From 7e196aa1a011da35a04cb1f161e186563aa8d8db Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 6 Jul 2016 12:18:34 +0000 Subject: [PATCH 0001/1050] clk: sunxi: pll2: Fix return value check in sun4i_pll2_setup() In case of error, the functions clk_register_composite() and clk_register_divider() returns ERR_PTR() and never returns NULL. The NULL test in the return value check should be replaced with IS_ERR(). Signed-off-by: Wei Yongjun Signed-off-by: Maxime Ripard --- drivers/clk/sunxi/clk-a10-pll2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/sunxi/clk-a10-pll2.c b/drivers/clk/sunxi/clk-a10-pll2.c index 0ee1f363e4be..d8eab90ae661 100644 --- a/drivers/clk/sunxi/clk-a10-pll2.c +++ b/drivers/clk/sunxi/clk-a10-pll2.c @@ -73,7 +73,7 @@ static void __init sun4i_pll2_setup(struct device_node *node, SUN4I_PLL2_PRE_DIV_WIDTH, CLK_DIVIDER_ONE_BASED | CLK_DIVIDER_ALLOW_ZERO, &sun4i_a10_pll2_lock); - if (!prediv_clk) { + if (IS_ERR(prediv_clk)) { pr_err("Couldn't register the prediv clock\n"); goto err_free_array; } @@ -106,7 +106,7 @@ static void __init sun4i_pll2_setup(struct device_node *node, &mult->hw, &clk_multiplier_ops, &gate->hw, &clk_gate_ops, CLK_SET_RATE_PARENT); - if (!base_clk) { + if (IS_ERR(base_clk)) { pr_err("Couldn't register the base multiplier clock\n"); goto err_free_multiplier; } From fbe359f12ce40e3126bea959b135656e4a305897 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 6 Jul 2016 12:21:47 +0000 Subject: [PATCH 0002/1050] clk: sunxi: Fix return value check in sun8i_a23_mbus_setup() In case of error, the function of_io_request_and_map() returns ERR_PTR() and never returns NULL. The NULL test in the return value check should be replaced with IS_ERR(). Signed-off-by: Wei Yongjun Signed-off-by: Maxime Ripard --- drivers/clk/sunxi/clk-sun8i-mbus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/sunxi/clk-sun8i-mbus.c b/drivers/clk/sunxi/clk-sun8i-mbus.c index 411d3033a96e..b200ebf159ee 100644 --- a/drivers/clk/sunxi/clk-sun8i-mbus.c +++ b/drivers/clk/sunxi/clk-sun8i-mbus.c @@ -48,7 +48,7 @@ static void __init sun8i_a23_mbus_setup(struct device_node *node) return; reg = of_io_request_and_map(node, 0, of_node_full_name(node)); - if (!reg) { + if (IS_ERR(reg)) { pr_err("Could not get registers for sun8i-mbus-clk\n"); goto err_free_parents; } From 156ad0d7eae4825a0d94a4bf68571e97302f2501 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 26 Jul 2016 15:04:23 +0800 Subject: [PATCH 0003/1050] clk: sunxi-ng: Fix inverted test condition in ccu_helper_wait_for_lock The condition passed to read*_poll_timeout() is the break condition, i.e. wait for this condition to happen and return success. The original code assumed the opposite, resulting in a warning when the PLL clock rate was changed but never lost it's lock as far as the readout indicated. This was verified by checking the read out register value. Fixes: 1d80c14248d6 ("clk: sunxi-ng: Add common infrastructure") Signed-off-by: Chen-Yu Tsai Acked-by: Maxime Ripard Signed-off-by: Maxime Ripard --- drivers/clk/sunxi-ng/ccu_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/sunxi-ng/ccu_common.c b/drivers/clk/sunxi-ng/ccu_common.c index fc17b5295e16..51d4bac97ab3 100644 --- a/drivers/clk/sunxi-ng/ccu_common.c +++ b/drivers/clk/sunxi-ng/ccu_common.c @@ -31,7 +31,7 @@ void ccu_helper_wait_for_lock(struct ccu_common *common, u32 lock) return; WARN_ON(readl_relaxed_poll_timeout(common->base + common->reg, reg, - !(reg & lock), 100, 70000)); + reg & lock, 100, 70000)); } int sunxi_ccu_probe(struct device_node *node, void __iomem *reg, From 06421a7821f9040467e0db2702309b72aa2b7af4 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 26 Jul 2016 15:04:24 +0800 Subject: [PATCH 0004/1050] clk: sunxi-ng: nk: Make ccu_nk_find_best static make C=2 reports: CHECK drivers/clk/sunxi-ng/ccu_nk.c drivers/clk/sunxi-ng/ccu_nk.c:17:6: warning: symbol 'ccu_nk_find_best' was not declared. Should it be static? ccu_nk_find_best is only used within ccu_nk.c. So make it static to get rid of this warning. Fixes: adbfb0056e03 ("clk: sunxi-ng: Add N-K-factor clock support") Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard --- drivers/clk/sunxi-ng/ccu_nk.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/clk/sunxi-ng/ccu_nk.c b/drivers/clk/sunxi-ng/ccu_nk.c index 4470ffc8cf0d..d6fafb397489 100644 --- a/drivers/clk/sunxi-ng/ccu_nk.c +++ b/drivers/clk/sunxi-ng/ccu_nk.c @@ -14,9 +14,9 @@ #include "ccu_gate.h" #include "ccu_nk.h" -void ccu_nk_find_best(unsigned long parent, unsigned long rate, - unsigned int max_n, unsigned int max_k, - unsigned int *n, unsigned int *k) +static void ccu_nk_find_best(unsigned long parent, unsigned long rate, + unsigned int max_n, unsigned int max_k, + unsigned int *n, unsigned int *k) { unsigned long best_rate = 0; unsigned int best_k = 0, best_n = 0; From 62148f0930a8e9bd5c5614f8387222f0220d7d47 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 2 Aug 2016 08:11:00 -0300 Subject: [PATCH 0005/1050] [media] cec: rename cec_devnode fhs_lock to just lock This lock will be used to protect more than just the fhs list. So rename it to just 'lock'. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/cec/cec-adap.c | 12 ++++++------ drivers/staging/media/cec/cec-api.c | 8 ++++---- drivers/staging/media/cec/cec-core.c | 6 +++--- include/media/cec.h | 2 +- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/staging/media/cec/cec-adap.c b/drivers/staging/media/cec/cec-adap.c index b2393bbacb26..9dcb784b8d6a 100644 --- a/drivers/staging/media/cec/cec-adap.c +++ b/drivers/staging/media/cec/cec-adap.c @@ -124,10 +124,10 @@ static void cec_queue_event(struct cec_adapter *adap, u64 ts = ktime_get_ns(); struct cec_fh *fh; - mutex_lock(&adap->devnode.fhs_lock); + mutex_lock(&adap->devnode.lock); list_for_each_entry(fh, &adap->devnode.fhs, list) cec_queue_event_fh(fh, ev, ts); - mutex_unlock(&adap->devnode.fhs_lock); + mutex_unlock(&adap->devnode.lock); } /* @@ -191,12 +191,12 @@ static void cec_queue_msg_monitor(struct cec_adapter *adap, u32 monitor_mode = valid_la ? CEC_MODE_MONITOR : CEC_MODE_MONITOR_ALL; - mutex_lock(&adap->devnode.fhs_lock); + mutex_lock(&adap->devnode.lock); list_for_each_entry(fh, &adap->devnode.fhs, list) { if (fh->mode_follower >= monitor_mode) cec_queue_msg_fh(fh, msg); } - mutex_unlock(&adap->devnode.fhs_lock); + mutex_unlock(&adap->devnode.lock); } /* @@ -207,12 +207,12 @@ static void cec_queue_msg_followers(struct cec_adapter *adap, { struct cec_fh *fh; - mutex_lock(&adap->devnode.fhs_lock); + mutex_lock(&adap->devnode.lock); list_for_each_entry(fh, &adap->devnode.fhs, list) { if (fh->mode_follower == CEC_MODE_FOLLOWER) cec_queue_msg_fh(fh, msg); } - mutex_unlock(&adap->devnode.fhs_lock); + mutex_unlock(&adap->devnode.lock); } /* Notify userspace of an adapter state change. */ diff --git a/drivers/staging/media/cec/cec-api.c b/drivers/staging/media/cec/cec-api.c index 7be7615a0fdf..4e2696a34ddb 100644 --- a/drivers/staging/media/cec/cec-api.c +++ b/drivers/staging/media/cec/cec-api.c @@ -508,14 +508,14 @@ static int cec_open(struct inode *inode, struct file *filp) filp->private_data = fh; - mutex_lock(&devnode->fhs_lock); + mutex_lock(&devnode->lock); /* Queue up initial state events */ ev_state.state_change.phys_addr = adap->phys_addr; ev_state.state_change.log_addr_mask = adap->log_addrs.log_addr_mask; cec_queue_event_fh(fh, &ev_state, 0); list_add(&fh->list, &devnode->fhs); - mutex_unlock(&devnode->fhs_lock); + mutex_unlock(&devnode->lock); return 0; } @@ -540,9 +540,9 @@ static int cec_release(struct inode *inode, struct file *filp) cec_monitor_all_cnt_dec(adap); mutex_unlock(&adap->lock); - mutex_lock(&devnode->fhs_lock); + mutex_lock(&devnode->lock); list_del(&fh->list); - mutex_unlock(&devnode->fhs_lock); + mutex_unlock(&devnode->lock); /* Unhook pending transmits from this filehandle. */ mutex_lock(&adap->lock); diff --git a/drivers/staging/media/cec/cec-core.c b/drivers/staging/media/cec/cec-core.c index 112a5fae12f5..73792d078462 100644 --- a/drivers/staging/media/cec/cec-core.c +++ b/drivers/staging/media/cec/cec-core.c @@ -117,7 +117,7 @@ static int __must_check cec_devnode_register(struct cec_devnode *devnode, /* Initialization */ INIT_LIST_HEAD(&devnode->fhs); - mutex_init(&devnode->fhs_lock); + mutex_init(&devnode->lock); /* Part 1: Find a free minor number */ mutex_lock(&cec_devnode_lock); @@ -181,10 +181,10 @@ static void cec_devnode_unregister(struct cec_devnode *devnode) if (!devnode->registered || devnode->unregistered) return; - mutex_lock(&devnode->fhs_lock); + mutex_lock(&devnode->lock); list_for_each_entry(fh, &devnode->fhs, list) wake_up_interruptible(&fh->wait); - mutex_unlock(&devnode->fhs_lock); + mutex_unlock(&devnode->lock); devnode->registered = false; devnode->unregistered = true; diff --git a/include/media/cec.h b/include/media/cec.h index dc7854b855f3..fdb5d600e4bb 100644 --- a/include/media/cec.h +++ b/include/media/cec.h @@ -57,8 +57,8 @@ struct cec_devnode { int minor; bool registered; bool unregistered; - struct mutex fhs_lock; struct list_head fhs; + struct mutex lock; }; struct cec_adapter; From 2ab25d35a91098ef0f42d478cc37f6a5591a4ab0 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 2 Aug 2016 08:13:57 -0300 Subject: [PATCH 0006/1050] [media] cec: improve locking - The global lock was used in cec_get_device when it should have used the devnode lock. - cec_put_device also took the global lock, but since the release function takes that lock as well this could lead to a deadlock. Just don't take the lock here since there is no reason for it. - cec_devnode_register() should take the global lock when clearing the bit in the global bitmap. - In cec_devnode_unregister() place the devnode->(un)register tests and assignments under the devnode lock as well: this has to be in a critical block. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/cec/cec-core.c | 29 +++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/staging/media/cec/cec-core.c b/drivers/staging/media/cec/cec-core.c index 73792d078462..3b1e4d2b190d 100644 --- a/drivers/staging/media/cec/cec-core.c +++ b/drivers/staging/media/cec/cec-core.c @@ -51,31 +51,29 @@ int cec_get_device(struct cec_devnode *devnode) { /* * Check if the cec device is available. This needs to be done with - * the cec_devnode_lock held to prevent an open/unregister race: + * the devnode->lock held to prevent an open/unregister race: * without the lock, the device could be unregistered and freed between * the devnode->registered check and get_device() calls, leading to * a crash. */ - mutex_lock(&cec_devnode_lock); + mutex_lock(&devnode->lock); /* * return ENXIO if the cec device has been removed * already or if it is not registered anymore. */ if (!devnode->registered) { - mutex_unlock(&cec_devnode_lock); + mutex_unlock(&devnode->lock); return -ENXIO; } /* and increase the device refcount */ get_device(&devnode->dev); - mutex_unlock(&cec_devnode_lock); + mutex_unlock(&devnode->lock); return 0; } void cec_put_device(struct cec_devnode *devnode) { - mutex_lock(&cec_devnode_lock); put_device(&devnode->dev); - mutex_unlock(&cec_devnode_lock); } /* Called when the last user of the cec device exits. */ @@ -84,11 +82,10 @@ static void cec_devnode_release(struct device *cd) struct cec_devnode *devnode = to_cec_devnode(cd); mutex_lock(&cec_devnode_lock); - /* Mark device node number as free */ clear_bit(devnode->minor, cec_devnode_nums); - mutex_unlock(&cec_devnode_lock); + cec_delete_adapter(to_cec_adapter(devnode)); } @@ -160,7 +157,9 @@ static int __must_check cec_devnode_register(struct cec_devnode *devnode, cdev_del: cdev_del(&devnode->cdev); clr_bit: + mutex_lock(&cec_devnode_lock); clear_bit(devnode->minor, cec_devnode_nums); + mutex_unlock(&cec_devnode_lock); return ret; } @@ -177,17 +176,21 @@ static void cec_devnode_unregister(struct cec_devnode *devnode) { struct cec_fh *fh; - /* Check if devnode was never registered or already unregistered */ - if (!devnode->registered || devnode->unregistered) - return; - mutex_lock(&devnode->lock); + + /* Check if devnode was never registered or already unregistered */ + if (!devnode->registered || devnode->unregistered) { + mutex_unlock(&devnode->lock); + return; + } + list_for_each_entry(fh, &devnode->fhs, list) wake_up_interruptible(&fh->wait); - mutex_unlock(&devnode->lock); devnode->registered = false; devnode->unregistered = true; + mutex_unlock(&devnode->lock); + device_del(&devnode->dev); cdev_del(&devnode->cdev); put_device(&devnode->dev); From 9ebf1945d757433a089ab3ee940673503e3e11ec Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 1 Aug 2016 07:29:34 -0300 Subject: [PATCH 0007/1050] [media] cec-funcs.h: fix typo: && should be & Fix typo where logical AND was used instead of bitwise AND. Reported-by: David Binderman Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/linux/cec-funcs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/cec-funcs.h b/include/linux/cec-funcs.h index 82c3d3b7269d..9e054aa168f3 100644 --- a/include/linux/cec-funcs.h +++ b/include/linux/cec-funcs.h @@ -227,7 +227,7 @@ static inline void cec_set_digital_service_id(__u8 *msg, if (digital->service_id_method == CEC_OP_SERVICE_ID_METHOD_BY_CHANNEL) { *msg++ = (digital->channel.channel_number_fmt << 2) | (digital->channel.major >> 8); - *msg++ = digital->channel.major && 0xff; + *msg++ = digital->channel.major & 0xff; *msg++ = digital->channel.minor >> 8; *msg++ = digital->channel.minor & 0xff; *msg++ = 0; @@ -1277,7 +1277,7 @@ static inline void cec_msg_user_control_pressed(struct cec_msg *msg, msg->len += 4; msg->msg[3] = (ui_cmd->channel_identifier.channel_number_fmt << 2) | (ui_cmd->channel_identifier.major >> 8); - msg->msg[4] = ui_cmd->channel_identifier.major && 0xff; + msg->msg[4] = ui_cmd->channel_identifier.major & 0xff; msg->msg[5] = ui_cmd->channel_identifier.minor >> 8; msg->msg[6] = ui_cmd->channel_identifier.minor & 0xff; break; From 31dc8b7302f1e48952ec8e90cd49dca843146cd0 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 10 Aug 2016 08:01:38 -0300 Subject: [PATCH 0008/1050] [media] cec-funcs.h: add reply argument for Record On/Off A reply parameter is added to the cec_msg_record_on/off functions in cec-funcs.h. The standard mandates that Record Status shall be replied to Record On, and it may be replied to Record Off. Signed-off-by: Johan Fjeldtvedt Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/linux/cec-funcs.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/cec-funcs.h b/include/linux/cec-funcs.h index 9e054aa168f3..8af613e67633 100644 --- a/include/linux/cec-funcs.h +++ b/include/linux/cec-funcs.h @@ -162,10 +162,11 @@ static inline void cec_msg_standby(struct cec_msg *msg) /* One Touch Record Feature */ -static inline void cec_msg_record_off(struct cec_msg *msg) +static inline void cec_msg_record_off(struct cec_msg *msg, bool reply) { msg->len = 2; msg->msg[1] = CEC_MSG_RECORD_OFF; + msg->reply = reply ? CEC_MSG_RECORD_STATUS : 0; } struct cec_op_arib_data { @@ -323,6 +324,7 @@ static inline void cec_msg_record_on_phys_addr(struct cec_msg *msg, } static inline void cec_msg_record_on(struct cec_msg *msg, + bool reply, const struct cec_op_record_src *rec_src) { switch (rec_src->type) { @@ -346,6 +348,7 @@ static inline void cec_msg_record_on(struct cec_msg *msg, rec_src->ext_phys_addr.phys_addr); break; } + msg->reply = reply ? CEC_MSG_RECORD_STATUS : 0; } static inline void cec_ops_record_on(const struct cec_msg *msg, From 277f963cea4ec87144c6713377322fe3bf172a5e Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 10 Aug 2016 06:00:53 -0300 Subject: [PATCH 0009/1050] [media] cec: improve dqevent documentation The documentation for the cec_event_state_change struct was incomplete. This patch documents what happens in the corner cases. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- Documentation/media/uapi/cec/cec-ioc-dqevent.rst | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Documentation/media/uapi/cec/cec-ioc-dqevent.rst b/Documentation/media/uapi/cec/cec-ioc-dqevent.rst index 7a6d6d00ce19..2e1e73928396 100644 --- a/Documentation/media/uapi/cec/cec-ioc-dqevent.rst +++ b/Documentation/media/uapi/cec/cec-ioc-dqevent.rst @@ -64,7 +64,8 @@ it is guaranteed that the state did change in between the two events. - ``phys_addr`` - - The current physical address. + - The current physical address. This is ``CEC_PHYS_ADDR_INVALID`` if no + valid physical address is set. - .. row 2 @@ -72,7 +73,10 @@ it is guaranteed that the state did change in between the two events. - ``log_addr_mask`` - - The current set of claimed logical addresses. + - The current set of claimed logical addresses. This is 0 if no logical + addresses are claimed or if ``phys_addr`` is ``CEC_PHYS_ADDR_INVALID``. + If bit 15 is set (``1 << CEC_LOG_ADDR_UNREGISTERED``) then this device + has the unregistered logical address. In that case all other bits are 0. From dcceb1eaf210096831b14471bc87678375b086ed Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 10 Aug 2016 09:24:45 -0300 Subject: [PATCH 0010/1050] [media] cec: add CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK flag Currently if none of the requested logical addresses can be claimed, the framework will fall back to the Unregistered logical address. Add a flag to enable this explicitly. By default it will just go back to the unconfigured state. Usually Unregistered is not something you want since the functionality is very limited. Unless the application has support for this, it will fail to work correctly. So require that the application explicitly requests this. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- .../uapi/cec/cec-ioc-adap-g-log-addrs.rst | 21 ++++++++++++++++++- drivers/staging/media/cec/cec-adap.c | 4 ++++ drivers/staging/media/cec/cec-api.c | 2 +- include/linux/cec.h | 5 ++++- 4 files changed, 29 insertions(+), 3 deletions(-) diff --git a/Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst b/Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst index 04ee90099676..201d4839931c 100644 --- a/Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst +++ b/Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst @@ -144,7 +144,7 @@ logical address types are already defined will return with error ``EBUSY``. - ``flags`` - - Flags. No flags are defined yet, so set this to 0. + - Flags. See :ref:`cec-log-addrs-flags` for a list of available flags. - .. row 7 @@ -201,6 +201,25 @@ logical address types are already defined will return with error ``EBUSY``. give the CEC framework more information about the device type, even though the framework won't use it directly in the CEC message. +.. _cec-log-addrs-flags: + +.. flat-table:: Flags for struct cec_log_addrs + :header-rows: 0 + :stub-columns: 0 + :widths: 3 1 4 + + + - .. _`CEC-LOG-ADDRS-FL-ALLOW-UNREG-FALLBACK`: + + - ``CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK`` + + - 1 + + - By default if no logical address of the requested type can be claimed, then + it will go back to the unconfigured state. If this flag is set, then it will + fallback to the Unregistered logical address. Note that if the Unregistered + logical address was explicitly requested, then this flag has no effect. + .. _cec-versions: .. flat-table:: CEC Versions diff --git a/drivers/staging/media/cec/cec-adap.c b/drivers/staging/media/cec/cec-adap.c index 9dcb784b8d6a..2458a6c87642 100644 --- a/drivers/staging/media/cec/cec-adap.c +++ b/drivers/staging/media/cec/cec-adap.c @@ -1047,6 +1047,10 @@ static int cec_config_thread_func(void *arg) dprintk(1, "could not claim LA %d\n", i); } + if (adap->log_addrs.log_addr_mask == 0 && + !(las->flags & CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK)) + goto unconfigure; + configured: if (adap->log_addrs.log_addr_mask == 0) { /* Fall back to unregistered */ diff --git a/drivers/staging/media/cec/cec-api.c b/drivers/staging/media/cec/cec-api.c index 4e2696a34ddb..6f58ee85eea4 100644 --- a/drivers/staging/media/cec/cec-api.c +++ b/drivers/staging/media/cec/cec-api.c @@ -162,7 +162,7 @@ static long cec_adap_s_log_addrs(struct cec_adapter *adap, struct cec_fh *fh, return -ENOTTY; if (copy_from_user(&log_addrs, parg, sizeof(log_addrs))) return -EFAULT; - log_addrs.flags = 0; + log_addrs.flags &= CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK; mutex_lock(&adap->lock); if (!adap->is_configuring && (!log_addrs.num_log_addrs || !adap->is_configured) && diff --git a/include/linux/cec.h b/include/linux/cec.h index b3e22893a002..851968e803fa 100644 --- a/include/linux/cec.h +++ b/include/linux/cec.h @@ -364,7 +364,7 @@ struct cec_caps { * @num_log_addrs: how many logical addresses should be claimed. Set by the * caller. * @vendor_id: the vendor ID of the device. Set by the caller. - * @flags: set to 0. + * @flags: flags. * @osd_name: the OSD name of the device. Set by the caller. * @primary_device_type: the primary device type for each logical address. * Set by the caller. @@ -389,6 +389,9 @@ struct cec_log_addrs { __u8 features[CEC_MAX_LOG_ADDRS][12]; }; +/* Allow a fallback to unregistered */ +#define CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK (1 << 0) + /* Events */ /* Event that occurs when the adapter state changes */ From 0c1d61b0e4ed68d125b21fed375c38b6e3c2a658 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Sun, 14 Aug 2016 08:27:09 -0300 Subject: [PATCH 0011/1050] [media] cec: set unclaimed addresses to CEC_LOG_ADDR_INVALID Up to 4 logical addresses can be claimed. Make sure that any unclaimed logical addresses are set to CEC_LOG_ADDR_INVALID as per the documentation. Take special care in the unregistered case: when falling back to unregistered num_log_addrs may be > 1, so mark those as invalid. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/cec/cec-adap.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/staging/media/cec/cec-adap.c b/drivers/staging/media/cec/cec-adap.c index 2458a6c87642..6cc7d7904446 100644 --- a/drivers/staging/media/cec/cec-adap.c +++ b/drivers/staging/media/cec/cec-adap.c @@ -1056,6 +1056,8 @@ configured: /* Fall back to unregistered */ las->log_addr[0] = CEC_LOG_ADDR_UNREGISTERED; las->log_addr_mask = 1 << las->log_addr[0]; + for (i = 1; i < las->num_log_addrs; i++) + las->log_addr[i] = CEC_LOG_ADDR_INVALID; } adap->is_configured = true; adap->is_configuring = false; @@ -1074,6 +1076,8 @@ configured: cec_report_features(adap, i); cec_report_phys_addr(adap, i); } + for (i = las->num_log_addrs; i < CEC_MAX_LOG_ADDRS; i++) + las->log_addr[i] = CEC_LOG_ADDR_INVALID; mutex_lock(&adap->lock); adap->kthread_config = NULL; mutex_unlock(&adap->lock); From 260ff1144a9dd1afb85cf5da462672d68412cbc4 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 19 Jul 2016 08:44:32 -0300 Subject: [PATCH 0012/1050] [media] cec: add item to TODO Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/cec/TODO | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/media/cec/TODO b/drivers/staging/media/cec/TODO index a10d4f82b954..13224694a8ae 100644 --- a/drivers/staging/media/cec/TODO +++ b/drivers/staging/media/cec/TODO @@ -12,6 +12,7 @@ Hopefully this will happen later in 2016. Other TODOs: +- There are two possible replies to CEC_MSG_INITIATE_ARC. How to handle that? - Add a flag to inhibit passing CEC RC messages to the rc subsystem. Applications should be able to choose this when calling S_LOG_ADDRS. - If the reply field of cec_msg is set then when the reply arrives it From 3e92d8b238e48dfb539e8112bb2cc463e35e1b71 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 12 Aug 2016 13:32:07 -0300 Subject: [PATCH 0013/1050] [media] cec: ignore messages when log_addr_mask == 0 Most CEC adapters will still receive broadcast messages, even if no logical addresses are claimed. But those messages should only be passed on for monitoring purposes, but not for processing by either kernel or userspace if userspace didn't call CEC_ADAP_S_LOG_ADDRS first. So if adap->log_addrs.log_addr_mask is 0, then just return before passing the received message on to the processing code. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/cec/cec-adap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/staging/media/cec/cec-adap.c b/drivers/staging/media/cec/cec-adap.c index 6cc7d7904446..e980ac9c9279 100644 --- a/drivers/staging/media/cec/cec-adap.c +++ b/drivers/staging/media/cec/cec-adap.c @@ -851,6 +851,9 @@ void cec_received_msg(struct cec_adapter *adap, struct cec_msg *msg) if (!valid_la || msg->len <= 1) return; + if (adap->log_addrs.log_addr_mask == 0) + return; + /* * Process the message on the protocol level. If is_reply is true, * then cec_receive_notify() won't pass on the reply to the listener(s) From 73b14977549e4e1214413e7da2d0e97a9947bf8d Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Sun, 14 Aug 2016 06:45:54 -0300 Subject: [PATCH 0014/1050] [media] mtk-vcodec: add HAS_DMA dependency This fixes this kbuild test robot error: tree: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master head: 329f4152911c276b074bec75a0443f88821afdb7 commit: c1023ba74fc77dc56dc317bd98f5060aab889ac1 [media] drivers/media/platform/Kconfig: fix VIDEO_MEDIATEK_VCODEC dependency config: m32r-allyesconfig (attached as .config) compiler: m32r-linux-gcc (GCC) 4.9.0 reproduce: wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross git checkout c1023ba74fc77dc56dc317bd98f5060aab889ac1 # save the attached .config to linux build tree make.cross ARCH=m32r All errors (new ones prefixed by >>): drivers/media/v4l2-core/videobuf2-dma-contig.c: In function 'vb2_dc_get_userptr': >> >> drivers/media/v4l2-core/videobuf2-dma-contig.c:486:2: error: implicit declaration of function 'dma_get_cache_alignment' [-Werror=implicit-function-declaration] unsigned long dma_align = dma_get_cache_alignment(); ^ cc1: some warnings being treated as errors This driver depends on HAS_DMA for dma_get_cache_alignment(). Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig index f25344bc7912..552b635cfce7 100644 --- a/drivers/media/platform/Kconfig +++ b/drivers/media/platform/Kconfig @@ -169,7 +169,7 @@ config VIDEO_MEDIATEK_VPU config VIDEO_MEDIATEK_VCODEC tristate "Mediatek Video Codec driver" depends on MTK_IOMMU || COMPILE_TEST - depends on VIDEO_DEV && VIDEO_V4L2 + depends on VIDEO_DEV && VIDEO_V4L2 && HAS_DMA depends on ARCH_MEDIATEK || COMPILE_TEST select VIDEOBUF2_DMA_CONTIG select V4L2_MEM2MEM_DEV From 1e6e97541ab51b65019bd823506af81ebb3730fc Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 12 Aug 2016 06:44:27 -0300 Subject: [PATCH 0015/1050] [media] pulse8-cec: set correct Signal Free Time Don't hardcode the signal free time to 3 bit periods, instead use the value for the signal free time as passed in by the CEC framework. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/pulse8-cec/pulse8-cec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/media/pulse8-cec/pulse8-cec.c b/drivers/staging/media/pulse8-cec/pulse8-cec.c index 94f8590492dc..28f853c80b19 100644 --- a/drivers/staging/media/pulse8-cec/pulse8-cec.c +++ b/drivers/staging/media/pulse8-cec/pulse8-cec.c @@ -388,7 +388,7 @@ static int pulse8_cec_adap_transmit(struct cec_adapter *adap, u8 attempts, int err; cmd[0] = MSGCODE_TRANSMIT_IDLETIME; - cmd[1] = 3; + cmd[1] = signal_free_time; err = pulse8_send_and_wait(pulse8, cmd, 2, MSGCODE_COMMAND_ACCEPTED, 1); cmd[0] = MSGCODE_TRANSMIT_ACK_POLARITY; From 31f58e31dc0e170e117a83584103921269b7581b Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 12 Aug 2016 06:46:06 -0300 Subject: [PATCH 0016/1050] [media] pulse8-cec: fix error handling Support more error codes and fix a bug where MSGCODE_TRANSMIT_FAILED_LINE was mapped to CEC_TX_STATUS_ARB_LOST, which is wrong. Thanks to Pulse-Eight for providing me with the information needed to handle this correctly (I hope). Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/pulse8-cec/pulse8-cec.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/staging/media/pulse8-cec/pulse8-cec.c b/drivers/staging/media/pulse8-cec/pulse8-cec.c index 28f853c80b19..ed8bd95ad6d0 100644 --- a/drivers/staging/media/pulse8-cec/pulse8-cec.c +++ b/drivers/staging/media/pulse8-cec/pulse8-cec.c @@ -114,14 +114,11 @@ static void pulse8_irq_work_handler(struct work_struct *work) cec_transmit_done(pulse8->adap, CEC_TX_STATUS_OK, 0, 0, 0, 0); break; - case MSGCODE_TRANSMIT_FAILED_LINE: - cec_transmit_done(pulse8->adap, CEC_TX_STATUS_ARB_LOST, - 1, 0, 0, 0); - break; case MSGCODE_TRANSMIT_FAILED_ACK: cec_transmit_done(pulse8->adap, CEC_TX_STATUS_NACK, 0, 1, 0, 0); break; + case MSGCODE_TRANSMIT_FAILED_LINE: case MSGCODE_TRANSMIT_FAILED_TIMEOUT_DATA: case MSGCODE_TRANSMIT_FAILED_TIMEOUT_LINE: cec_transmit_done(pulse8->adap, CEC_TX_STATUS_ERROR, @@ -170,6 +167,9 @@ static irqreturn_t pulse8_interrupt(struct serio *serio, unsigned char data, case MSGCODE_TRANSMIT_FAILED_TIMEOUT_LINE: schedule_work(&pulse8->work); break; + case MSGCODE_HIGH_ERROR: + case MSGCODE_LOW_ERROR: + case MSGCODE_RECEIVE_FAILED: case MSGCODE_TIMEOUT_ERROR: break; case MSGCODE_COMMAND_ACCEPTED: From 8ac6a1a53e9f195e8c4336a7edfba2e102fc14bb Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 18 Aug 2016 04:13:42 -0300 Subject: [PATCH 0017/1050] [media] cec-edid: check for IEEE identifier The cec_get_edid_spa_location() function did not verify that the IEEE identifier in the Vendor Specific Data Block matched the HDMI-LLC identifier. This could result in the wrong VSDB block being returned. For example, for HDMI 2.0 EDIDs there is also a HDMI Forum VSDB. So check the IEEE identifier as well. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/cec-edid.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/media/cec-edid.c b/drivers/media/cec-edid.c index 70018247bdda..5719b991e340 100644 --- a/drivers/media/cec-edid.c +++ b/drivers/media/cec-edid.c @@ -70,7 +70,10 @@ static unsigned int cec_get_edid_spa_location(const u8 *edid, unsigned int size) u8 tag = edid[i] >> 5; u8 len = edid[i] & 0x1f; - if (tag == 3 && len >= 5 && i + len <= end) + if (tag == 3 && len >= 5 && i + len <= end && + edid[i + 1] == 0x03 && + edid[i + 2] == 0x0c && + edid[i + 3] == 0x00) return i + 4; i += len + 1; } while (i < end); From 4808f721627c2a23b5d749f9bbd20d4529ea2b8d Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Sat, 20 Aug 2016 07:54:38 -0300 Subject: [PATCH 0018/1050] [media] cec-funcs.h: add missing vendor-specific messages The cec-funcs.h header was missing support for these three vendor-specific messages: CEC_MSG_VENDOR_COMMAND CEC_MSG_VENDOR_COMMAND_WITH_ID CEC_MSG_VENDOR_REMOTE_BUTTON_DOWN Add wrappers for these messages. I originally postponed adding these wrappers due to the fact that the argument is just a byte array which cec-ctl couldn't handle at the time, and then I just forgot to add them once the CEC framework was finalized. It wasn't until an attempt to transmit a vendor specific command was made that I realized that these wrappers were missing. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/linux/cec-funcs.h | 69 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/include/linux/cec-funcs.h b/include/linux/cec-funcs.h index 8af613e67633..138bbf721e70 100644 --- a/include/linux/cec-funcs.h +++ b/include/linux/cec-funcs.h @@ -1144,6 +1144,75 @@ static inline void cec_msg_give_device_vendor_id(struct cec_msg *msg, msg->reply = reply ? CEC_MSG_DEVICE_VENDOR_ID : 0; } +static inline void cec_msg_vendor_command(struct cec_msg *msg, + __u8 size, const __u8 *vendor_cmd) +{ + if (size > 14) + size = 14; + msg->len = 2 + size; + msg->msg[1] = CEC_MSG_VENDOR_COMMAND; + memcpy(msg->msg + 2, vendor_cmd, size); +} + +static inline void cec_ops_vendor_command(const struct cec_msg *msg, + __u8 *size, + const __u8 **vendor_cmd) +{ + *size = msg->len - 2; + + if (*size > 14) + *size = 14; + *vendor_cmd = msg->msg + 2; +} + +static inline void cec_msg_vendor_command_with_id(struct cec_msg *msg, + __u32 vendor_id, __u8 size, + const __u8 *vendor_cmd) +{ + if (size > 11) + size = 11; + msg->len = 5 + size; + msg->msg[1] = CEC_MSG_VENDOR_COMMAND_WITH_ID; + msg->msg[2] = vendor_id >> 16; + msg->msg[3] = (vendor_id >> 8) & 0xff; + msg->msg[4] = vendor_id & 0xff; + memcpy(msg->msg + 5, vendor_cmd, size); +} + +static inline void cec_ops_vendor_command_with_id(const struct cec_msg *msg, + __u32 *vendor_id, __u8 *size, + const __u8 **vendor_cmd) +{ + *size = msg->len - 5; + + if (*size > 11) + *size = 11; + *vendor_id = (msg->msg[2] << 16) | (msg->msg[3] << 8) | msg->msg[4]; + *vendor_cmd = msg->msg + 5; +} + +static inline void cec_msg_vendor_remote_button_down(struct cec_msg *msg, + __u8 size, + const __u8 *rc_code) +{ + if (size > 14) + size = 14; + msg->len = 2 + size; + msg->msg[1] = CEC_MSG_VENDOR_REMOTE_BUTTON_DOWN; + memcpy(msg->msg + 2, rc_code, size); +} + +static inline void cec_ops_vendor_remote_button_down(const struct cec_msg *msg, + __u8 *size, + const __u8 **rc_code) +{ + *size = msg->len - 2; + + if (*size > 14) + *size = 14; + *rc_code = msg->msg + 2; +} + static inline void cec_msg_vendor_remote_button_up(struct cec_msg *msg) { msg->len = 2; From 0d06108c65e572085b2d1f7c8273f417cad68734 Mon Sep 17 00:00:00 2001 From: Tiffany Lin Date: Sun, 14 Aug 2016 23:31:13 -0300 Subject: [PATCH 0019/1050] [media] vcodec:mediatek:code refine for v4l2 Encoder driver This patch remove unused header and define from haeder files Signed-off-by: Tiffany Lin Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h | 1 - drivers/media/platform/mtk-vcodec/mtk_vcodec_intr.h | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h b/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h index 94f0a425be42..3a8e6958adae 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h @@ -23,7 +23,6 @@ #include #include -#include "mtk_vcodec_util.h" #define MTK_VCODEC_DRV_NAME "mtk_vcodec_drv" #define MTK_VCODEC_ENC_NAME "mtk-vcodec-enc" diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_intr.h b/drivers/media/platform/mtk-vcodec/mtk_vcodec_intr.h index 33e890f5aa9c..12131855b46a 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_intr.h +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_intr.h @@ -16,7 +16,6 @@ #define _MTK_VCODEC_INTR_H_ #define MTK_INST_IRQ_RECEIVED 0x1 -#define MTK_INST_WORK_THREAD_ABORT_DONE 0x2 struct mtk_vcodec_ctx; From ad34f5412d2a04a894b2cd2912538ae2e5d64e76 Mon Sep 17 00:00:00 2001 From: Tiffany Lin Date: Sun, 14 Aug 2016 23:47:20 -0300 Subject: [PATCH 0020/1050] [media] vcodec:mediatek: Fix fops_vcodec_release flow for V4L2 Encoder This patch fix that mtk_vcodec_venc_release should be called after v4l2_m2m_ctx_release Signed-off-by: Tiffany Lin Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c | 7 ++++++- drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c | 6 +++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c index 3ed3f2d31df5..3b0691f2deb4 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c @@ -1288,5 +1288,10 @@ int mtk_venc_lock(struct mtk_vcodec_ctx *ctx) void mtk_vcodec_enc_release(struct mtk_vcodec_ctx *ctx) { - venc_if_deinit(ctx); + int ret = venc_if_deinit(ctx); + + if (ret) + mtk_v4l2_err("venc_if_deinit failed=%d", ret); + + ctx->state = MTK_STATE_FREE; } diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c index c7806ecda2dd..5cd2151431bf 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c @@ -218,11 +218,15 @@ static int fops_vcodec_release(struct file *file) mtk_v4l2_debug(1, "[%d] encoder", ctx->id); mutex_lock(&dev->dev_mutex); + /* + * Call v4l2_m2m_ctx_release to make sure the worker thread is not + * running after venc_if_deinit. + */ + v4l2_m2m_ctx_release(ctx->m2m_ctx); mtk_vcodec_enc_release(ctx); v4l2_fh_del(&ctx->fh); v4l2_fh_exit(&ctx->fh); v4l2_ctrl_handler_free(&ctx->ctrl_hdl); - v4l2_m2m_ctx_release(ctx->m2m_ctx); list_del_init(&ctx->list); dev->num_instances--; From 91ae0e1ec6ec91cd297933886b424f9a4a8acbd4 Mon Sep 17 00:00:00 2001 From: Tiffany Lin Date: Mon, 15 Aug 2016 00:08:03 -0300 Subject: [PATCH 0021/1050] [media] vcodec:mediatek: Fix visible_height larger than coded_height issue in s_fmt_out The original code add extra 32 line to visible_height. It is incorrect, 32 line should be add to coded_height. The purpose is that user space could calcuate real buffer size needed by using coded_width * coded_height. But this method will make v4l2-compliance test fail, since g_fmt != s_fmt(g_fmt) So remove extend visible_height or coded_height, user space should just use sizeimage to get real buffer size needed Signed-off-by: Tiffany Lin Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c index 3b0691f2deb4..9b0187ecfa6a 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c @@ -487,7 +487,6 @@ static int vidioc_venc_s_fmt_out(struct file *file, void *priv, struct mtk_q_data *q_data; int ret, i; struct mtk_video_fmt *fmt; - unsigned int pitch_w_div16; struct v4l2_pix_format_mplane *pix_fmt_mp = &f->fmt.pix_mp; vq = v4l2_m2m_get_vq(ctx->m2m_ctx, f->type); @@ -530,15 +529,6 @@ static int vidioc_venc_s_fmt_out(struct file *file, void *priv, q_data->coded_width = f->fmt.pix_mp.width; q_data->coded_height = f->fmt.pix_mp.height; - pitch_w_div16 = DIV_ROUND_UP(q_data->visible_width, 16); - if (pitch_w_div16 % 8 != 0) { - /* Adjust returned width/height, so application could correctly - * allocate hw required memory - */ - q_data->visible_height += 32; - vidioc_try_fmt(f, q_data->fmt); - } - q_data->field = f->fmt.pix_mp.field; ctx->colorspace = f->fmt.pix_mp.colorspace; ctx->ycbcr_enc = f->fmt.pix_mp.ycbcr_enc; From 16060f7ef660a11f282909b01fb6096e21cf5389 Mon Sep 17 00:00:00 2001 From: Tiffany Lin Date: Mon, 15 Aug 2016 00:15:44 -0300 Subject: [PATCH 0022/1050] [media] vcodec:mediatek: Add timestamp and timecode copy for V4L2 Encoder This patch add copying timestamp and timecode from src buffer to dst buffer Signed-off-by: Tiffany Lin Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- .../platform/mtk-vcodec/mtk_vcodec_enc.c | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c index 9b0187ecfa6a..0ca230e0a812 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c @@ -868,7 +868,8 @@ static int mtk_venc_encode_header(void *priv) { struct mtk_vcodec_ctx *ctx = priv; int ret; - struct vb2_buffer *dst_buf; + struct vb2_buffer *src_buf, *dst_buf; + struct vb2_v4l2_buffer *dst_vb2_v4l2, *src_vb2_v4l2; struct mtk_vcodec_mem bs_buf; struct venc_done_result enc_result; @@ -901,6 +902,15 @@ static int mtk_venc_encode_header(void *priv) mtk_v4l2_err("venc_if_encode failed=%d", ret); return -EINVAL; } + src_buf = v4l2_m2m_next_src_buf(ctx->m2m_ctx); + if (src_buf) { + src_vb2_v4l2 = to_vb2_v4l2_buffer(src_buf); + dst_vb2_v4l2 = to_vb2_v4l2_buffer(dst_buf); + dst_buf->timestamp = src_buf->timestamp; + dst_vb2_v4l2->timecode = src_vb2_v4l2->timecode; + } else { + mtk_v4l2_err("No timestamp for the header buffer."); + } ctx->state = MTK_STATE_HEADER; dst_buf->planes[0].bytesused = enc_result.bs_size; @@ -993,7 +1003,7 @@ static void mtk_venc_worker(struct work_struct *work) struct mtk_vcodec_mem bs_buf; struct venc_done_result enc_result; int ret, i; - struct vb2_v4l2_buffer *vb2_v4l2; + struct vb2_v4l2_buffer *dst_vb2_v4l2, *src_vb2_v4l2; /* check dst_buf, dst_buf may be removed in device_run * to stored encdoe header so we need check dst_buf and @@ -1033,9 +1043,14 @@ static void mtk_venc_worker(struct work_struct *work) ret = venc_if_encode(ctx, VENC_START_OPT_ENCODE_FRAME, &frm_buf, &bs_buf, &enc_result); - vb2_v4l2 = container_of(dst_buf, struct vb2_v4l2_buffer, vb2_buf); + src_vb2_v4l2 = to_vb2_v4l2_buffer(src_buf); + dst_vb2_v4l2 = to_vb2_v4l2_buffer(dst_buf); + + dst_buf->timestamp = src_buf->timestamp; + dst_vb2_v4l2->timecode = src_vb2_v4l2->timecode; + if (enc_result.is_key_frm) - vb2_v4l2->flags |= V4L2_BUF_FLAG_KEYFRAME; + dst_vb2_v4l2->flags |= V4L2_BUF_FLAG_KEYFRAME; if (ret) { v4l2_m2m_buf_done(to_vb2_v4l2_buffer(src_buf), From 158d6071bc0aad6663109d2fe9249c3cf570d423 Mon Sep 17 00:00:00 2001 From: Tiffany Lin Date: Mon, 15 Aug 2016 00:26:02 -0300 Subject: [PATCH 0023/1050] [media] vcodec:mediatek: change H264 profile default to profile high This patch change default H264 profile from V4L2_MPEG_VIDEO_H264_PROFILE_MAIN to V4L2_MPEG_VIDEO_H264_PROFILE_HIGH Signed-off-by: Tiffany Lin Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c index 0ca230e0a812..2c5719ac23b2 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c @@ -1222,7 +1222,7 @@ int mtk_vcodec_enc_ctrls_setup(struct mtk_vcodec_ctx *ctx) 0, V4L2_MPEG_VIDEO_HEADER_MODE_SEPARATE); v4l2_ctrl_new_std_menu(handler, ops, V4L2_CID_MPEG_VIDEO_H264_PROFILE, V4L2_MPEG_VIDEO_H264_PROFILE_HIGH, - 0, V4L2_MPEG_VIDEO_H264_PROFILE_MAIN); + 0, V4L2_MPEG_VIDEO_H264_PROFILE_HIGH); v4l2_ctrl_new_std_menu(handler, ops, V4L2_CID_MPEG_VIDEO_H264_LEVEL, V4L2_MPEG_VIDEO_H264_LEVEL_4_2, 0, V4L2_MPEG_VIDEO_H264_LEVEL_4_0); From 2d683b6dad73b5636297ac4978f73f2c638a0b19 Mon Sep 17 00:00:00 2001 From: Tiffany Lin Date: Mon, 15 Aug 2016 00:33:32 -0300 Subject: [PATCH 0024/1050] [media] vcodec:mediatek: Refine H264 encoder driver This patch : 1. remove field and function that unused anymore 2. add support V4L2_MPEG_VIDEO_H264_LEVEL_4_2 Signed-off-by: Tiffany Lin Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- .../platform/mtk-vcodec/venc/venc_h264_if.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/media/platform/mtk-vcodec/venc/venc_h264_if.c b/drivers/media/platform/mtk-vcodec/venc/venc_h264_if.c index 9a600525b3c1..63d4be4ff327 100644 --- a/drivers/media/platform/mtk-vcodec/venc/venc_h264_if.c +++ b/drivers/media/platform/mtk-vcodec/venc/venc_h264_if.c @@ -61,6 +61,8 @@ enum venc_h264_bs_mode { /* * struct venc_h264_vpu_config - Structure for h264 encoder configuration + * AP-W/R : AP is writer/reader on this item + * VPU-W/R: VPU is write/reader on this item * @input_fourcc: input fourcc * @bitrate: target bitrate (in bps) * @pic_w: picture width. Picture size is visible stream resolution, in pixels, @@ -94,13 +96,13 @@ struct venc_h264_vpu_config { /* * struct venc_h264_vpu_buf - Structure for buffer information - * @align: buffer alignment (in bytes) + * AP-W/R : AP is writer/reader on this item + * VPU-W/R: VPU is write/reader on this item * @iova: IO virtual address * @vpua: VPU side memory addr which is used by RC_CODE * @size: buffer size (in bytes) */ struct venc_h264_vpu_buf { - u32 align; u32 iova; u32 vpua; u32 size; @@ -108,6 +110,8 @@ struct venc_h264_vpu_buf { /* * struct venc_h264_vsi - Structure for VPU driver control and info share + * AP-W/R : AP is writer/reader on this item + * VPU-W/R: VPU is write/reader on this item * This structure is allocated in VPU side and shared to AP side. * @config: h264 encoder configuration * @work_bufs: working buffer information in VPU side @@ -150,12 +154,6 @@ struct venc_h264_inst { struct mtk_vcodec_ctx *ctx; }; -static inline void h264_write_reg(struct venc_h264_inst *inst, u32 addr, - u32 val) -{ - writel(val, inst->hw_base + addr); -} - static inline u32 h264_read_reg(struct venc_h264_inst *inst, u32 addr) { return readl(inst->hw_base + addr); @@ -214,6 +212,8 @@ static unsigned int h264_get_level(struct venc_h264_inst *inst, return 40; case V4L2_MPEG_VIDEO_H264_LEVEL_4_1: return 41; + case V4L2_MPEG_VIDEO_H264_LEVEL_4_2: + return 42; default: mtk_vcodec_debug(inst, "unsupported level %d", level); return 31; From 19d6837a52f1683cf448265952d559a44a7df924 Mon Sep 17 00:00:00 2001 From: Tiffany Lin Date: Mon, 15 Aug 2016 00:37:19 -0300 Subject: [PATCH 0025/1050] [media] vcodec:mediatek: Refine VP8 encoder driver This patch remove field and function that unused anymore Signed-off-by: Tiffany Lin Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- .../media/platform/mtk-vcodec/venc/venc_vp8_if.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/media/platform/mtk-vcodec/venc/venc_vp8_if.c b/drivers/media/platform/mtk-vcodec/venc/venc_vp8_if.c index 60bbcd2a0510..6d9758479f9a 100644 --- a/drivers/media/platform/mtk-vcodec/venc/venc_vp8_if.c +++ b/drivers/media/platform/mtk-vcodec/venc/venc_vp8_if.c @@ -56,6 +56,8 @@ enum venc_vp8_vpu_work_buf { /* * struct venc_vp8_vpu_config - Structure for vp8 encoder configuration + * AP-W/R : AP is writer/reader on this item + * VPU-W/R: VPU is write/reader on this item * @input_fourcc: input fourcc * @bitrate: target bitrate (in bps) * @pic_w: picture width. Picture size is visible stream resolution, in pixels, @@ -83,14 +85,14 @@ struct venc_vp8_vpu_config { }; /* - * struct venc_vp8_vpu_buf -Structure for buffer information - * @align: buffer alignment (in bytes) + * struct venc_vp8_vpu_buf - Structure for buffer information + * AP-W/R : AP is writer/reader on this item + * VPU-W/R: VPU is write/reader on this item * @iova: IO virtual address * @vpua: VPU side memory addr which is used by RC_CODE * @size: buffer size (in bytes) */ struct venc_vp8_vpu_buf { - u32 align; u32 iova; u32 vpua; u32 size; @@ -98,6 +100,8 @@ struct venc_vp8_vpu_buf { /* * struct venc_vp8_vsi - Structure for VPU driver control and info share + * AP-W/R : AP is writer/reader on this item + * VPU-W/R: VPU is write/reader on this item * This structure is allocated in VPU side and shared to AP side. * @config: vp8 encoder configuration * @work_bufs: working buffer information in VPU side @@ -138,12 +142,6 @@ struct venc_vp8_inst { struct mtk_vcodec_ctx *ctx; }; -static inline void vp8_enc_write_reg(struct venc_vp8_inst *inst, u32 addr, - u32 val) -{ - writel(val, inst->hw_base + addr); -} - static inline u32 vp8_enc_read_reg(struct venc_vp8_inst *inst, u32 addr) { return readl(inst->hw_base + addr); From a7d4b8f2565ad0dfdff9a222d1d87990c73b36e8 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 16 Aug 2016 14:38:24 +0200 Subject: [PATCH 0026/1050] KVM: s390: don't use current->thread.fpu.* when accessing registers As the meaning of these variables and pointers seems to change more frequently, let's directly access our save area, instead of going via current->thread. Right now, this is broken for set/get_fpu. They simply overwrite the host registers, as the pointers to the current save area were turned into the static host save area. Cc: stable@vger.kernel.org # 4.7 Fixes: 3f6813b9a5e0 ("s390/fpu: allocate 'struct fpu' with the task_struct") Reported-by: Hao QingFeng Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index f142215ed30d..607ec91966c7 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2231,9 +2231,10 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) return -EINVAL; current->thread.fpu.fpc = fpu->fpc; if (MACHINE_HAS_VX) - convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs); + convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs, + (freg_t *) fpu->fprs); else - memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs)); + memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs)); return 0; } @@ -2242,9 +2243,10 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) /* make sure we have the latest values */ save_fpu_regs(); if (MACHINE_HAS_VX) - convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs); + convert_vx_to_fp((freg_t *) fpu->fprs, + (__vector128 *) vcpu->run->s.regs.vrs); else - memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs)); + memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs)); fpu->fpc = current->thread.fpu.fpc; return 0; } From 936523441bb64cdc9a5b263e8fd2782e70313a57 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 6 Aug 2016 15:50:52 +0200 Subject: [PATCH 0027/1050] batman-adv: Add missing refcnt for last_candidate batadv_find_router dereferences last_bonding_candidate from orig_node without making sure that it has a valid reference. This reference has to be retrieved by increasing the reference counter while holding neigh_list_lock. The lock is required to avoid that batadv_last_bonding_replace removes the current last_bonding_candidate, reduces the reference counter and maybe destroys the object in this process. Fixes: f3b3d9018975 ("batman-adv: add bonding again") Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/routing.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 7602c001e92b..3d199478c405 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -469,6 +469,29 @@ static int batadv_check_unicast_packet(struct batadv_priv *bat_priv, return 0; } +/** + * batadv_last_bonding_get - Get last_bonding_candidate of orig_node + * @orig_node: originator node whose last bonding candidate should be retrieved + * + * Return: last bonding candidate of router or NULL if not found + * + * The object is returned with refcounter increased by 1. + */ +static struct batadv_orig_ifinfo * +batadv_last_bonding_get(struct batadv_orig_node *orig_node) +{ + struct batadv_orig_ifinfo *last_bonding_candidate; + + spin_lock_bh(&orig_node->neigh_list_lock); + last_bonding_candidate = orig_node->last_bonding_candidate; + + if (last_bonding_candidate) + kref_get(&last_bonding_candidate->refcount); + spin_unlock_bh(&orig_node->neigh_list_lock); + + return last_bonding_candidate; +} + /** * batadv_last_bonding_replace - Replace last_bonding_candidate of orig_node * @orig_node: originator node whose bonding candidates should be replaced @@ -539,7 +562,7 @@ batadv_find_router(struct batadv_priv *bat_priv, * router - obviously there are no other candidates. */ rcu_read_lock(); - last_candidate = orig_node->last_bonding_candidate; + last_candidate = batadv_last_bonding_get(orig_node); if (last_candidate) last_cand_router = rcu_dereference(last_candidate->router); @@ -631,6 +654,9 @@ next: batadv_orig_ifinfo_put(next_candidate); } + if (last_candidate) + batadv_orig_ifinfo_put(last_candidate); + return router; } From 1e5d343b8f23770e8ac5d31f5c439826bdb35148 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Tue, 23 Aug 2016 03:13:03 +0200 Subject: [PATCH 0028/1050] batman-adv: fix elp packet data reservation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The skb_reserve() call only reserved headroom for the mac header, but not the elp packet header itself. Fixing this by using skb_put()'ing towards the skb tail instead of skb_push()'ing towards the skb head. Fixes: d6f94d91f766 ("batman-adv: ELP - adding basic infrastructure") Signed-off-by: Linus Lüssing Signed-off-by: Marek Lindner Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_v_elp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index 7d170010beb9..ee08540ce503 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -335,7 +335,7 @@ int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface) goto out; skb_reserve(hard_iface->bat_v.elp_skb, ETH_HLEN + NET_IP_ALIGN); - elp_buff = skb_push(hard_iface->bat_v.elp_skb, BATADV_ELP_HLEN); + elp_buff = skb_put(hard_iface->bat_v.elp_skb, BATADV_ELP_HLEN); elp_packet = (struct batadv_elp_packet *)elp_buff; memset(elp_packet, 0, BATADV_ELP_HLEN); From 29617e1cfc2aa869154f5ed2580b756ec2c3cb28 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Thu, 25 Aug 2016 10:56:48 -0400 Subject: [PATCH 0029/1050] MAINTAINERS: Add myself as reviewer for Samsung Exynos support I've been helping reviewing and testing Exynos SoC support patches for the last couple of years. But it would be easier for me if I'm cc'ed for patches, so I'm adding myself as reviewer for this entry. Signed-off-by: Javier Martinez Canillas Acked-by: Kukjin Kim Acked-by: Sylwester Nawrocki Signed-off-by: Krzysztof Kozlowski --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index c9cd8d34b753..8a8a48512f4b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1614,6 +1614,7 @@ N: rockchip ARM/SAMSUNG EXYNOS ARM ARCHITECTURES M: Kukjin Kim M: Krzysztof Kozlowski +R: Javier Martinez Canillas L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-samsung-soc@vger.kernel.org (moderated for non-subscribers) S: Maintained From 6654674cb7b5953ac04fc9d7f5f511676ae97e29 Mon Sep 17 00:00:00 2001 From: Jorik Jonker Date: Sat, 27 Aug 2016 21:04:33 +0200 Subject: [PATCH 0030/1050] clk: sunxi-ng: Fix wrong reset register offsets The reset register offsets for UART*, I2C* and SCR were off by a few bytes. Signed-off-by: Jorik Jonker Signed-off-by: Maxime Ripard --- drivers/clk/sunxi-ng/ccu-sun8i-h3.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-h3.c b/drivers/clk/sunxi-ng/ccu-sun8i-h3.c index 9af359544110..267f99523fbe 100644 --- a/drivers/clk/sunxi-ng/ccu-sun8i-h3.c +++ b/drivers/clk/sunxi-ng/ccu-sun8i-h3.c @@ -783,14 +783,14 @@ static struct ccu_reset_map sun8i_h3_ccu_resets[] = { [RST_BUS_I2S1] = { 0x2d0, BIT(13) }, [RST_BUS_I2S2] = { 0x2d0, BIT(14) }, - [RST_BUS_I2C0] = { 0x2d4, BIT(0) }, - [RST_BUS_I2C1] = { 0x2d4, BIT(1) }, - [RST_BUS_I2C2] = { 0x2d4, BIT(2) }, - [RST_BUS_UART0] = { 0x2d4, BIT(16) }, - [RST_BUS_UART1] = { 0x2d4, BIT(17) }, - [RST_BUS_UART2] = { 0x2d4, BIT(18) }, - [RST_BUS_UART3] = { 0x2d4, BIT(19) }, - [RST_BUS_SCR] = { 0x2d4, BIT(20) }, + [RST_BUS_I2C0] = { 0x2d8, BIT(0) }, + [RST_BUS_I2C1] = { 0x2d8, BIT(1) }, + [RST_BUS_I2C2] = { 0x2d8, BIT(2) }, + [RST_BUS_UART0] = { 0x2d8, BIT(16) }, + [RST_BUS_UART1] = { 0x2d8, BIT(17) }, + [RST_BUS_UART2] = { 0x2d8, BIT(18) }, + [RST_BUS_UART3] = { 0x2d8, BIT(19) }, + [RST_BUS_SCR] = { 0x2d8, BIT(20) }, }; static const struct sunxi_ccu_desc sun8i_h3_ccu_desc = { From 8a07fed44b126f48020f122b9e6bf05d8c48f281 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 23 Aug 2016 10:25:58 +0100 Subject: [PATCH 0031/1050] drm/i915/dvo: Remove dangling call to drm_encoder_cleanup() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we hit the error path, we have never called drm_encoder_init() and so have nothing to cleanup. Doing so hits a null dereference: [ 10.066261] BUG: unable to handle kernel NULL pointer dereference at 00000104 [ 10.066273] IP: [] mutex_lock+0xa/0x15 [ 10.066287] *pde = 00000000 [ 10.066295] Oops: 0002 [#1] [ 10.066302] Modules linked in: i915(+) video i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm iTCO_wdt iTCO_vendor_support ppdev evdev snd_intel8x0 snd_ac97_codec ac97_bus psmouse snd_pcm snd_timer snd pcspkr uhci_hcd ehci_pci soundcore sr_mod ehci_hcd serio_raw i2c_i801 usbcore i2c_smbus cdrom lpc_ich mfd_core rng_core e100 mii floppy parport_pc parport acpi_cpufreq button processor usb_common eeprom lm85 hwmon_vid autofs4 [ 10.066378] CPU: 0 PID: 132 Comm: systemd-udevd Not tainted 4.8.0-rc3-00013-gef0e1ea #34 [ 10.066389] Hardware name: MicroLink /D865GLC , BIOS BF86510A.86A.0077.P25.0508040031 08/04/2005 [ 10.066401] task: f62db800 task.stack: f5970000 [ 10.066409] EIP: 0060:[] EFLAGS: 00010286 CPU: 0 [ 10.066417] EIP is at mutex_lock+0xa/0x15 [ 10.066424] EAX: 00000104 EBX: 00000104 ECX: 00000000 EDX: 80000000 [ 10.066432] ESI: 00000000 EDI: 00000104 EBP: f5be8000 ESP: f5971b58 [ 10.066439] DS: 007b ES: 007b FS: 0000 GS: 00e0 SS: 0068 [ 10.066446] CR0: 80050033 CR2: 00000104 CR3: 35945000 CR4: 000006d0 [ 10.066453] Stack: [ 10.066459] f503d740 f824dddf 00000000 f61170c0 f61170c0 f82371ae f850f40e 00000001 [ 10.066476] f61170c0 f5971bcc f5be8000 f9c2d401 00000001 f8236fcc 00000001 00000000 [ 10.066491] f5144014 f5be8104 00000008 f9c5267c 00000007 f61170c0 f5144400 f9c4ff00 [ 10.066507] Call Trace: [ 10.066526] [] ? drm_modeset_lock_all+0x27/0xb3 [drm] [ 10.066545] [] ? drm_encoder_cleanup+0x1a/0x132 [drm] [ 10.066559] [] ? drm_atomic_helper_connector_reset+0x3f/0x5c [drm_kms_helper] [ 10.066644] [] ? intel_dvo_init+0x569/0x788 [i915] [ 10.066663] [] ? drm_encoder_init+0x43/0x20b [drm] [ 10.066734] [] ? intel_modeset_init+0x1436/0x17dd [i915] [ 10.066791] [] ? i915_driver_load+0x85a/0x15d3 [i915] [ 10.066846] [] ? i915_driver_open+0x5/0x5 [i915] [ 10.066857] [] ? firmware_map_add_entry.part.2+0xc/0xc [ 10.066868] [] ? pci_device_probe+0x8e/0x11c [ 10.066878] [] ? driver_probe_device+0x1db/0x62e [ 10.066888] [] ? kernfs_new_node+0x29/0x9c [ 10.066897] [] ? pci_match_device+0xd9/0x161 [ 10.066905] [] ? kernfs_create_dir_ns+0x42/0x88 [ 10.066914] [] ? __driver_attach+0xe6/0x11b [ 10.066924] [] ? kobject_add_internal+0x1bb/0x44f [ 10.066933] [] ? driver_probe_device+0x62e/0x62e [ 10.066941] [] ? bus_for_each_dev+0x46/0x7f [ 10.066950] [] ? driver_attach+0x1a/0x34 [ 10.066958] [] ? driver_probe_device+0x62e/0x62e [ 10.066966] [] ? bus_add_driver+0x217/0x32a [ 10.066975] [] ? 0xf8403000 [ 10.066982] [] ? driver_register+0x5f/0x108 [ 10.066991] [] ? do_one_initcall+0x49/0x1f6 [ 10.067000] [] ? pick_next_task_fair+0x14b/0x2a3 [ 10.067008] [] ? __schedule+0x15c/0x4fe [ 10.067016] [] ? preempt_schedule_common+0x19/0x3c [ 10.067027] [] ? do_init_module+0x17/0x230 [ 10.067035] [] ? _cond_resched+0x12/0x1a [ 10.067044] [] ? kmem_cache_alloc+0x8f/0x11f [ 10.067052] [] ? do_init_module+0x17/0x230 [ 10.067060] [] ? kfree+0x137/0x203 [ 10.067068] [] ? do_init_module+0x76/0x230 [ 10.067078] [] ? load_module+0x2a39/0x333f [ 10.067087] [] ? SyS_finit_module+0x96/0xd5 [ 10.067096] [] ? vm_mmap_pgoff+0x79/0xa0 [ 10.067105] [] ? do_fast_syscall_32+0xb5/0x1b0 [ 10.067114] [] ? sysenter_past_esp+0x47/0x75 [ 10.067121] Code: c8 f7 76 c1 e8 8e cc d2 ff e9 45 fe ff ff 66 90 66 90 66 90 66 90 90 ff 00 7f 05 e8 4e 0c 00 00 c3 53 89 c3 e8 75 ec ff ff 89 d8 08 79 05 e8 fa 0a 00 00 5b c3 53 89 c3 85 c0 74 1b 8b 03 83 [ 10.067180] EIP: [] mutex_lock+0xa/0x15 SS:ESP 0068:f5971b58 [ 10.067190] CR2: 0000000000000104 [ 10.067222] ---[ end trace 049f1f09da45a856 ]--- Reported-by: Meelis Roos Fixes: 580d8ed522e0 ("drm/i915: Give encoders useful names") Reviewed-by: David Weinehall Signed-off-by: Chris Wilson Cc: Ville Syrjälä Cc: drm-intel-fixes@lists.freedesktop.org Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20160823092558.14931-1-chris@chris-wilson.co.uk (cherry picked from commit 8f76aa0ebe0b7787afe768d9df80031e832d2520) --- drivers/gpu/drm/i915/intel_dvo.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c index 47bdf9dad0d3..b9e5a63a7c9e 100644 --- a/drivers/gpu/drm/i915/intel_dvo.c +++ b/drivers/gpu/drm/i915/intel_dvo.c @@ -554,7 +554,6 @@ void intel_dvo_init(struct drm_device *dev) return; } - drm_encoder_cleanup(&intel_encoder->base); kfree(intel_dvo); kfree(intel_connector); } From a93295a7e118b5c58391906e47183775b1eb7cb1 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Mon, 15 Aug 2016 10:47:39 +0300 Subject: [PATCH 0032/1050] mtd: nand: omap2: Don't call dma_release_channel() if dma_request_chan() failed dma_request_chan() can fail returning an error pointer. In this case prevent calling dma_release_channel() to prevent a ERR_PTR() dereference. As error path can be called even with no DMA configuration, info->dma can be NULL so don't call dma_release_channel() for that case either. Fixes: de3bfc4a1616: ("mtd: nand: omap2: fix return value check in omap_nand_probe()") Reported-by: Dan Carpenter Signed-off-by: Roger Quadros Acked-by: Boris Brezillon Signed-off-by: Brian Norris --- drivers/mtd/nand/omap2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c index a59361c36f40..5513bfd9cdc9 100644 --- a/drivers/mtd/nand/omap2.c +++ b/drivers/mtd/nand/omap2.c @@ -2169,7 +2169,7 @@ scan_tail: return 0; return_error: - if (info->dma) + if (!IS_ERR_OR_NULL(info->dma)) dma_release_channel(info->dma); if (nand_chip->ecc.priv) { nand_bch_free(nand_chip->ecc.priv); From b030485220caf862c71db6fb8b8ad016ce7f7565 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Sun, 21 Aug 2016 03:27:45 -0400 Subject: [PATCH 0033/1050] ARM: EXYNOS: Clear OF_POPULATED flag from PMU node in IRQ init callback The Exynos PMU node is an interrupt, clock and PMU (Power Management Unit) controller, and these functionalities are supported by different drivers that matches the same compatible strings. Since commit 15cc2ed6dcf9 ("of/irq: Mark initialised interrupt controllers as populated") the OF core flags interrupt controllers registered with the IRQCHIP_DECLARE() macro as OF_POPULATED, so platform devices with the same compatible string as the interrupt controller will not be registered. This prevents the PMU platform device to be registered so the Exynos PMU driver is never probed. This breaks (among other things) Suspend-to-RAM. Fix this by clearing the OF_POPULATED flag in the PMU IRQ init callback, to allow the Exynos PMU platform driver to be probed. The patch is based on Philipp Zabel's "ARM: imx6: mark GPC node as not populated after irq init to probe pm domain driver". Fixes: 15cc2ed6dcf9 ("of/irq: Mark initialised interrupt controllers as populated") Signed-off-by: Javier Martinez Canillas Signed-off-by: Krzysztof Kozlowski --- arch/arm/mach-exynos/suspend.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm/mach-exynos/suspend.c b/arch/arm/mach-exynos/suspend.c index 3750575c73c5..06332f626565 100644 --- a/arch/arm/mach-exynos/suspend.c +++ b/arch/arm/mach-exynos/suspend.c @@ -255,6 +255,12 @@ static int __init exynos_pmu_irq_init(struct device_node *node, return -ENOMEM; } + /* + * Clear the OF_POPULATED flag set in of_irq_init so that + * later the Exynos PMU platform device won't be skipped. + */ + of_node_clear_flag(node, OF_POPULATED); + return 0; } From 313a61d30761217ce4383018de1cc0d5d503a376 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 30 Aug 2016 13:57:38 -0700 Subject: [PATCH 0034/1050] drm/vc4: Allow some more signals to be packed with uniform resets. The intent was to make sure people don't sneak in a small immediate or something to change the interpretation of the uniform update args, but these signals are just fine. Fixes a validation failure in the current X server on some Render operation. Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_validate_shaders.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_validate_shaders.c b/drivers/gpu/drm/vc4/vc4_validate_shaders.c index 46527e989ce3..2543cf5b8b51 100644 --- a/drivers/gpu/drm/vc4/vc4_validate_shaders.c +++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c @@ -309,8 +309,14 @@ validate_uniform_address_write(struct vc4_validated_shader_info *validated_shade * of uniforms on each side. However, this scheme is easy to * validate so it's all we allow for now. */ - - if (QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_NONE) { + switch (QPU_GET_FIELD(inst, QPU_SIG)) { + case QPU_SIG_NONE: + case QPU_SIG_SCOREBOARD_UNLOCK: + case QPU_SIG_COLOR_LOAD: + case QPU_SIG_LOAD_TMU0: + case QPU_SIG_LOAD_TMU1: + break; + default: DRM_ERROR("uniforms address change must be " "normal math\n"); return false; From 8e018c21da3febb558586b48c8db0d6d66cb6593 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 25 Aug 2016 10:09:39 -0700 Subject: [PATCH 0035/1050] raid5-cache: fix a deadlock in superblock write There is a potential deadlock in superblock write. Discard could zero data, so before discard we must make sure superblock is updated to new log tail. Updating superblock (either directly call md_update_sb() or depend on md thread) must hold reconfig mutex. On the other hand, raid5_quiesce is called with reconfig_mutex hold. The first step of raid5_quiesce() is waitting for all IO finish, hence waitting for reclaim thread, while reclaim thread is calling this function and waitting for reconfig mutex. So there is a deadlock. We workaround this issue with a trylock. The downside of the solution is we could miss discard if we can't take reconfig mutex. But this should happen rarely (mainly in raid array stop), so miss discard shouldn't be a big problem. Cc: NeilBrown Signed-off-by: Shaohua Li --- drivers/md/raid5-cache.c | 46 +++++++++++++--------------------------- 1 file changed, 15 insertions(+), 31 deletions(-) diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 51f76ddbe265..1b1ab4a1d132 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -96,7 +96,6 @@ struct r5l_log { spinlock_t no_space_stripes_lock; bool need_cache_flush; - bool in_teardown; }; /* @@ -704,31 +703,22 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log, mddev = log->rdev->mddev; /* - * This is to avoid a deadlock. r5l_quiesce holds reconfig_mutex and - * wait for this thread to finish. This thread waits for - * MD_CHANGE_PENDING clear, which is supposed to be done in - * md_check_recovery(). md_check_recovery() tries to get - * reconfig_mutex. Since r5l_quiesce already holds the mutex, - * md_check_recovery() fails, so the PENDING never get cleared. The - * in_teardown check workaround this issue. + * Discard could zero data, so before discard we must make sure + * superblock is updated to new log tail. Updating superblock (either + * directly call md_update_sb() or depend on md thread) must hold + * reconfig mutex. On the other hand, raid5_quiesce is called with + * reconfig_mutex hold. The first step of raid5_quiesce() is waitting + * for all IO finish, hence waitting for reclaim thread, while reclaim + * thread is calling this function and waitting for reconfig mutex. So + * there is a deadlock. We workaround this issue with a trylock. + * FIXME: we could miss discard if we can't take reconfig mutex */ - if (!log->in_teardown) { - set_mask_bits(&mddev->flags, 0, - BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING)); - md_wakeup_thread(mddev->thread); - wait_event(mddev->sb_wait, - !test_bit(MD_CHANGE_PENDING, &mddev->flags) || - log->in_teardown); - /* - * r5l_quiesce could run after in_teardown check and hold - * mutex first. Superblock might get updated twice. - */ - if (log->in_teardown) - md_update_sb(mddev, 1); - } else { - WARN_ON(!mddev_is_locked(mddev)); - md_update_sb(mddev, 1); - } + set_mask_bits(&mddev->flags, 0, + BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING)); + if (!mddev_trylock(mddev)) + return; + md_update_sb(mddev, 1); + mddev_unlock(mddev); /* discard IO error really doesn't matter, ignore it */ if (log->last_checkpoint < end) { @@ -827,7 +817,6 @@ void r5l_quiesce(struct r5l_log *log, int state) if (!log || state == 2) return; if (state == 0) { - log->in_teardown = 0; /* * This is a special case for hotadd. In suspend, the array has * no journal. In resume, journal is initialized as well as the @@ -838,11 +827,6 @@ void r5l_quiesce(struct r5l_log *log, int state) log->reclaim_thread = md_register_thread(r5l_reclaim_thread, log->rdev->mddev, "reclaim"); } else if (state == 1) { - /* - * at this point all stripes are finished, so io_unit is at - * least in STRIPE_END state - */ - log->in_teardown = 1; /* make sure r5l_write_super_and_discard_space exits */ mddev = log->rdev->mddev; wake_up(&mddev->sb_wait); From ad5b0f7685dbfc4730987cd16af3c5ebe8133f10 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 30 Aug 2016 10:29:33 -0700 Subject: [PATCH 0036/1050] raid5: guarantee enough stripes to avoid reshape hang If there aren't enough stripes, reshape will hang. We have a check for this in new reshape, but miss it for reshape resume, hence we could see hang in reshape resume. This patch forces enough stripes existed if reshape resumes. Reviewed-by: NeilBrown Signed-off-by: Shaohua Li --- drivers/md/raid5.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index da583bb43c84..b95c54c0e596 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -6639,6 +6639,16 @@ static struct r5conf *setup_conf(struct mddev *mddev) } conf->min_nr_stripes = NR_STRIPES; + if (mddev->reshape_position != MaxSector) { + int stripes = max_t(int, + ((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4, + ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4); + conf->min_nr_stripes = max(NR_STRIPES, stripes); + if (conf->min_nr_stripes != NR_STRIPES) + printk(KERN_INFO + "md/raid:%s: force stripe size %d for reshape\n", + mdname(mddev), conf->min_nr_stripes); + } memory = conf->min_nr_stripes * (sizeof(struct stripe_head) + max_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024; atomic_set(&conf->empty_inactive_list_nr, NR_STRIPE_HASH_LOCKS); From 6af7e4f77259ee946103387372cb159f2e99a6d4 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 1 Sep 2016 08:52:29 -0500 Subject: [PATCH 0037/1050] PCI: Mark Haswell Power Control Unit as having non-compliant BARs The Haswell Power Control Unit has a non-PCI register (CONFIG_TDP_NOMINAL) where BAR 0 is supposed to be. This is erratum HSE43 in the spec update referenced below: The PCIe* Base Specification indicates that Configuration Space Headers have a base address register at offset 0x10. Due to this erratum, the Power Control Unit's CONFIG_TDP_NOMINAL CSR (Bus 1; Device 30; Function 3; Offset 0x10) is located where a base register is expected. Mark the PCU as having non-compliant BARs so we don't try to probe any of them. There are no other BARs on this device. Rename the quirk so it's not Broadwell-specific. Link: http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v3-spec-update.html Link: http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v3-datasheet-vol-2.html (section 5.4, Device 30 Function 3) Link: https://bugzilla.kernel.org/show_bug.cgi?id=153881 Reported-by: Paul Menzel Tested-by: Prarit Bhargava Signed-off-by: Bjorn Helgaas Acked-by: Myron Stowe --- arch/x86/pci/fixup.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 837ea36a837d..6d52b94f4bb9 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -553,15 +553,21 @@ static void twinhead_reserve_killing_zone(struct pci_dev *dev) DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x27B9, twinhead_reserve_killing_zone); /* - * Broadwell EP Home Agent BARs erroneously return non-zero values when read. + * Device [8086:2fc0] + * Erratum HSE43 + * CONFIG_TDP_NOMINAL CSR Implemented at Incorrect Offset + * http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v3-spec-update.html * - * See http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v4-spec-update.html - * entry BDF2. + * Devices [8086:6f60,6fa0,6fc0] + * Erratum BDF2 + * PCI BARs in the Home Agent Will Return Non-Zero Values During Enumeration + * http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v4-spec-update.html */ -static void pci_bdwep_bar(struct pci_dev *dev) +static void pci_invalid_bar(struct pci_dev *dev) { dev->non_compliant_bars = 1; } -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6f60, pci_bdwep_bar); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fa0, pci_bdwep_bar); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, pci_bdwep_bar); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, pci_invalid_bar); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6f60, pci_invalid_bar); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fa0, pci_invalid_bar); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, pci_invalid_bar); From c2f321126e31cd69365e65ecd4a7c774e4fc71d2 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 30 Aug 2016 21:50:22 +0200 Subject: [PATCH 0038/1050] ARM: shmobile: fix regulator quirk for Gen2 The current implementation only works if the da9xxx devices are added before their drivers are registered. Only then it can apply the fixes to both devices. Otherwise, the driver for the first device gets probed before the fix for the second device can be applied. This is what fails when using the IP core switcher or when having the i2c master driver as a module. So, we need to disable both da9xxx once we detected one of them. We now use i2c_transfer with hardcoded i2c_messages and device addresses, so we don't need the da9xxx client devices to be instantiated. Because the fixup is used on specific boards only, the addresses are not going to change. Fixes: 663fbb52159cca ("ARM: shmobile: R-Car Gen2: Add da9063/da9210 regulator quirk") Signed-off-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven (r8a7791/koelsch) Tested-by: Kuninori Morimoto Signed-off-by: Simon Horman --- .../mach-shmobile/regulator-quirk-rcar-gen2.c | 62 ++++++++----------- 1 file changed, 26 insertions(+), 36 deletions(-) diff --git a/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c b/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c index 62437b57813e..73e3adbc1330 100644 --- a/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c +++ b/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c @@ -41,40 +41,27 @@ #define REGULATOR_IRQ_MASK BIT(2) /* IRQ2, active low */ +/* start of DA9210 System Control and Event Registers */ +#define DA9210_REG_MASK_A 0x54 + static void __iomem *irqc; -static const u8 da9063_mask_regs[] = { - DA9063_REG_IRQ_MASK_A, - DA9063_REG_IRQ_MASK_B, - DA9063_REG_IRQ_MASK_C, - DA9063_REG_IRQ_MASK_D, +/* first byte sets the memory pointer, following are consecutive reg values */ +static u8 da9063_irq_clr[] = { DA9063_REG_IRQ_MASK_A, 0xff, 0xff, 0xff, 0xff }; +static u8 da9210_irq_clr[] = { DA9210_REG_MASK_A, 0xff, 0xff }; + +static struct i2c_msg da9xxx_msgs[2] = { + { + .addr = 0x58, + .len = ARRAY_SIZE(da9063_irq_clr), + .buf = da9063_irq_clr, + }, { + .addr = 0x68, + .len = ARRAY_SIZE(da9210_irq_clr), + .buf = da9210_irq_clr, + }, }; -/* DA9210 System Control and Event Registers */ -#define DA9210_REG_MASK_A 0x54 -#define DA9210_REG_MASK_B 0x55 - -static const u8 da9210_mask_regs[] = { - DA9210_REG_MASK_A, - DA9210_REG_MASK_B, -}; - -static void da9xxx_mask_irqs(struct i2c_client *client, const u8 regs[], - unsigned int nregs) -{ - unsigned int i; - - dev_info(&client->dev, "Masking %s interrupt sources\n", client->name); - - for (i = 0; i < nregs; i++) { - int error = i2c_smbus_write_byte_data(client, regs[i], ~0); - if (error) { - dev_err(&client->dev, "i2c error %d\n", error); - return; - } - } -} - static int regulator_quirk_notify(struct notifier_block *nb, unsigned long action, void *data) { @@ -93,12 +80,15 @@ static int regulator_quirk_notify(struct notifier_block *nb, client = to_i2c_client(dev); dev_dbg(dev, "Detected %s\n", client->name); - if ((client->addr == 0x58 && !strcmp(client->name, "da9063"))) - da9xxx_mask_irqs(client, da9063_mask_regs, - ARRAY_SIZE(da9063_mask_regs)); - else if (client->addr == 0x68 && !strcmp(client->name, "da9210")) - da9xxx_mask_irqs(client, da9210_mask_regs, - ARRAY_SIZE(da9210_mask_regs)); + if ((client->addr == 0x58 && !strcmp(client->name, "da9063")) || + (client->addr == 0x68 && !strcmp(client->name, "da9210"))) { + int ret; + + dev_info(&client->dev, "clearing da9063/da9210 interrupts\n"); + ret = i2c_transfer(client->adapter, da9xxx_msgs, ARRAY_SIZE(da9xxx_msgs)); + if (ret != ARRAY_SIZE(da9xxx_msgs)) + dev_err(&client->dev, "i2c error %d\n", ret); + } mon = ioread32(irqc + IRQC_MONITOR); if (mon & REGULATOR_IRQ_MASK) From a41bd25ae67d3e4052c7f00ee9f2b4ba9219309e Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 25 Aug 2016 18:42:35 +0200 Subject: [PATCH 0039/1050] sunrpc: fix UDP memory accounting The commit f9b2ee714c5c ("SUNRPC: Move UDP receive data path into a workqueue context"), as a side effect, moved the skb_free_datagram() call outside the scope of the related socket lock, but UDP sockets require such lock to be held for proper memory accounting. Fix it by replacing skb_free_datagram() with skb_free_datagram_locked(). Fixes: f9b2ee714c5c ("SUNRPC: Move UDP receive data path into a workqueue context") Reported-and-tested-by: Jan Stancek Signed-off-by: Paolo Abeni Cc: stable@vger.kernel.org # 4.4+ Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 8ede3bc52481..bf168838a029 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1074,7 +1074,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport) skb = skb_recv_datagram(sk, 0, 1, &err); if (skb != NULL) { xs_udp_data_read_skb(&transport->xprt, sk, skb); - skb_free_datagram(sk, skb); + skb_free_datagram_locked(sk, skb); continue; } if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) From c49edecd513693ea7530ab18efbd7d6d5b7cbf90 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 3 Sep 2016 12:05:31 -0400 Subject: [PATCH 0040/1050] NFS: Fix error reporting in nfs_file_write() When doing O_DSYNC writes, the actual write errors are reported through generic_write_sync(), so we must test the result. Reported-by: J. R. Okajima Fixes: 18290650b1c8 ("NFS: Move buffered I/O locking into nfs_file_write()") Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 7d620970f2e1..ca699ddc11c1 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -657,7 +657,10 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) if (result <= 0) goto out; - written = generic_write_sync(iocb, result); + result = generic_write_sync(iocb, result); + if (result < 0) + goto out; + written = result; iocb->ki_pos += written; /* Return error values */ From bf0291dd2267a2b9a4cd74d65249553d11bb45d6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 3 Sep 2016 10:39:51 -0400 Subject: [PATCH 0041/1050] pNFS: Ensure LAYOUTGET and LAYOUTRETURN are properly serialised According to RFC5661, the client is responsible for serialising LAYOUTGET and LAYOUTRETURN to avoid ambiguity. Consider the case where we send both in parallel. Client Server ====== ====== LAYOUTGET(seqid=X) LAYOUTRETURN(seqid=X) LAYOUTGET return seqid=X+1 LAYOUTRETURN return seqid=X+2 Process LAYOUTRETURN Forget layout stateid Process LAYOUTGET Set seqid=X+1 The client processes the layoutget/layoutreturn in the wrong order, and since the result of the layoutreturn was to clear the only existing layout segment, the client forgets the layout stateid. When the LAYOUTGET comes in, it is treated as having a completely new stateid, and so the client sets the wrong sequence id... Fix is to check if there are outstanding LAYOUTGET requests before we send the LAYOUTRETURN (note that LAYOUGET will already wait if it sees an outstanding LAYOUTRETURN). Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # v4.5+ Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 6daf034645c8..519ad320f5cd 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -899,6 +899,9 @@ pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid, enum pnfs_iomode *iomode) { + /* Serialise LAYOUTGET/LAYOUTRETURN */ + if (atomic_read(&lo->plh_outstanding) != 0) + return false; if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) return false; pnfs_get_layout_hdr(lo); From 2a59a0411671ef9daf17ba21da57809c696f4119 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 3 Sep 2016 11:20:04 -0400 Subject: [PATCH 0042/1050] pNFS: Fix pnfs_set_layout_stateid() to clear NFS_LAYOUT_INVALID_STID If the layout was marked as invalid, we want to ensure to initialise the layout header fields correctly. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 519ad320f5cd..cd8b5fca33f6 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -768,17 +768,32 @@ pnfs_destroy_all_layouts(struct nfs_client *clp) pnfs_destroy_layouts_byclid(clp, false); } +static void +pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo) +{ + lo->plh_return_iomode = 0; + lo->plh_return_seq = 0; + clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); +} + /* update lo->plh_stateid with new if is more recent */ void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, bool update_barrier) { u32 oldseq, newseq, new_barrier = 0; - bool invalid = !pnfs_layout_is_valid(lo); oldseq = be32_to_cpu(lo->plh_stateid.seqid); newseq = be32_to_cpu(new->seqid); - if (invalid || pnfs_seqid_is_newer(newseq, oldseq)) { + + if (!pnfs_layout_is_valid(lo)) { + nfs4_stateid_copy(&lo->plh_stateid, new); + lo->plh_barrier = newseq; + pnfs_clear_layoutreturn_info(lo); + clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); + return; + } + if (pnfs_seqid_is_newer(newseq, oldseq)) { nfs4_stateid_copy(&lo->plh_stateid, new); /* * Because of wraparound, we want to keep the barrier @@ -790,7 +805,7 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, new_barrier = be32_to_cpu(new->seqid); else if (new_barrier == 0) return; - if (invalid || pnfs_seqid_is_newer(new_barrier, lo->plh_barrier)) + if (pnfs_seqid_is_newer(new_barrier, lo->plh_barrier)) lo->plh_barrier = new_barrier; } @@ -886,14 +901,6 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); } -static void -pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo) -{ - lo->plh_return_iomode = 0; - lo->plh_return_seq = 0; - clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); -} - static bool pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid, @@ -1801,16 +1808,11 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) */ pnfs_mark_layout_stateid_invalid(lo, &free_me); - nfs4_stateid_copy(&lo->plh_stateid, &res->stateid); - lo->plh_barrier = be32_to_cpu(res->stateid.seqid); + pnfs_set_layout_stateid(lo, &res->stateid, true); } pnfs_get_lseg(lseg); pnfs_layout_insert_lseg(lo, lseg, &free_me); - if (!pnfs_layout_is_valid(lo)) { - pnfs_clear_layoutreturn_info(lo); - clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); - } if (res->return_on_close) From 52ec7be2e27392201adf77892ba883f68df88c99 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 3 Sep 2016 11:05:28 -0400 Subject: [PATCH 0043/1050] pNFS: Clear out all layout segments if the server unsets lrp->res.lrs_present If the server fails to set lrp->res.lrs_present in the LAYOUTRETURN reply, then that means it believes the client holds no more layout state for that file, and that the layout stateid is now invalid. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f5aecaabcb7c..c380d2ee4137 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8190,10 +8190,13 @@ static void nfs4_layoutreturn_release(void *calldata) dprintk("--> %s\n", __func__); spin_lock(&lo->plh_inode->i_lock); - pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range, - be32_to_cpu(lrp->args.stateid.seqid)); - if (lrp->res.lrs_present && pnfs_layout_is_valid(lo)) + if (lrp->res.lrs_present) { + pnfs_mark_matching_lsegs_invalid(lo, &freeme, + &lrp->args.range, + be32_to_cpu(lrp->args.stateid.seqid)); pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); + } else + pnfs_mark_layout_stateid_invalid(lo, &freeme); pnfs_clear_layoutreturn_waitbit(lo); spin_unlock(&lo->plh_inode->i_lock); nfs4_sequence_free_slot(&lrp->res.seq_res); From 609e941a6bcd7ceb1cbb561941c997f6465e8698 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 1 Sep 2016 06:43:46 -0700 Subject: [PATCH 0044/1050] iw_cxgb4: call dev_put() on l2t allocation failure Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Steve Wise Signed-off-by: Sagi Grimberg --- drivers/infiniband/hw/cxgb4/cm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index a3a67216bce6..57b5bb56cdbf 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2077,8 +2077,10 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, } ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, n, pdev, rt_tos2priority(tos)); - if (!ep->l2t) + if (!ep->l2t) { + dev_put(pdev); goto out; + } ep->mtu = pdev->mtu; ep->tx_chan = cxgb4_port_chan(pdev); ep->smac_idx = cxgb4_tp_smt_idx(adapter_type, From 37eb816c0867b1b0db273d22b530780a0a083980 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 1 Sep 2016 06:44:52 -0700 Subject: [PATCH 0045/1050] iw_cxgb4: block module unload until all ep resources are released Otherwise an endpoint can be still closing down causing a touch after free crash. Also WARN_ON if ulps have failed to destroy various resources during device removal. Fixes: ad61a4c7a9b7 ("iw_cxgb4: don't block in destroy_qp awaiting the last deref") Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Steve Wise Signed-off-by: Sagi Grimberg --- drivers/infiniband/hw/cxgb4/cm.c | 2 ++ drivers/infiniband/hw/cxgb4/device.c | 5 +++++ drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 1 + 3 files changed, 8 insertions(+) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 57b5bb56cdbf..5621270664f4 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -314,6 +314,8 @@ static void remove_ep_tid(struct c4iw_ep *ep) spin_lock_irqsave(&ep->com.dev->lock, flags); _remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid, 0); + if (idr_is_empty(&ep->com.dev->hwtid_idr)) + wake_up(&ep->com.dev->wait); spin_unlock_irqrestore(&ep->com.dev->lock, flags); } diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index ae2e8b23d2dd..63561aa9c69e 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -872,9 +872,13 @@ static void c4iw_rdev_close(struct c4iw_rdev *rdev) static void c4iw_dealloc(struct uld_ctx *ctx) { c4iw_rdev_close(&ctx->dev->rdev); + WARN_ON_ONCE(!idr_is_empty(&ctx->dev->cqidr)); idr_destroy(&ctx->dev->cqidr); + WARN_ON_ONCE(!idr_is_empty(&ctx->dev->qpidr)); idr_destroy(&ctx->dev->qpidr); + WARN_ON_ONCE(!idr_is_empty(&ctx->dev->mmidr)); idr_destroy(&ctx->dev->mmidr); + wait_event(ctx->dev->wait, idr_is_empty(&ctx->dev->hwtid_idr)); idr_destroy(&ctx->dev->hwtid_idr); idr_destroy(&ctx->dev->stid_idr); idr_destroy(&ctx->dev->atid_idr); @@ -992,6 +996,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) mutex_init(&devp->rdev.stats.lock); mutex_init(&devp->db_mutex); INIT_LIST_HEAD(&devp->db_fc_list); + init_waitqueue_head(&devp->wait); devp->avail_ird = devp->rdev.lldi.max_ird_adapter; if (c4iw_debugfs_root) { diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index f6f34a75af27..0e9cd4479bb3 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -263,6 +263,7 @@ struct c4iw_dev { struct idr stid_idr; struct list_head db_fc_list; u32 avail_ird; + wait_queue_head_t wait; }; static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev) From cdbecc8d24b642b67ae79a0acc2ff18d3d0e677e Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 1 Sep 2016 09:12:25 -0700 Subject: [PATCH 0046/1050] nvme_rdma: keep a ref on the ctrl during delete/flush Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Steve Wise Signed-off-by: Sagi Grimberg --- drivers/nvme/host/rdma.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index ab545fb347a0..15b0c1d025f5 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1351,9 +1351,15 @@ static int nvme_rdma_device_unplug(struct nvme_rdma_queue *queue) ret = 1; } - /* Queue controller deletion */ + /* + * Queue controller deletion. Keep a reference until all + * work is flushed since delete_work will free the ctrl mem + */ + kref_get(&ctrl->ctrl.kref); queue_work(nvme_rdma_wq, &ctrl->delete_work); flush_work(&ctrl->delete_work); + nvme_put_ctrl(&ctrl->ctrl); + return ret; } @@ -1700,15 +1706,19 @@ static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl) static int nvme_rdma_del_ctrl(struct nvme_ctrl *nctrl) { struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); - int ret; + int ret = 0; + /* + * Keep a reference until all work is flushed since + * __nvme_rdma_del_ctrl can free the ctrl mem + */ + if (!kref_get_unless_zero(&ctrl->ctrl.kref)) + return -EBUSY; ret = __nvme_rdma_del_ctrl(ctrl); - if (ret) - return ret; - - flush_work(&ctrl->delete_work); - - return 0; + if (!ret) + flush_work(&ctrl->delete_work); + nvme_put_ctrl(&ctrl->ctrl); + return ret; } static void nvme_rdma_remove_ctrl_work(struct work_struct *work) From f361e5a01ed35c0f9a00816d76a910d8a5cb4547 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 2 Sep 2016 09:01:27 -0700 Subject: [PATCH 0047/1050] nvme-rdma: destroy nvme queue rdma resources on connect failure After address resolution, the nvme_rdma_queue rdma resources are allocated. If rdma route resolution or the connect fails, or the controller reconnect times out and gives up, then the rdma resources need to be freed. Otherwise, rdma resources are leaked. Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Steve Wise Signed-off-by: Sagi Grimberg --- drivers/nvme/host/rdma.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 15b0c1d025f5..a9d43f09963f 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -82,6 +82,7 @@ struct nvme_rdma_request { enum nvme_rdma_queue_flags { NVME_RDMA_Q_CONNECTED = (1 << 0), + NVME_RDMA_IB_QUEUE_ALLOCATED = (1 << 1), }; struct nvme_rdma_queue { @@ -480,9 +481,14 @@ out_err: static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) { - struct nvme_rdma_device *dev = queue->device; - struct ib_device *ibdev = dev->dev; + struct nvme_rdma_device *dev; + struct ib_device *ibdev; + if (!test_and_clear_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags)) + return; + + dev = queue->device; + ibdev = dev->dev; rdma_destroy_qp(queue->cm_id); ib_free_cq(queue->ib_cq); @@ -533,6 +539,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue, ret = -ENOMEM; goto out_destroy_qp; } + set_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags); return 0; @@ -590,6 +597,7 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl, return 0; out_destroy_cm_id: + nvme_rdma_destroy_queue_ib(queue); rdma_destroy_id(queue->cm_id); return ret; } @@ -652,7 +660,7 @@ static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl) return 0; out_free_queues: - for (; i >= 1; i--) + for (i--; i >= 1; i--) nvme_rdma_stop_and_free_queue(&ctrl->queues[i]); return ret; From 334a8f37115bf35e38617315a360a91ac4f2b2c6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 4 Sep 2016 12:46:35 -0400 Subject: [PATCH 0048/1050] pNFS: Don't forget the layout stateid if there are outstanding LAYOUTGETs If there are outstanding LAYOUTGET rpc calls, then we want to ensure that we keep the layout stateid around so we that don't inadvertently pick up an old/misordered sequence id. The race is as follows: Client Server ====== ====== LAYOUTGET(seqid) LAYOUTGET(seqid) return LAYOUTGET(seqid+1) return LAYOUTGET(seqid+2) process LAYOUTGET(seqid+2) forget layout process LAYOUTGET(seqid+1) If it forgets the layout stateid before processing seqid+1, then the client will not check the layout->plh_barrier, and so will set the stateid with seqid+1. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index cd8b5fca33f6..2c93a85eda51 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -365,7 +365,8 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ atomic_dec(&lo->plh_refcount); if (list_empty(&lo->plh_segs)) { - set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); + if (atomic_read(&lo->plh_outstanding) == 0) + set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); } rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); From cc2187a6e037bc64404f63c6d650ff263c2200c0 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 4 Sep 2016 11:37:36 +0200 Subject: [PATCH 0049/1050] x86/microcode/AMD: Fix load of builtin microcode with randomized memory We do not need to add the randomization offset when the microcode is built in. Reported-and-tested-by: Emanuel Czirai Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/20160904093736.GA11939@pd.tnic Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/microcode/amd.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index b816971f5da4..620ab06bcf45 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -54,6 +54,7 @@ static LIST_HEAD(pcache); */ static u8 *container; static size_t container_size; +static bool ucode_builtin; static u32 ucode_new_rev; static u8 amd_ucode_patch[PATCH_MAX_SIZE]; @@ -281,18 +282,22 @@ static bool __init load_builtin_amd_microcode(struct cpio_data *cp, void __init load_ucode_amd_bsp(unsigned int family) { struct cpio_data cp; + bool *builtin; void **data; size_t *size; #ifdef CONFIG_X86_32 data = (void **)__pa_nodebug(&ucode_cpio.data); size = (size_t *)__pa_nodebug(&ucode_cpio.size); + builtin = (bool *)__pa_nodebug(&ucode_builtin); #else data = &ucode_cpio.data; size = &ucode_cpio.size; + builtin = &ucode_builtin; #endif - if (!load_builtin_amd_microcode(&cp, family)) + *builtin = load_builtin_amd_microcode(&cp, family); + if (!*builtin) cp = find_ucode_in_initrd(); if (!(cp.data && cp.size)) @@ -373,7 +378,8 @@ void load_ucode_amd_ap(void) return; /* Add CONFIG_RANDOMIZE_MEMORY offset. */ - cont += PAGE_OFFSET - __PAGE_OFFSET_BASE; + if (!ucode_builtin) + cont += PAGE_OFFSET - __PAGE_OFFSET_BASE; eax = cpuid_eax(0x00000001); eq = (struct equiv_cpu_entry *)(cont + CONTAINER_HDR_SZ); @@ -439,7 +445,8 @@ int __init save_microcode_in_initrd_amd(void) container = cont_va; /* Add CONFIG_RANDOMIZE_MEMORY offset. */ - container += PAGE_OFFSET - __PAGE_OFFSET_BASE; + if (!ucode_builtin) + container += PAGE_OFFSET - __PAGE_OFFSET_BASE; eax = cpuid_eax(0x00000001); eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); From d7127b5e5fa0551be21b86640f1648b224e36d43 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 26 Aug 2016 08:16:00 +0200 Subject: [PATCH 0050/1050] locking/barriers: Don't use sizeof(void) in lockless_dereference() My previous commit: 112dc0c8069e ("locking/barriers: Suppress sparse warnings in lockless_dereference()") caused sparse to complain that (in radix-tree.h) we use sizeof(void) since that rcu_dereference()s a void *. Really, all we need is to have the expression *p in here somewhere to make sure p is a pointer type, and sizeof(*p) was the thing that came to my mind first to make sure that's done without really doing anything at runtime. Another thing I had considered was using typeof(*p), but obviously we can't just declare a typeof(*p) variable either, since that may end up being void. Declaring a variable as typeof(*p)* gets around that, and still checks that typeof(*p) is valid, so do that. This type construction can't be done for _________p1 because that will actually be used and causes sparse address space warnings, so keep a separate unused variable for it. Reported-by: Fengguang Wu Signed-off-by: Johannes Berg Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Paul E . McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kbuild-all@01.org Fixes: 112dc0c8069e ("locking/barriers: Suppress sparse warnings in lockless_dereference()") Link: http://lkml.kernel.org/r/1472192160-4049-1-git-send-email-johannes@sipsolutions.net Signed-off-by: Ingo Molnar --- include/linux/compiler.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 436aa4e42221..668569844d37 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -527,13 +527,14 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s * object's lifetime is managed by something other than RCU. That * "something other" might be reference counting or simple immortality. * - * The seemingly unused size_t variable is to validate @p is indeed a pointer - * type by making sure it can be dereferenced. + * The seemingly unused variable ___typecheck_p validates that @p is + * indeed a pointer type by using a pointer to typeof(*p) as the type. + * Taking a pointer to typeof(*p) again is needed in case p is void *. */ #define lockless_dereference(p) \ ({ \ typeof(p) _________p1 = READ_ONCE(p); \ - size_t __maybe_unused __size_of_ptr = sizeof(*(p)); \ + typeof(*(p)) *___typecheck_p __maybe_unused; \ smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ (_________p1); \ }) From 58763148758057ffc447bf990321d3ea86d199a0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 30 Aug 2016 10:15:03 +0200 Subject: [PATCH 0051/1050] perf/core: Remove WARN from perf_event_read() This effectively reverts commit: 71e7bc2bab77 ("perf/core: Check return value of the perf_event_read() IPI") ... and puts in a comment explaining why we ignore the return value. Reported-by: Vegard Nossum Signed-off-by: Peter Zijlstra (Intel) Cc: David Carrillo-Cisneros Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 71e7bc2bab77 ("perf/core: Check return value of the perf_event_read() IPI") Signed-off-by: Ingo Molnar --- kernel/events/core.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 3cfabdf7b942..07ac8596a728 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3549,10 +3549,18 @@ static int perf_event_read(struct perf_event *event, bool group) .group = group, .ret = 0, }; - ret = smp_call_function_single(event->oncpu, __perf_event_read, &data, 1); - /* The event must have been read from an online CPU: */ - WARN_ON_ONCE(ret); - ret = ret ? : data.ret; + /* + * Purposely ignore the smp_call_function_single() return + * value. + * + * If event->oncpu isn't a valid CPU it means the event got + * scheduled out and that will have updated the event count. + * + * Therefore, either way, we'll have an up-to-date event count + * after this. + */ + (void)smp_call_function_single(event->oncpu, __perf_event_read, &data, 1); + ret = data.ret; } else if (event->state == PERF_EVENT_STATE_INACTIVE) { struct perf_event_context *ctx = event->ctx; unsigned long flags; From 135e8c9250dd5c8c9aae5984fde6f230d0cbfeaf Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Mon, 5 Sep 2016 13:16:40 +1000 Subject: [PATCH 0052/1050] sched/core: Fix a race between try_to_wake_up() and a woken up task The origin of the issue I've seen is related to a missing memory barrier between check for task->state and the check for task->on_rq. The task being woken up is already awake from a schedule() and is doing the following: do { schedule() set_current_state(TASK_(UN)INTERRUPTIBLE); } while (!cond); The waker, actually gets stuck doing the following in try_to_wake_up(): while (p->on_cpu) cpu_relax(); Analysis: The instance I've seen involves the following race: CPU1 CPU2 while () { if (cond) break; do { schedule(); set_current_state(TASK_UN..) } while (!cond); wakeup_routine() spin_lock_irqsave(wait_lock) raw_spin_lock_irqsave(wait_lock) wake_up_process() } try_to_wake_up() set_current_state(TASK_RUNNING); .. list_del(&waiter.list); CPU2 wakes up CPU1, but before it can get the wait_lock and set current state to TASK_RUNNING the following occurs: CPU3 wakeup_routine() raw_spin_lock_irqsave(wait_lock) if (!list_empty) wake_up_process() try_to_wake_up() raw_spin_lock_irqsave(p->pi_lock) .. if (p->on_rq && ttwu_wakeup()) .. while (p->on_cpu) cpu_relax() .. CPU3 tries to wake up the task on CPU1 again since it finds it on the wait_queue, CPU1 is spinning on wait_lock, but immediately after CPU2, CPU3 got it. CPU3 checks the state of p on CPU1, it is TASK_UNINTERRUPTIBLE and the task is spinning on the wait_lock. Interestingly since p->on_rq is checked under pi_lock, I've noticed that try_to_wake_up() finds p->on_rq to be 0. This was the most confusing bit of the analysis, but p->on_rq is changed under runqueue lock, rq_lock, the p->on_rq check is not reliable without this fix IMHO. The race is visible (based on the analysis) only when ttwu_queue() does a remote wakeup via ttwu_queue_remote. In which case the p->on_rq change is not done uder the pi_lock. The result is that after a while the entire system locks up on the raw_spin_irqlock_save(wait_lock) and the holder spins infintely Reproduction of the issue: The issue can be reproduced after a long run on my system with 80 threads and having to tweak available memory to very low and running memory stress-ng mmapfork test. It usually takes a long time to reproduce. I am trying to work on a test case that can reproduce the issue faster, but thats work in progress. I am still testing the changes on my still in a loop and the tests seem OK thus far. Big thanks to Benjamin and Nick for helping debug this as well. Ben helped catch the missing barrier, Nick caught every missing bit in my theory. Signed-off-by: Balbir Singh [ Updated comment to clarify matching barriers. Many architectures do not have a full barrier in switch_to() so that cannot be relied upon. ] Signed-off-by: Peter Zijlstra (Intel) Acked-by: Benjamin Herrenschmidt Cc: Alexey Kardashevskiy Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Nicholas Piggin Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Link: http://lkml.kernel.org/r/e02cce7b-d9ca-1ad0-7a61-ea97c7582b37@gmail.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2a906f20fba7..44817c640e99 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2016,6 +2016,28 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) success = 1; /* we're going to change ->state */ cpu = task_cpu(p); + /* + * Ensure we load p->on_rq _after_ p->state, otherwise it would + * be possible to, falsely, observe p->on_rq == 0 and get stuck + * in smp_cond_load_acquire() below. + * + * sched_ttwu_pending() try_to_wake_up() + * [S] p->on_rq = 1; [L] P->state + * UNLOCK rq->lock -----. + * \ + * +--- RMB + * schedule() / + * LOCK rq->lock -----' + * UNLOCK rq->lock + * + * [task p] + * [S] p->state = UNINTERRUPTIBLE [L] p->on_rq + * + * Pairs with the UNLOCK+LOCK on rq->lock from the + * last wakeup of our task and the schedule that got our task + * current. + */ + smp_rmb(); if (p->on_rq && ttwu_remote(p, wake_flags)) goto stat; From d4c4fed08f31f3746000c46cb1b20bed2959547a Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 15 Aug 2016 09:05:45 -0600 Subject: [PATCH 0053/1050] efi: Make for_each_efi_memory_desc_in_map() cope with running on Xen While commit 55f1ea15216 ("efi: Fix for_each_efi_memory_desc_in_map() for empty memmaps") made an attempt to deal with empty memory maps, it didn't address the case where the map field never gets set, as is apparently the case when running under Xen. Reported-by: Tested-by: Cc: Vitaly Kuznetsov Cc: Jiri Slaby Cc: Mark Rutland Cc: # v4.7+ Signed-off-by: Jan Beulich [ Guard the loop with a NULL check instead of pointer underflow ] Signed-off-by: Matt Fleming --- include/linux/efi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/efi.h b/include/linux/efi.h index 7f5a58225385..23cd3ced8c1a 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -946,7 +946,7 @@ extern int efi_memattr_apply_permissions(struct mm_struct *mm, /* Iterate through an efi_memory_map */ #define for_each_efi_memory_desc_in_map(m, md) \ for ((md) = (m)->map; \ - ((void *)(md) + (m)->desc_size) <= (m)->map_end; \ + (md) && ((void *)(md) + (m)->desc_size) <= (m)->map_end; \ (md) = (void *)(md) + (m)->desc_size) /** From 4af9ed578a50cd331a725322cfd9d555251ce788 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Tue, 30 Aug 2016 12:41:37 +0200 Subject: [PATCH 0054/1050] efi: Fix handling error value in fdt_find_uefi_params of_get_flat_dt_subnode_by_name can return negative value in case of error. Assigning the result to unsigned variable and checking if the variable is lesser than zero is incorrect and always false. The patch fixes it by using signed variable to check the result. The problem has been detected using semantic patch scripts/coccinelle/tests/unsigned_lesser_than_zero.cocci Signed-off-by: Andrzej Hajda Cc: Bartlomiej Zolnierkiewicz Cc: Marek Szyprowski Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Shawn Lin Cc: Mark Rutland Cc: Signed-off-by: Matt Fleming --- drivers/firmware/efi/efi.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 5a2631af7410..7dd2e2d37231 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -657,9 +657,12 @@ static int __init fdt_find_uefi_params(unsigned long node, const char *uname, } if (subnode) { - node = of_get_flat_dt_subnode_by_name(node, subnode); - if (node < 0) + int err = of_get_flat_dt_subnode_by_name(node, subnode); + + if (err < 0) return 0; + + node = err; } return __find_uefi_params(node, info, dt_params[i].params); From dadb57abc37499f565b23933dbf49b435c3ba8af Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Mon, 29 Aug 2016 14:38:51 -0600 Subject: [PATCH 0055/1050] efi/libstub: Allocate headspace in efi_get_memory_map() efi_get_memory_map() allocates a buffer to store the memory map that it retrieves. This buffer may need to be reused by the client after ExitBootServices() is called, at which point allocations are not longer permitted. To support this usecase, provide the allocated buffer size back to the client, and allocate some additional headroom to account for any reasonable growth in the map that is likely to happen between the call to efi_get_memory_map() and the client reusing the buffer. Signed-off-by: Jeffrey Hugo Cc: Ard Biesheuvel Cc: Mark Rutland Cc: Leif Lindholm Cc: Ingo Molnar Cc: Signed-off-by: Matt Fleming --- arch/x86/boot/compressed/eboot.c | 18 ++-- .../firmware/efi/libstub/efi-stub-helper.c | 96 +++++++++++++------ drivers/firmware/efi/libstub/fdt.c | 17 +++- drivers/firmware/efi/libstub/random.c | 12 ++- include/linux/efi.h | 15 ++- 5 files changed, 110 insertions(+), 48 deletions(-) diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index ff574dad95cc..c5b7c7b4f0d7 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -1008,7 +1008,7 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle, bool is64) { struct efi_info *efi = &boot_params->efi_info; - unsigned long map_sz, key, desc_size; + unsigned long map_sz, key, desc_size, buff_size; efi_memory_desc_t *mem_map; struct setup_data *e820ext; const char *signature; @@ -1019,14 +1019,20 @@ static efi_status_t exit_boot(struct boot_params *boot_params, bool called_exit = false; u8 nr_entries; int i; + struct efi_boot_memmap map; - nr_desc = 0; - e820ext = NULL; - e820ext_size = 0; + nr_desc = 0; + e820ext = NULL; + e820ext_size = 0; + map.map = &mem_map; + map.map_size = &map_sz; + map.desc_size = &desc_size; + map.desc_ver = &desc_version; + map.key_ptr = &key; + map.buff_size = &buff_size; get_map: - status = efi_get_memory_map(sys_table, &mem_map, &map_sz, &desc_size, - &desc_version, &key); + status = efi_get_memory_map(sys_table, &map); if (status != EFI_SUCCESS) return status; diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index 3bd127f95315..29368ac69221 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -41,6 +41,8 @@ static unsigned long __chunk_size = EFI_READ_CHUNK_SIZE; #define EFI_ALLOC_ALIGN EFI_PAGE_SIZE #endif +#define EFI_MMAP_NR_SLACK_SLOTS 8 + struct file_info { efi_file_handle_t *handle; u64 size; @@ -63,49 +65,62 @@ void efi_printk(efi_system_table_t *sys_table_arg, char *str) } } +static inline bool mmap_has_headroom(unsigned long buff_size, + unsigned long map_size, + unsigned long desc_size) +{ + unsigned long slack = buff_size - map_size; + + return slack / desc_size >= EFI_MMAP_NR_SLACK_SLOTS; +} + efi_status_t efi_get_memory_map(efi_system_table_t *sys_table_arg, - efi_memory_desc_t **map, - unsigned long *map_size, - unsigned long *desc_size, - u32 *desc_ver, - unsigned long *key_ptr) + struct efi_boot_memmap *map) { efi_memory_desc_t *m = NULL; efi_status_t status; unsigned long key; u32 desc_version; - *map_size = sizeof(*m) * 32; + *map->desc_size = sizeof(*m); + *map->map_size = *map->desc_size * 32; + *map->buff_size = *map->map_size; again: - /* - * Add an additional efi_memory_desc_t because we're doing an - * allocation which may be in a new descriptor region. - */ - *map_size += sizeof(*m); status = efi_call_early(allocate_pool, EFI_LOADER_DATA, - *map_size, (void **)&m); + *map->map_size, (void **)&m); if (status != EFI_SUCCESS) goto fail; - *desc_size = 0; + *map->desc_size = 0; key = 0; - status = efi_call_early(get_memory_map, map_size, m, - &key, desc_size, &desc_version); - if (status == EFI_BUFFER_TOO_SMALL) { + status = efi_call_early(get_memory_map, map->map_size, m, + &key, map->desc_size, &desc_version); + if (status == EFI_BUFFER_TOO_SMALL || + !mmap_has_headroom(*map->buff_size, *map->map_size, + *map->desc_size)) { efi_call_early(free_pool, m); + /* + * Make sure there is some entries of headroom so that the + * buffer can be reused for a new map after allocations are + * no longer permitted. Its unlikely that the map will grow to + * exceed this headroom once we are ready to trigger + * ExitBootServices() + */ + *map->map_size += *map->desc_size * EFI_MMAP_NR_SLACK_SLOTS; + *map->buff_size = *map->map_size; goto again; } if (status != EFI_SUCCESS) efi_call_early(free_pool, m); - if (key_ptr && status == EFI_SUCCESS) - *key_ptr = key; - if (desc_ver && status == EFI_SUCCESS) - *desc_ver = desc_version; + if (map->key_ptr && status == EFI_SUCCESS) + *map->key_ptr = key; + if (map->desc_ver && status == EFI_SUCCESS) + *map->desc_ver = desc_version; fail: - *map = m; + *map->map = m; return status; } @@ -113,13 +128,20 @@ fail: unsigned long get_dram_base(efi_system_table_t *sys_table_arg) { efi_status_t status; - unsigned long map_size; + unsigned long map_size, buff_size; unsigned long membase = EFI_ERROR; struct efi_memory_map map; efi_memory_desc_t *md; + struct efi_boot_memmap boot_map; - status = efi_get_memory_map(sys_table_arg, (efi_memory_desc_t **)&map.map, - &map_size, &map.desc_size, NULL, NULL); + boot_map.map = (efi_memory_desc_t **)&map.map; + boot_map.map_size = &map_size; + boot_map.desc_size = &map.desc_size; + boot_map.desc_ver = NULL; + boot_map.key_ptr = NULL; + boot_map.buff_size = &buff_size; + + status = efi_get_memory_map(sys_table_arg, &boot_map); if (status != EFI_SUCCESS) return membase; @@ -144,15 +166,22 @@ efi_status_t efi_high_alloc(efi_system_table_t *sys_table_arg, unsigned long size, unsigned long align, unsigned long *addr, unsigned long max) { - unsigned long map_size, desc_size; + unsigned long map_size, desc_size, buff_size; efi_memory_desc_t *map; efi_status_t status; unsigned long nr_pages; u64 max_addr = 0; int i; + struct efi_boot_memmap boot_map; - status = efi_get_memory_map(sys_table_arg, &map, &map_size, &desc_size, - NULL, NULL); + boot_map.map = ↦ + boot_map.map_size = &map_size; + boot_map.desc_size = &desc_size; + boot_map.desc_ver = NULL; + boot_map.key_ptr = NULL; + boot_map.buff_size = &buff_size; + + status = efi_get_memory_map(sys_table_arg, &boot_map); if (status != EFI_SUCCESS) goto fail; @@ -230,14 +259,21 @@ efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg, unsigned long size, unsigned long align, unsigned long *addr) { - unsigned long map_size, desc_size; + unsigned long map_size, desc_size, buff_size; efi_memory_desc_t *map; efi_status_t status; unsigned long nr_pages; int i; + struct efi_boot_memmap boot_map; - status = efi_get_memory_map(sys_table_arg, &map, &map_size, &desc_size, - NULL, NULL); + boot_map.map = ↦ + boot_map.map_size = &map_size; + boot_map.desc_size = &desc_size; + boot_map.desc_ver = NULL; + boot_map.key_ptr = NULL; + boot_map.buff_size = &buff_size; + + status = efi_get_memory_map(sys_table_arg, &boot_map); if (status != EFI_SUCCESS) goto fail; diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index e58abfa953cc..bec0fa8d8746 100644 --- a/drivers/firmware/efi/libstub/fdt.c +++ b/drivers/firmware/efi/libstub/fdt.c @@ -175,13 +175,21 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, unsigned long fdt_addr, unsigned long fdt_size) { - unsigned long map_size, desc_size; + unsigned long map_size, desc_size, buff_size; u32 desc_ver; unsigned long mmap_key; efi_memory_desc_t *memory_map, *runtime_map; unsigned long new_fdt_size; efi_status_t status; int runtime_entry_count = 0; + struct efi_boot_memmap map; + + map.map = &runtime_map; + map.map_size = &map_size; + map.desc_size = &desc_size; + map.desc_ver = &desc_ver; + map.key_ptr = &mmap_key; + map.buff_size = &buff_size; /* * Get a copy of the current memory map that we will use to prepare @@ -189,8 +197,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, * subsequent allocations adding entries, since they could not affect * the number of EFI_MEMORY_RUNTIME regions. */ - status = efi_get_memory_map(sys_table, &runtime_map, &map_size, - &desc_size, &desc_ver, &mmap_key); + status = efi_get_memory_map(sys_table, &map); if (status != EFI_SUCCESS) { pr_efi_err(sys_table, "Unable to retrieve UEFI memory map.\n"); return status; @@ -199,6 +206,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, pr_efi(sys_table, "Exiting boot services and installing virtual address map...\n"); + map.map = &memory_map; /* * Estimate size of new FDT, and allocate memory for it. We * will allocate a bigger buffer if this ends up being too @@ -218,8 +226,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, * we can get the memory map key needed for * exit_boot_services(). */ - status = efi_get_memory_map(sys_table, &memory_map, &map_size, - &desc_size, &desc_ver, &mmap_key); + status = efi_get_memory_map(sys_table, &map); if (status != EFI_SUCCESS) goto fail_free_new_fdt; diff --git a/drivers/firmware/efi/libstub/random.c b/drivers/firmware/efi/libstub/random.c index 53f6d3fe6d86..0c9f58c5ba50 100644 --- a/drivers/firmware/efi/libstub/random.c +++ b/drivers/firmware/efi/libstub/random.c @@ -73,12 +73,20 @@ efi_status_t efi_random_alloc(efi_system_table_t *sys_table_arg, unsigned long random_seed) { unsigned long map_size, desc_size, total_slots = 0, target_slot; + unsigned long buff_size; efi_status_t status; efi_memory_desc_t *memory_map; int map_offset; + struct efi_boot_memmap map; - status = efi_get_memory_map(sys_table_arg, &memory_map, &map_size, - &desc_size, NULL, NULL); + map.map = &memory_map; + map.map_size = &map_size; + map.desc_size = &desc_size; + map.desc_ver = NULL; + map.key_ptr = NULL; + map.buff_size = &buff_size; + + status = efi_get_memory_map(sys_table_arg, &map); if (status != EFI_SUCCESS) return status; diff --git a/include/linux/efi.h b/include/linux/efi.h index 23cd3ced8c1a..943fee524176 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -118,6 +118,15 @@ typedef struct { u32 imagesize; } efi_capsule_header_t; +struct efi_boot_memmap { + efi_memory_desc_t **map; + unsigned long *map_size; + unsigned long *desc_size; + u32 *desc_ver; + unsigned long *key_ptr; + unsigned long *buff_size; +}; + /* * EFI capsule flags */ @@ -1371,11 +1380,7 @@ char *efi_convert_cmdline(efi_system_table_t *sys_table_arg, efi_loaded_image_t *image, int *cmd_line_len); efi_status_t efi_get_memory_map(efi_system_table_t *sys_table_arg, - efi_memory_desc_t **map, - unsigned long *map_size, - unsigned long *desc_size, - u32 *desc_ver, - unsigned long *key_ptr); + struct efi_boot_memmap *map); efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg, unsigned long size, unsigned long align, From fc07716ba803483be91bc4b2344f9c84985e6f07 Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Mon, 29 Aug 2016 14:38:52 -0600 Subject: [PATCH 0056/1050] efi/libstub: Introduce ExitBootServices helper The spec allows ExitBootServices to fail with EFI_INVALID_PARAMETER if a race condition has occurred where the EFI has updated the memory map after the stub grabbed a reference to the map. The spec defines a retry proceedure with specific requirements to handle this scenario. This scenario was previously observed on x86 - commit d3768d885c6c ("x86, efi: retry ExitBootServices() on failure") but the current fix is not spec compliant and the scenario is now observed on the Qualcomm Technologies QDF2432 via the FDT stub which does not handle the error and thus causes boot failures. The user will notice the boot failure as the kernel is not executed and the system may drop back to a UEFI shell, but will be unresponsive to input and the system will require a power cycle to recover. Add a helper to the stub library that correctly adheres to the spec in the case of EFI_INVALID_PARAMETER from ExitBootServices and can be universally used across all stub implementations. Signed-off-by: Jeffrey Hugo Cc: Ard Biesheuvel Cc: Mark Rutland Cc: Leif Lindholm Cc: Ingo Molnar Cc: Signed-off-by: Matt Fleming --- .../firmware/efi/libstub/efi-stub-helper.c | 73 +++++++++++++++++++ include/linux/efi.h | 10 +++ 2 files changed, 83 insertions(+) diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index 29368ac69221..aded10662020 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -740,3 +740,76 @@ char *efi_convert_cmdline(efi_system_table_t *sys_table_arg, *cmd_line_len = options_bytes; return (char *)cmdline_addr; } + +/* + * Handle calling ExitBootServices according to the requirements set out by the + * spec. Obtains the current memory map, and returns that info after calling + * ExitBootServices. The client must specify a function to perform any + * processing of the memory map data prior to ExitBootServices. A client + * specific structure may be passed to the function via priv. The client + * function may be called multiple times. + */ +efi_status_t efi_exit_boot_services(efi_system_table_t *sys_table_arg, + void *handle, + struct efi_boot_memmap *map, + void *priv, + efi_exit_boot_map_processing priv_func) +{ + efi_status_t status; + + status = efi_get_memory_map(sys_table_arg, map); + + if (status != EFI_SUCCESS) + goto fail; + + status = priv_func(sys_table_arg, map, priv); + if (status != EFI_SUCCESS) + goto free_map; + + status = efi_call_early(exit_boot_services, handle, *map->key_ptr); + + if (status == EFI_INVALID_PARAMETER) { + /* + * The memory map changed between efi_get_memory_map() and + * exit_boot_services(). Per the UEFI Spec v2.6, Section 6.4: + * EFI_BOOT_SERVICES.ExitBootServices we need to get the + * updated map, and try again. The spec implies one retry + * should be sufficent, which is confirmed against the EDK2 + * implementation. Per the spec, we can only invoke + * get_memory_map() and exit_boot_services() - we cannot alloc + * so efi_get_memory_map() cannot be used, and we must reuse + * the buffer. For all practical purposes, the headroom in the + * buffer should account for any changes in the map so the call + * to get_memory_map() is expected to succeed here. + */ + *map->map_size = *map->buff_size; + status = efi_call_early(get_memory_map, + map->map_size, + *map->map, + map->key_ptr, + map->desc_size, + map->desc_ver); + + /* exit_boot_services() was called, thus cannot free */ + if (status != EFI_SUCCESS) + goto fail; + + status = priv_func(sys_table_arg, map, priv); + /* exit_boot_services() was called, thus cannot free */ + if (status != EFI_SUCCESS) + goto fail; + + status = efi_call_early(exit_boot_services, handle, *map->key_ptr); + } + + /* exit_boot_services() was called, thus cannot free */ + if (status != EFI_SUCCESS) + goto fail; + + return EFI_SUCCESS; + +free_map: + efi_call_early(free_pool, *map->map); +fail: + return status; +} diff --git a/include/linux/efi.h b/include/linux/efi.h index 943fee524176..0148a3046b48 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1462,4 +1462,14 @@ extern void efi_call_virt_check_flags(unsigned long flags, const char *call); arch_efi_call_virt_teardown(); \ }) +typedef efi_status_t (*efi_exit_boot_map_processing)( + efi_system_table_t *sys_table_arg, + struct efi_boot_memmap *map, + void *priv); + +efi_status_t efi_exit_boot_services(efi_system_table_t *sys_table, + void *handle, + struct efi_boot_memmap *map, + void *priv, + efi_exit_boot_map_processing priv_func); #endif /* _LINUX_EFI_H */ From ed9cc156c42ff0c0bf9b1d09df48a12bf0873473 Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Mon, 29 Aug 2016 14:38:53 -0600 Subject: [PATCH 0057/1050] efi/libstub: Use efi_exit_boot_services() in FDT The FDT code directly calls ExitBootServices. This is inadvisable as the UEFI spec details a complex set of errors, race conditions, and API interactions that the caller of ExitBootServices must get correct. The FDT code does not handle EFI_INVALID_PARAMETER as required by the spec, which causes intermittent boot failures on the Qualcomm Technologies QDF2432. Call the efi_exit_boot_services() helper intead, which handles the EFI_INVALID_PARAMETER scenario properly. Signed-off-by: Jeffrey Hugo Cc: Ard Biesheuvel Cc: Mark Rutland Cc: Leif Lindholm Cc: Ingo Molnar Cc: Signed-off-by: Matt Fleming --- drivers/firmware/efi/libstub/fdt.c | 37 ++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index bec0fa8d8746..a6a93116a8f0 100644 --- a/drivers/firmware/efi/libstub/fdt.c +++ b/drivers/firmware/efi/libstub/fdt.c @@ -152,6 +152,27 @@ fdt_set_fail: #define EFI_FDT_ALIGN EFI_PAGE_SIZE #endif +struct exit_boot_struct { + efi_memory_desc_t *runtime_map; + int *runtime_entry_count; +}; + +static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg, + struct efi_boot_memmap *map, + void *priv) +{ + struct exit_boot_struct *p = priv; + /* + * Update the memory map with virtual addresses. The function will also + * populate @runtime_map with copies of just the EFI_MEMORY_RUNTIME + * entries so that we can pass it straight to SetVirtualAddressMap() + */ + efi_get_virtmap(*map->map, *map->map_size, *map->desc_size, + p->runtime_map, p->runtime_entry_count); + + return EFI_SUCCESS; +} + /* * Allocate memory for a new FDT, then add EFI, commandline, and * initrd related fields to the FDT. This routine increases the @@ -183,6 +204,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, efi_status_t status; int runtime_entry_count = 0; struct efi_boot_memmap map; + struct exit_boot_struct priv; map.map = &runtime_map; map.map_size = &map_size; @@ -257,16 +279,11 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, } } - /* - * Update the memory map with virtual addresses. The function will also - * populate @runtime_map with copies of just the EFI_MEMORY_RUNTIME - * entries so that we can pass it straight into SetVirtualAddressMap() - */ - efi_get_virtmap(memory_map, map_size, desc_size, runtime_map, - &runtime_entry_count); - - /* Now we are ready to exit_boot_services.*/ - status = sys_table->boottime->exit_boot_services(handle, mmap_key); + sys_table->boottime->free_pool(memory_map); + priv.runtime_map = runtime_map; + priv.runtime_entry_count = &runtime_entry_count; + status = efi_exit_boot_services(sys_table, handle, &map, &priv, + exit_boot_func); if (status == EFI_SUCCESS) { efi_set_virtual_address_map_t *svam; From d64934019f6cc39202e2f78063709f61ca5cb364 Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Mon, 29 Aug 2016 14:38:54 -0600 Subject: [PATCH 0058/1050] x86/efi: Use efi_exit_boot_services() The eboot code directly calls ExitBootServices. This is inadvisable as the UEFI spec details a complex set of errors, race conditions, and API interactions that the caller of ExitBootServices must get correct. The eboot code attempts allocations after calling ExitBootSerives which is not permitted per the spec. Call the efi_exit_boot_services() helper intead, which handles the allocation scenario properly. Signed-off-by: Jeffrey Hugo Cc: Ard Biesheuvel Cc: Mark Rutland Cc: Leif Lindholm Cc: Ingo Molnar Cc: Signed-off-by: Matt Fleming --- arch/x86/boot/compressed/eboot.c | 134 +++++++++++++++---------------- 1 file changed, 66 insertions(+), 68 deletions(-) diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index c5b7c7b4f0d7..94dd4a31f5b3 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -1004,85 +1004,87 @@ static efi_status_t alloc_e820ext(u32 nr_desc, struct setup_data **e820ext, return status; } +struct exit_boot_struct { + struct boot_params *boot_params; + struct efi_info *efi; + struct setup_data *e820ext; + __u32 e820ext_size; + bool is64; +}; + +static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg, + struct efi_boot_memmap *map, + void *priv) +{ + static bool first = true; + const char *signature; + __u32 nr_desc; + efi_status_t status; + struct exit_boot_struct *p = priv; + + if (first) { + nr_desc = *map->buff_size / *map->desc_size; + if (nr_desc > ARRAY_SIZE(p->boot_params->e820_map)) { + u32 nr_e820ext = nr_desc - + ARRAY_SIZE(p->boot_params->e820_map); + + status = alloc_e820ext(nr_e820ext, &p->e820ext, + &p->e820ext_size); + if (status != EFI_SUCCESS) + return status; + } + first = false; + } + + signature = p->is64 ? EFI64_LOADER_SIGNATURE : EFI32_LOADER_SIGNATURE; + memcpy(&p->efi->efi_loader_signature, signature, sizeof(__u32)); + + p->efi->efi_systab = (unsigned long)sys_table_arg; + p->efi->efi_memdesc_size = *map->desc_size; + p->efi->efi_memdesc_version = *map->desc_ver; + p->efi->efi_memmap = (unsigned long)*map->map; + p->efi->efi_memmap_size = *map->map_size; + +#ifdef CONFIG_X86_64 + p->efi->efi_systab_hi = (unsigned long)sys_table_arg >> 32; + p->efi->efi_memmap_hi = (unsigned long)*map->map >> 32; +#endif + + return EFI_SUCCESS; +} + static efi_status_t exit_boot(struct boot_params *boot_params, void *handle, bool is64) { - struct efi_info *efi = &boot_params->efi_info; unsigned long map_sz, key, desc_size, buff_size; efi_memory_desc_t *mem_map; struct setup_data *e820ext; - const char *signature; __u32 e820ext_size; - __u32 nr_desc, prev_nr_desc; efi_status_t status; __u32 desc_version; - bool called_exit = false; - u8 nr_entries; - int i; struct efi_boot_memmap map; + struct exit_boot_struct priv; - nr_desc = 0; - e820ext = NULL; - e820ext_size = 0; - map.map = &mem_map; - map.map_size = &map_sz; - map.desc_size = &desc_size; - map.desc_ver = &desc_version; - map.key_ptr = &key; - map.buff_size = &buff_size; - -get_map: - status = efi_get_memory_map(sys_table, &map); + map.map = &mem_map; + map.map_size = &map_sz; + map.desc_size = &desc_size; + map.desc_ver = &desc_version; + map.key_ptr = &key; + map.buff_size = &buff_size; + priv.boot_params = boot_params; + priv.efi = &boot_params->efi_info; + priv.e820ext = NULL; + priv.e820ext_size = 0; + priv.is64 = is64; + /* Might as well exit boot services now */ + status = efi_exit_boot_services(sys_table, handle, &map, &priv, + exit_boot_func); if (status != EFI_SUCCESS) return status; - prev_nr_desc = nr_desc; - nr_desc = map_sz / desc_size; - if (nr_desc > prev_nr_desc && - nr_desc > ARRAY_SIZE(boot_params->e820_map)) { - u32 nr_e820ext = nr_desc - ARRAY_SIZE(boot_params->e820_map); - - status = alloc_e820ext(nr_e820ext, &e820ext, &e820ext_size); - if (status != EFI_SUCCESS) - goto free_mem_map; - - efi_call_early(free_pool, mem_map); - goto get_map; /* Allocated memory, get map again */ - } - - signature = is64 ? EFI64_LOADER_SIGNATURE : EFI32_LOADER_SIGNATURE; - memcpy(&efi->efi_loader_signature, signature, sizeof(__u32)); - - efi->efi_systab = (unsigned long)sys_table; - efi->efi_memdesc_size = desc_size; - efi->efi_memdesc_version = desc_version; - efi->efi_memmap = (unsigned long)mem_map; - efi->efi_memmap_size = map_sz; - -#ifdef CONFIG_X86_64 - efi->efi_systab_hi = (unsigned long)sys_table >> 32; - efi->efi_memmap_hi = (unsigned long)mem_map >> 32; -#endif - - /* Might as well exit boot services now */ - status = efi_call_early(exit_boot_services, handle, key); - if (status != EFI_SUCCESS) { - /* - * ExitBootServices() will fail if any of the event - * handlers change the memory map. In which case, we - * must be prepared to retry, but only once so that - * we're guaranteed to exit on repeated failures instead - * of spinning forever. - */ - if (called_exit) - goto free_mem_map; - - called_exit = true; - efi_call_early(free_pool, mem_map); - goto get_map; - } - + e820ext = priv.e820ext; + e820ext_size = priv.e820ext_size; /* Historic? */ boot_params->alt_mem_k = 32 * 1024; @@ -1091,10 +1093,6 @@ get_map: return status; return EFI_SUCCESS; - -free_mem_map: - efi_call_early(free_pool, mem_map); - return status; } /* From 4d21cef3ea00ba3ac508eb61fb8db70e3e31df67 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 2 Sep 2016 12:33:49 +0200 Subject: [PATCH 0059/1050] KVM: s390: vsie: fix riccbd We store the address of riccbd at the wrong location, overwriting gvrd. This means that our nested guest will not be able to use runtime instrumentation. Also, a memory leak, if our KVM guest actually sets gvrd. Not noticed until now, as KVM guests never make use of gvrd and runtime instrumentation wasn't completely tested yet. Reported-by: Fan Zhang Reviewed-by: Cornelia Huck Signed-off-by: David Hildenbrand Signed-off-by: Cornelia Huck --- arch/s390/kvm/vsie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index c106488b4137..d8673e243f13 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -584,7 +584,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) /* Validity 0x0044 will be checked by SIE */ if (rc) goto unpin; - scb_s->gvrd = hpa; + scb_s->riccbd = hpa; } return 0; unpin: From e12c8f36f3f7a60d55938c5aed5999278fa92bcb Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 30 Aug 2016 16:14:00 +0800 Subject: [PATCH 0060/1050] KVM: lapic: adjust preemption timer correctly when goes TSC backward MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TSC_OFFSET will be adjusted if discovers TSC backward during vCPU load. The preemption timer, which relies on the guest tsc to reprogram its preemption timer value, is also reprogrammed if vCPU is scheded in to a different pCPU. However, the current implementation reprogram preemption timer before TSC_OFFSET is adjusted to the right value, resulting in the preemption timer firing prematurely. This patch fix it by adjusting TSC_OFFSET before reprogramming preemption timer if TSC backward. Cc: Paolo Bonzini Cc: Radim Krċmář Cc: Yunhong Jiang Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 19f9f9e05c2a..699f8726539a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2743,16 +2743,16 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (tsc_delta < 0) mark_tsc_unstable("KVM discovered backwards TSC"); - if (kvm_lapic_hv_timer_in_use(vcpu) && - kvm_x86_ops->set_hv_timer(vcpu, - kvm_get_lapic_tscdeadline_msr(vcpu))) - kvm_lapic_switch_to_sw_timer(vcpu); if (check_tsc_unstable()) { u64 offset = kvm_compute_tsc_offset(vcpu, vcpu->arch.last_guest_tsc); kvm_x86_ops->write_tsc_offset(vcpu, offset); vcpu->arch.tsc_catchup = 1; } + if (kvm_lapic_hv_timer_in_use(vcpu) && + kvm_x86_ops->set_hv_timer(vcpu, + kvm_get_lapic_tscdeadline_msr(vcpu))) + kvm_lapic_switch_to_sw_timer(vcpu); /* * On a host with synchronized TSC, there is no need to update * kvmclock on vcpu->cpu migration From 5210d393ef84e5d2a4854671a9af2d97fd1b8dd4 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Fri, 2 Sep 2016 20:49:12 +0800 Subject: [PATCH 0061/1050] netfilter: nf_tables_trace: fix endiness when dump chain policy NFTA_TRACE_POLICY attribute is big endian, but we forget to call htonl to convert it. Fortunately, this attribute is parsed as big endian in libnftnl. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index 39eb1cc62e91..fa24a5b398b1 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -237,7 +237,7 @@ void nft_trace_notify(struct nft_traceinfo *info) break; case NFT_TRACETYPE_POLICY: if (nla_put_be32(skb, NFTA_TRACE_POLICY, - info->basechain->policy)) + htonl(info->basechain->policy))) goto nla_put_failure; break; } From 79d102cbfd2e9d94257fcc7c82807ef1cdf80322 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 5 Sep 2016 17:30:07 +0200 Subject: [PATCH 0062/1050] perf/x86/intel/cqm: Check cqm/mbm enabled state in event init Yanqiu Zhang reported kernel panic when using mbm event on system where CQM is detected but without mbm event support, like with perf: # perf stat -e 'intel_cqm/event=3/' -a BUG: unable to handle kernel NULL pointer dereference at 0000000000000020 IP: [] update_sample+0xbc/0xe0 ... [] __intel_mbm_event_init+0x18/0x20 [] flush_smp_call_function_queue+0x7b/0x160 [] generic_smp_call_function_single_interrupt+0x13/0x60 [] smp_call_function_interrupt+0x27/0x40 [] call_function_interrupt+0x8c/0xa0 ... The reason is that we currently allow to init mbm event even if mbm support is not detected. Adding checks for both cqm and mbm events and support into cqm's event_init. Fixes: 33c3cc7acfd9 ("perf/x86/mbm: Add Intel Memory B/W Monitoring enumeration and init") Reported-by: Yanqiu Zhang Signed-off-by: Jiri Olsa Acked-by: Peter Zijlstra Cc: Vikas Shivappa Cc: Tony Luck Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1473089407-21857-1-git-send-email-jolsa@kernel.org Signed-off-by: Thomas Gleixner --- arch/x86/events/intel/cqm.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c index 783c49ddef29..8f82b02934fa 100644 --- a/arch/x86/events/intel/cqm.c +++ b/arch/x86/events/intel/cqm.c @@ -458,6 +458,11 @@ static void __intel_cqm_event_count(void *info); static void init_mbm_sample(u32 rmid, u32 evt_type); static void __intel_mbm_event_count(void *info); +static bool is_cqm_event(int e) +{ + return (e == QOS_L3_OCCUP_EVENT_ID); +} + static bool is_mbm_event(int e) { return (e >= QOS_MBM_TOTAL_EVENT_ID && e <= QOS_MBM_LOCAL_EVENT_ID); @@ -1366,6 +1371,10 @@ static int intel_cqm_event_init(struct perf_event *event) (event->attr.config > QOS_MBM_LOCAL_EVENT_ID)) return -EINVAL; + if ((is_cqm_event(event->attr.config) && !cqm_enabled) || + (is_mbm_event(event->attr.config) && !mbm_enabled)) + return -EINVAL; + /* unsupported modes and filters */ if (event->attr.exclude_user || event->attr.exclude_kernel || From d2896d4b55b2e32b423072a4124d7da4dc1e6cb1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 22 Aug 2016 09:01:17 +0100 Subject: [PATCH 0063/1050] arm: KVM: Fix idmap overlap detection when the kernel is idmap'ed We're trying hard to detect when the HYP idmap overlaps with the HYP va, as it makes the teardown of a cpu dangerous. But there is one case where an overlap is completely safe, which is when the whole of the kernel is idmap'ed, which is likely to happen on 32bit when RAM is at 0x8000000 and we're using a 2G/2G VA split. In that case, we can proceed safely. Reported-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/kvm/mmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 29d0b23af2a9..a3faafe70081 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1714,7 +1714,8 @@ int kvm_mmu_init(void) kern_hyp_va(PAGE_OFFSET), kern_hyp_va(~0UL)); if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) && - hyp_idmap_start < kern_hyp_va(~0UL)) { + hyp_idmap_start < kern_hyp_va(~0UL) && + hyp_idmap_start != (unsigned long)__hyp_idmap_text_start) { /* * The idmap page is intersecting with the VA space, * it is not safe to continue further. From d31ed3f05763644840c654a384eaefa94c097ba2 Mon Sep 17 00:00:00 2001 From: Jan Leupold Date: Wed, 6 Jul 2016 13:22:35 +0200 Subject: [PATCH 0064/1050] drm: atmel-hlcdc: Fix vertical scaling The code is applying the same scaling for the X and Y components, thus making the scaling feature only functional when both components have the same scaling factor. Do the s/_w/_h/ replacement where appropriate to fix vertical scaling. Signed-off-by: Jan Leupold Fixes: 1a396789f65a2 ("drm: add Atmel HLCDC Display Controller support") Cc: Signed-off-by: Boris Brezillon --- drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c index 016c191221f3..52c527f6642a 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c @@ -320,19 +320,19 @@ atmel_hlcdc_plane_update_pos_and_size(struct atmel_hlcdc_plane *plane, u32 *coeff_tab = heo_upscaling_ycoef; u32 max_memsize; - if (state->crtc_w < state->src_w) + if (state->crtc_h < state->src_h) coeff_tab = heo_downscaling_ycoef; for (i = 0; i < ARRAY_SIZE(heo_upscaling_ycoef); i++) atmel_hlcdc_layer_update_cfg(&plane->layer, 33 + i, 0xffffffff, coeff_tab[i]); - factor = ((8 * 256 * state->src_w) - (256 * 4)) / - state->crtc_w; + factor = ((8 * 256 * state->src_h) - (256 * 4)) / + state->crtc_h; factor++; - max_memsize = ((factor * state->crtc_w) + (256 * 4)) / + max_memsize = ((factor * state->crtc_h) + (256 * 4)) / 2048; - if (max_memsize > state->src_w) + if (max_memsize > state->src_h) factor--; factor_reg |= (factor << 16) | 0x80000000; } From 1ba7db07ccc2825669d6e376632316813a072887 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 11 Jul 2016 12:19:40 +0200 Subject: [PATCH 0065/1050] drm/atmel-hlcdc: Make ->reset() implementation static The atmel_hlcdc_crtc_reset() function is never used outside the file and can be static. This avoids a warning from sparse. Signed-off-by: Thierry Reding --- drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c index a978381ef95b..9b17a66cf0e1 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c @@ -387,7 +387,7 @@ void atmel_hlcdc_crtc_irq(struct drm_crtc *c) atmel_hlcdc_crtc_finish_page_flip(drm_crtc_to_atmel_hlcdc_crtc(c)); } -void atmel_hlcdc_crtc_reset(struct drm_crtc *crtc) +static void atmel_hlcdc_crtc_reset(struct drm_crtc *crtc) { struct atmel_hlcdc_crtc_state *state; From cd28e716c6869d2f06e64bcd679d0a45dd8a6295 Mon Sep 17 00:00:00 2001 From: Ping Gao Date: Tue, 6 Sep 2016 12:04:11 +0800 Subject: [PATCH 0066/1050] drm/i915: enable vGPU detection for all vGPU capability is handled by GVT-g host driver, not needed to put extra HW check for vGPU detection. And we'll actually support vGPU from BDW. Signed-off-by: Ping Gao Signed-off-by: Zhenyu Wang Reviewed-by: Joonas Lahtinen Acked-by: Chris Wilson Cc: drm-intel-fixes@lists.freedesktop.org Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20160906040412.1274-2-zhenyuw@linux.intel.com (cherry picked from commit 8ef89995c735f978d5dfcb3ca6bce70d41728c91) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_vgpu.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index f6acb5a0e701..b81cfb3b22ec 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -65,9 +65,6 @@ void i915_check_vgpu(struct drm_i915_private *dev_priv) BUILD_BUG_ON(sizeof(struct vgt_if) != VGT_PVINFO_SIZE); - if (!IS_HASWELL(dev_priv)) - return; - magic = __raw_i915_read64(dev_priv, vgtif_reg(magic)); if (magic != VGT_MAGIC) return; From 557b1a8cae25e36ac2f125d93f003e60a7d0d014 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Tue, 6 Sep 2016 12:04:12 +0800 Subject: [PATCH 0067/1050] drm/i915: disable 48bit full PPGTT when vGPU is active Disable 48bit full PPGTT on vGPU too for now. Signed-off-by: Zhi Wang Signed-off-by: Zhenyu Wang Reviewed-by: Joonas Lahtinen Acked-by: Chris Wilson Cc: drm-intel-fixes@lists.freedesktop.org Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20160906040412.1274-3-zhenyuw@linux.intel.com (cherry picked from commit e320d40022128845dfff900422ea9fd69f576c98) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem_gtt.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 7a30af79d799..f38ceffd82c3 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -122,8 +122,11 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, has_full_48bit_ppgtt = IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9; - if (intel_vgpu_active(dev_priv)) - has_full_ppgtt = false; /* emulation is too hard */ + if (intel_vgpu_active(dev_priv)) { + /* emulation is too hard */ + has_full_ppgtt = false; + has_full_48bit_ppgtt = false; + } if (!has_aliasing_ppgtt) return 0; @@ -158,7 +161,7 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, return 0; } - if (INTEL_GEN(dev_priv) >= 8 && i915.enable_execlists) + if (INTEL_GEN(dev_priv) >= 8 && i915.enable_execlists && has_full_ppgtt) return has_full_48bit_ppgtt ? 3 : 2; else return has_aliasing_ppgtt ? 1 : 0; From d1a6cba576fc7c43e476538fe5aa72fe04bd80e1 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Tue, 6 Sep 2016 22:31:02 +0800 Subject: [PATCH 0068/1050] netfilter: nft_chain_route: re-route before skb is queued to userspace Imagine such situation, user add the following nft rules, and queue the packets to userspace for further check: # ip rule add fwmark 0x0/0x1 lookup eth0 # ip rule add fwmark 0x1/0x1 lookup eth1 # nft add table filter # nft add chain filter output {type route hook output priority 0 \;} # nft add rule filter output mark set 0x1 # nft add rule filter output queue num 0 But after we reinject the skbuff, the packet will be sent via the wrong route, i.e. in this case, the packet will be routed via eth0 table, not eth1 table. Because we skip to do re-route when verdict is NF_QUEUE, even if the mark was changed. Acctually, we should not touch sk_buff if verdict is NF_DROP or NF_STOLEN, and when re-route fails, return NF_DROP with error code. This is consistent with the mangle table in iptables. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nft_chain_route_ipv4.c | 11 +++++++---- net/ipv6/netfilter/nft_chain_route_ipv6.c | 10 +++++++--- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c index 2375b0a8be46..30493beb611a 100644 --- a/net/ipv4/netfilter/nft_chain_route_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c @@ -31,6 +31,7 @@ static unsigned int nf_route_table_hook(void *priv, __be32 saddr, daddr; u_int8_t tos; const struct iphdr *iph; + int err; /* root is playing with raw sockets. */ if (skb->len < sizeof(struct iphdr) || @@ -46,15 +47,17 @@ static unsigned int nf_route_table_hook(void *priv, tos = iph->tos; ret = nft_do_chain(&pkt, priv); - if (ret != NF_DROP && ret != NF_QUEUE) { + if (ret != NF_DROP && ret != NF_STOLEN) { iph = ip_hdr(skb); if (iph->saddr != saddr || iph->daddr != daddr || skb->mark != mark || - iph->tos != tos) - if (ip_route_me_harder(state->net, skb, RTN_UNSPEC)) - ret = NF_DROP; + iph->tos != tos) { + err = ip_route_me_harder(state->net, skb, RTN_UNSPEC); + if (err < 0) + ret = NF_DROP_ERR(err); + } } return ret; } diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c index 71d995ff3108..2535223ba956 100644 --- a/net/ipv6/netfilter/nft_chain_route_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c @@ -31,6 +31,7 @@ static unsigned int nf_route_table_hook(void *priv, struct in6_addr saddr, daddr; u_int8_t hop_limit; u32 mark, flowlabel; + int err; /* malformed packet, drop it */ if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0) @@ -46,13 +47,16 @@ static unsigned int nf_route_table_hook(void *priv, flowlabel = *((u32 *)ipv6_hdr(skb)); ret = nft_do_chain(&pkt, priv); - if (ret != NF_DROP && ret != NF_QUEUE && + if (ret != NF_DROP && ret != NF_STOLEN && (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) || memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) || skb->mark != mark || ipv6_hdr(skb)->hop_limit != hop_limit || - flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) - return ip6_route_me_harder(state->net, skb) == 0 ? ret : NF_DROP; + flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) { + err = ip6_route_me_harder(state->net, skb); + if (err < 0) + ret = NF_DROP_ERR(err); + } return ret; } From 78d506e1b7071b24850fd5ac22b896c459b0a04c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 6 Sep 2016 11:22:49 -0400 Subject: [PATCH 0069/1050] xprtrdma: Revert 3d4cf35bd4fa ("xprtrdma: Reply buffer exhaustion...") Receive buffer exhaustion, if it were to actually occur, would be catastrophic. However, when there are no reply buffers to post, that means all of them have already been posted and are waiting for incoming replies. By design, there can never be more RPCs in flight than there are available receive buffers. A receive buffer can be left posted after an RPC exits without a received reply; say, due to a credential problem or a soft timeout. This does not result in fewer posted receive buffers than there are pending RPCs, and there is already logic in xprtrdma to deal appropriately with this case. It also looks like the "+ 2" that was removed was accidentally accommodating the number of extra receive buffers needed for receiving backchannel requests. That will need to be addressed by another patch. Fixes: 3d4cf35bd4fa ("xprtrdma: Reply buffer exhaustion can be...") Signed-off-by: Chuck Lever Reviewed-by: Anna Schumaker Signed-off-by: Trond Myklebust --- net/sunrpc/xprtrdma/verbs.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 536d0be3f61b..fefcba982415 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -923,7 +923,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) } INIT_LIST_HEAD(&buf->rb_recv_bufs); - for (i = 0; i < buf->rb_max_requests; i++) { + for (i = 0; i < buf->rb_max_requests + 2; i++) { struct rpcrdma_rep *rep; rep = rpcrdma_create_rep(r_xprt); @@ -1076,6 +1076,8 @@ rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw) /* * Get a set of request/reply buffers. + * + * Reply buffer (if available) is attached to send buffer upon return. */ struct rpcrdma_req * rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) @@ -1094,13 +1096,13 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) out_reqbuf: spin_unlock(&buffers->rb_lock); - pr_warn("rpcrdma: out of request buffers (%p)\n", buffers); + pr_warn("RPC: %s: out of request buffers\n", __func__); return NULL; out_repbuf: - list_add(&req->rl_free, &buffers->rb_send_bufs); spin_unlock(&buffers->rb_lock); - pr_warn("rpcrdma: out of reply buffers (%p)\n", buffers); - return NULL; + pr_warn("RPC: %s: out of reply buffers\n", __func__); + req->rl_reply = NULL; + return req; } /* From 05c974669ecec510a85d8534099bb75404e82c41 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 6 Sep 2016 11:22:58 -0400 Subject: [PATCH 0070/1050] xprtrdma: Fix receive buffer accounting An RPC can terminate before its reply arrives, if a credential problem or a soft timeout occurs. After this happens, xprtrdma reports it is out of Receive buffers. A Receive buffer is posted before each RPC is sent, and returned to the buffer pool when a reply is received. If no reply is received for an RPC, that Receive buffer remains posted. But xprtrdma tries to post another when the next RPC is sent. If this happens a few dozen times, there are no receive buffers left to be posted at send time. I don't see a way for a transport connection to recover at that point, and it will spit warnings and unnecessarily delay RPCs on occasion for its remaining lifetime. Commit 1e465fd4ff47 ("xprtrdma: Replace send and receive arrays") removed a little bit of logic to detect this case and not provide a Receive buffer so no more buffers are posted, and then transport operation continues correctly. We didn't understand what that logic did, and it wasn't commented, so it was removed as part of the overhaul to support backchannel requests. Restore it, but be wary of the need to keep extra Receives posted to deal with backchannel requests. Fixes: 1e465fd4ff47 ("xprtrdma: Replace send and receive arrays") Signed-off-by: Chuck Lever Reviewed-by: Anna Schumaker Signed-off-by: Trond Myklebust --- net/sunrpc/xprtrdma/verbs.c | 41 +++++++++++++++++++++++---------- net/sunrpc/xprtrdma/xprt_rdma.h | 1 + 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index fefcba982415..799cce6cbe45 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include /* try_module_get()/module_put() */ @@ -923,7 +924,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) } INIT_LIST_HEAD(&buf->rb_recv_bufs); - for (i = 0; i < buf->rb_max_requests + 2; i++) { + for (i = 0; i < buf->rb_max_requests + RPCRDMA_MAX_BC_REQUESTS; i++) { struct rpcrdma_rep *rep; rep = rpcrdma_create_rep(r_xprt); @@ -1018,6 +1019,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) rep = rpcrdma_buffer_get_rep_locked(buf); rpcrdma_destroy_rep(ia, rep); } + buf->rb_send_count = 0; spin_lock(&buf->rb_reqslock); while (!list_empty(&buf->rb_allreqs)) { @@ -1032,6 +1034,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) spin_lock(&buf->rb_reqslock); } spin_unlock(&buf->rb_reqslock); + buf->rb_recv_count = 0; rpcrdma_destroy_mrs(buf); } @@ -1074,6 +1077,23 @@ rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw) spin_unlock(&buf->rb_mwlock); } +static struct rpcrdma_rep * +rpcrdma_buffer_get_rep(struct rpcrdma_buffer *buffers) +{ + /* If an RPC previously completed without a reply (say, a + * credential problem or a soft timeout occurs) then hold off + * on supplying more Receive buffers until the number of new + * pending RPCs catches up to the number of posted Receives. + */ + if (unlikely(buffers->rb_send_count < buffers->rb_recv_count)) + return NULL; + + if (unlikely(list_empty(&buffers->rb_recv_bufs))) + return NULL; + buffers->rb_recv_count++; + return rpcrdma_buffer_get_rep_locked(buffers); +} + /* * Get a set of request/reply buffers. * @@ -1087,10 +1107,9 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) spin_lock(&buffers->rb_lock); if (list_empty(&buffers->rb_send_bufs)) goto out_reqbuf; + buffers->rb_send_count++; req = rpcrdma_buffer_get_req_locked(buffers); - if (list_empty(&buffers->rb_recv_bufs)) - goto out_repbuf; - req->rl_reply = rpcrdma_buffer_get_rep_locked(buffers); + req->rl_reply = rpcrdma_buffer_get_rep(buffers); spin_unlock(&buffers->rb_lock); return req; @@ -1098,11 +1117,6 @@ out_reqbuf: spin_unlock(&buffers->rb_lock); pr_warn("RPC: %s: out of request buffers\n", __func__); return NULL; -out_repbuf: - spin_unlock(&buffers->rb_lock); - pr_warn("RPC: %s: out of reply buffers\n", __func__); - req->rl_reply = NULL; - return req; } /* @@ -1119,9 +1133,12 @@ rpcrdma_buffer_put(struct rpcrdma_req *req) req->rl_reply = NULL; spin_lock(&buffers->rb_lock); + buffers->rb_send_count--; list_add_tail(&req->rl_free, &buffers->rb_send_bufs); - if (rep) + if (rep) { + buffers->rb_recv_count--; list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs); + } spin_unlock(&buffers->rb_lock); } @@ -1135,8 +1152,7 @@ rpcrdma_recv_buffer_get(struct rpcrdma_req *req) struct rpcrdma_buffer *buffers = req->rl_buffer; spin_lock(&buffers->rb_lock); - if (!list_empty(&buffers->rb_recv_bufs)) - req->rl_reply = rpcrdma_buffer_get_rep_locked(buffers); + req->rl_reply = rpcrdma_buffer_get_rep(buffers); spin_unlock(&buffers->rb_lock); } @@ -1150,6 +1166,7 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf; spin_lock(&buffers->rb_lock); + buffers->rb_recv_count--; list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs); spin_unlock(&buffers->rb_lock); } diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 670fad57153a..a71b0f5897d8 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -321,6 +321,7 @@ struct rpcrdma_buffer { char *rb_pool; spinlock_t rb_lock; /* protect buf lists */ + int rb_send_count, rb_recv_count; struct list_head rb_send_bufs; struct list_head rb_recv_bufs; u32 rb_max_requests; From ecc6569f3503b39f45bc6b86197b5e0a8533fb72 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Thu, 25 Aug 2016 23:08:11 +0800 Subject: [PATCH 0071/1050] netfilter: gre: Use consistent GRE_* macros instead of ones defined by netfilter. There are already some GRE_* macros in kernel, so it is unnecessary to define these macros. And remove some useless macros Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- .../linux/netfilter/nf_conntrack_proto_gre.h | 22 ++----------------- include/uapi/linux/if_tunnel.h | 1 + net/ipv4/netfilter/nf_nat_proto_gre.c | 4 ++-- net/netfilter/nf_conntrack_proto_gre.c | 4 ++-- 4 files changed, 7 insertions(+), 24 deletions(-) diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h index df78dc2b5524..0189747f2691 100644 --- a/include/linux/netfilter/nf_conntrack_proto_gre.h +++ b/include/linux/netfilter/nf_conntrack_proto_gre.h @@ -1,29 +1,11 @@ #ifndef _CONNTRACK_PROTO_GRE_H #define _CONNTRACK_PROTO_GRE_H #include +#include +#include /* GRE PROTOCOL HEADER */ -/* GRE Version field */ -#define GRE_VERSION_1701 0x0 -#define GRE_VERSION_PPTP 0x1 - -/* GRE Protocol field */ -#define GRE_PROTOCOL_PPTP 0x880B - -/* GRE Flags */ -#define GRE_FLAG_C 0x80 -#define GRE_FLAG_R 0x40 -#define GRE_FLAG_K 0x20 -#define GRE_FLAG_S 0x10 -#define GRE_FLAG_A 0x80 - -#define GRE_IS_C(f) ((f)&GRE_FLAG_C) -#define GRE_IS_R(f) ((f)&GRE_FLAG_R) -#define GRE_IS_K(f) ((f)&GRE_FLAG_K) -#define GRE_IS_S(f) ((f)&GRE_FLAG_S) -#define GRE_IS_A(f) ((f)&GRE_FLAG_A) - /* GRE is a mess: Four different standards */ struct gre_hdr { #if defined(__LITTLE_ENDIAN_BITFIELD) diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 9865c8caedde..fb7337d6b985 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -39,6 +39,7 @@ #define GRE_IS_REC(f) ((f) & GRE_REC) #define GRE_IS_ACK(f) ((f) & GRE_ACK) +#define GRE_VERSION_0 __cpu_to_be16(0x0000) #define GRE_VERSION_1 __cpu_to_be16(0x0001) #define GRE_PROTO_PPP __cpu_to_be16(0x880b) #define GRE_PPTP_KEY_MASK __cpu_to_be32(0xffff) diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c index 9414923f1e15..93198d71dbb6 100644 --- a/net/ipv4/netfilter/nf_nat_proto_gre.c +++ b/net/ipv4/netfilter/nf_nat_proto_gre.c @@ -104,11 +104,11 @@ gre_manip_pkt(struct sk_buff *skb, if (maniptype != NF_NAT_MANIP_DST) return true; switch (greh->version) { - case GRE_VERSION_1701: + case ntohs(GRE_VERSION_0): /* We do not currently NAT any GREv0 packets. * Try to behave like "nf_nat_proto_unknown" */ break; - case GRE_VERSION_PPTP: + case ntohs(GRE_VERSION_1): pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key)); pgreh->call_id = tuple->dst.u.gre.key; break; diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index a96451a7af20..deb239a014e4 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -200,7 +200,7 @@ static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, /* first only delinearize old RFC1701 GRE header */ grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr); - if (!grehdr || grehdr->version != GRE_VERSION_PPTP) { + if (!grehdr || grehdr->version != ntohs(GRE_VERSION_1)) { /* try to behave like "nf_conntrack_proto_generic" */ tuple->src.u.all = 0; tuple->dst.u.all = 0; @@ -212,7 +212,7 @@ static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, if (!pgrehdr) return true; - if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) { + if (grehdr->protocol != GRE_PROTO_PPP) { pr_debug("GRE_VERSION_PPTP but unknown proto\n"); return false; } From c579a9e7d58f66030a144c7a33cc9bdf827a4b6d Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Thu, 25 Aug 2016 23:08:47 +0800 Subject: [PATCH 0072/1050] netfilter: gre: Use consistent GRE and PTTP header structure instead of the ones defined by netfilter There are two existing strutures which defines the GRE and PPTP header. So use these two structures instead of the ones defined by netfilter to keep consitent with other codes. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- .../linux/netfilter/nf_conntrack_proto_gre.h | 42 ------------------- net/ipv4/netfilter/nf_nat_proto_gre.c | 13 +++--- net/netfilter/nf_conntrack_proto_gre.c | 12 +++--- 3 files changed, 13 insertions(+), 54 deletions(-) diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h index 0189747f2691..dee0acd0dd31 100644 --- a/include/linux/netfilter/nf_conntrack_proto_gre.h +++ b/include/linux/netfilter/nf_conntrack_proto_gre.h @@ -4,48 +4,6 @@ #include #include -/* GRE PROTOCOL HEADER */ - -/* GRE is a mess: Four different standards */ -struct gre_hdr { -#if defined(__LITTLE_ENDIAN_BITFIELD) - __u16 rec:3, - srr:1, - seq:1, - key:1, - routing:1, - csum:1, - version:3, - reserved:4, - ack:1; -#elif defined(__BIG_ENDIAN_BITFIELD) - __u16 csum:1, - routing:1, - key:1, - seq:1, - srr:1, - rec:3, - ack:1, - reserved:4, - version:3; -#else -#error "Adjust your defines" -#endif - __be16 protocol; -}; - -/* modified GRE header for PPTP */ -struct gre_hdr_pptp { - __u8 flags; /* bitfield */ - __u8 version; /* should be GRE_VERSION_PPTP */ - __be16 protocol; /* should be GRE_PROTOCOL_PPTP */ - __be16 payload_len; /* size of ppp payload, not inc. gre header */ - __be16 call_id; /* peer's call_id for this session */ - __be32 seq; /* sequence number. Present if S==1 */ - __be32 ack; /* seq number of highest packet received by */ - /* sender in this session */ -}; - struct nf_ct_gre { unsigned int stream_timeout; unsigned int timeout; diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c index 93198d71dbb6..edf05002d674 100644 --- a/net/ipv4/netfilter/nf_nat_proto_gre.c +++ b/net/ipv4/netfilter/nf_nat_proto_gre.c @@ -88,8 +88,8 @@ gre_manip_pkt(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { - const struct gre_hdr *greh; - struct gre_hdr_pptp *pgreh; + const struct gre_base_hdr *greh; + struct pptp_gre_header *pgreh; /* pgreh includes two optional 32bit fields which are not required * to be there. That's where the magic '8' comes from */ @@ -97,18 +97,19 @@ gre_manip_pkt(struct sk_buff *skb, return false; greh = (void *)skb->data + hdroff; - pgreh = (struct gre_hdr_pptp *)greh; + pgreh = (struct pptp_gre_header *)greh; /* we only have destination manip of a packet, since 'source key' * is not present in the packet itself */ if (maniptype != NF_NAT_MANIP_DST) return true; - switch (greh->version) { - case ntohs(GRE_VERSION_0): + + switch (greh->flags & GRE_VERSION) { + case GRE_VERSION_0: /* We do not currently NAT any GREv0 packets. * Try to behave like "nf_nat_proto_unknown" */ break; - case ntohs(GRE_VERSION_1): + case GRE_VERSION_1: pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key)); pgreh->call_id = tuple->dst.u.gre.key; break; diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index deb239a014e4..9a715f88b2f1 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -192,15 +192,15 @@ static bool gre_invert_tuple(struct nf_conntrack_tuple *tuple, static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct net *net, struct nf_conntrack_tuple *tuple) { - const struct gre_hdr_pptp *pgrehdr; - struct gre_hdr_pptp _pgrehdr; + const struct pptp_gre_header *pgrehdr; + struct pptp_gre_header _pgrehdr; __be16 srckey; - const struct gre_hdr *grehdr; - struct gre_hdr _grehdr; + const struct gre_base_hdr *grehdr; + struct gre_base_hdr _grehdr; /* first only delinearize old RFC1701 GRE header */ grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr); - if (!grehdr || grehdr->version != ntohs(GRE_VERSION_1)) { + if (!grehdr || (grehdr->flags & GRE_VERSION) != GRE_VERSION_1) { /* try to behave like "nf_conntrack_proto_generic" */ tuple->src.u.all = 0; tuple->dst.u.all = 0; @@ -213,7 +213,7 @@ static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, return true; if (grehdr->protocol != GRE_PROTO_PPP) { - pr_debug("GRE_VERSION_PPTP but unknown proto\n"); + pr_debug("Unsupported GRE proto(0x%x)\n", ntohs(grehdr->protocol)); return false; } From 68cb9fe47ea661bffd48c8ca35790be26935e1c5 Mon Sep 17 00:00:00 2001 From: Marco Angaroni Date: Tue, 30 Aug 2016 18:48:19 +0200 Subject: [PATCH 0073/1050] netfilter: nf_ct_sip: correct parsing of continuation lines in SIP headers Current parsing methods for SIP headers do not properly manage continuation lines: in case of Call-ID header the first character of Call-ID header value is truncated. As a result IPVS SIP persistence engine hashes over a call-id that is not exactly the one present in the originale message. Example: "Call-ID: \r\n abcdeABCDE1234" results in extracted call-id equal to "bcdeABCDE1234". In above example Call-ID is represented as a string in C language. Obviously in real message the first bytes after colon (":") are "20 0d 0a 20". Proposed fix is in nf_conntrack_sip module. Since sip_follow_continuation() function walks past the leading spaces or tabs of the continuation line, sip_skip_whitespace() should simply return the ouput of sip_follow_continuation(). Otherwise another iteration of the for loop is done and dptr is incremented by one pointing to the second character of the first word in the header. Below is an extract of relevant SIP ABNF syntax. Call-ID = ( "Call-ID" / "i" ) HCOLON callid callid = word [ "@" word ] HCOLON = *( SP / HTAB ) ":" SWS SWS = [LWS] ; sep whitespace LWS = [*WSP CRLF] 1*WSP ; linear whitespace WSP = SP / HTAB word = 1*(alphanum / "-" / "." / "!" / "%" / "*" / "_" / "+" / "`" / "'" / "~" / "(" / ")" / "<" / ">" / ":" / "\" / DQUOTE / "/" / "[" / "]" / "?" / "{" / "}" ) Signed-off-by: Marco Angaroni Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_sip.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 7d77217de6a3..251a9a44d189 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -334,8 +334,7 @@ static const char *sip_skip_whitespace(const char *dptr, const char *limit) if (*dptr != '\r' && *dptr != '\n') break; dptr = sip_follow_continuation(dptr, limit); - if (dptr == NULL) - return NULL; + break; } return dptr; } From f0608ceaa79d99d24e97517f9a9a0fed2b9698b4 Mon Sep 17 00:00:00 2001 From: Marco Angaroni Date: Tue, 30 Aug 2016 18:48:24 +0200 Subject: [PATCH 0074/1050] netfilter: nf_ct_sip: correct allowed characters in Call-ID SIP header Current parsing methods for SIP header Call-ID do not check correctly all characters allowed by RFC 3261. In particular "," character is allowed instead of "'" character. As a result Call-ID headers like the following are discarded by IPVS SIP persistence engine. Call-ID: -.!%*_+`'~()<>:\"/[]?{} Above example is composed using all non-alphanumeric characters listed in RFC 3261 for Call-ID header syntax. Proposed fix is in nf_conntrack_sip module; function iswordc() checks this range: (c >= '(' && c <= '/') which includes these characters: ()*+,-./ They are all allowed except ",". Instead "'" is not included in the list. Below is an extract of relevant SIP ABNF syntax. Call-ID = ( "Call-ID" / "i" ) HCOLON callid callid = word [ "@" word ] HCOLON = *( SP / HTAB ) ":" SWS SWS = [LWS] ; sep whitespace LWS = [*WSP CRLF] 1*WSP ; linear whitespace WSP = SP / HTAB word = 1*(alphanum / "-" / "." / "!" / "%" / "*" / "_" / "+" / "`" / "'" / "~" / "(" / ")" / "<" / ">" / ":" / "\" / DQUOTE / "/" / "[" / "]" / "?" / "{" / "}" ) Signed-off-by: Marco Angaroni Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_sip.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 251a9a44d189..d8035351aff5 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -83,9 +83,10 @@ static int digits_len(const struct nf_conn *ct, const char *dptr, static int iswordc(const char c) { if (isalnum(c) || c == '!' || c == '"' || c == '%' || - (c >= '(' && c <= '/') || c == ':' || c == '<' || c == '>' || + (c >= '(' && c <= '+') || c == ':' || c == '<' || c == '>' || c == '?' || (c >= '[' && c <= ']') || c == '_' || c == '`' || - c == '{' || c == '}' || c == '~') + c == '{' || c == '}' || c == '~' || (c >= '-' && c <= '/') || + c == '\'') return 1; return 0; } From 723eb299de62ce75dbd31e7ee25d45887c32a602 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Thu, 1 Sep 2016 18:58:29 +0800 Subject: [PATCH 0075/1050] netfilter: ftp: Remove the useless dlen==0 condition check in find_pattern The caller function "help" has already make sure the datalen could not be zero before invoke find_pattern as a parameter by the following codes if (dataoff >= skb->len) { pr_debug("ftp: dataoff(%u) >= skblen(%u)\n", dataoff, skb->len); return NF_ACCEPT; } datalen = skb->len - dataoff; And the latter codes "ends_in_nl = (fb_ptr[datalen - 1] == '\n');" use datalen directly without checking if it is zero. So it is unneccessary to check it in find_pattern too. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_ftp.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index b6934b5edf7a..d49a2d410813 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -301,8 +301,6 @@ static int find_pattern(const char *data, size_t dlen, size_t i = plen; pr_debug("find_pattern `%s': dlen = %Zu\n", pattern, dlen); - if (dlen == 0) - return 0; if (dlen <= plen) { /* Short packet: try for partial? */ From ddb075b0cdbca140d6c6503db9cc9990d3461308 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Thu, 1 Sep 2016 18:59:02 +0800 Subject: [PATCH 0076/1050] netfilter: ftp: Remove the useless code There are some debug code which are commented out in find_pattern by #if 0. Now remove them. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_ftp.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index d49a2d410813..e3ed20060878 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -309,19 +309,8 @@ static int find_pattern(const char *data, size_t dlen, else return 0; } - if (strncasecmp(data, pattern, plen) != 0) { -#if 0 - size_t i; - - pr_debug("ftp: string mismatch\n"); - for (i = 0; i < plen; i++) { - pr_debug("ftp:char %u `%c'(%u) vs `%c'(%u)\n", - i, data[i], data[i], - pattern[i], pattern[i]); - } -#endif + if (strncasecmp(data, pattern, plen) != 0) return 0; - } pr_debug("Pattern matches!\n"); /* Now we've found the constant string, try to skip From 0d9932b2875f568d679f2af33ce610da3903ac11 Mon Sep 17 00:00:00 2001 From: Laura Garcia Liebana Date: Fri, 2 Sep 2016 15:05:57 +0200 Subject: [PATCH 0077/1050] netfilter: nft_numgen: rename until attribute by modulus The _until_ attribute is renamed to _modulus_ as the behaviour is similar to other expresions with number limits (ex. nft_hash). Renaming is possible because there isn't a kernel release yet with these changes. Signed-off-by: Laura Garcia Liebana Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 4 ++-- net/netfilter/nft_numgen.c | 30 ++++++++++++------------ 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 28ce01d79707..24161e25576d 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1126,13 +1126,13 @@ enum nft_trace_types { * enum nft_ng_attributes - nf_tables number generator expression netlink attributes * * @NFTA_NG_DREG: destination register (NLA_U32) - * @NFTA_NG_UNTIL: source value to increment the counter until reset (NLA_U32) + * @NFTA_NG_MODULUS: maximum counter value (NLA_U32) * @NFTA_NG_TYPE: operation type (NLA_U32) */ enum nft_ng_attributes { NFTA_NG_UNSPEC, NFTA_NG_DREG, - NFTA_NG_UNTIL, + NFTA_NG_MODULUS, NFTA_NG_TYPE, __NFTA_NG_MAX }; diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c index 294745ecb0fc..f51a3ede3932 100644 --- a/net/netfilter/nft_numgen.c +++ b/net/netfilter/nft_numgen.c @@ -21,7 +21,7 @@ static DEFINE_PER_CPU(struct rnd_state, nft_numgen_prandom_state); struct nft_ng_inc { enum nft_registers dreg:8; - u32 until; + u32 modulus; atomic_t counter; }; @@ -34,7 +34,7 @@ static void nft_ng_inc_eval(const struct nft_expr *expr, do { oval = atomic_read(&priv->counter); - nval = (oval + 1 < priv->until) ? oval + 1 : 0; + nval = (oval + 1 < priv->modulus) ? oval + 1 : 0; } while (atomic_cmpxchg(&priv->counter, oval, nval) != oval); memcpy(®s->data[priv->dreg], &priv->counter, sizeof(u32)); @@ -42,7 +42,7 @@ static void nft_ng_inc_eval(const struct nft_expr *expr, static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = { [NFTA_NG_DREG] = { .type = NLA_U32 }, - [NFTA_NG_UNTIL] = { .type = NLA_U32 }, + [NFTA_NG_MODULUS] = { .type = NLA_U32 }, [NFTA_NG_TYPE] = { .type = NLA_U32 }, }; @@ -52,8 +52,8 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx, { struct nft_ng_inc *priv = nft_expr_priv(expr); - priv->until = ntohl(nla_get_be32(tb[NFTA_NG_UNTIL])); - if (priv->until == 0) + priv->modulus = ntohl(nla_get_be32(tb[NFTA_NG_MODULUS])); + if (priv->modulus == 0) return -ERANGE; priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]); @@ -64,11 +64,11 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx, } static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg, - u32 until, enum nft_ng_types type) + u32 modulus, enum nft_ng_types type) { if (nft_dump_register(skb, NFTA_NG_DREG, dreg)) goto nla_put_failure; - if (nla_put_be32(skb, NFTA_NG_UNTIL, htonl(until))) + if (nla_put_be32(skb, NFTA_NG_MODULUS, htonl(modulus))) goto nla_put_failure; if (nla_put_be32(skb, NFTA_NG_TYPE, htonl(type))) goto nla_put_failure; @@ -83,12 +83,12 @@ static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_ng_inc *priv = nft_expr_priv(expr); - return nft_ng_dump(skb, priv->dreg, priv->until, NFT_NG_INCREMENTAL); + return nft_ng_dump(skb, priv->dreg, priv->modulus, NFT_NG_INCREMENTAL); } struct nft_ng_random { enum nft_registers dreg:8; - u32 until; + u32 modulus; }; static void nft_ng_random_eval(const struct nft_expr *expr, @@ -99,7 +99,7 @@ static void nft_ng_random_eval(const struct nft_expr *expr, struct rnd_state *state = this_cpu_ptr(&nft_numgen_prandom_state); regs->data[priv->dreg] = reciprocal_scale(prandom_u32_state(state), - priv->until); + priv->modulus); } static int nft_ng_random_init(const struct nft_ctx *ctx, @@ -108,8 +108,8 @@ static int nft_ng_random_init(const struct nft_ctx *ctx, { struct nft_ng_random *priv = nft_expr_priv(expr); - priv->until = ntohl(nla_get_be32(tb[NFTA_NG_UNTIL])); - if (priv->until == 0) + priv->modulus = ntohl(nla_get_be32(tb[NFTA_NG_MODULUS])); + if (priv->modulus == 0) return -ERANGE; prandom_init_once(&nft_numgen_prandom_state); @@ -124,7 +124,7 @@ static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_ng_random *priv = nft_expr_priv(expr); - return nft_ng_dump(skb, priv->dreg, priv->until, NFT_NG_RANDOM); + return nft_ng_dump(skb, priv->dreg, priv->modulus, NFT_NG_RANDOM); } static struct nft_expr_type nft_ng_type; @@ -149,8 +149,8 @@ nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { u32 type; - if (!tb[NFTA_NG_DREG] || - !tb[NFTA_NG_UNTIL] || + if (!tb[NFTA_NG_DREG] || + !tb[NFTA_NG_MODULUS] || !tb[NFTA_NG_TYPE]) return ERR_PTR(-EINVAL); From db6d857b819a00627a3bd911f49ee3156766bba8 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 2 Sep 2016 21:00:58 +0200 Subject: [PATCH 0078/1050] netfilter: nft_quota: fix overquota logic Use xor to decide to break further rule evaluation or not, since the existing logic doesn't achieve the expected inversion. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_quota.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index 6eafbf987ed9..92b6ff16dbb3 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -33,7 +33,7 @@ static void nft_quota_eval(const struct nft_expr *expr, { struct nft_quota *priv = nft_expr_priv(expr); - if (nft_quota(priv, pkt) < 0 && !priv->invert) + if ((nft_quota(priv, pkt) < 0) ^ priv->invert) regs->verdict.code = NFT_BREAK; } From 22609b43b194917dce2188ae9a78bc40a14e67b5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 2 Sep 2016 21:00:59 +0200 Subject: [PATCH 0079/1050] netfilter: nft_quota: introduce nft_overquota() This is patch renames the existing function to nft_overquota() and make it return a boolean that tells us if we have exceeded our byte quota. Just a cleanup. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_quota.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index 92b6ff16dbb3..c00104c07095 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -21,10 +21,10 @@ struct nft_quota { atomic64_t remain; }; -static inline long nft_quota(struct nft_quota *priv, - const struct nft_pktinfo *pkt) +static inline bool nft_overquota(struct nft_quota *priv, + const struct nft_pktinfo *pkt) { - return atomic64_sub_return(pkt->skb->len, &priv->remain); + return atomic64_sub_return(pkt->skb->len, &priv->remain) < 0; } static void nft_quota_eval(const struct nft_expr *expr, @@ -33,7 +33,7 @@ static void nft_quota_eval(const struct nft_expr *expr, { struct nft_quota *priv = nft_expr_priv(expr); - if ((nft_quota(priv, pkt) < 0) ^ priv->invert) + if (nft_overquota(priv, pkt) ^ priv->invert) regs->verdict.code = NFT_BREAK; } From 1bcabc81ee94c0a65989128258f8c1d3e1c1b0ea Mon Sep 17 00:00:00 2001 From: Marco Angaroni Date: Tue, 30 Aug 2016 18:52:22 +0200 Subject: [PATCH 0080/1050] netfilter: nf_ct_sip: allow tab character in SIP headers Current parsing methods for SIP headers do not allow the presence of tab characters between header name and header value. As a result Call-ID SIP headers like the following are discarded by IPVS SIP persistence engine: "Call-ID\t: mycallid@abcde" "Call-ID:\tmycallid@abcde" In above examples Call-IDs are represented as strings in C language. Obviously in real message we have byte "09" before/after colon (":"). Proposed fix is in nf_conntrack_sip module. Function sip_skip_whitespace() should skip tabs in addition to spaces, since in SIP grammar whitespace (WSP) corresponds to space or tab. Below is an extract of relevant SIP ABNF syntax. Call-ID = ( "Call-ID" / "i" ) HCOLON callid callid = word [ "@" word ] HCOLON = *( SP / HTAB ) ":" SWS SWS = [LWS] ; sep whitespace LWS = [*WSP CRLF] 1*WSP ; linear whitespace WSP = SP / HTAB word = 1*(alphanum / "-" / "." / "!" / "%" / "*" / "_" / "+" / "`" / "'" / "~" / "(" / ")" / "<" / ">" / ":" / "\" / DQUOTE / "/" / "[" / "]" / "?" / "{" / "}" ) Signed-off-by: Marco Angaroni Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_sip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index d8035351aff5..621b81c7bddc 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -330,7 +330,7 @@ static const char *sip_follow_continuation(const char *dptr, const char *limit) static const char *sip_skip_whitespace(const char *dptr, const char *limit) { for (; dptr < limit; dptr++) { - if (*dptr == ' ') + if (*dptr == ' ' || *dptr == '\t') continue; if (*dptr != '\r' && *dptr != '\n') break; From 0bd2223594a4dcddc1e34b15774a3a4776f7749e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 1 Sep 2016 14:25:43 +0100 Subject: [PATCH 0081/1050] crypto: cryptd - initialize child shash_desc on import When calling .import() on a cryptd ahash_request, the structure members that describe the child transform in the shash_desc need to be initialized like they are when calling .init() Cc: stable@vger.kernel.org Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/cryptd.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/crypto/cryptd.c b/crypto/cryptd.c index 77207b41940c..0c654e59f215 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -631,9 +631,14 @@ static int cryptd_hash_export(struct ahash_request *req, void *out) static int cryptd_hash_import(struct ahash_request *req, const void *in) { - struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct shash_desc *desc = cryptd_shash_desc(req); - return crypto_shash_import(&rctx->desc, in); + desc->tfm = ctx->child; + desc->flags = req->base.flags; + + return crypto_shash_import(desc, in); } static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, From fc2780b66b15092ac68272644a522c1624c48547 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 26 Aug 2016 11:59:26 +0100 Subject: [PATCH 0082/1050] drm/i915: Add GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE to SNB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the CI test machines, SNB also uses the GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE value to report a bad GEN6_PCODE_MIN_FREQ_TABLE request. [ 157.744641] WARNING: CPU: 5 PID: 9238 at drivers/gpu/drm/i915/intel_pm.c:7760 sandybridge_pcode_write+0x141/0x200 [i915] [ 157.744642] Missing switch case (16) in gen6_check_mailbox_status [ 157.744642] Modules linked in: snd_hda_intel i915 ax88179_178a usbnet mii x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic snd_hda_codec snd_hwdep snd_hda_core mei_me lpc_ich snd_pcm mei broadcom bcm_phy_lib tg3 ptp pps_core [last unloaded: vgem] [ 157.744658] CPU: 5 PID: 9238 Comm: drv_hangman Tainted: G U W 4.8.0-rc3-CI-CI_DRM_1589+ #1 [ 157.744658] Hardware name: Dell Inc. XPS 8300 /0Y2MRG, BIOS A06 10/17/2011 [ 157.744659] 0000000000000000 ffff88011f093a98 ffffffff81426415 ffff88011f093ae8 [ 157.744662] 0000000000000000 ffff88011f093ad8 ffffffff8107d2a6 00001e50810d3c9f [ 157.744663] ffff880128680000 0000000000000008 0000000000000000 ffff88012868a650 [ 157.744665] Call Trace: [ 157.744669] [] dump_stack+0x67/0x92 [ 157.744672] [] __warn+0xc6/0xe0 [ 157.744673] [] warn_slowpath_fmt+0x4a/0x50 [ 157.744685] [] sandybridge_pcode_write+0x141/0x200 [i915] [ 157.744697] [] intel_enable_gt_powersave+0x64a/0x1330 [i915] [ 157.744712] [] ? i9xx_emit_request+0x1b/0x80 [i915] [ 157.744725] [] __i915_add_request+0x1e3/0x370 [i915] [ 157.744738] [] i915_gem_do_execbuffer.isra.16+0xced/0x1b80 [i915] [ 157.744740] [] ? __might_fault+0x3e/0x90 [ 157.744752] [] i915_gem_execbuffer2+0xc2/0x2a0 [i915] [ 157.744753] [] drm_ioctl+0x207/0x4c0 [ 157.744765] [] ? i915_gem_execbuffer+0x360/0x360 [i915] [ 157.744767] [] ? debug_lockdep_rcu_enabled+0x1d/0x20 [ 157.744769] [] do_vfs_ioctl+0x8e/0x680 [ 157.744770] [] ? __might_fault+0x87/0x90 [ 157.744771] [] ? __might_fault+0x3e/0x90 [ 157.744773] [] ? trace_hardirqs_on_caller+0x122/0x1b0 [ 157.744774] [] SyS_ioctl+0x3c/0x70 [ 157.744776] [] entry_SYSCALL_64_fastpath+0x1c/0xac Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97491 Fixes: 87660502f1a4 ("drm/i915/gen6+: Interpret mailbox error flags") Signed-off-by: Chris Wilson Cc: Lyude Cc: Matt Roper Cc: Maarten Lankhorst Cc: Daniel Vetter Cc: Ville Syrjälä Cc: stable@vger.kernel.org Link: http://patchwork.freedesktop.org/patch/msgid/20160826105926.3413-1-chris@chris-wilson.co.uk Acked-by: Mika Kuoppala (cherry picked from commit 7850d1c35344c7bd6a357240f2f9f60fc2c097b5) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_pm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 53e13c10e4ea..2d2481392824 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7859,6 +7859,7 @@ static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv) case GEN6_PCODE_ILLEGAL_CMD: return -ENXIO; case GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE: + case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE: return -EOVERFLOW; case GEN6_PCODE_TIMEOUT: return -ETIMEDOUT; From 82469c59d222f839ded5cd282172258e026f9112 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Tue, 6 Sep 2016 17:39:13 -0300 Subject: [PATCH 0083/1050] nvme: Don't suspend admin queue that wasn't created This fixes a regression in my previous commit c21377f8366c ("nvme: Suspend all queues before deletion"), which provoked an Oops in the removal path when removing a device that became IO incapable very early at probe (i.e. after a failed EEH recovery). Turns out, if the error occurred very early at the probe path, before even configuring the admin queue, we might try to suspend the uninitialized admin queue, accessing bad memory. Fixes: c21377f8366c ("nvme: Suspend all queues before deletion") Signed-off-by: Gabriel Krisman Bertazi Reviewed-by: Jay Freyensee Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 8dcf5a960951..be84a84a40f7 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1693,7 +1693,12 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) nvme_suspend_queue(dev->queues[i]); if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) { - nvme_suspend_queue(dev->queues[0]); + /* A device might become IO incapable very soon during + * probe, before the admin queue is configured. Thus, + * queue_count can be 0 here. + */ + if (dev->queue_count) + nvme_suspend_queue(dev->queues[0]); } else { nvme_disable_io_queues(dev); nvme_disable_admin_queue(dev, shutdown); From db91e2370e087967cb6b6425c092188767fb5e00 Mon Sep 17 00:00:00 2001 From: Daniel Baluta Date: Wed, 7 Sep 2016 19:49:34 +0930 Subject: [PATCH 0084/1050] tools/lguest: Don't bork the terminal in case of wrong args Running lguest without arguments or with a wrong argument name borks the terminal, because the cleanup handler is set up too late in the initialization process. Signed-off-by: Daniel Baluta Signed-off-by: Rusty Russell Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- tools/lguest/lguest.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c index d9836c5eb694..11c8d9bc762e 100644 --- a/tools/lguest/lguest.c +++ b/tools/lguest/lguest.c @@ -3266,6 +3266,9 @@ int main(int argc, char *argv[]) } } + /* If we exit via err(), this kills all the threads, restores tty. */ + atexit(cleanup_devices); + /* We always have a console device, and it's always device 1. */ setup_console(); @@ -3369,9 +3372,6 @@ int main(int argc, char *argv[]) /* Ensure that we terminate if a device-servicing child dies. */ signal(SIGCHLD, kill_launcher); - /* If we exit via err(), this kills all the threads, restores tty. */ - atexit(cleanup_devices); - /* If requested, chroot to a directory */ if (chroot_path) { if (chroot(chroot_path) != 0) From c291b015158577be533dd5a959dfc09bab119eed Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Wed, 7 Sep 2016 10:21:33 +0800 Subject: [PATCH 0085/1050] x86/apic: Fix num_processors value in case of failure If the topology package map check of the APIC ID and the CPU is a failure, we don't generate the processor info for that APIC ID yet we increase disabled_cpus by one - which is buggy. Only increase num_processors once we are sure we don't fail. Signed-off-by: Dou Liyang Acked-by: David Rientjes Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1473214893-16481-1-git-send-email-douly.fnst@cn.fujitsu.com [ Rewrote the changelog. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 50c95af0f017..f3e9b2df4b16 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2093,7 +2093,6 @@ int generic_processor_info(int apicid, int version) return -EINVAL; } - num_processors++; if (apicid == boot_cpu_physical_apicid) { /* * x86_bios_cpu_apicid is required to have processors listed @@ -2116,10 +2115,13 @@ int generic_processor_info(int apicid, int version) pr_warning("APIC: Package limit reached. Processor %d/0x%x ignored.\n", thiscpu, apicid); + disabled_cpus++; return -ENOSPC; } + num_processors++; + /* * Validate version */ From 7e9d2850a8db4e0d85a20bb692198bf2cc4be3b7 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 8 Sep 2016 11:11:00 +0200 Subject: [PATCH 0086/1050] ARM: dts: STiH410: Handle interconnect clock required by EHCI/OHCI (USB) The STiH4{07,10} platform contains some interconnect clocks which are used by various IPs. If this clock isn't handled correctly by ST's EHCI/OHCI drivers, their hub won't be found, the following error be shown and the result will be non-working USB: [ 97.221963] hub 2-1:1.0: hub_ext_port_status failed (err = -110) Cc: stable@vger.kernel.org Tested-by: Peter Griffin Signed-off-by: Lee Jones Acked-by: Patrice Chotard --- arch/arm/boot/dts/stih410.dtsi | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/stih410.dtsi b/arch/arm/boot/dts/stih410.dtsi index 18ed1ad10d32..40318869c733 100644 --- a/arch/arm/boot/dts/stih410.dtsi +++ b/arch/arm/boot/dts/stih410.dtsi @@ -41,7 +41,8 @@ compatible = "st,st-ohci-300x"; reg = <0x9a03c00 0x100>; interrupts = ; - clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>; + clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>, + <&clk_s_c0_flexgen CLK_RX_ICN_DISP_0>; resets = <&powerdown STIH407_USB2_PORT0_POWERDOWN>, <&softreset STIH407_USB2_PORT0_SOFTRESET>; reset-names = "power", "softreset"; @@ -57,7 +58,8 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb0>; - clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>; + clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>, + <&clk_s_c0_flexgen CLK_RX_ICN_DISP_0>; resets = <&powerdown STIH407_USB2_PORT0_POWERDOWN>, <&softreset STIH407_USB2_PORT0_SOFTRESET>; reset-names = "power", "softreset"; @@ -71,7 +73,8 @@ compatible = "st,st-ohci-300x"; reg = <0x9a83c00 0x100>; interrupts = ; - clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>; + clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>, + <&clk_s_c0_flexgen CLK_RX_ICN_DISP_0>; resets = <&powerdown STIH407_USB2_PORT1_POWERDOWN>, <&softreset STIH407_USB2_PORT1_SOFTRESET>; reset-names = "power", "softreset"; @@ -87,7 +90,8 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb1>; - clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>; + clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>, + <&clk_s_c0_flexgen CLK_RX_ICN_DISP_0>; resets = <&powerdown STIH407_USB2_PORT1_POWERDOWN>, <&softreset STIH407_USB2_PORT1_SOFTRESET>; reset-names = "power", "softreset"; From 78567f135d9bbbaf4538f63656d3e4d957c35fe9 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 8 Sep 2016 11:11:00 +0200 Subject: [PATCH 0087/1050] ARM: dts: STiH407-family: Provide interconnect clock for consumption in ST SDHCI The STiH4{07,10} platform contains some interconnect clocks which are used by various IPs. If these clocks aren't handled correctly by ST's SDHCI driver MMC will break and the following output can be observed: [ 13.916949] mmc0: Timeout waiting for hardware interrupt. [ 13.922349] sdhci: =========== REGISTER DUMP (mmc0)=========== [ 13.928175] sdhci: Sys addr: 0x00000000 | Version: 0x00001002 [ 13.933999] sdhci: Blk size: 0x00007040 | Blk cnt: 0x00000001 [ 13.939825] sdhci: Argument: 0x00fffff0 | Trn mode: 0x00000013 [ 13.945650] sdhci: Present: 0x1fff0206 | Host ctl: 0x00000011 [ 13.951475] sdhci: Power: 0x0000000f | Blk gap: 0x00000080 [ 13.957300] sdhci: Wake-up: 0x00000000 | Clock: 0x00003f07 [ 13.963126] sdhci: Timeout: 0x00000004 | Int stat: 0x00000000 [ 13.968952] sdhci: Int enab: 0x02ff008b | Sig enab: 0x02ff008b [ 13.974777] sdhci: AC12 err: 0x00000000 | Slot int: 0x00000000 [ 13.980602] sdhci: Caps: 0x21ed3281 | Caps_1: 0x00000000 [ 13.986428] sdhci: Cmd: 0x0000063a | Max curr: 0x00000000 [ 13.992252] sdhci: Host ctl2: 0x00000000 [ 13.996166] sdhci: ADMA Err: 0x00000000 | ADMA Ptr: 0x7c048200 [ 14.001990] sdhci: =========================================== [ 14.009802] mmc0: Got data interrupt 0x02000000 even though no data operation was in progress. Cc: stable@vger.kernel.org Tested-by: Peter Griffin Signed-off-by: Lee Jones Acked-by: Patrice Chotard --- arch/arm/boot/dts/stih407-family.dtsi | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/stih407-family.dtsi b/arch/arm/boot/dts/stih407-family.dtsi index d294e82447a2..8b063ab10c19 100644 --- a/arch/arm/boot/dts/stih407-family.dtsi +++ b/arch/arm/boot/dts/stih407-family.dtsi @@ -550,8 +550,9 @@ interrupt-names = "mmcirq"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_mmc0>; - clock-names = "mmc"; - clocks = <&clk_s_c0_flexgen CLK_MMC_0>; + clock-names = "mmc", "icn"; + clocks = <&clk_s_c0_flexgen CLK_MMC_0>, + <&clk_s_c0_flexgen CLK_RX_ICN_HVA>; bus-width = <8>; non-removable; }; @@ -565,8 +566,9 @@ interrupt-names = "mmcirq"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_sd1>; - clock-names = "mmc"; - clocks = <&clk_s_c0_flexgen CLK_MMC_1>; + clock-names = "mmc", "icn"; + clocks = <&clk_s_c0_flexgen CLK_MMC_1>, + <&clk_s_c0_flexgen CLK_RX_ICN_HVA>; resets = <&softreset STIH407_MMC1_SOFTRESET>; bus-width = <4>; }; From a4497a86fb9b855c5ac8503fdc959393b00bb643 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Thu, 8 Sep 2016 10:15:28 -0400 Subject: [PATCH 0088/1050] x86, clock: Fix kvm guest tsc initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When booting a kvm guest on AMD with the latest kernel the following messages are displayed in the boot log: tsc: Unable to calibrate against PIT tsc: HPET/PMTIMER calibration failed aa297292d708 ("x86/tsc: Enumerate SKL cpu_khz and tsc_khz via CPUID") introduced a change to account for a difference in cpu and tsc frequencies for Intel SKL processors. Before this change the native tsc set x86_platform.calibrate_tsc to native_calibrate_tsc() which is a hardware calibration of the tsc, and in tsc_init() executed tsc_khz = x86_platform.calibrate_tsc(); cpu_khz = tsc_khz; The kvm code changed x86_platform.calibrate_tsc to kvm_get_tsc_khz() and executed the same tsc_init() function. This meant that KVM guests did not execute the native hardware calibration function. After aa297292d708, there are separate native calibrations for cpu_khz and tsc_khz. The code sets x86_platform.calibrate_tsc to native_calibrate_tsc() which is now an Intel specific calibration function, and x86_platform.calibrate_cpu to native_calibrate_cpu() which is the "old" native_calibrate_tsc() function (ie, the native hardware calibration function). tsc_init() now does cpu_khz = x86_platform.calibrate_cpu(); tsc_khz = x86_platform.calibrate_tsc(); if (tsc_khz == 0) tsc_khz = cpu_khz; else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz) cpu_khz = tsc_khz; The kvm code should not call the hardware initialization in native_calibrate_cpu(), as it isn't applicable for kvm and it didn't do that prior to aa297292d708. This patch resolves this issue by setting x86_platform.calibrate_cpu to kvm_get_tsc_khz(). v2: I had originally set x86_platform.calibrate_cpu to cpu_khz_from_cpuid(), however, pbonzini pointed out that the CPUID leaf in that function is not available in KVM. I have changed the function pointer to kvm_get_tsc_khz(). Fixes: aa297292d708 ("x86/tsc: Enumerate SKL cpu_khz and tsc_khz via CPUID") Signed-off-by: Prarit Bhargava Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: x86@kernel.org Cc: Len Brown Cc: "Peter Zijlstra (Intel)" Cc: Borislav Petkov Cc: Adrian Hunter Cc: "Christopher S. Hall" Cc: David Woodhouse Cc: kvm@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kernel/kvmclock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 1d39bfbd26bb..3692249a70f1 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -289,6 +289,7 @@ void __init kvmclock_init(void) put_cpu(); x86_platform.calibrate_tsc = kvm_get_tsc_khz; + x86_platform.calibrate_cpu = kvm_get_tsc_khz; x86_platform.get_wallclock = kvm_get_wallclock; x86_platform.set_wallclock = kvm_set_wallclock; #ifdef CONFIG_X86_LOCAL_APIC From 015282c9eb6da05bfad6ff009078f91e06c0c98f Mon Sep 17 00:00:00 2001 From: Wenbo Wang Date: Thu, 8 Sep 2016 12:12:11 -0400 Subject: [PATCH 0089/1050] nvme/quirk: Add a delay before checking device ready for memblaze device Signed-off-by: Wenbo Wang Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index be84a84a40f7..60f7eab11865 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2117,6 +2117,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_IDENTIFY_CNS, }, { PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, + { PCI_DEVICE(0x1c5f, 0x0540), /* Memblaze Pblaze4 adapter */ + .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) }, { 0, } From 47a7b0d8888c04c9746812820b6e60553cc77bbc Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Sun, 4 Sep 2016 22:17:28 -0400 Subject: [PATCH 0090/1050] md-cluster: make md-cluster also can work when compiled into kernel The md-cluster is compiled as module by default, if it is compiled by built-in way, then we can't make md-cluster works. [64782.630008] md/raid1:md127: active with 2 out of 2 mirrors [64782.630528] md-cluster module not found. [64782.630530] md127: Could not setup cluster service (-2) Fixes: edb39c9 ("Introduce md_cluster_operations to handle cluster functions") Cc: stable@vger.kernel.org (v4.1+) Reported-by: Marc Smith Reviewed-by: NeilBrown Signed-off-by: Guoqing Jiang Signed-off-by: Shaohua Li --- drivers/md/md.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 67642bacd597..915e84d631a2 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7610,16 +7610,12 @@ EXPORT_SYMBOL(unregister_md_cluster_operations); int md_setup_cluster(struct mddev *mddev, int nodes) { - int err; - - err = request_module("md-cluster"); - if (err) { - pr_err("md-cluster module not found.\n"); - return -ENOENT; - } - + if (!md_cluster_ops) + request_module("md-cluster"); spin_lock(&pers_lock); + /* ensure module won't be unloaded */ if (!md_cluster_ops || !try_module_get(md_cluster_mod)) { + pr_err("can't find md-cluster module or get it's reference.\n"); spin_unlock(&pers_lock); return -ENOENT; } From 2f30ea5090cbc57ea573cdc66421264b3de3fb0a Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 8 Sep 2016 18:09:57 +0200 Subject: [PATCH 0091/1050] xfrm_user: propagate sec ctx allocation errors When we fail to attach the security context in xfrm_state_construct() we'll return 0 as error value which, in turn, will wrongly claim success to userland when, in fact, we won't be adding / updating the XFRM state. This is a regression introduced by commit fd21150a0fe1 ("[XFRM] netlink: Inline attach_encap_tmpl(), attach_sec_ctx(), and attach_one_addr()"). Fix it by propagating the error returned by security_xfrm_state_alloc() in this case. Fixes: fd21150a0fe1 ("[XFRM] netlink: Inline attach_encap_tmpl()...") Signed-off-by: Mathias Krause Cc: Thomas Graf Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index cb65d916a345..08892091cfe3 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -581,9 +581,12 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, if (err) goto error; - if (attrs[XFRMA_SEC_CTX] && - security_xfrm_state_alloc(x, nla_data(attrs[XFRMA_SEC_CTX]))) - goto error; + if (attrs[XFRMA_SEC_CTX]) { + err = security_xfrm_state_alloc(x, + nla_data(attrs[XFRMA_SEC_CTX])); + if (err) + goto error; + } if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn, attrs[XFRMA_REPLAY_ESN_VAL]))) From 1fb81e09d487656aa23f2acb1232c7f56b4c2367 Mon Sep 17 00:00:00 2001 From: "thomas.zeitlhofer+lkml@ze-it.at" Date: Wed, 7 Sep 2016 20:40:38 +0200 Subject: [PATCH 0092/1050] vti: use right inner_mode for inbound inter address family policy checks In case of inter address family tunneling (IPv6 over vti4 or IPv4 over vti6), the inbound policy checks in vti_rcv_cb() and vti6_rcv_cb() are using the wrong address family. As a result, all inbound inter address family traffic is dropped. Use the xfrm_ip2inner_mode() helper, as done in xfrm_input() (i.e., also increment LINUX_MIB_XFRMINSTATEMODEERROR in case of error), to select the inner_mode that contains the right address family for the inbound policy checks. Signed-off-by: Thomas Zeitlhofer Signed-off-by: Steffen Klassert --- net/ipv4/ip_vti.c | 15 ++++++++++++++- net/ipv6/ip6_vti.c | 15 ++++++++++++++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index cc701fa70b12..5d7944f394d9 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -88,6 +88,7 @@ static int vti_rcv_cb(struct sk_buff *skb, int err) struct net_device *dev; struct pcpu_sw_netstats *tstats; struct xfrm_state *x; + struct xfrm_mode *inner_mode; struct ip_tunnel *tunnel = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4; u32 orig_mark = skb->mark; int ret; @@ -105,7 +106,19 @@ static int vti_rcv_cb(struct sk_buff *skb, int err) } x = xfrm_input_state(skb); - family = x->inner_mode->afinfo->family; + + inner_mode = x->inner_mode; + + if (x->sel.family == AF_UNSPEC) { + inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); + if (inner_mode == NULL) { + XFRM_INC_STATS(dev_net(skb->dev), + LINUX_MIB_XFRMINSTATEMODEERROR); + return -EINVAL; + } + } + + family = inner_mode->afinfo->family; skb->mark = be32_to_cpu(tunnel->parms.i_key); ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index d90a11f14040..52a2f735881f 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -340,6 +340,7 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err) struct net_device *dev; struct pcpu_sw_netstats *tstats; struct xfrm_state *x; + struct xfrm_mode *inner_mode; struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6; u32 orig_mark = skb->mark; int ret; @@ -357,7 +358,19 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err) } x = xfrm_input_state(skb); - family = x->inner_mode->afinfo->family; + + inner_mode = x->inner_mode; + + if (x->sel.family == AF_UNSPEC) { + inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); + if (inner_mode == NULL) { + XFRM_INC_STATS(dev_net(skb->dev), + LINUX_MIB_XFRMINSTATEMODEERROR); + return -EINVAL; + } + } + + family = inner_mode->afinfo->family; skb->mark = be32_to_cpu(t->parms.i_key); ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family); From 34a3d4b2d1f1b7c81af79f6f93a6cef4c3a0f54a Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Thu, 8 Sep 2016 13:55:56 -0400 Subject: [PATCH 0093/1050] xfrm: fix header file comment reference to struct xfrm_replay_state_esn Reported-by: Paul Wouters Signed-off-by: Richard Guy Briggs Signed-off-by: Steffen Klassert --- include/uapi/linux/xfrm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index 143338978b48..1fc62b239f1b 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -298,7 +298,7 @@ enum xfrm_attr_type_t { XFRMA_ALG_AUTH_TRUNC, /* struct xfrm_algo_auth */ XFRMA_MARK, /* struct xfrm_mark */ XFRMA_TFCPAD, /* __u32 */ - XFRMA_REPLAY_ESN_VAL, /* struct xfrm_replay_esn */ + XFRMA_REPLAY_ESN_VAL, /* struct xfrm_replay_state_esn */ XFRMA_SA_EXTRA_FLAGS, /* __u32 */ XFRMA_PROTO, /* __u8 */ XFRMA_ADDRESS_FILTER, /* struct xfrm_address_filter */ From 293f293637b55db4f9f522a5a72514e98a541076 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 8 Sep 2016 16:25:49 +0100 Subject: [PATCH 0094/1050] kvm-arm: Unmap shadow pagetables properly On arm/arm64, we depend on the kvm_unmap_hva* callbacks (via mmu_notifiers::invalidate_*) to unmap the stage2 pagetables when the userspace buffer gets unmapped. However, when the Hypervisor process exits without explicit unmap of the guest buffers, the only notifier we get is kvm_arch_flush_shadow_all() (via mmu_notifier::release ) which does nothing on arm. Later this causes us to access pages that were already released [via exit_mmap() -> unmap_vmas()] when we actually get to unmap the stage2 pagetable [via kvm_arch_destroy_vm() -> kvm_free_stage2_pgd()]. This triggers crashes with CONFIG_DEBUG_PAGEALLOC, which unmaps any free'd pages from the linear map. [ 757.644120] Unable to handle kernel paging request at virtual address ffff800661e00000 [ 757.652046] pgd = ffff20000b1a2000 [ 757.655471] [ffff800661e00000] *pgd=00000047fffe3003, *pud=00000047fcd8c003, *pmd=00000047fcc7c003, *pte=00e8004661e00712 [ 757.666492] Internal error: Oops: 96000147 [#3] PREEMPT SMP [ 757.672041] Modules linked in: [ 757.675100] CPU: 7 PID: 3630 Comm: qemu-system-aar Tainted: G D 4.8.0-rc1 #3 [ 757.683240] Hardware name: AppliedMicro X-Gene Mustang Board/X-Gene Mustang Board, BIOS 3.06.15 Aug 19 2016 [ 757.692938] task: ffff80069cdd3580 task.stack: ffff8006adb7c000 [ 757.698840] PC is at __flush_dcache_area+0x1c/0x40 [ 757.703613] LR is at kvm_flush_dcache_pmd+0x60/0x70 [ 757.708469] pc : [] lr : [] pstate: 20000145 ... [ 758.357249] [] __flush_dcache_area+0x1c/0x40 [ 758.363059] [] unmap_stage2_range+0x458/0x5f0 [ 758.368954] [] kvm_free_stage2_pgd+0x34/0x60 [ 758.374761] [] kvm_arch_destroy_vm+0x20/0x68 [ 758.380570] [] kvm_put_kvm+0x210/0x358 [ 758.385860] [] kvm_vm_release+0x2c/0x40 [ 758.391239] [] __fput+0x114/0x2e8 [ 758.396096] [] ____fput+0xc/0x18 [ 758.400869] [] task_work_run+0x108/0x138 [ 758.406332] [] do_exit+0x48c/0x10e8 [ 758.411363] [] do_group_exit+0x6c/0x130 [ 758.416739] [] get_signal+0x284/0xa18 [ 758.421943] [] do_signal+0x158/0x860 [ 758.427060] [] do_notify_resume+0x6c/0x88 [ 758.432608] [] work_pending+0x10/0x14 [ 758.437812] Code: 9ac32042 8b010001 d1000443 8a230000 (d50b7e20) This patch fixes the issue by moving the kvm_free_stage2_pgd() to kvm_arch_flush_shadow_all(). Cc: # 3.9+ Tested-by: Itaru Kitayama Reported-by: Itaru Kitayama Reported-by: James Morse Cc: Marc Zyngier Cc: Catalin Marinas Cc: Christoffer Dall Signed-off-by: Suzuki K Poulose Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 2 -- arch/arm/kvm/mmu.c | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 75f130ef6504..c94b90d43772 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -158,8 +158,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) { int i; - kvm_free_stage2_pgd(kvm); - for (i = 0; i < KVM_MAX_VCPUS; ++i) { if (kvm->vcpus[i]) { kvm_arch_vcpu_free(kvm->vcpus[i]); diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index a3faafe70081..e9a5c0e0c115 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1894,6 +1894,7 @@ void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) void kvm_arch_flush_shadow_all(struct kvm *kvm) { + kvm_free_stage2_pgd(kvm); } void kvm_arch_flush_shadow_memslot(struct kvm *kvm, From a179b69359feb26ddb148bb6a2c0c53a8d1dc5be Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 24 Aug 2016 05:36:53 -0300 Subject: [PATCH 0095/1050] [media] cec: don't Feature Abort broadcast msgs when unregistered If the adapter is configured as 'Unregistered', then cec_receive_notify incorrectly thinks that broadcast messages are directed messages. The destination for broadcast messages is 0xf, and the logical address assigned to Unregistered devices is also 0xf and the logic didn't handle that correctly. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/cec/cec-adap.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/staging/media/cec/cec-adap.c b/drivers/staging/media/cec/cec-adap.c index e980ac9c9279..946986f3ac0d 100644 --- a/drivers/staging/media/cec/cec-adap.c +++ b/drivers/staging/media/cec/cec-adap.c @@ -1409,7 +1409,6 @@ static int cec_receive_notify(struct cec_adapter *adap, struct cec_msg *msg, u8 init_laddr = cec_msg_initiator(msg); u8 devtype = cec_log_addr2dev(adap, dest_laddr); int la_idx = cec_log_addr2idx(adap, dest_laddr); - bool is_directed = la_idx >= 0; bool from_unregistered = init_laddr == 0xf; struct cec_msg tx_cec_msg = { }; @@ -1571,7 +1570,7 @@ static int cec_receive_notify(struct cec_adapter *adap, struct cec_msg *msg, * Unprocessed messages are aborted if userspace isn't doing * any processing either. */ - if (is_directed && !is_reply && !adap->follower_cnt && + if (!is_broadcast && !is_reply && !adap->follower_cnt && !adap->cec_follower && msg->msg[1] != CEC_MSG_FEATURE_ABORT) return cec_feature_abort(adap, msg); break; From 60815d4a78204915f5cdf79a536bc96d5d23ae5f Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 24 Aug 2016 07:17:22 -0300 Subject: [PATCH 0096/1050] [media] cec: fix ioctl return code when not registered Don't return the confusing -EIO error code when the device is not registered, instead return -ENODEV which is the proper thing to do in this situation. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/cec/cec-api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/media/cec/cec-api.c b/drivers/staging/media/cec/cec-api.c index 6f58ee85eea4..e274e2f22398 100644 --- a/drivers/staging/media/cec/cec-api.c +++ b/drivers/staging/media/cec/cec-api.c @@ -435,7 +435,7 @@ static long cec_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) void __user *parg = (void __user *)arg; if (!devnode->registered) - return -EIO; + return -ENODEV; switch (cmd) { case CEC_ADAP_G_CAPS: From fe01111d23810c0cf6830ce5af1c14c6d3df6dc5 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Tue, 6 Sep 2016 22:33:37 +0800 Subject: [PATCH 0097/1050] netfilter: nft_queue: check the validation of queues_total and queuenum Although the validation of queues_total and queuenum is checked in nft utility, but user can add nft rules via nfnetlink, so it is necessary to check the validation at the nft_queue expr init routine too. Tested by run ./nft-test.py any/queue.t: any/queue.t: 6 unit tests, 0 error, 0 warning Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_queue.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c index 61d216eb7917..d16d59959ff6 100644 --- a/net/netfilter/nft_queue.c +++ b/net/netfilter/nft_queue.c @@ -65,6 +65,7 @@ static int nft_queue_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_queue *priv = nft_expr_priv(expr); + u32 maxid; if (tb[NFTA_QUEUE_NUM] == NULL) return -EINVAL; @@ -74,6 +75,16 @@ static int nft_queue_init(const struct nft_ctx *ctx, if (tb[NFTA_QUEUE_TOTAL] != NULL) priv->queues_total = ntohs(nla_get_be16(tb[NFTA_QUEUE_TOTAL])); + else + priv->queues_total = 1; + + if (priv->queues_total == 0) + return -EINVAL; + + maxid = priv->queues_total - 1 + priv->queuenum; + if (maxid > U16_MAX) + return -ERANGE; + if (tb[NFTA_QUEUE_FLAGS] != NULL) { priv->flags = ntohs(nla_get_be16(tb[NFTA_QUEUE_FLAGS])); if (priv->flags & ~NFT_QUEUE_FLAG_MASK) From 3c15b8e112d7351b2586c649a759318548523364 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Tue, 6 Sep 2016 22:35:47 +0800 Subject: [PATCH 0098/1050] netfilter: nf_conntrack: remove unused ctl_table_path member in nf_conntrack_l3proto After commit adf0516845bc ("netfilter: remove ip_conntrack* sysctl compat code"), ctl_table_path member in struct nf_conntrack_l3proto{} is not used anymore, remove it. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l3proto.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index cdc920b4c4c2..8992e4229da9 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -63,10 +63,6 @@ struct nf_conntrack_l3proto { size_t nla_size; -#ifdef CONFIG_SYSCTL - const char *ctl_table_path; -#endif /* CONFIG_SYSCTL */ - /* Init l3proto pernet data */ int (*init_net)(struct net *net); From 6b7b554d34fdbc5dc9fae9d4ca9dd37f6346be3d Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 3 Aug 2016 15:12:44 +0100 Subject: [PATCH 0099/1050] ARM: dts: Remove use of skeleton.dtsi from bcm283x.dtsi This file is included from DTS files under arch/arm64 too (via broadcom/bcm2837-rpi-3-b.dts and broadcom/bcm2837.dtsi). There is a desire not to have skeleton.dtsi for ARM64. See commit 3ebee5a2e141 ("arm64: dts: kill skeleton.dtsi") for rationale for its removal. As well as the addition of #*-cells also requires adding the device_type to the rpi memory node explicitly. Note that this change results in the removal of an empty /aliases node from bcm2835-rpi-a.dtb and bcm2835-rpi-a-plus.dtb. I have no hardware to check if this is a problem or not. It also results in some reordering of the nodes in the DTBs (the /aliases and /memory nodes come later). This isn't supposed to matter but, again, I've no hardware to check if it is true in this particular case. Signed-off-by: Ian Campbell Acked-by: Mark Rutland Tested-by: Stefan Wahren Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: Rob Herring Cc: Frank Rowand Cc: Eric Anholt Cc: Stephen Warren Cc: Lee Jones Cc: Gerd Hoffmann Cc: devicetree@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-rpi-kernel@lists.infradead.org Cc: arm@kernel.org Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/bcm2835-rpi.dtsi | 1 + arch/arm/boot/dts/bcm283x.dtsi | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/bcm2835-rpi.dtsi b/arch/arm/boot/dts/bcm2835-rpi.dtsi index caf2707680c1..e9b47b2bbc33 100644 --- a/arch/arm/boot/dts/bcm2835-rpi.dtsi +++ b/arch/arm/boot/dts/bcm2835-rpi.dtsi @@ -2,6 +2,7 @@ / { memory { + device_type = "memory"; reg = <0 0x10000000>; }; diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi index b98252232d20..445624a1a1de 100644 --- a/arch/arm/boot/dts/bcm283x.dtsi +++ b/arch/arm/boot/dts/bcm283x.dtsi @@ -2,7 +2,6 @@ #include #include #include -#include "skeleton.dtsi" /* This include file covers the common peripherals and configuration between * bcm2835 and bcm2836 implementations, leaving the CPU configuration to @@ -13,6 +12,8 @@ compatible = "brcm,bcm2835"; model = "BCM2835"; interrupt-parent = <&intc>; + #address-cells = <1>; + #size-cells = <1>; chosen { bootargs = "earlyprintk console=ttyAMA0"; From 76aa7591688001839ec9ca838041f2d55d49ab92 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 3 Aug 2016 15:12:45 +0100 Subject: [PATCH 0100/1050] ARM64: dts: bcm: Use a symlink to R-Pi dtsi files from arch=arm The ../../../arm... style cross-references added by commit 9d56c22a7861 ("ARM: bcm2835: Add devicetree for the Raspberry Pi 3.") do not work in the context of the split device-tree repository[0] (where the directory structure differs). As with commit 8ee57b8182c4 ("ARM64: dts: vexpress: Use a symlink to vexpress-v2m-rs1.dtsi from arch=arm") use symlinks instead. [0] https://git.kernel.org/cgit/linux/kernel/git/devicetree/devicetree-rebasing.git/ Signed-off-by: Ian Campbell Acked-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: Rob Herring Cc: Frank Rowand Cc: Eric Anholt Cc: Stephen Warren Cc: Lee Jones Cc: Gerd Hoffmann Cc: devicetree@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-rpi-kernel@lists.infradead.org Cc: arm@kernel.org Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/broadcom/bcm2835-rpi.dtsi | 1 + arch/arm64/boot/dts/broadcom/bcm2837-rpi-3-b.dts | 4 ++-- arch/arm64/boot/dts/broadcom/bcm2837.dtsi | 2 +- arch/arm64/boot/dts/broadcom/bcm283x-rpi-smsc9514.dtsi | 1 + arch/arm64/boot/dts/broadcom/bcm283x.dtsi | 1 + 5 files changed, 6 insertions(+), 3 deletions(-) create mode 120000 arch/arm64/boot/dts/broadcom/bcm2835-rpi.dtsi create mode 120000 arch/arm64/boot/dts/broadcom/bcm283x-rpi-smsc9514.dtsi create mode 120000 arch/arm64/boot/dts/broadcom/bcm283x.dtsi diff --git a/arch/arm64/boot/dts/broadcom/bcm2835-rpi.dtsi b/arch/arm64/boot/dts/broadcom/bcm2835-rpi.dtsi new file mode 120000 index 000000000000..3937b77cb310 --- /dev/null +++ b/arch/arm64/boot/dts/broadcom/bcm2835-rpi.dtsi @@ -0,0 +1 @@ +../../../../arm/boot/dts/bcm2835-rpi.dtsi \ No newline at end of file diff --git a/arch/arm64/boot/dts/broadcom/bcm2837-rpi-3-b.dts b/arch/arm64/boot/dts/broadcom/bcm2837-rpi-3-b.dts index 6f47dd2bb1db..7841b724e340 100644 --- a/arch/arm64/boot/dts/broadcom/bcm2837-rpi-3-b.dts +++ b/arch/arm64/boot/dts/broadcom/bcm2837-rpi-3-b.dts @@ -1,7 +1,7 @@ /dts-v1/; #include "bcm2837.dtsi" -#include "../../../../arm/boot/dts/bcm2835-rpi.dtsi" -#include "../../../../arm/boot/dts/bcm283x-rpi-smsc9514.dtsi" +#include "bcm2835-rpi.dtsi" +#include "bcm283x-rpi-smsc9514.dtsi" / { compatible = "raspberrypi,3-model-b", "brcm,bcm2837"; diff --git a/arch/arm64/boot/dts/broadcom/bcm2837.dtsi b/arch/arm64/boot/dts/broadcom/bcm2837.dtsi index f2a31d06845d..8216bbb29fe0 100644 --- a/arch/arm64/boot/dts/broadcom/bcm2837.dtsi +++ b/arch/arm64/boot/dts/broadcom/bcm2837.dtsi @@ -1,4 +1,4 @@ -#include "../../../../arm/boot/dts/bcm283x.dtsi" +#include "bcm283x.dtsi" / { compatible = "brcm,bcm2836"; diff --git a/arch/arm64/boot/dts/broadcom/bcm283x-rpi-smsc9514.dtsi b/arch/arm64/boot/dts/broadcom/bcm283x-rpi-smsc9514.dtsi new file mode 120000 index 000000000000..dca7c057d5a5 --- /dev/null +++ b/arch/arm64/boot/dts/broadcom/bcm283x-rpi-smsc9514.dtsi @@ -0,0 +1 @@ +../../../../arm/boot/dts/bcm283x-rpi-smsc9514.dtsi \ No newline at end of file diff --git a/arch/arm64/boot/dts/broadcom/bcm283x.dtsi b/arch/arm64/boot/dts/broadcom/bcm283x.dtsi new file mode 120000 index 000000000000..5f54e4cab99b --- /dev/null +++ b/arch/arm64/boot/dts/broadcom/bcm283x.dtsi @@ -0,0 +1 @@ +../../../../arm/boot/dts/bcm283x.dtsi \ No newline at end of file From c94455558337eece474eebb6a16b905f98930418 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 8 Sep 2016 10:43:58 -0700 Subject: [PATCH 0101/1050] raid5: fix a small race condition commit 5f9d1fde7d54a5(raid5: fix memory leak of bio integrity data) moves bio_reset to bio_endio. But it introduces a small race condition. It does bio_reset after raid5_release_stripe, which could make the stripe reusable and hence reuse the bio just before bio_reset. Moving bio_reset before raid5_release_stripe is called should fix the race. Reported-and-tested-by: Stefan Priebe - Profihost AG Signed-off-by: Shaohua Li --- drivers/md/raid5.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index b95c54c0e596..ee7fc3701700 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2423,10 +2423,10 @@ static void raid5_end_read_request(struct bio * bi) } } rdev_dec_pending(rdev, conf->mddev); + bio_reset(bi); clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); raid5_release_stripe(sh); - bio_reset(bi); } static void raid5_end_write_request(struct bio *bi) @@ -2498,6 +2498,7 @@ static void raid5_end_write_request(struct bio *bi) if (sh->batch_head && bi->bi_error && !replacement) set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state); + bio_reset(bi); if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags)) clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); @@ -2505,7 +2506,6 @@ static void raid5_end_write_request(struct bio *bi) if (sh->batch_head && sh != sh->batch_head) raid5_release_stripe(sh->batch_head); - bio_reset(bi); } static void raid5_build_block(struct stripe_head *sh, int i, int previous) From ea90383837334bcebe842e719ad4d8c966f4ef51 Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Thu, 8 Sep 2016 19:03:20 +0800 Subject: [PATCH 0102/1050] drm/vc4: mark vc4_bo_cache_purge() static We get 1 warning when building kernel with W=1: drivers/gpu/drm/vc4/vc4_bo.c:147:6: warning: no previous prototype for 'vc4_bo_cache_purge' [-Wmissing-prototypes] In fact, this function is only used in the file in which it is declared and don't need a declaration, but can be made static. So this patch marks it 'static'. Signed-off-by: Baoyou Xie Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_bo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c index 59adcf8532dd..3f6704cf6608 100644 --- a/drivers/gpu/drm/vc4/vc4_bo.c +++ b/drivers/gpu/drm/vc4/vc4_bo.c @@ -144,7 +144,7 @@ static struct list_head *vc4_get_cache_list_for_size(struct drm_device *dev, return &vc4->bo_cache.size_list[page_index]; } -void vc4_bo_cache_purge(struct drm_device *dev) +static void vc4_bo_cache_purge(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); From 7d762e49c2117d3829eb3355f2617aea080ed3a7 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 9 Sep 2016 18:08:23 +0200 Subject: [PATCH 0103/1050] perf/x86/amd/uncore: Prevent use after free The resent conversion of the cpu hotplug support in the uncore driver introduced a regression due to the way the callbacks are invoked at initialization time. The old code called the prepare/starting/online function on each online cpu as a block. The new code registers the hotplug callbacks in the core for each state. The core invokes the callbacks at each registration on all online cpus. The code implicitely relied on the prepare/starting/online callbacks being called as combo on a particular cpu, which was not obvious and completely undocumented. The resulting subtle wreckage happens due to the way how the uncore code manages shared data structures for cpus which share an uncore resource in hardware. The sharing is determined in the cpu starting callback, but the prepare callback allocates per cpu data for the upcoming cpu because potential sharing is unknown at this point. If the starting callback finds a online cpu which shares the hardware resource it takes a refcount on the percpu data of that cpu and puts the own data structure into a 'free_at_online' pointer of that shared data structure. The online callback frees that. With the old model this worked because in a starting callback only one non unused structure (the one of the starting cpu) was available. The new code allocates the data structures for all cpus when the prepare callback is registered. Now the starting function iterates through all online cpus and looks for a data structure (skipping its own) which has a matching hardware id. The id member of the data structure is initialized to 0, but the hardware id can be 0 as well. The resulting wreckage is: CPU0 finds a matching id on CPU1, takes a refcount on CPU1 data and puts its own data structure into CPU1s data structure to be freed. CPU1 skips CPU0 because the data structure is its allegedly unsued own. It finds a matching id on CPU2, takes a refcount on CPU1 data and puts its own data structure into CPU2s data structure to be freed. .... Now the online callbacks are invoked. CPU0 has a pointer to CPU1s data and frees the original CPU0 data. So far so good. CPU1 has a pointer to CPU2s data and frees the original CPU1 data, which is still referenced by CPU0 ---> Booom So there are two issues to be solved here: 1) The id field must be initialized at allocation time to a value which cannot be a valid hardware id, i.e. -1 This prevents the above scenario, but now CPU1 and CPU2 both stick their own data structure into the free_at_online pointer of CPU0. So we leak CPU1s data structure. 2) Fix the memory leak described in #1 Instead of having a single pointer, use a hlist to enqueue the superflous data structures which are then freed by the first cpu invoking the online callback. Ideally we should know the sharing _before_ invoking the prepare callback, but that's way beyond the scope of this bug fix. [ tglx: Rewrote changelog ] Fixes: 96b2bd3866a0 ("perf/x86/amd/uncore: Convert to hotplug state machine") Reported-and-tested-by: Eric Sandeen Signed-off-by: Sebastian Andrzej Siewior Cc: Borislav Petkov Link: http://lkml.kernel.org/r/20160909160822.lowgmkdwms2dheyv@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/events/amd/uncore.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index e6131d4454e6..65577f081d07 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -29,6 +29,8 @@ #define COUNTER_SHIFT 16 +static HLIST_HEAD(uncore_unused_list); + struct amd_uncore { int id; int refcnt; @@ -39,7 +41,7 @@ struct amd_uncore { cpumask_t *active_mask; struct pmu *pmu; struct perf_event *events[MAX_COUNTERS]; - struct amd_uncore *free_when_cpu_online; + struct hlist_node node; }; static struct amd_uncore * __percpu *amd_uncore_nb; @@ -306,6 +308,7 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu) uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL; uncore_nb->active_mask = &amd_nb_active_mask; uncore_nb->pmu = &amd_nb_pmu; + uncore_nb->id = -1; *per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb; } @@ -319,6 +322,7 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu) uncore_l2->msr_base = MSR_F16H_L2I_PERF_CTL; uncore_l2->active_mask = &amd_l2_active_mask; uncore_l2->pmu = &amd_l2_pmu; + uncore_l2->id = -1; *per_cpu_ptr(amd_uncore_l2, cpu) = uncore_l2; } @@ -348,7 +352,7 @@ amd_uncore_find_online_sibling(struct amd_uncore *this, continue; if (this->id == that->id) { - that->free_when_cpu_online = this; + hlist_add_head(&this->node, &uncore_unused_list); this = that; break; } @@ -388,13 +392,23 @@ static int amd_uncore_cpu_starting(unsigned int cpu) return 0; } +static void uncore_clean_online(void) +{ + struct amd_uncore *uncore; + struct hlist_node *n; + + hlist_for_each_entry_safe(uncore, n, &uncore_unused_list, node) { + hlist_del(&uncore->node); + kfree(uncore); + } +} + static void uncore_online(unsigned int cpu, struct amd_uncore * __percpu *uncores) { struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu); - kfree(uncore->free_when_cpu_online); - uncore->free_when_cpu_online = NULL; + uncore_clean_online(); if (cpu == uncore->cpu) cpumask_set_cpu(cpu, uncore->active_mask); From 2561d309dfd1555e781484af757ed0115035ddb3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 17 Aug 2016 16:02:32 -0400 Subject: [PATCH 0104/1050] alpha: fix copy_from_user() it should clear the destination even when access_ok() fails. Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/alpha/include/asm/uaccess.h | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/arch/alpha/include/asm/uaccess.h b/arch/alpha/include/asm/uaccess.h index c419b43c461d..466e42e96bfa 100644 --- a/arch/alpha/include/asm/uaccess.h +++ b/arch/alpha/include/asm/uaccess.h @@ -371,14 +371,6 @@ __copy_tofrom_user_nocheck(void *to, const void *from, long len) return __cu_len; } -extern inline long -__copy_tofrom_user(void *to, const void *from, long len, const void __user *validate) -{ - if (__access_ok((unsigned long)validate, len, get_fs())) - len = __copy_tofrom_user_nocheck(to, from, len); - return len; -} - #define __copy_to_user(to, from, n) \ ({ \ __chk_user_ptr(to); \ @@ -393,17 +385,22 @@ __copy_tofrom_user(void *to, const void *from, long len, const void __user *vali #define __copy_to_user_inatomic __copy_to_user #define __copy_from_user_inatomic __copy_from_user - extern inline long copy_to_user(void __user *to, const void *from, long n) { - return __copy_tofrom_user((__force void *)to, from, n, to); + if (likely(__access_ok((unsigned long)to, n, get_fs()))) + n = __copy_tofrom_user_nocheck((__force void *)to, from, n); + return n; } extern inline long copy_from_user(void *to, const void __user *from, long n) { - return __copy_tofrom_user(to, (__force void *)from, n, from); + if (likely(__access_ok((unsigned long)from, n, get_fs()))) + n = __copy_tofrom_user_nocheck(to, (__force void *)from, n); + else + memset(to, 0, n); + return n; } extern void __do_clear_user(void); From 2545e5da080b4839dd859e3b09343a884f6ab0e3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 17 Aug 2016 16:36:37 -0400 Subject: [PATCH 0105/1050] asm-generic: make copy_from_user() zero the destination properly ... in all cases, including the failing access_ok() Note that some architectures using asm-generic/uaccess.h have __copy_from_user() not zeroing the tail on failure halfway through. This variant works either way. Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- include/asm-generic/uaccess.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h index 1bfa602958f2..04e21a41796a 100644 --- a/include/asm-generic/uaccess.h +++ b/include/asm-generic/uaccess.h @@ -257,11 +257,13 @@ extern int __get_user_bad(void) __attribute__((noreturn)); static inline long copy_from_user(void *to, const void __user * from, unsigned long n) { + unsigned long res = n; might_fault(); - if (access_ok(VERIFY_READ, from, n)) - return __copy_from_user(to, from, n); - else - return n; + if (likely(access_ok(VERIFY_READ, from, n))) + res = __copy_from_user(to, from, n); + if (unlikely(res)) + memset(to + (n - res), 0, res); + return res; } static inline long copy_to_user(void __user *to, From f6d7c1b5598b6407c3f1da795dd54acf99c1990c Mon Sep 17 00:00:00 2001 From: Karl Beldan Date: Mon, 29 Aug 2016 07:45:49 +0000 Subject: [PATCH 0106/1050] mtd: nand: davinci: Reinitialize the HW ECC engine in 4bit hwctl This fixes subpage writes when using 4-bit HW ECC. There has been numerous reports about ECC errors with devices using this driver for a while. Also the 4-bit ECC has been reported as broken with subpages in [1] and with 16 bits NANDs in the driver and in mach* board files both in mainline and in the vendor BSPs. What I saw with 4-bit ECC on a 16bits NAND (on an LCDK) which got me to try reinitializing the ECC engine: - R/W on whole pages properly generates/checks RS code - try writing the 1st subpage only of a blank page, the subpage is well written and the RS code properly generated, re-reading the same page the HW detects some ECC error, reading the same page again no ECC error is detected Note that the ECC engine is already reinitialized in the 1-bit case. Tested on my LCDK with UBI+UBIFS using subpages. This could potentially get rid of the issue workarounded in [1]. [1] 28c015a9daab ("mtd: davinci-nand: disable subpage write for keystone-nand") Fixes: 6a4123e581b3 ("mtd: nand: davinci_nand, 4-bit ECC for smallpage") Cc: Signed-off-by: Karl Beldan Acked-by: Boris Brezillon Signed-off-by: Brian Norris --- drivers/mtd/nand/davinci_nand.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mtd/nand/davinci_nand.c b/drivers/mtd/nand/davinci_nand.c index cc07ba0f044d..27fa8b87cd5f 100644 --- a/drivers/mtd/nand/davinci_nand.c +++ b/drivers/mtd/nand/davinci_nand.c @@ -240,6 +240,9 @@ static void nand_davinci_hwctl_4bit(struct mtd_info *mtd, int mode) unsigned long flags; u32 val; + /* Reset ECC hardware */ + davinci_nand_readl(info, NAND_4BIT_ECC1_OFFSET); + spin_lock_irqsave(&davinci_nand_lock, flags); /* Start 4-bit ECC calculation for read/write */ From 4214ebf4654798309364d0c678b799e402f38288 Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Fri, 29 Jul 2016 22:38:19 +0100 Subject: [PATCH 0107/1050] Fix memory leaks in cifs_do_mount() Fix memory leaks introduced by the patch fs/cifs: make share unaccessible at root level mountable Also move allocation of cifs_sb->prepath to cifs_setup_cifs_sb(). Signed-off-by: Sachin Prabhu Tested-by: Aurelien Aptel Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 20 ++++++++------------ fs/cifs/cifsproto.h | 2 +- fs/cifs/connect.c | 10 +++++++++- 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 6bbec5e784cd..cc9cdab52dab 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -686,26 +686,22 @@ cifs_do_mount(struct file_system_type *fs_type, cifs_sb->mountdata = kstrndup(data, PAGE_SIZE, GFP_KERNEL); if (cifs_sb->mountdata == NULL) { root = ERR_PTR(-ENOMEM); - goto out_cifs_sb; + goto out_free; } - if (volume_info->prepath) { - cifs_sb->prepath = kstrdup(volume_info->prepath, GFP_KERNEL); - if (cifs_sb->prepath == NULL) { - root = ERR_PTR(-ENOMEM); - goto out_cifs_sb; - } + rc = cifs_setup_cifs_sb(volume_info, cifs_sb); + if (rc) { + root = ERR_PTR(rc); + goto out_free; } - cifs_setup_cifs_sb(volume_info, cifs_sb); - rc = cifs_mount(cifs_sb, volume_info); if (rc) { if (!(flags & MS_SILENT)) cifs_dbg(VFS, "cifs_mount failed w/return code = %d\n", rc); root = ERR_PTR(rc); - goto out_mountdata; + goto out_free; } mnt_data.vol = volume_info; @@ -752,9 +748,9 @@ out: cifs_cleanup_volume_info(volume_info); return root; -out_mountdata: +out_free: + kfree(cifs_sb->prepath); kfree(cifs_sb->mountdata); -out_cifs_sb: kfree(cifs_sb); out_nls: unload_nls(volume_info->local_nls); diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 1243bd326591..95dab43646f0 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -184,7 +184,7 @@ extern int cifs_read_from_socket(struct TCP_Server_Info *server, char *buf, unsigned int to_read); extern int cifs_read_page_from_socket(struct TCP_Server_Info *server, struct page *page, unsigned int to_read); -extern void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, +extern int cifs_setup_cifs_sb(struct smb_vol *pvolume_info, struct cifs_sb_info *cifs_sb); extern int cifs_match_super(struct super_block *, void *); extern void cifs_cleanup_volume_info(struct smb_vol *pvolume_info); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 7ae03283bd61..4546926680d4 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3222,7 +3222,7 @@ void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon, } } -void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, +int cifs_setup_cifs_sb(struct smb_vol *pvolume_info, struct cifs_sb_info *cifs_sb) { INIT_DELAYED_WORK(&cifs_sb->prune_tlinks, cifs_prune_tlinks); @@ -3316,6 +3316,14 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, if ((pvolume_info->cifs_acl) && (pvolume_info->dynperm)) cifs_dbg(VFS, "mount option dynperm ignored if cifsacl mount option supported\n"); + + if (pvolume_info->prepath) { + cifs_sb->prepath = kstrdup(pvolume_info->prepath, GFP_KERNEL); + if (cifs_sb->prepath == NULL) + return -ENOMEM; + } + + return 0; } static void From c1d8b24d18192764fe82067ec6aa8d4c3bf094e0 Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Fri, 29 Jul 2016 22:38:20 +0100 Subject: [PATCH 0108/1050] Compare prepaths when comparing superblocks The patch fs/cifs: make share unaccessible at root level mountable makes use of prepaths when any component of the underlying path is inaccessible. When mounting 2 separate shares having different prepaths but are other wise similar in other respects, we end up sharing superblocks when we shouldn't be doing so. Signed-off-by: Sachin Prabhu Tested-by: Aurelien Aptel Signed-off-by: Steve French --- fs/cifs/connect.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 4546926680d4..2e4f4bad8b1e 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2781,6 +2781,24 @@ compare_mount_options(struct super_block *sb, struct cifs_mnt_data *mnt_data) return 1; } +static int +match_prepath(struct super_block *sb, struct cifs_mnt_data *mnt_data) +{ + struct cifs_sb_info *old = CIFS_SB(sb); + struct cifs_sb_info *new = mnt_data->cifs_sb; + + if (old->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) { + if (!(new->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH)) + return 0; + /* The prepath should be null terminated strings */ + if (strcmp(new->prepath, old->prepath)) + return 0; + + return 1; + } + return 0; +} + int cifs_match_super(struct super_block *sb, void *data) { @@ -2808,7 +2826,8 @@ cifs_match_super(struct super_block *sb, void *data) if (!match_server(tcp_srv, volume_info) || !match_session(ses, volume_info) || - !match_tcon(tcon, volume_info->UNC)) { + !match_tcon(tcon, volume_info->UNC) || + !match_prepath(sb, mnt_data)) { rc = 0; goto out; } From 348c1bfa84dfc47da1f1234b7f2bf09fa798edea Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Fri, 29 Jul 2016 22:38:21 +0100 Subject: [PATCH 0109/1050] Move check for prefix path to within cifs_get_root() Signed-off-by: Sachin Prabhu Tested-by: Aurelien Aptel Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index cc9cdab52dab..14ae4b8e1a3c 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -609,6 +609,9 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb) char *s, *p; char sep; + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) + return dget(sb->s_root); + full_path = cifs_build_path_to_root(vol, cifs_sb, cifs_sb_master_tcon(cifs_sb)); if (full_path == NULL) @@ -731,11 +734,7 @@ cifs_do_mount(struct file_system_type *fs_type, sb->s_flags |= MS_ACTIVE; } - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) - root = dget(sb->s_root); - else - root = cifs_get_root(volume_info, sb); - + root = cifs_get_root(volume_info, sb); if (IS_ERR(root)) goto out_super; From 767ae08678c2c796bcd7f582ee457aee20a28a1e Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 6 Sep 2016 16:23:49 +0300 Subject: [PATCH 0110/1050] perf/core: Fix a race between mmap_close() and set_output() of AUX events In the mmap_close() path we need to stop all the AUX events that are writing data to the AUX area that we are unmapping, before we can safely free the pages. To determine if an event needs to be stopped, we're comparing its ->rb against the one that's getting unmapped. However, a SET_OUTPUT ioctl may turn up inside an AUX transaction and swizzle event::rb to some other ring buffer, but the transaction will keep writing data to the old ring buffer until the event gets scheduled out. At this point, mmap_close() will skip over such an event and will proceed to free the AUX area, while it's still being used by this event, which will set off a warning in the mmap_close() path and cause a memory corruption. To avoid this, always stop an AUX event before its ->rb is updated; this will release the (potentially) last reference on the AUX area of the buffer. If the event gets restarted, its new ring buffer will be used. If another SET_OUTPUT comes and switches it back to the old ring buffer that's getting unmapped, it's also fine: this ring buffer's aux_mmap_count will be zero and AUX transactions won't start any more. Reported-by: Vince Weaver Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: vince@deater.net Link: http://lkml.kernel.org/r/20160906132353.19887-2-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 07ac8596a728..a54f2c2cdb20 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2496,11 +2496,11 @@ static int __perf_event_stop(void *info) return 0; } -static int perf_event_restart(struct perf_event *event) +static int perf_event_stop(struct perf_event *event, int restart) { struct stop_event_data sd = { .event = event, - .restart = 1, + .restart = restart, }; int ret = 0; @@ -4845,6 +4845,19 @@ static void ring_buffer_attach(struct perf_event *event, spin_unlock_irqrestore(&rb->event_lock, flags); } + /* + * Avoid racing with perf_mmap_close(AUX): stop the event + * before swizzling the event::rb pointer; if it's getting + * unmapped, its aux_mmap_count will be 0 and it won't + * restart. See the comment in __perf_pmu_output_stop(). + * + * Data will inevitably be lost when set_output is done in + * mid-air, but then again, whoever does it like this is + * not in for the data anyway. + */ + if (has_aux(event)) + perf_event_stop(event, 0); + rcu_assign_pointer(event->rb, rb); if (old_rb) { @@ -6120,7 +6133,7 @@ static void perf_event_addr_filters_exec(struct perf_event *event, void *data) raw_spin_unlock_irqrestore(&ifh->lock, flags); if (restart) - perf_event_restart(event); + perf_event_stop(event, 1); } void perf_event_exec(void) @@ -6164,7 +6177,13 @@ static void __perf_event_output_stop(struct perf_event *event, void *data) /* * In case of inheritance, it will be the parent that links to the - * ring-buffer, but it will be the child that's actually using it: + * ring-buffer, but it will be the child that's actually using it. + * + * We are using event::rb to determine if the event should be stopped, + * however this may race with ring_buffer_attach() (through set_output), + * which will make us skip the event that actually needs to be stopped. + * So ring_buffer_attach() has to stop an aux event before re-assigning + * its rb pointer. */ if (rcu_dereference(parent->rb) == rb) ro->err = __perf_event_stop(&sd); @@ -6678,7 +6697,7 @@ static void __perf_addr_filters_adjust(struct perf_event *event, void *data) raw_spin_unlock_irqrestore(&ifh->lock, flags); if (restart) - perf_event_restart(event); + perf_event_stop(event, 1); } /* @@ -7867,7 +7886,7 @@ static void perf_event_addr_filters_apply(struct perf_event *event) mmput(mm); restart: - perf_event_restart(event); + perf_event_stop(event, 1); } /* From b79ccadd6bb10e72cf784a298ca6dc1398eb9a24 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 6 Sep 2016 16:23:50 +0300 Subject: [PATCH 0111/1050] perf/core: Fix aux_mmap_count vs aux_refcount order The order of accesses to ring buffer's aux_mmap_count and aux_refcount has to be preserved across the users, namely perf_mmap_close() and perf_aux_output_begin(), otherwise the inversion can result in the latter holding the last reference to the aux buffer and subsequently free'ing it in atomic context, triggering a warning. > ------------[ cut here ]------------ > WARNING: CPU: 0 PID: 257 at kernel/events/ring_buffer.c:541 __rb_free_aux+0x11a/0x130 > CPU: 0 PID: 257 Comm: stopbug Not tainted 4.8.0-rc1+ #2596 > Call Trace: > [] __warn+0xcb/0xf0 > [] warn_slowpath_null+0x1d/0x20 > [] __rb_free_aux+0x11a/0x130 > [] rb_free_aux+0x18/0x20 > [] perf_aux_output_begin+0x163/0x1e0 > [] bts_event_start+0x3a/0xd0 > [] bts_event_add+0x5d/0x80 > [] event_sched_in.isra.104+0xf6/0x2f0 > [] group_sched_in+0x6e/0x190 > [] ctx_sched_in+0x2fe/0x5f0 > [] perf_event_sched_in+0x60/0x80 > [] ctx_resched+0x5b/0x90 > [] __perf_event_enable+0x1e1/0x240 > [] event_function+0xa9/0x180 > [] ? perf_cgroup_attach+0x70/0x70 > [] remote_function+0x3f/0x50 > [] flush_smp_call_function_queue+0x83/0x150 > [] generic_smp_call_function_single_interrupt+0x13/0x60 > [] smp_call_function_single_interrupt+0x27/0x40 > [] call_function_single_interrupt+0x89/0x90 > [] finish_task_switch+0xa6/0x210 > [] ? finish_task_switch+0x67/0x210 > [] __schedule+0x3dd/0xb50 > [] schedule+0x35/0x80 > [] sys_sched_yield+0x61/0x70 > [] entry_SYSCALL_64_fastpath+0x18/0xa8 > ---[ end trace 6235f556f5ea83a9 ]--- This patch puts the checks in perf_aux_output_begin() in the same order as that of perf_mmap_close(). Reported-by: Vince Weaver Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: vince@deater.net Link: http://lkml.kernel.org/r/20160906132353.19887-3-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- kernel/events/ring_buffer.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index ae9b90dc9a5a..257fa460b846 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -330,15 +330,22 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, if (!rb) return NULL; - if (!rb_has_aux(rb) || !atomic_inc_not_zero(&rb->aux_refcount)) + if (!rb_has_aux(rb)) goto err; /* - * If rb::aux_mmap_count is zero (and rb_has_aux() above went through), - * the aux buffer is in perf_mmap_close(), about to get freed. + * If aux_mmap_count is zero, the aux buffer is in perf_mmap_close(), + * about to get freed, so we leave immediately. + * + * Checking rb::aux_mmap_count and rb::refcount has to be done in + * the same order, see perf_mmap_close. Otherwise we end up freeing + * aux pages in this path, which is a bug, because in_atomic(). */ if (!atomic_read(&rb->aux_mmap_count)) - goto err_put; + goto err; + + if (!atomic_inc_not_zero(&rb->aux_refcount)) + goto err; /* * Nesting is not supported for AUX area, make sure nested From a9a94401c2b5805c71e39427b1af1bf1b9f67cd0 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 6 Sep 2016 16:23:51 +0300 Subject: [PATCH 0112/1050] perf/x86/intel/bts: Fix confused ordering of PMU callbacks The intel_bts driver is using a CPU-local 'started' variable to order callbacks and PMIs and make sure that AUX transactions don't get messed up. However, the ordering rules in regard to this variable is a complete mess, which recently resulted in perf_fuzzer-triggered warnings and panics. The general ordering rule that is patch is enforcing is that this cpu-local variable be set only when the cpu-local AUX transaction is active; consequently, this variable is to be checked before the AUX related bits can be touched. Reported-by: Vince Weaver Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: vince@deater.net Link: http://lkml.kernel.org/r/20160906132353.19887-4-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/bts.c | 104 +++++++++++++++++++++++++++--------- 1 file changed, 80 insertions(+), 24 deletions(-) diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 0a6e393a2e62..61e1d713b114 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -31,7 +31,17 @@ struct bts_ctx { struct perf_output_handle handle; struct debug_store ds_back; - int started; + int state; +}; + +/* BTS context states: */ +enum { + /* no ongoing AUX transactions */ + BTS_STATE_STOPPED = 0, + /* AUX transaction is on, BTS tracing is disabled */ + BTS_STATE_INACTIVE, + /* AUX transaction is on, BTS tracing is running */ + BTS_STATE_ACTIVE, }; static DEFINE_PER_CPU(struct bts_ctx, bts_ctx); @@ -204,6 +214,15 @@ static void bts_update(struct bts_ctx *bts) static int bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle); +/* + * Ordering PMU callbacks wrt themselves and the PMI is done by means + * of bts::state, which: + * - is set when bts::handle::event is valid, that is, between + * perf_aux_output_begin() and perf_aux_output_end(); + * - is zero otherwise; + * - is ordered against bts::handle::event with a compiler barrier. + */ + static void __bts_event_start(struct perf_event *event) { struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); @@ -221,10 +240,13 @@ static void __bts_event_start(struct perf_event *event) /* * local barrier to make sure that ds configuration made it - * before we enable BTS + * before we enable BTS and bts::state goes ACTIVE */ wmb(); + /* INACTIVE/STOPPED -> ACTIVE */ + WRITE_ONCE(bts->state, BTS_STATE_ACTIVE); + intel_pmu_enable_bts(config); } @@ -251,9 +273,6 @@ static void bts_event_start(struct perf_event *event, int flags) __bts_event_start(event); - /* PMI handler: this counter is running and likely generating PMIs */ - ACCESS_ONCE(bts->started) = 1; - return; fail_end_stop: @@ -263,30 +282,34 @@ fail_stop: event->hw.state = PERF_HES_STOPPED; } -static void __bts_event_stop(struct perf_event *event) +static void __bts_event_stop(struct perf_event *event, int state) { + struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + + /* ACTIVE -> INACTIVE(PMI)/STOPPED(->stop()) */ + WRITE_ONCE(bts->state, state); + /* * No extra synchronization is mandated by the documentation to have * BTS data stores globally visible. */ intel_pmu_disable_bts(); - - if (event->hw.state & PERF_HES_STOPPED) - return; - - ACCESS_ONCE(event->hw.state) |= PERF_HES_STOPPED; } static void bts_event_stop(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); - struct bts_buffer *buf = perf_get_aux(&bts->handle); + struct bts_buffer *buf = NULL; + int state = READ_ONCE(bts->state); - /* PMI handler: don't restart this counter */ - ACCESS_ONCE(bts->started) = 0; + if (state == BTS_STATE_ACTIVE) + __bts_event_stop(event, BTS_STATE_STOPPED); - __bts_event_stop(event); + if (state != BTS_STATE_STOPPED) + buf = perf_get_aux(&bts->handle); + + event->hw.state |= PERF_HES_STOPPED; if (flags & PERF_EF_UPDATE) { bts_update(bts); @@ -296,6 +319,7 @@ static void bts_event_stop(struct perf_event *event, int flags) bts->handle.head = local_xchg(&buf->data_size, buf->nr_pages << PAGE_SHIFT); + perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0), !!local_xchg(&buf->lost, 0)); } @@ -310,8 +334,20 @@ static void bts_event_stop(struct perf_event *event, int flags) void intel_bts_enable_local(void) { struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + int state = READ_ONCE(bts->state); - if (bts->handle.event && bts->started) + /* + * Here we transition from INACTIVE to ACTIVE; + * if we instead are STOPPED from the interrupt handler, + * stay that way. Can't be ACTIVE here though. + */ + if (WARN_ON_ONCE(state == BTS_STATE_ACTIVE)) + return; + + if (state == BTS_STATE_STOPPED) + return; + + if (bts->handle.event) __bts_event_start(bts->handle.event); } @@ -319,8 +355,15 @@ void intel_bts_disable_local(void) { struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + /* + * Here we transition from ACTIVE to INACTIVE; + * do nothing for STOPPED or INACTIVE. + */ + if (READ_ONCE(bts->state) != BTS_STATE_ACTIVE) + return; + if (bts->handle.event) - __bts_event_stop(bts->handle.event); + __bts_event_stop(bts->handle.event, BTS_STATE_INACTIVE); } static int @@ -407,9 +450,13 @@ int intel_bts_interrupt(void) struct perf_event *event = bts->handle.event; struct bts_buffer *buf; s64 old_head; - int err; + int err = -ENOSPC; - if (!event || !bts->started) + /* + * this is wrapped in intel_bts_enable_local/intel_bts_disable_local, + * so we can only be INACTIVE or STOPPED + */ + if (READ_ONCE(bts->state) == BTS_STATE_STOPPED) return 0; buf = perf_get_aux(&bts->handle); @@ -432,12 +479,21 @@ int intel_bts_interrupt(void) !!local_xchg(&buf->lost, 0)); buf = perf_aux_output_begin(&bts->handle, event); - if (!buf) - return 1; + if (buf) + err = bts_buffer_reset(buf, &bts->handle); - err = bts_buffer_reset(buf, &bts->handle); - if (err) - perf_aux_output_end(&bts->handle, 0, false); + if (err) { + WRITE_ONCE(bts->state, BTS_STATE_STOPPED); + + if (buf) { + /* + * BTS_STATE_STOPPED should be visible before + * cleared handle::event + */ + barrier(); + perf_aux_output_end(&bts->handle, 0, false); + } + } return 1; } From 4d4c474124649198d9b0a065c06f9362cf18e14e Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 6 Sep 2016 16:23:52 +0300 Subject: [PATCH 0113/1050] perf/x86/intel/bts: Fix BTS PMI detection Since BTS doesn't have a dedicated PMI status bit, the driver needs to take extra care to check for the condition that triggers it to avoid spurious NMI warnings. Regardless of the local BTS context state, the only way of knowing that the NMI is ours is to compare the write pointer against the interrupt threshold. Reported-by: Vince Weaver Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: vince@deater.net Link: http://lkml.kernel.org/r/20160906132353.19887-5-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/bts.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 61e1d713b114..9233edf993e1 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -446,26 +446,37 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle) int intel_bts_interrupt(void) { + struct debug_store *ds = this_cpu_ptr(&cpu_hw_events)->ds; struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); struct perf_event *event = bts->handle.event; struct bts_buffer *buf; s64 old_head; - int err = -ENOSPC; + int err = -ENOSPC, handled = 0; + + /* + * The only surefire way of knowing if this NMI is ours is by checking + * the write ptr against the PMI threshold. + */ + if (ds->bts_index >= ds->bts_interrupt_threshold) + handled = 1; /* * this is wrapped in intel_bts_enable_local/intel_bts_disable_local, * so we can only be INACTIVE or STOPPED */ if (READ_ONCE(bts->state) == BTS_STATE_STOPPED) - return 0; + return handled; buf = perf_get_aux(&bts->handle); + if (!buf) + return handled; + /* * Skip snapshot counters: they don't use the interrupt, but * there's no other way of telling, because the pointer will * keep moving */ - if (!buf || buf->snapshot) + if (buf->snapshot) return 0; old_head = local_read(&buf->head); @@ -473,7 +484,7 @@ int intel_bts_interrupt(void) /* no new data */ if (old_head == local_read(&buf->head)) - return 0; + return handled; perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0), !!local_xchg(&buf->lost, 0)); From ef9ef3befa0d76008e988a9ed9fe439e803351b9 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 6 Sep 2016 16:23:53 +0300 Subject: [PATCH 0114/1050] perf/x86/intel/bts: Kill a silly warning At the moment, intel_bts will WARN() out if there is more than one event writing to the same ring buffer, via SET_OUTPUT, and will only send data from one event to a buffer. There is no reason to have this warning in, so kill it. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: vince@deater.net Link: http://lkml.kernel.org/r/20160906132353.19887-6-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/bts.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 9233edf993e1..bdcd6510992c 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -378,8 +378,6 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle) return 0; head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1); - if (WARN_ON_ONCE(head != local_read(&buf->head))) - return -EINVAL; phys = &buf->buf[buf->cur_buf]; space = phys->offset + phys->displacement + phys->size - head; From 8ef9b8455a2a3049efa9e46e8a6402b972a3eb41 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 7 Sep 2016 14:42:55 +0200 Subject: [PATCH 0115/1050] perf/x86/intel: Fix PEBSv3 record drain Alexander hit the WARN_ON_ONCE(!event) on his Skylake while running the perf fuzzer. This means the PEBSv3 record included a status bit for an inactive event, something that _should_ not happen. Move the code that filters the status bits against our known PEBS events up a spot to guarantee we only deal with events we know about. Further add "continue" statements to the WARN_ON_ONCE()s such that we'll not die nor generate silly events in case we ever do hit them again. Reported-by: Alexander Shishkin Tested-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: stable@vger.kernel.org Fixes: a3d86542de88 ("perf/x86/intel/pebs: Add PEBSv3 decoding") Signed-off-by: Ingo Molnar --- arch/x86/events/intel/ds.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 7ce9f3f669e6..9b983a474253 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1274,18 +1274,18 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) struct pebs_record_nhm *p = at; u64 pebs_status; - /* PEBS v3 has accurate status bits */ + pebs_status = p->status & cpuc->pebs_enabled; + pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1; + + /* PEBS v3 has more accurate status bits */ if (x86_pmu.intel_cap.pebs_format >= 3) { - for_each_set_bit(bit, (unsigned long *)&p->status, - MAX_PEBS_EVENTS) + for_each_set_bit(bit, (unsigned long *)&pebs_status, + x86_pmu.max_pebs_events) counts[bit]++; continue; } - pebs_status = p->status & cpuc->pebs_enabled; - pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1; - /* * On some CPUs the PEBS status can be zero when PEBS is * racing with clearing of GLOBAL_STATUS. @@ -1333,8 +1333,11 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) continue; event = cpuc->events[bit]; - WARN_ON_ONCE(!event); - WARN_ON_ONCE(!event->attr.precise_ip); + if (WARN_ON_ONCE(!event)) + continue; + + if (WARN_ON_ONCE(!event->attr.precise_ip)) + continue; /* log dropped samples number */ if (error[bit]) From 43ba588346455dcc984dc98a49af1c2eb1e9aa75 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 7 Sep 2016 19:25:37 -0700 Subject: [PATCH 0116/1050] Input: silead_gsl1680 - document firmware-name, fix implementation The driver has supported touchscreen-fw-name to specify the firmware to load since it has been merged, but this was omitted from the dt-binding documentation. During review of adding touchscreen-fw-name to the binding documentation it was brought up that there is a standard property name called "firmware-name" for this, which should be used. Since there are no users of touchscreen-fw-name yet, this commit adds documentation of "firmware-name" to the dt-binding documentation and switches the driver over to use this. This commit also makes the driver add a "silead/" prefix to the firmware name from dt before calling request_firmware. That the firmware files are stored under /lib/firmware/silead under Linux is an implementation detail and does not belong in devicetree. Signed-off-by: Hans de Goede Acked-by: Rob Herring Signed-off-by: Dmitry Torokhov --- .../devicetree/bindings/input/touchscreen/silead_gsl1680.txt | 1 + drivers/input/touchscreen/silead.c | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/input/touchscreen/silead_gsl1680.txt b/Documentation/devicetree/bindings/input/touchscreen/silead_gsl1680.txt index 1112e0d794e1..820fee4b77b6 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/silead_gsl1680.txt +++ b/Documentation/devicetree/bindings/input/touchscreen/silead_gsl1680.txt @@ -13,6 +13,7 @@ Required properties: - touchscreen-size-y : See touchscreen.txt Optional properties: +- firmware-name : File basename (string) for board specific firmware - touchscreen-inverted-x : See touchscreen.txt - touchscreen-inverted-y : See touchscreen.txt - touchscreen-swapped-x-y : See touchscreen.txt diff --git a/drivers/input/touchscreen/silead.c b/drivers/input/touchscreen/silead.c index b2744a64e933..c7ab116a16b3 100644 --- a/drivers/input/touchscreen/silead.c +++ b/drivers/input/touchscreen/silead.c @@ -390,9 +390,10 @@ static void silead_ts_read_props(struct i2c_client *client) data->max_fingers = 5; /* Most devices handle up-to 5 fingers */ } - error = device_property_read_string(dev, "touchscreen-fw-name", &str); + error = device_property_read_string(dev, "firmware-name", &str); if (!error) - snprintf(data->fw_name, sizeof(data->fw_name), "%s", str); + snprintf(data->fw_name, sizeof(data->fw_name), + "silead/%s", str); else dev_dbg(dev, "Firmware file name read error. Using default."); } From 4af2ff91ec3f42b538a65cf12df5f9faf6aaa914 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 7 Sep 2016 19:32:14 -0700 Subject: [PATCH 0117/1050] Input: silead_gsl1680 - use "silead/" prefix for firmware loading The silead touch-controller ICs use a different firmware per digitizer / tablet model. So there are going to be quite a few of then and they really should be under a separate subdir. This commit prefixes the default firmware names with "silead/" just like we are already doing for devicetree specified firmware names. Signed-off-by: Hans de Goede Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/silead.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/input/touchscreen/silead.c b/drivers/input/touchscreen/silead.c index c7ab116a16b3..f502c8488be8 100644 --- a/drivers/input/touchscreen/silead.c +++ b/drivers/input/touchscreen/silead.c @@ -411,14 +411,14 @@ static int silead_ts_set_default_fw_name(struct silead_ts_data *data, if (!acpi_id) return -ENODEV; - snprintf(data->fw_name, sizeof(data->fw_name), "%s.fw", - acpi_id->id); + snprintf(data->fw_name, sizeof(data->fw_name), + "silead/%s.fw", acpi_id->id); for (i = 0; i < strlen(data->fw_name); i++) data->fw_name[i] = tolower(data->fw_name[i]); } else { - snprintf(data->fw_name, sizeof(data->fw_name), "%s.fw", - id->name); + snprintf(data->fw_name, sizeof(data->fw_name), + "silead/%s.fw", id->name); } return 0; @@ -427,7 +427,8 @@ static int silead_ts_set_default_fw_name(struct silead_ts_data *data, static int silead_ts_set_default_fw_name(struct silead_ts_data *data, const struct i2c_device_id *id) { - snprintf(data->fw_name, sizeof(data->fw_name), "%s.fw", id->name); + snprintf(data->fw_name, sizeof(data->fw_name), + "silead/%s.fw", id->name); return 0; } #endif From b519d408ea32040b1c7e10b155a3ee9a36660947 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 11 Sep 2016 14:50:01 -0400 Subject: [PATCH 0118/1050] NFSv4.1: Fix the CREATE_SESSION slot number accounting Ensure that we conform to the algorithm described in RFC5661, section 18.36.4 for when to bump the sequence id. In essence we do it for all cases except when the RPC call timed out, or in case of the server returning NFS4ERR_DELAY or NFS4ERR_STALE_CLIENTID. Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- fs/nfs/nfs4proc.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c380d2ee4137..a9dec32ba9ba 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7570,12 +7570,20 @@ static int _nfs4_proc_create_session(struct nfs_client *clp, status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); trace_nfs4_create_session(clp, status); + switch (status) { + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_DELAY: + case -ETIMEDOUT: + case -EACCES: + case -EAGAIN: + goto out; + }; + + clp->cl_seqid++; if (!status) { /* Verify the session's negotiated channel_attrs values */ status = nfs4_verify_channel_attrs(&args, &res); /* Increment the clientid slot sequence id */ - if (clp->cl_seqid == res.seqid) - clp->cl_seqid++; if (status) goto out; nfs4_update_session(session, &res); From bd00a240dc52e28706fbbe3aceda63e6c291b433 Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Wed, 7 Sep 2016 10:46:30 +0530 Subject: [PATCH 0119/1050] powerpc/powernv: Fix restore of SPRs upon wake up from hypervisor state loss pnv_wakeup_tb_loss() currently expects cr4 to be "eq" if the CPU is waking up from a complete hypervisor state loss. Hence, it currently restores the SPR contents only if cr4 is "eq". However, after commit bcef83a00dc4 ("powerpc/powernv: Add platform support for stop instruction"), on ISA v3.0 CPUs, the function pnv_restore_hyp_resource() sets cr4 to contain the result of the comparison between the state the CPU has woken up from and the first deep stop state before calling pnv_wakeup_tb_loss(). Thus if the CPU woke up from a state that is deeper than the first deep stop state, cr4 will have "gt" set and hence, pnv_wakeup_tb_loss() will fail to restore the SPRs on waking up from such a state. Fix the code in pnv_wakeup_tb_loss() to restore the SPR states when cr4 is "eq" or "gt". Fixes: bcef83a00dc4 ("powerpc/powernv: Add platform support for stop instruction") Signed-off-by: Gautham R. Shenoy Reviewed-by: Shreyas B. Prabhu Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/idle_book3s.S | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 2265c6398a17..bd739fed26e3 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -411,7 +411,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) * * r13 - PACA * cr3 - gt if waking up with partial/complete hypervisor state loss - * cr4 - eq if waking up from complete hypervisor state loss. + * cr4 - gt or eq if waking up from complete hypervisor state loss. */ _GLOBAL(pnv_wakeup_tb_loss) ld r1,PACAR1(r13) @@ -453,7 +453,7 @@ lwarx_loop2: * At this stage * cr2 - eq if first thread to wakeup in core * cr3- gt if waking up with partial/complete hypervisor state loss - * cr4 - eq if waking up from complete hypervisor state loss. + * cr4 - gt or eq if waking up from complete hypervisor state loss. */ ori r15,r15,PNV_CORE_IDLE_LOCK_BIT @@ -481,7 +481,7 @@ first_thread_in_subcore: * If waking up from sleep, subcore state is not lost. Hence * skip subcore state restore */ - bne cr4,subcore_state_restored + blt cr4,subcore_state_restored /* Restore per-subcore state */ ld r4,_SDR1(r1) @@ -526,7 +526,7 @@ timebase_resync: * If waking up from sleep, per core state is not lost, skip to * clear_lock. */ - bne cr4,clear_lock + blt cr4,clear_lock /* * First thread in the core to wake up and its waking up with @@ -557,7 +557,7 @@ common_exit: * If waking up from sleep, hypervisor state is not lost. Hence * skip hypervisor state restore. */ - bne cr4,hypervisor_state_restored + blt cr4,hypervisor_state_restored /* Waking up from winkle */ From ffed15d3ce3f710b94e6f402e1ca2318f7d7c0e2 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 12 Sep 2016 12:48:28 +1000 Subject: [PATCH 0120/1050] powerpc/kernel: Fix size of NUM_CPU_FTR_KEYS on 32-bit The number of CPU feature keys is meant to map 1:1 to the number of CPU feature flags defined in cputable.h, and the latter must fit in an unsigned long. In commit 4db7327194db ("powerpc: Add option to use jump label for cpu_has_feature()"), I incorrectly defined NUM_CPU_FTR_KEYS to 64. There should be no real adverse consequences of this bug, other than us allocating too many keys. Fix it by using BITS_PER_LONG. Fixes: 4db7327194db ("powerpc: Add option to use jump label for cpu_has_feature()") Tested-by: Meelis Roos Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cpu_has_feature.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/cpu_has_feature.h b/arch/powerpc/include/asm/cpu_has_feature.h index 2ef55f8968a2..b312b152461b 100644 --- a/arch/powerpc/include/asm/cpu_has_feature.h +++ b/arch/powerpc/include/asm/cpu_has_feature.h @@ -15,7 +15,7 @@ static inline bool early_cpu_has_feature(unsigned long feature) #ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS #include -#define NUM_CPU_FTR_KEYS 64 +#define NUM_CPU_FTR_KEYS BITS_PER_LONG extern struct static_key_true cpu_feature_keys[NUM_CPU_FTR_KEYS]; From f190fd92458da3e869b4e2c6289e2c617490ae53 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Fri, 2 Sep 2016 10:37:56 +0200 Subject: [PATCH 0121/1050] USB: serial: simple: add support for another Infineon flashloader This patch adds support for Infineon flashloader 0x8087/0x0801. The flashloader is used in Telit LE940B modem family with Telit flashing application. Signed-off-by: Daniele Palmas Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/usb-serial-simple.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c index a204782ae530..e98b6e57b703 100644 --- a/drivers/usb/serial/usb-serial-simple.c +++ b/drivers/usb/serial/usb-serial-simple.c @@ -54,7 +54,8 @@ DEVICE(funsoft, FUNSOFT_IDS); /* Infineon Flashloader driver */ #define FLASHLOADER_IDS() \ { USB_DEVICE_INTERFACE_CLASS(0x058b, 0x0041, USB_CLASS_CDC_DATA) }, \ - { USB_DEVICE(0x8087, 0x0716) } + { USB_DEVICE(0x8087, 0x0716) }, \ + { USB_DEVICE(0x8087, 0x0801) } DEVICE(flashloader, FLASHLOADER_IDS); /* Google Serial USB SubClass */ From 981b178964d03f6f8e6cca01568c17d1dbafdf0e Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 8 Sep 2016 11:11:35 +0200 Subject: [PATCH 0122/1050] dt-bindings: mmc: sdhci-st: Mention the discretionary "icn" clock The interconnect (ICN) clock is required for functional working of MMC on some ST platforms. When not supplied it can result in broken MMC and the following output: [ 13.916949] mmc0: Timeout waiting for hardware interrupt. [ 13.922349] sdhci: =========== REGISTER DUMP (mmc0)=========== [ 13.928175] sdhci: Sys addr: 0x00000000 | Version: 0x00001002 [ 13.933999] sdhci: Blk size: 0x00007040 | Blk cnt: 0x00000001 [ 13.939825] sdhci: Argument: 0x00fffff0 | Trn mode: 0x00000013 [ 13.945650] sdhci: Present: 0x1fff0206 | Host ctl: 0x00000011 [ 13.951475] sdhci: Power: 0x0000000f | Blk gap: 0x00000080 [ 13.957300] sdhci: Wake-up: 0x00000000 | Clock: 0x00003f07 [ 13.963126] sdhci: Timeout: 0x00000004 | Int stat: 0x00000000 [ 13.968952] sdhci: Int enab: 0x02ff008b | Sig enab: 0x02ff008b [ 13.974777] sdhci: AC12 err: 0x00000000 | Slot int: 0x00000000 [ 13.980602] sdhci: Caps: 0x21ed3281 | Caps_1: 0x00000000 [ 13.986428] sdhci: Cmd: 0x0000063a | Max curr: 0x00000000 [ 13.992252] sdhci: Host ctl2: 0x00000000 [ 13.996166] sdhci: ADMA Err: 0x00000000 | ADMA Ptr: 0x7c048200 [ 14.001990] sdhci: =========================================== [ 14.009802] mmc0: Got data interrupt 0x02000000 even though no data operation was in progress. Signed-off-by: Lee Jones Signed-off-by: Ulf Hansson --- Documentation/devicetree/bindings/mmc/sdhci-st.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/mmc/sdhci-st.txt b/Documentation/devicetree/bindings/mmc/sdhci-st.txt index 88faa91125bf..3cd4c43a3260 100644 --- a/Documentation/devicetree/bindings/mmc/sdhci-st.txt +++ b/Documentation/devicetree/bindings/mmc/sdhci-st.txt @@ -10,7 +10,7 @@ Required properties: subsystem (mmcss) inside the FlashSS (available in STiH407 SoC family). -- clock-names: Should be "mmc". +- clock-names: Should be "mmc" and "icn". (NB: The latter is not compulsory) See: Documentation/devicetree/bindings/resource-names.txt - clocks: Phandle to the clock. See: Documentation/devicetree/bindings/clock/clock-bindings.txt From 3ae50f4512ce831e8b63eb54ad969417ff30ada7 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 8 Sep 2016 11:11:36 +0200 Subject: [PATCH 0123/1050] mmc: sdhci-st: Handle interconnect clock Some ST platforms contain interconnect (ICN) clocks which must be handed correctly in order to obtain full functionality of a given IP. In this case, if the ICN clocks are not handled properly by the ST SDHCI driver MMC will break and the following output can be observed: [ 13.916949] mmc0: Timeout waiting for hardware interrupt. [ 13.922349] sdhci: =========== REGISTER DUMP (mmc0)=========== [ 13.928175] sdhci: Sys addr: 0x00000000 | Version: 0x00001002 [ 13.933999] sdhci: Blk size: 0x00007040 | Blk cnt: 0x00000001 [ 13.939825] sdhci: Argument: 0x00fffff0 | Trn mode: 0x00000013 [ 13.945650] sdhci: Present: 0x1fff0206 | Host ctl: 0x00000011 [ 13.951475] sdhci: Power: 0x0000000f | Blk gap: 0x00000080 [ 13.957300] sdhci: Wake-up: 0x00000000 | Clock: 0x00003f07 [ 13.963126] sdhci: Timeout: 0x00000004 | Int stat: 0x00000000 [ 13.968952] sdhci: Int enab: 0x02ff008b | Sig enab: 0x02ff008b [ 13.974777] sdhci: AC12 err: 0x00000000 | Slot int: 0x00000000 [ 13.980602] sdhci: Caps: 0x21ed3281 | Caps_1: 0x00000000 [ 13.986428] sdhci: Cmd: 0x0000063a | Max curr: 0x00000000 [ 13.992252] sdhci: Host ctl2: 0x00000000 [ 13.996166] sdhci: ADMA Err: 0x00000000 | ADMA Ptr: 0x7c048200 [ 14.001990] sdhci: =========================================== [ 14.009802] mmc0: Got data interrupt 0x02000000 even though no data operation was in progress. A decent point was raised about minimising the use of a local variable that we 'could' do without. I've chosen consistency over the possibility of reducing the local variable count by 1. Thinking that it's more important for the code to be grouped and authoured in a similar manner/style for greater maintainability/readability. Cc: stable@vger.kernel.org Tested-by: Peter Griffin Signed-off-by: Lee Jones Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-st.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-st.c b/drivers/mmc/host/sdhci-st.c index c95ba83366a0..ed92ce729dde 100644 --- a/drivers/mmc/host/sdhci-st.c +++ b/drivers/mmc/host/sdhci-st.c @@ -28,6 +28,7 @@ struct st_mmc_platform_data { struct reset_control *rstc; + struct clk *icnclk; void __iomem *top_ioaddr; }; @@ -353,7 +354,7 @@ static int sdhci_st_probe(struct platform_device *pdev) struct sdhci_host *host; struct st_mmc_platform_data *pdata; struct sdhci_pltfm_host *pltfm_host; - struct clk *clk; + struct clk *clk, *icnclk; int ret = 0; u16 host_version; struct resource *res; @@ -365,6 +366,11 @@ static int sdhci_st_probe(struct platform_device *pdev) return PTR_ERR(clk); } + /* ICN clock isn't compulsory, but use it if it's provided. */ + icnclk = devm_clk_get(&pdev->dev, "icn"); + if (IS_ERR(icnclk)) + icnclk = NULL; + rstc = devm_reset_control_get(&pdev->dev, NULL); if (IS_ERR(rstc)) rstc = NULL; @@ -389,6 +395,7 @@ static int sdhci_st_probe(struct platform_device *pdev) } clk_prepare_enable(clk); + clk_prepare_enable(icnclk); /* Configure the FlashSS Top registers for setting eMMC TX/RX delay */ res = platform_get_resource_byname(pdev, IORESOURCE_MEM, @@ -400,6 +407,7 @@ static int sdhci_st_probe(struct platform_device *pdev) } pltfm_host->clk = clk; + pdata->icnclk = icnclk; /* Configure the Arasan HC inside the flashSS */ st_mmcss_cconfig(np, host); @@ -422,6 +430,7 @@ static int sdhci_st_probe(struct platform_device *pdev) return 0; err_out: + clk_disable_unprepare(icnclk); clk_disable_unprepare(clk); err_of: sdhci_pltfm_free(pdev); @@ -442,6 +451,8 @@ static int sdhci_st_remove(struct platform_device *pdev) ret = sdhci_pltfm_unregister(pdev); + clk_disable_unprepare(pdata->icnclk); + if (rstc) reset_control_assert(rstc); @@ -462,6 +473,7 @@ static int sdhci_st_suspend(struct device *dev) if (pdata->rstc) reset_control_assert(pdata->rstc); + clk_disable_unprepare(pdata->icnclk); clk_disable_unprepare(pltfm_host->clk); out: return ret; @@ -475,6 +487,7 @@ static int sdhci_st_resume(struct device *dev) struct device_node *np = dev->of_node; clk_prepare_enable(pltfm_host->clk); + clk_prepare_enable(pdata->icnclk); if (pdata->rstc) reset_control_deassert(pdata->rstc); From 83843c80dcf11a78995d167255b03072a1e49c2c Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 28 Aug 2016 13:10:37 +0200 Subject: [PATCH 0124/1050] mac80211: fix tim recalculation after PS response Handle the case where the mac80211 intermediate queues are empty and the driver has buffered frames Fixes: ba8c3d6f16a1 ("mac80211: add an intermediate software queue implementation") Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 76b737dcc36f..aa58df80ede0 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1616,7 +1616,6 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, sta_info_recalc_tim(sta); } else { - unsigned long tids = sta->txq_buffered_tids & driver_release_tids; int tid; /* @@ -1648,7 +1647,8 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) { struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]); - if (!(tids & BIT(tid)) || txqi->tin.backlog_packets) + if (!(driver_release_tids & BIT(tid)) || + txqi->tin.backlog_packets) continue; sta_info_recalc_tim(sta); From df6ef5d8a87ace995d5c10a7bd684be05911a321 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 4 Sep 2016 18:00:59 +0200 Subject: [PATCH 0125/1050] mac80211: fix sequence number assignment for PS response frames When using intermediate queues, sequence number allocation is deferred until dequeue. This doesn't work for PS response frames, which bypass those queues. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 65 ++++++++++++++++++++++++----------------------- 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 502396694f47..cc8e95554b48 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -796,6 +796,36 @@ static __le16 ieee80211_tx_next_seq(struct sta_info *sta, int tid) return ret; } +static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local, + struct ieee80211_vif *vif, + struct ieee80211_sta *pubsta, + struct sk_buff *skb) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_txq *txq = NULL; + + if ((info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM) || + (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE)) + return NULL; + + if (!ieee80211_is_data(hdr->frame_control)) + return NULL; + + if (pubsta) { + u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; + + txq = pubsta->txq[tid]; + } else if (vif) { + txq = vif->txq; + } + + if (!txq) + return NULL; + + return to_txq_info(txq); +} + static ieee80211_tx_result debug_noinline ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) { @@ -853,7 +883,8 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) tid = *qc & IEEE80211_QOS_CTL_TID_MASK; tx->sta->tx_stats.msdu[tid]++; - if (!tx->sta->sta.txq[0]) + if (!ieee80211_get_txq(tx->local, info->control.vif, &tx->sta->sta, + tx->skb)) hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid); return TX_CONTINUE; @@ -1243,36 +1274,6 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, return TX_CONTINUE; } -static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local, - struct ieee80211_vif *vif, - struct ieee80211_sta *pubsta, - struct sk_buff *skb) -{ - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - struct ieee80211_txq *txq = NULL; - - if ((info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM) || - (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE)) - return NULL; - - if (!ieee80211_is_data(hdr->frame_control)) - return NULL; - - if (pubsta) { - u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; - - txq = pubsta->txq[tid]; - } else if (vif) { - txq = vif->txq; - } - - if (!txq) - return NULL; - - return to_txq_info(txq); -} - static void ieee80211_set_skb_enqueue_time(struct sk_buff *skb) { IEEE80211_SKB_CB(skb)->control.enqueue_time = codel_get_time(); @@ -3264,7 +3265,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) { *ieee80211_get_qos_ctl(hdr) = tid; - if (!sta->sta.txq[0]) + if (!ieee80211_get_txq(local, &sdata->vif, &sta->sta, skb)) hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid); } else { info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ; From ad8d52b897a14711e026889053befbbee7fd51ba Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 31 Aug 2016 08:49:43 +0100 Subject: [PATCH 0126/1050] pcmcia: ds: fix suspend/resume PCMCIA suspend/resume no longer works since the commit mentioned below, as the callbacks are no longer made. Convert the driver to the new dev_pm_ops, which restores the suspend/resume functionality. Tested on the arm arch Assabet platform. Fixes: aa8e54b559479 ("PM / sleep: Go direct_complete if driver has no callbacks") Signed-off-by: Russell King --- drivers/pcmcia/ds.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c index 489ea1098c96..69b5e811ea2b 100644 --- a/drivers/pcmcia/ds.c +++ b/drivers/pcmcia/ds.c @@ -977,7 +977,7 @@ static int pcmcia_bus_uevent(struct device *dev, struct kobj_uevent_env *env) /************************ runtime PM support ***************************/ -static int pcmcia_dev_suspend(struct device *dev, pm_message_t state); +static int pcmcia_dev_suspend(struct device *dev); static int pcmcia_dev_resume(struct device *dev); static int runtime_suspend(struct device *dev) @@ -985,7 +985,7 @@ static int runtime_suspend(struct device *dev) int rc; device_lock(dev); - rc = pcmcia_dev_suspend(dev, PMSG_SUSPEND); + rc = pcmcia_dev_suspend(dev); device_unlock(dev); return rc; } @@ -1135,7 +1135,7 @@ ATTRIBUTE_GROUPS(pcmcia_dev); /* PM support, also needed for reset */ -static int pcmcia_dev_suspend(struct device *dev, pm_message_t state) +static int pcmcia_dev_suspend(struct device *dev) { struct pcmcia_device *p_dev = to_pcmcia_dev(dev); struct pcmcia_driver *p_drv = NULL; @@ -1410,6 +1410,9 @@ static struct class_interface pcmcia_bus_interface __refdata = { .remove_dev = &pcmcia_bus_remove_socket, }; +static const struct dev_pm_ops pcmcia_bus_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(pcmcia_dev_suspend, pcmcia_dev_resume) +}; struct bus_type pcmcia_bus_type = { .name = "pcmcia", @@ -1418,8 +1421,7 @@ struct bus_type pcmcia_bus_type = { .dev_groups = pcmcia_dev_groups, .probe = pcmcia_device_probe, .remove = pcmcia_device_remove, - .suspend = pcmcia_dev_suspend, - .resume = pcmcia_dev_resume, + .pm = &pcmcia_bus_pm_ops, }; From 6dec04e8f30a8cf1e782500244d8601c1f8505ad Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 31 Aug 2016 08:49:43 +0100 Subject: [PATCH 0127/1050] pcmcia: sa11xx_base: fix reporting of timing information Fix the reporting of the currently programmed timing information. These entries have been showing zero due to the clock rate being a factor of 1000 too big. With this change, we go from: I/O : 165 (0) attribute: 300 (0) common : 300 (0) to: I/O : 165 (172) attribute: 300 (316) common : 300 (316) Signed-off-by: Russell King --- drivers/pcmcia/sa11xx_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pcmcia/sa11xx_base.c b/drivers/pcmcia/sa11xx_base.c index 9f6ec87b9f9e..af37b7ec1c73 100644 --- a/drivers/pcmcia/sa11xx_base.c +++ b/drivers/pcmcia/sa11xx_base.c @@ -144,7 +144,7 @@ static int sa1100_pcmcia_show_timing(struct soc_pcmcia_socket *skt, char *buf) { struct soc_pcmcia_timing timing; - unsigned int clock = clk_get_rate(skt->clk); + unsigned int clock = clk_get_rate(skt->clk) / 1000; unsigned long mecr = MECR; char *p = buf; From cbd5a16820e576d26bf985ad62b8c4cdf792fb45 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 31 Aug 2016 08:49:43 +0100 Subject: [PATCH 0128/1050] pcmcia: sa11xx_base: add units to the timing information Add units to the timing information, so we know that the numbers are nanoseconds. The output changes from: I/O : 165 (172) attribute: 300 (316) common : 300 (316) to: I/O : 165ns (172ns) attribute: 300ns (316ns) common : 300ns (316ns) Signed-off-by: Russell King --- drivers/pcmcia/sa11xx_base.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pcmcia/sa11xx_base.c b/drivers/pcmcia/sa11xx_base.c index af37b7ec1c73..48140ac73ed6 100644 --- a/drivers/pcmcia/sa11xx_base.c +++ b/drivers/pcmcia/sa11xx_base.c @@ -150,13 +150,13 @@ sa1100_pcmcia_show_timing(struct soc_pcmcia_socket *skt, char *buf) soc_common_pcmcia_get_timing(skt, &timing); - p+=sprintf(p, "I/O : %u (%u)\n", timing.io, + p+=sprintf(p, "I/O : %uns (%uns)\n", timing.io, sa1100_pcmcia_cmd_time(clock, MECR_BSIO_GET(mecr, skt->nr))); - p+=sprintf(p, "attribute: %u (%u)\n", timing.attr, + p+=sprintf(p, "attribute: %uns (%uns)\n", timing.attr, sa1100_pcmcia_cmd_time(clock, MECR_BSA_GET(mecr, skt->nr))); - p+=sprintf(p, "common : %u (%u)\n", timing.mem, + p+=sprintf(p, "common : %uns (%uns)\n", timing.mem, sa1100_pcmcia_cmd_time(clock, MECR_BSM_GET(mecr, skt->nr))); return p - buf; From a466ebd2fc6a793e55f028a008b9f094d7d30fe3 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 31 Aug 2016 08:49:43 +0100 Subject: [PATCH 0129/1050] pcmcia: soc_common: fix SS_STSCHG polarity SS_STSCHG should be set for an IO card when the BVD1 signal is asserted low, not high. Signed-off-by: Russell King --- drivers/pcmcia/soc_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pcmcia/soc_common.c b/drivers/pcmcia/soc_common.c index eed5e9c05353..d5ca760c4eb2 100644 --- a/drivers/pcmcia/soc_common.c +++ b/drivers/pcmcia/soc_common.c @@ -235,7 +235,7 @@ static unsigned int soc_common_pcmcia_skt_state(struct soc_pcmcia_socket *skt) stat |= skt->cs_state.Vcc ? SS_POWERON : 0; if (skt->cs_state.flags & SS_IOCARD) - stat |= state.bvd1 ? SS_STSCHG : 0; + stat |= state.bvd1 ? 0 : SS_STSCHG; else { if (state.bvd1 == 0) stat |= SS_BATDEAD; From 3f8df892b2312011f2ba73aedc0a192d70b8844e Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 2 Sep 2016 10:14:20 +0100 Subject: [PATCH 0130/1050] pcmcia: sa1111: fix propagation of lowlevel board init return code When testing Lubbock, it was noticed that the sa1111 pcmcia driver bound but was not functional due to no sockets being registered. This is because the return code from the lowlevel board initialisation was not being propagated out of the probe function. Fix this. Tested-by: Robert Jarzmik Signed-off-by: Russell King --- drivers/pcmcia/sa1111_badge4.c | 20 +++++++------------ drivers/pcmcia/sa1111_generic.c | 22 +++++++++++++++----- drivers/pcmcia/sa1111_jornada720.c | 25 ++++++++++------------- drivers/pcmcia/sa1111_lubbock.c | 32 ++++++++++++------------------ drivers/pcmcia/sa1111_neponset.c | 26 ++++++++++-------------- 5 files changed, 57 insertions(+), 68 deletions(-) diff --git a/drivers/pcmcia/sa1111_badge4.c b/drivers/pcmcia/sa1111_badge4.c index 12f0dd091477..2f490930430d 100644 --- a/drivers/pcmcia/sa1111_badge4.c +++ b/drivers/pcmcia/sa1111_badge4.c @@ -134,20 +134,14 @@ static struct pcmcia_low_level badge4_pcmcia_ops = { int pcmcia_badge4_init(struct sa1111_dev *dev) { - int ret = -ENODEV; + printk(KERN_INFO + "%s: badge4_pcmvcc=%d, badge4_pcmvpp=%d, badge4_cfvcc=%d\n", + __func__, + badge4_pcmvcc, badge4_pcmvpp, badge4_cfvcc); - if (machine_is_badge4()) { - printk(KERN_INFO - "%s: badge4_pcmvcc=%d, badge4_pcmvpp=%d, badge4_cfvcc=%d\n", - __func__, - badge4_pcmvcc, badge4_pcmvpp, badge4_cfvcc); - - sa11xx_drv_pcmcia_ops(&badge4_pcmcia_ops); - ret = sa1111_pcmcia_add(dev, &badge4_pcmcia_ops, - sa11xx_drv_pcmcia_add_one); - } - - return ret; + sa11xx_drv_pcmcia_ops(&badge4_pcmcia_ops); + return sa1111_pcmcia_add(dev, &badge4_pcmcia_ops, + sa11xx_drv_pcmcia_add_one); } static int __init pcmv_setup(char *s) diff --git a/drivers/pcmcia/sa1111_generic.c b/drivers/pcmcia/sa1111_generic.c index a1531feb8460..3d95dffcff7a 100644 --- a/drivers/pcmcia/sa1111_generic.c +++ b/drivers/pcmcia/sa1111_generic.c @@ -18,6 +18,7 @@ #include #include +#include #include #include "sa1111_generic.h" @@ -203,19 +204,30 @@ static int pcmcia_probe(struct sa1111_dev *dev) sa1111_writel(PCSSR_S0_SLEEP | PCSSR_S1_SLEEP, base + PCSSR); sa1111_writel(PCCR_S0_FLT | PCCR_S1_FLT, base + PCCR); + ret = -ENODEV; #ifdef CONFIG_SA1100_BADGE4 - pcmcia_badge4_init(dev); + if (machine_is_badge4()) + ret = pcmcia_badge4_init(dev); #endif #ifdef CONFIG_SA1100_JORNADA720 - pcmcia_jornada720_init(dev); + if (machine_is_jornada720()) + ret = pcmcia_jornada720_init(dev); #endif #ifdef CONFIG_ARCH_LUBBOCK - pcmcia_lubbock_init(dev); + if (machine_is_lubbock()) + ret = pcmcia_lubbock_init(dev); #endif #ifdef CONFIG_ASSABET_NEPONSET - pcmcia_neponset_init(dev); + if (machine_is_assabet()) + ret = pcmcia_neponset_init(dev); #endif - return 0; + + if (ret) { + release_mem_region(dev->res.start, 512); + sa1111_disable_device(dev); + } + + return ret; } static int pcmcia_remove(struct sa1111_dev *dev) diff --git a/drivers/pcmcia/sa1111_jornada720.c b/drivers/pcmcia/sa1111_jornada720.c index c2c30580c83f..480a3ede27c8 100644 --- a/drivers/pcmcia/sa1111_jornada720.c +++ b/drivers/pcmcia/sa1111_jornada720.c @@ -94,22 +94,17 @@ static struct pcmcia_low_level jornada720_pcmcia_ops = { int pcmcia_jornada720_init(struct sa1111_dev *sadev) { - int ret = -ENODEV; + unsigned int pin = GPIO_A0 | GPIO_A1 | GPIO_A2 | GPIO_A3; - if (machine_is_jornada720()) { - unsigned int pin = GPIO_A0 | GPIO_A1 | GPIO_A2 | GPIO_A3; + /* Fixme: why messing around with SA11x0's GPIO1? */ + GRER |= 0x00000002; - GRER |= 0x00000002; + /* Set GPIO_A<3:1> to be outputs for PCMCIA/CF power controller: */ + sa1111_set_io_dir(sadev, pin, 0, 0); + sa1111_set_io(sadev, pin, 0); + sa1111_set_sleep_io(sadev, pin, 0); - /* Set GPIO_A<3:1> to be outputs for PCMCIA/CF power controller: */ - sa1111_set_io_dir(sadev, pin, 0, 0); - sa1111_set_io(sadev, pin, 0); - sa1111_set_sleep_io(sadev, pin, 0); - - sa11xx_drv_pcmcia_ops(&jornada720_pcmcia_ops); - ret = sa1111_pcmcia_add(sadev, &jornada720_pcmcia_ops, - sa11xx_drv_pcmcia_add_one); - } - - return ret; + sa11xx_drv_pcmcia_ops(&jornada720_pcmcia_ops); + return sa1111_pcmcia_add(sadev, &jornada720_pcmcia_ops, + sa11xx_drv_pcmcia_add_one); } diff --git a/drivers/pcmcia/sa1111_lubbock.c b/drivers/pcmcia/sa1111_lubbock.c index c5caf5790451..df2b6b28b1d1 100644 --- a/drivers/pcmcia/sa1111_lubbock.c +++ b/drivers/pcmcia/sa1111_lubbock.c @@ -210,27 +210,21 @@ static struct pcmcia_low_level lubbock_pcmcia_ops = { int pcmcia_lubbock_init(struct sa1111_dev *sadev) { - int ret = -ENODEV; + /* + * Set GPIO_A<3:0> to be outputs for the MAX1600, + * and switch to standby mode. + */ + sa1111_set_io_dir(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0, 0); + sa1111_set_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0); + sa1111_set_sleep_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0); - if (machine_is_lubbock()) { - /* - * Set GPIO_A<3:0> to be outputs for the MAX1600, - * and switch to standby mode. - */ - sa1111_set_io_dir(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0, 0); - sa1111_set_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0); - sa1111_set_sleep_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0); + /* Set CF Socket 1 power to standby mode. */ + lubbock_set_misc_wr((1 << 15) | (1 << 14), 0); - /* Set CF Socket 1 power to standby mode. */ - lubbock_set_misc_wr((1 << 15) | (1 << 14), 0); - - pxa2xx_drv_pcmcia_ops(&lubbock_pcmcia_ops); - pxa2xx_configure_sockets(&sadev->dev); - ret = sa1111_pcmcia_add(sadev, &lubbock_pcmcia_ops, - pxa2xx_drv_pcmcia_add_one); - } - - return ret; + pxa2xx_drv_pcmcia_ops(&lubbock_pcmcia_ops); + pxa2xx_configure_sockets(&sadev->dev); + return sa1111_pcmcia_add(sadev, &lubbock_pcmcia_ops, + pxa2xx_drv_pcmcia_add_one); } MODULE_LICENSE("GPL"); diff --git a/drivers/pcmcia/sa1111_neponset.c b/drivers/pcmcia/sa1111_neponset.c index 1d78739c4c07..019c395eb4bf 100644 --- a/drivers/pcmcia/sa1111_neponset.c +++ b/drivers/pcmcia/sa1111_neponset.c @@ -110,20 +110,14 @@ static struct pcmcia_low_level neponset_pcmcia_ops = { int pcmcia_neponset_init(struct sa1111_dev *sadev) { - int ret = -ENODEV; - - if (machine_is_assabet()) { - /* - * Set GPIO_A<3:0> to be outputs for the MAX1600, - * and switch to standby mode. - */ - sa1111_set_io_dir(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0, 0); - sa1111_set_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0); - sa1111_set_sleep_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0); - sa11xx_drv_pcmcia_ops(&neponset_pcmcia_ops); - ret = sa1111_pcmcia_add(sadev, &neponset_pcmcia_ops, - sa11xx_drv_pcmcia_add_one); - } - - return ret; + /* + * Set GPIO_A<3:0> to be outputs for the MAX1600, + * and switch to standby mode. + */ + sa1111_set_io_dir(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0, 0); + sa1111_set_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0); + sa1111_set_sleep_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0); + sa11xx_drv_pcmcia_ops(&neponset_pcmcia_ops); + return sa1111_pcmcia_add(sadev, &neponset_pcmcia_ops, + sa11xx_drv_pcmcia_add_one); } From 817ed5748e40bbc5b5f2aa0c3094c4a7adfb8881 Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Thu, 1 Sep 2016 08:31:08 +0200 Subject: [PATCH 0131/1050] pcmcia: lubbock: fix sockets configuration On lubbock board, the probe of the driver crashes by dereferencing very early a platform_data structure which is not set, in pxa2xx_configure_sockets(). The stack fixed is : [ 0.244353] SA1111 Microprocessor Companion Chip: silicon revision 1, metal revision 1 [ 0.256321] sa1111 sa1111: Providing IRQ336-390 [ 0.340899] clocksource: Switched to clocksource oscr0 [ 0.472263] Unable to handle kernel NULL pointer dereference at virtual address 00000004 [ 0.480469] pgd = c0004000 [ 0.483432] [00000004] *pgd=00000000 [ 0.487105] Internal error: Oops: f5 [#1] ARM [ 0.491497] Modules linked in: [ 0.494650] CPU: 0 PID: 1 Comm: swapper Not tainted 4.8.0-rc3-00080-g1aaa68426f0c-dirty #2068 [ 0.503229] Hardware name: Intel DBPXA250 Development Platform (aka Lubbock) [ 0.510344] task: c3e42000 task.stack: c3e44000 [ 0.514984] PC is at pxa2xx_configure_sockets+0x4/0x24 (drivers/pcmcia/pxa2xx_base.c:227) [ 0.520193] LR is at pcmcia_lubbock_init+0x1c/0x38 [ 0.525079] pc : [] lr : [] psr: a0000053 [ 0.525079] sp : c3e45e70 ip : 100019ff fp : 00000000 [ 0.536651] r10: c0828900 r9 : c0434838 r8 : 00000000 [ 0.541953] r7 : c0820700 r6 : c0857b30 r5 : c3ec1400 r4 : c0820758 [ 0.548549] r3 : 00000000 r2 : 0000000c r1 : c3c09c40 r0 : c3ec1400 [ 0.555154] Flags: NzCv IRQs on FIQs off Mode SVC_32 ISA ARM Segment none [ 0.562450] Control: 0000397f Table: a0004000 DAC: 00000053 [ 0.568257] Process swapper (pid: 1, stack limit = 0xc3e44190) [ 0.574154] Stack: (0xc3e45e70 to 0xc3e46000) [ 0.578610] 5e60: c4849800 00000000 c3ec1400 c024769c [ 0.586928] 5e80: 00000000 c3ec140c c3c0ee0c c3ec1400 c3ec1434 c020c410 c3ec1400 c3ec1434 [ 0.595244] 5ea0: c0820700 c080b408 c0828900 c020c5f8 00000000 c0820700 c020c578 c020ac5c [ 0.603560] 5ec0: c3e687cc c3e71e10 c0820700 00000000 c3c02de0 c020bae4 c03c62f7 c03c62f7 [ 0.611872] 5ee0: c3e68780 c0820700 c042e034 00000000 c043c440 c020cdec c080b408 00000005 [ 0.620188] 5f00: c042e034 c00096c0 c0034440 c01c730c 20000053 ffffffff 00000000 00000000 [ 0.628502] 5f20: 00000000 c3ffcb87 c3ffcb90 c00346ac c3e66ba0 c03f7914 00000092 00000005 [ 0.636811] 5f40: 00000005 c03f847c 00000091 c03f847c 00000000 00000005 c0434828 00000005 [ 0.645125] 5f60: c043482c 00000092 c043c440 c0828900 c0434838 c0418d2c 00000005 00000005 [ 0.653430] 5f80: 00000000 c041858c 00000000 c032e9f0 00000000 00000000 00000000 00000000 [ 0.661729] 5fa0: 00000000 c032e9f8 00000000 c000f0f0 00000000 00000000 00000000 00000000 [ 0.670020] 5fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 0.678311] 5fe0: 00000000 00000000 00000000 00000000 00000013 00000000 00000000 00000000 [ 0.686673] (pxa2xx_configure_sockets) from pcmcia_lubbock_init (/drivers/pcmcia/sa1111_lubbock.c:161) [ 0.696026] (pcmcia_lubbock_init) from pcmcia_probe (/drivers/pcmcia/sa1111_generic.c:213) [ 0.704358] (pcmcia_probe) from driver_probe_device (/drivers/base/dd.c:378 /drivers/base/dd.c:499) [ 0.712848] (driver_probe_device) from __driver_attach (/./include/linux/device.h:983 /drivers/base/dd.c:733) [ 0.721414] (__driver_attach) from bus_for_each_dev (/drivers/base/bus.c:313) [ 0.729723] (bus_for_each_dev) from bus_add_driver (/drivers/base/bus.c:708) [ 0.738036] (bus_add_driver) from driver_register (/drivers/base/driver.c:169) [ 0.746185] (driver_register) from do_one_initcall (/init/main.c:778) [ 0.754561] (do_one_initcall) from kernel_init_freeable (/init/main.c:843 /init/main.c:851 /init/main.c:869 /init/main.c:1016) [ 0.763409] (kernel_init_freeable) from kernel_init (/init/main.c:944) [ 0.771660] (kernel_init) from ret_from_fork (/arch/arm/kernel/entry-common.S:119) [ 0.779347] Code: c03c6305 c03c631e c03c632e e5903048 (e993000c) All code ======== 0: c03c6305 eorsgt r6, ip, r5, lsl #6 4: c03c631e eorsgt r6, ip, lr, lsl r3 8: c03c632e eorsgt r6, ip, lr, lsr #6 c: e5903048 ldr r3, [r0, #72] ; 0x48 10:* e993000c ldmib r3, {r2, r3} <-- trapping instruction Signed-off-by: Robert Jarzmik Signed-off-by: Russell King --- drivers/pcmcia/pxa2xx_base.c | 9 +++++---- drivers/pcmcia/pxa2xx_base.h | 2 +- drivers/pcmcia/sa1111_lubbock.c | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/pcmcia/pxa2xx_base.c b/drivers/pcmcia/pxa2xx_base.c index 483f919e0d2e..91b5f5724cba 100644 --- a/drivers/pcmcia/pxa2xx_base.c +++ b/drivers/pcmcia/pxa2xx_base.c @@ -214,9 +214,8 @@ pxa2xx_pcmcia_frequency_change(struct soc_pcmcia_socket *skt, } #endif -void pxa2xx_configure_sockets(struct device *dev) +void pxa2xx_configure_sockets(struct device *dev, struct pcmcia_low_level *ops) { - struct pcmcia_low_level *ops = dev->platform_data; /* * We have at least one socket, so set MECR:CIT * (Card Is There) @@ -322,7 +321,7 @@ static int pxa2xx_drv_pcmcia_probe(struct platform_device *dev) goto err1; } - pxa2xx_configure_sockets(&dev->dev); + pxa2xx_configure_sockets(&dev->dev, ops); dev_set_drvdata(&dev->dev, sinfo); return 0; @@ -348,7 +347,9 @@ static int pxa2xx_drv_pcmcia_remove(struct platform_device *dev) static int pxa2xx_drv_pcmcia_resume(struct device *dev) { - pxa2xx_configure_sockets(dev); + struct pcmcia_low_level *ops = (struct pcmcia_low_level *)dev->platform_data; + + pxa2xx_configure_sockets(dev, ops); return 0; } diff --git a/drivers/pcmcia/pxa2xx_base.h b/drivers/pcmcia/pxa2xx_base.h index b609b45469ed..e58c7a415418 100644 --- a/drivers/pcmcia/pxa2xx_base.h +++ b/drivers/pcmcia/pxa2xx_base.h @@ -1,4 +1,4 @@ int pxa2xx_drv_pcmcia_add_one(struct soc_pcmcia_socket *skt); void pxa2xx_drv_pcmcia_ops(struct pcmcia_low_level *ops); -void pxa2xx_configure_sockets(struct device *dev); +void pxa2xx_configure_sockets(struct device *dev, struct pcmcia_low_level *ops); diff --git a/drivers/pcmcia/sa1111_lubbock.c b/drivers/pcmcia/sa1111_lubbock.c index df2b6b28b1d1..e741f499c875 100644 --- a/drivers/pcmcia/sa1111_lubbock.c +++ b/drivers/pcmcia/sa1111_lubbock.c @@ -222,7 +222,7 @@ int pcmcia_lubbock_init(struct sa1111_dev *sadev) lubbock_set_misc_wr((1 << 15) | (1 << 14), 0); pxa2xx_drv_pcmcia_ops(&lubbock_pcmcia_ops); - pxa2xx_configure_sockets(&sadev->dev); + pxa2xx_configure_sockets(&sadev->dev, &lubbock_pcmcia_ops); return sa1111_pcmcia_add(sadev, &lubbock_pcmcia_ops, pxa2xx_drv_pcmcia_add_one); } From cb034407ec3f816540f359300cda1122faabdbbd Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 3 Sep 2016 10:21:51 +0100 Subject: [PATCH 0132/1050] ARM: sa1111: fix error code propagation in sa1111_probe() Ensure that we propagate the platform_get_irq() error code out of the probe function. This allows probe deferrals to work correctly should platform_get_irq() not be able to resolve the interrupt in a DT environment at probe time. Signed-off-by: Russell King --- arch/arm/common/sa1111.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c index fb0a0a4dfea4..332b92317fd8 100644 --- a/arch/arm/common/sa1111.c +++ b/arch/arm/common/sa1111.c @@ -1017,7 +1017,7 @@ static int sa1111_probe(struct platform_device *pdev) return -EINVAL; irq = platform_get_irq(pdev, 0); if (irq < 0) - return -ENXIO; + return irq; return __sa1111_probe(&pdev->dev, mem, irq); } From 7c0091eceab231b59e51b80bbcf5a2205a0fa905 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 6 Sep 2016 14:21:46 +0100 Subject: [PATCH 0133/1050] ARM: sa1111: fix pcmcia interrupt mask polarity The polarity of the high IRQs was being calculated using SA1111_IRQMASK_HI(), but this assumes a Linux interrupt number, not a hardware interrupt number. Hence, the resulting mask was incorrect. Fix this. Signed-off-by: Russell King --- arch/arm/common/sa1111.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c index 332b92317fd8..cfa61b857cad 100644 --- a/arch/arm/common/sa1111.c +++ b/arch/arm/common/sa1111.c @@ -472,8 +472,8 @@ static int sa1111_setup_irq(struct sa1111 *sachip, unsigned irq_base) * specifies that S0ReadyInt and S1ReadyInt should be '1'. */ sa1111_writel(0, irqbase + SA1111_INTPOL0); - sa1111_writel(SA1111_IRQMASK_HI(IRQ_S0_READY_NINT) | - SA1111_IRQMASK_HI(IRQ_S1_READY_NINT), + sa1111_writel(BIT(IRQ_S0_READY_NINT & 31) | + BIT(IRQ_S1_READY_NINT & 31), irqbase + SA1111_INTPOL1); /* clear all IRQs */ From 06dfe5cc0cc684e735cb0232fdb756d30780b05d Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 6 Sep 2016 14:34:05 +0100 Subject: [PATCH 0134/1050] ARM: sa1111: fix pcmcia suspend/resume SA1111 PCMCIA was broken when PCMCIA switched to using dev_pm_ops for the PCMCIA socket class. PCMCIA used to handle suspend/resume via the socket hosting device, which happened at normal device suspend/resume time. However, the referenced commit changed this: much of the resume now happens much earlier, in the noirq resume handler of dev_pm_ops. However, on SA1111, the PCMCIA device is not accessible as the SA1111 has not been resumed at _noirq time. It's slightly worse than that, because the SA1111 has already been put to sleep at _noirq time, so suspend doesn't work properly. Fix this by converting the core SA1111 code to use dev_pm_ops as well, and performing its own suspend/resume at noirq time. This fixes these errors in the kernel log: pcmcia_socket pcmcia_socket0: time out after reset pcmcia_socket pcmcia_socket1: time out after reset and the resulting lack of PCMCIA cards after a S2RAM cycle. Fixes: d7646f7632549 ("pcmcia: use dev_pm_ops for class pcmcia_socket_class") Signed-off-by: Russell King --- arch/arm/common/sa1111.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c index cfa61b857cad..7838659b870a 100644 --- a/arch/arm/common/sa1111.c +++ b/arch/arm/common/sa1111.c @@ -869,9 +869,9 @@ struct sa1111_save_data { #ifdef CONFIG_PM -static int sa1111_suspend(struct platform_device *dev, pm_message_t state) +static int sa1111_suspend_noirq(struct device *dev) { - struct sa1111 *sachip = platform_get_drvdata(dev); + struct sa1111 *sachip = dev_get_drvdata(dev); struct sa1111_save_data *save; unsigned long flags; unsigned int val; @@ -934,9 +934,9 @@ static int sa1111_suspend(struct platform_device *dev, pm_message_t state) * restored by their respective drivers, and must be called * via LDM after this function. */ -static int sa1111_resume(struct platform_device *dev) +static int sa1111_resume_noirq(struct device *dev) { - struct sa1111 *sachip = platform_get_drvdata(dev); + struct sa1111 *sachip = dev_get_drvdata(dev); struct sa1111_save_data *save; unsigned long flags, id; void __iomem *base; @@ -952,7 +952,7 @@ static int sa1111_resume(struct platform_device *dev) id = sa1111_readl(sachip->base + SA1111_SKID); if ((id & SKID_ID_MASK) != SKID_SA1111_ID) { __sa1111_remove(sachip); - platform_set_drvdata(dev, NULL); + dev_set_drvdata(dev, NULL); kfree(save); return 0; } @@ -1003,8 +1003,8 @@ static int sa1111_resume(struct platform_device *dev) } #else -#define sa1111_suspend NULL -#define sa1111_resume NULL +#define sa1111_suspend_noirq NULL +#define sa1111_resume_noirq NULL #endif static int sa1111_probe(struct platform_device *pdev) @@ -1038,6 +1038,11 @@ static int sa1111_remove(struct platform_device *pdev) return 0; } +static struct dev_pm_ops sa1111_pm_ops = { + .suspend_noirq = sa1111_suspend_noirq, + .resume_noirq = sa1111_resume_noirq, +}; + /* * Not sure if this should be on the system bus or not yet. * We really want some way to register a system device at @@ -1050,10 +1055,9 @@ static int sa1111_remove(struct platform_device *pdev) static struct platform_driver sa1111_device_driver = { .probe = sa1111_probe, .remove = sa1111_remove, - .suspend = sa1111_suspend, - .resume = sa1111_resume, .driver = { .name = "sa1111", + .pm = &sa1111_pm_ops, }, }; From 87d5dd62c07f90ed2b0d6718f5c666f69e7d39b0 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 6 Sep 2016 16:09:17 +0100 Subject: [PATCH 0135/1050] ARM: sa1111: fix missing clk_disable() SA1111 forgets to call clk_disable() in the probe error cleanup path. Add the necessary call. Signed-off-by: Russell King --- arch/arm/common/sa1111.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c index 7838659b870a..2e076c492005 100644 --- a/arch/arm/common/sa1111.c +++ b/arch/arm/common/sa1111.c @@ -754,7 +754,7 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq) if (sachip->irq != NO_IRQ) { ret = sa1111_setup_irq(sachip, pd->irq_base); if (ret) - goto err_unmap; + goto err_clk; } #ifdef CONFIG_ARCH_SA1100 @@ -799,6 +799,8 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq) return 0; + err_clk: + clk_disable(sachip->clk); err_unmap: iounmap(sachip->base); err_clk_unprep: From 5df20f2141eadb5430caaad20eceac61cfe0f139 Mon Sep 17 00:00:00 2001 From: "Pedersen, Thomas" Date: Tue, 6 Sep 2016 11:59:00 -0700 Subject: [PATCH 0136/1050] mac80211: make mpath path fixing more robust A fixed mpath was not quite being treated as such: 1) if a PERR frame was received, a fixed mpath was deactivated. 2) queued path discovery for fixed mpath was potentially being considered, changing mpath state. 3) other mpath flags were potentially being inherited when fixing the mpath. Just assign PATH_FIXED and SN_VALID. This solves several issues when fixing a mesh path in one direction. The reverse direction mpath should probably also be fixed, or root announcements at least be enabled. Signed-off-by: Thomas Pedersen Signed-off-by: Johannes Berg --- net/mac80211/mesh_hwmp.c | 3 ++- net/mac80211/mesh_pathtbl.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 8f9c3bde835f..faccef977670 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -746,6 +746,7 @@ static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata, sta = next_hop_deref_protected(mpath); if (mpath->flags & MESH_PATH_ACTIVE && ether_addr_equal(ta, sta->sta.addr) && + !(mpath->flags & MESH_PATH_FIXED) && (!(mpath->flags & MESH_PATH_SN_VALID) || SN_GT(target_sn, mpath->sn) || target_sn == 0)) { mpath->flags &= ~MESH_PATH_ACTIVE; @@ -1012,7 +1013,7 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata) goto enddiscovery; spin_lock_bh(&mpath->state_lock); - if (mpath->flags & MESH_PATH_DELETED) { + if (mpath->flags & (MESH_PATH_DELETED | MESH_PATH_FIXED)) { spin_unlock_bh(&mpath->state_lock); goto enddiscovery; } diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 6db2ddfa0695..f0e6175a9821 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -826,7 +826,7 @@ void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop) mpath->metric = 0; mpath->hop_count = 0; mpath->exp_time = 0; - mpath->flags |= MESH_PATH_FIXED; + mpath->flags = MESH_PATH_FIXED | MESH_PATH_SN_VALID; mesh_path_activate(mpath); spin_unlock_bh(&mpath->state_lock); mesh_path_tx_pending(mpath); From 6b3142b2b852cd5e3216d1aa800a0a49377e6e1c Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Wed, 7 Sep 2016 21:56:09 +0100 Subject: [PATCH 0137/1050] ARM: 8612/1: LPAE: initialize cache policy correctly The cachepolicy variable gets initialized using a masked pmd value. So far, the pmd has been masked with flags valid for the 2-page table format, but the 3-page table format requires a different mask. On LPAE, this lead to a wrong assumption of what initial cache policy has been used. Later a check forces the cache policy to writealloc and prints the following warning: Forcing write-allocate cache policy for SMP This patch introduces a new definition PMD_SECT_CACHE_MASK for both page table formats which masks in all cache flags in both cases. Signed-off-by: Stefan Agner Signed-off-by: Russell King --- arch/arm/include/asm/pgtable-2level-hwdef.h | 1 + arch/arm/include/asm/pgtable-3level-hwdef.h | 1 + arch/arm/mm/mmu.c | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/pgtable-2level-hwdef.h b/arch/arm/include/asm/pgtable-2level-hwdef.h index d0131ee6f6af..3f82e9da7cec 100644 --- a/arch/arm/include/asm/pgtable-2level-hwdef.h +++ b/arch/arm/include/asm/pgtable-2level-hwdef.h @@ -47,6 +47,7 @@ #define PMD_SECT_WB (PMD_SECT_CACHEABLE | PMD_SECT_BUFFERABLE) #define PMD_SECT_MINICACHE (PMD_SECT_TEX(1) | PMD_SECT_CACHEABLE) #define PMD_SECT_WBWA (PMD_SECT_TEX(1) | PMD_SECT_CACHEABLE | PMD_SECT_BUFFERABLE) +#define PMD_SECT_CACHE_MASK (PMD_SECT_TEX(1) | PMD_SECT_CACHEABLE | PMD_SECT_BUFFERABLE) #define PMD_SECT_NONSHARED_DEV (PMD_SECT_TEX(2)) /* diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h index f8f1cff62065..4cd664abfcd3 100644 --- a/arch/arm/include/asm/pgtable-3level-hwdef.h +++ b/arch/arm/include/asm/pgtable-3level-hwdef.h @@ -62,6 +62,7 @@ #define PMD_SECT_WT (_AT(pmdval_t, 2) << 2) /* normal inner write-through */ #define PMD_SECT_WB (_AT(pmdval_t, 3) << 2) /* normal inner write-back */ #define PMD_SECT_WBWA (_AT(pmdval_t, 7) << 2) /* normal inner write-alloc */ +#define PMD_SECT_CACHE_MASK (_AT(pmdval_t, 7) << 2) /* * + Level 3 descriptor (PTE) diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 6344913f0804..30fe03f95c85 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -137,7 +137,7 @@ void __init init_default_cache_policy(unsigned long pmd) initial_pmd_value = pmd; - pmd &= PMD_SECT_TEX(1) | PMD_SECT_BUFFERABLE | PMD_SECT_CACHEABLE; + pmd &= PMD_SECT_CACHE_MASK; for (i = 0; i < ARRAY_SIZE(cache_policies); i++) if (cache_policies[i].pmd == pmd) { From 07f56e6646228da27122e81d5a5a232fdf3d3a50 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 2 Sep 2016 22:08:45 +0100 Subject: [PATCH 0138/1050] ARM: locomo: fix locomo irq handling Accidentally booting Collie on Assabet reveals that the locomo driver incorrectly overwrites gpio-sa1100's chip data for its parent interrupt, leading to oops in sa1100_gpio_unmask() and sa1100_update_edge_regs() when "gpio: sa1100: convert to use IO accessors" is applied. Fix locomo to use the handler data rather than chip data for its parent interrupt. Signed-off-by: Russell King --- arch/arm/common/locomo.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arm/common/locomo.c b/arch/arm/common/locomo.c index 0e97b4b871f9..6c7b06854fce 100644 --- a/arch/arm/common/locomo.c +++ b/arch/arm/common/locomo.c @@ -140,7 +140,7 @@ static struct locomo_dev_info locomo_devices[] = { static void locomo_handler(struct irq_desc *desc) { - struct locomo *lchip = irq_desc_get_chip_data(desc); + struct locomo *lchip = irq_desc_get_handler_data(desc); int req, i; /* Acknowledge the parent IRQ */ @@ -200,8 +200,7 @@ static void locomo_setup_irq(struct locomo *lchip) * Install handler for IRQ_LOCOMO_HW. */ irq_set_irq_type(lchip->irq, IRQ_TYPE_EDGE_FALLING); - irq_set_chip_data(lchip->irq, lchip); - irq_set_chained_handler(lchip->irq, locomo_handler); + irq_set_chained_handler_and_data(lchip->irq, locomo_handler, lchip); /* Install handlers for IRQ_LOCOMO_* */ for ( ; irq <= lchip->irq_base + 3; irq++) { From 1a57c286d8ced1e4144c6201a19bbb70827edee6 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 4 Sep 2016 21:45:56 +0100 Subject: [PATCH 0139/1050] ARM: pxa/lubbock: add pcmcia clock Add the required PCMCIA clock for the SA1111 "1800" device. This clock is used to compute timing information for the PCMCIA interface in the SoC device, rather than the SA1111. Hence, the provision of this clock is a convenience for the driver and does not reflect the hardware, so this must not be copied into DT. Acked-by: Robert Jarzmik Tested-by: Robert Jarzmik Signed-off-by: Russell King --- arch/arm/mach-pxa/lubbock.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/arm/mach-pxa/lubbock.c b/arch/arm/mach-pxa/lubbock.c index 7245f3359564..d6159f8ef0c2 100644 --- a/arch/arm/mach-pxa/lubbock.c +++ b/arch/arm/mach-pxa/lubbock.c @@ -137,6 +137,18 @@ static struct pxa2xx_udc_mach_info udc_info __initdata = { // no D+ pullup; lubbock can't connect/disconnect in software }; +static void lubbock_init_pcmcia(void) +{ + struct clk *clk; + + /* Add an alias for the SA1111 PCMCIA clock */ + clk = clk_get_sys("pxa2xx-pcmcia", NULL); + if (!IS_ERR(clk)) { + clkdev_create(clk, NULL, "1800"); + clk_put(clk); + } +} + static struct resource sa1111_resources[] = { [0] = { .start = 0x10000000, @@ -467,6 +479,8 @@ static void __init lubbock_init(void) pxa_set_btuart_info(NULL); pxa_set_stuart_info(NULL); + lubbock_init_pcmcia(); + clk_add_alias("SA1111_CLK", NULL, "GPIO11_CLK", NULL); pxa_set_udc_info(&udc_info); pxa_set_fb_info(NULL, &sharp_lm8v31); From 70ca767ea1b2748f45e96192400e515dddbe517c Mon Sep 17 00:00:00 2001 From: Laura Garcia Liebana Date: Tue, 6 Sep 2016 08:44:19 +0200 Subject: [PATCH 0140/1050] netfilter: nft_hash: Add hash offset value Add support to pass through an offset to the hash value. With this feature, the sysadmin is able to generate a hash with a given offset value. Example: meta mark set jhash ip saddr mod 2 seed 0xabcd offset 100 This option generates marks according to the source address from 100 to 101. Signed-off-by: Laura Garcia Liebana --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ net/netfilter/nft_hash.c | 17 +++++++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 24161e25576d..8c653bbd1ead 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -731,6 +731,7 @@ enum nft_meta_keys { * @NFTA_HASH_LEN: source data length (NLA_U32) * @NFTA_HASH_MODULUS: modulus value (NLA_U32) * @NFTA_HASH_SEED: seed value (NLA_U32) + * @NFTA_HASH_OFFSET: add this offset value to hash result (NLA_U32) */ enum nft_hash_attributes { NFTA_HASH_UNSPEC, @@ -739,6 +740,7 @@ enum nft_hash_attributes { NFTA_HASH_LEN, NFTA_HASH_MODULUS, NFTA_HASH_SEED, + NFTA_HASH_OFFSET, __NFTA_HASH_MAX, }; #define NFTA_HASH_MAX (__NFTA_HASH_MAX - 1) diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 764251d31e46..bd12f7a801c2 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -23,6 +23,7 @@ struct nft_hash { u8 len; u32 modulus; u32 seed; + u32 offset; }; static void nft_hash_eval(const struct nft_expr *expr, @@ -31,10 +32,10 @@ static void nft_hash_eval(const struct nft_expr *expr, { struct nft_hash *priv = nft_expr_priv(expr); const void *data = ®s->data[priv->sreg]; + u32 h; - regs->data[priv->dreg] = - reciprocal_scale(jhash(data, priv->len, priv->seed), - priv->modulus); + h = reciprocal_scale(jhash(data, priv->len, priv->seed), priv->modulus); + regs->data[priv->dreg] = h + priv->offset; } static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = { @@ -59,6 +60,9 @@ static int nft_hash_init(const struct nft_ctx *ctx, !tb[NFTA_HASH_MODULUS]) return -EINVAL; + if (tb[NFTA_HASH_OFFSET]) + priv->offset = ntohl(nla_get_be32(tb[NFTA_HASH_OFFSET])); + priv->sreg = nft_parse_register(tb[NFTA_HASH_SREG]); priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]); @@ -72,6 +76,9 @@ static int nft_hash_init(const struct nft_ctx *ctx, if (priv->modulus <= 1) return -ERANGE; + if (priv->offset + priv->modulus - 1 < U32_MAX) + return -EOVERFLOW; + priv->seed = ntohl(nla_get_be32(tb[NFTA_HASH_SEED])); return nft_validate_register_load(priv->sreg, len) && @@ -94,7 +101,9 @@ static int nft_hash_dump(struct sk_buff *skb, goto nla_put_failure; if (nla_put_be32(skb, NFTA_HASH_SEED, htonl(priv->seed))) goto nla_put_failure; - + if (priv->offset != 0) + if (nla_put_be32(skb, NFTA_HASH_OFFSET, htonl(priv->offset))) + goto nla_put_failure; return 0; nla_put_failure: From dbd2be0646e3239022630c426cbceefa15714bca Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 7 Sep 2016 12:22:18 +0200 Subject: [PATCH 0141/1050] netfilter: nft_dynset: allow to invert match criteria The dynset expression matches if we can fit a new entry into the set. If there is no room for it, then it breaks the rule evaluation. This patch introduces the inversion flag so you can add rules to explicitly drop packets that don't fit into the set. For example: # nft filter input flow table xyz size 4 { ip saddr timeout 120s counter } overflow drop This is useful to provide a replacement for connlimit. For the rule above, every new entry uses the IPv4 address as key in the set, this entry gets a timeout of 120 seconds that gets refresh on every packet seen. If we get new flow and our set already contains 4 entries already, then this packet is dropped. You can already express this in positive logic, assuming default policy to drop: # nft filter input flow table xyz size 4 { ip saddr timeout 10s counter } accept Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 6 ++++++ net/netfilter/nft_dynset.c | 20 +++++++++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 8c653bbd1ead..bc0eb6a1066d 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -575,6 +575,10 @@ enum nft_dynset_ops { NFT_DYNSET_OP_UPDATE, }; +enum nft_dynset_flags { + NFT_DYNSET_F_INV = (1 << 0), +}; + /** * enum nft_dynset_attributes - dynset expression attributes * @@ -585,6 +589,7 @@ enum nft_dynset_ops { * @NFTA_DYNSET_SREG_DATA: source register of the data (NLA_U32) * @NFTA_DYNSET_TIMEOUT: timeout value for the new element (NLA_U64) * @NFTA_DYNSET_EXPR: expression (NLA_NESTED: nft_expr_attributes) + * @NFTA_DYNSET_FLAGS: flags (NLA_U32) */ enum nft_dynset_attributes { NFTA_DYNSET_UNSPEC, @@ -596,6 +601,7 @@ enum nft_dynset_attributes { NFTA_DYNSET_TIMEOUT, NFTA_DYNSET_EXPR, NFTA_DYNSET_PAD, + NFTA_DYNSET_FLAGS, __NFTA_DYNSET_MAX, }; #define NFTA_DYNSET_MAX (__NFTA_DYNSET_MAX - 1) diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 0af26699bf04..e3b83c31da2e 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -22,6 +22,7 @@ struct nft_dynset { enum nft_dynset_ops op:8; enum nft_registers sreg_key:8; enum nft_registers sreg_data:8; + bool invert; u64 timeout; struct nft_expr *expr; struct nft_set_binding binding; @@ -82,10 +83,14 @@ static void nft_dynset_eval(const struct nft_expr *expr, if (sexpr != NULL) sexpr->ops->eval(sexpr, regs, pkt); + + if (priv->invert) + regs->verdict.code = NFT_BREAK; return; } out: - regs->verdict.code = NFT_BREAK; + if (!priv->invert) + regs->verdict.code = NFT_BREAK; } static const struct nla_policy nft_dynset_policy[NFTA_DYNSET_MAX + 1] = { @@ -96,6 +101,7 @@ static const struct nla_policy nft_dynset_policy[NFTA_DYNSET_MAX + 1] = { [NFTA_DYNSET_SREG_DATA] = { .type = NLA_U32 }, [NFTA_DYNSET_TIMEOUT] = { .type = NLA_U64 }, [NFTA_DYNSET_EXPR] = { .type = NLA_NESTED }, + [NFTA_DYNSET_FLAGS] = { .type = NLA_U32 }, }; static int nft_dynset_init(const struct nft_ctx *ctx, @@ -113,6 +119,15 @@ static int nft_dynset_init(const struct nft_ctx *ctx, tb[NFTA_DYNSET_SREG_KEY] == NULL) return -EINVAL; + if (tb[NFTA_DYNSET_FLAGS]) { + u32 flags = ntohl(nla_get_be32(tb[NFTA_DYNSET_FLAGS])); + + if (flags & ~NFT_DYNSET_F_INV) + return -EINVAL; + if (flags & NFT_DYNSET_F_INV) + priv->invert = true; + } + set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME], genmask); if (IS_ERR(set)) { @@ -220,6 +235,7 @@ static void nft_dynset_destroy(const struct nft_ctx *ctx, static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_dynset *priv = nft_expr_priv(expr); + u32 flags = priv->invert ? NFT_DYNSET_F_INV : 0; if (nft_dump_register(skb, NFTA_DYNSET_SREG_KEY, priv->sreg_key)) goto nla_put_failure; @@ -235,6 +251,8 @@ static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr) goto nla_put_failure; if (priv->expr && nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr)) goto nla_put_failure; + if (nla_put_be32(skb, NFTA_DYNSET_FLAGS, htonl(flags))) + goto nla_put_failure; return 0; nla_put_failure: From beac5afa2d78605b70f40cf5ab5601ab10659c7f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 9 Sep 2016 12:42:49 +0200 Subject: [PATCH 0142/1050] netfilter: nf_tables: ensure proper initialization of nft_pktinfo fields This patch introduces nft_set_pktinfo_unspec() that ensures proper initialization all of pktinfo fields for non-IP traffic. This is used by the bridge, netdev and arp families. This new function relies on nft_set_pktinfo_proto_unspec() to set a new tprot_set field that indicates if transport protocol information is available. Remain fields are zeroed. The meta expression has been also updated to check to tprot_set in first place given that zero is a valid tprot value. Even a handcrafted packet may come with the IPPROTO_RAW (255) protocol number so we can't rely on this value as tprot unset. Reported-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 18 ++++++++++++++++++ include/net/netfilter/nf_tables_ipv4.h | 1 + include/net/netfilter/nf_tables_ipv6.h | 1 + net/bridge/netfilter/nf_tables_bridge.c | 6 +++--- net/ipv4/netfilter/nf_tables_arp.c | 2 +- net/netfilter/nf_tables_netdev.c | 4 +++- net/netfilter/nft_meta.c | 2 ++ 7 files changed, 29 insertions(+), 5 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 8972468bc94b..a7a7cebc8d07 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -19,6 +19,7 @@ struct nft_pktinfo { const struct net_device *out; u8 pf; u8 hook; + bool tprot_set; u8 tprot; /* for x_tables compatibility */ struct xt_action_param xt; @@ -36,6 +37,23 @@ static inline void nft_set_pktinfo(struct nft_pktinfo *pkt, pkt->pf = pkt->xt.family = state->pf; } +static inline void nft_set_pktinfo_proto_unspec(struct nft_pktinfo *pkt, + struct sk_buff *skb) +{ + pkt->tprot_set = false; + pkt->tprot = 0; + pkt->xt.thoff = 0; + pkt->xt.fragoff = 0; +} + +static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + nft_set_pktinfo(pkt, skb, state); + nft_set_pktinfo_proto_unspec(pkt, skb); +} + /** * struct nft_verdict - nf_tables verdict * diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h index ca6ef6bf775e..af952f7843ee 100644 --- a/include/net/netfilter/nf_tables_ipv4.h +++ b/include/net/netfilter/nf_tables_ipv4.h @@ -14,6 +14,7 @@ nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt, nft_set_pktinfo(pkt, skb, state); ip = ip_hdr(pkt->skb); + pkt->tprot_set = true; pkt->tprot = ip->protocol; pkt->xt.thoff = ip_hdrlen(pkt->skb); pkt->xt.fragoff = ntohs(ip->frag_off) & IP_OFFSET; diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index 8ad39a6a5fe1..6aeee47b1b5e 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -19,6 +19,7 @@ nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, if (protohdr < 0) return -1; + pkt->tprot_set = true; pkt->tprot = protohdr; pkt->xt.thoff = thoff; pkt->xt.fragoff = frag_off; diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index a78c4e2826e5..29899887163e 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -71,7 +71,7 @@ static inline void nft_bridge_set_pktinfo_ipv4(struct nft_pktinfo *pkt, if (nft_bridge_iphdr_validate(skb)) nft_set_pktinfo_ipv4(pkt, skb, state); else - nft_set_pktinfo(pkt, skb, state); + nft_set_pktinfo_unspec(pkt, skb, state); } static inline void nft_bridge_set_pktinfo_ipv6(struct nft_pktinfo *pkt, @@ -83,7 +83,7 @@ static inline void nft_bridge_set_pktinfo_ipv6(struct nft_pktinfo *pkt, nft_set_pktinfo_ipv6(pkt, skb, state) == 0) return; #endif - nft_set_pktinfo(pkt, skb, state); + nft_set_pktinfo_unspec(pkt, skb, state); } static unsigned int @@ -101,7 +101,7 @@ nft_do_chain_bridge(void *priv, nft_bridge_set_pktinfo_ipv6(&pkt, skb, state); break; default: - nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_unspec(&pkt, skb, state); break; } diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c index cd84d4295a20..058c034be376 100644 --- a/net/ipv4/netfilter/nf_tables_arp.c +++ b/net/ipv4/netfilter/nf_tables_arp.c @@ -21,7 +21,7 @@ nft_do_chain_arp(void *priv, { struct nft_pktinfo pkt; - nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_unspec(&pkt, skb, state); return nft_do_chain(&pkt, priv); } diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c index 5eefe4a355c6..8de502b0c37b 100644 --- a/net/netfilter/nf_tables_netdev.c +++ b/net/netfilter/nf_tables_netdev.c @@ -41,6 +41,7 @@ nft_netdev_set_pktinfo_ipv4(struct nft_pktinfo *pkt, else if (len < thoff) return; + pkt->tprot_set = true; pkt->tprot = iph->protocol; pkt->xt.thoff = thoff; pkt->xt.fragoff = ntohs(iph->frag_off) & IP_OFFSET; @@ -74,6 +75,7 @@ __nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt, if (protohdr < 0) return; + pkt->tprot_set = true; pkt->tprot = protohdr; pkt->xt.thoff = thoff; pkt->xt.fragoff = frag_off; @@ -102,7 +104,7 @@ nft_do_chain_netdev(void *priv, struct sk_buff *skb, nft_netdev_set_pktinfo_ipv6(&pkt, skb, state); break; default: - nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_unspec(&pkt, skb, state); break; } diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 2863f3493038..14264edf2d77 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -52,6 +52,8 @@ void nft_meta_get_eval(const struct nft_expr *expr, *dest = pkt->pf; break; case NFT_META_L4PROTO: + if (!pkt->tprot_set) + goto err; *dest = pkt->tprot; break; case NFT_META_PRIORITY: From 8df9e32e7ed2978f90cce780ce6a27513044158a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 9 Sep 2016 12:42:50 +0200 Subject: [PATCH 0143/1050] netfilter: nf_tables_ipv6: setup pktinfo transport field on failure to parse Make sure the pktinfo protocol fields are initialized if this fails to parse the transport header. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_ipv6.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index 6aeee47b1b5e..1e0ffd5aea47 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -15,9 +15,10 @@ nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, nft_set_pktinfo(pkt, skb, state); protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL); - /* If malformed, drop it */ - if (protohdr < 0) + if (protohdr < 0) { + nft_set_pktinfo_proto_unspec(pkt, skb); return -1; + } pkt->tprot_set = true; pkt->tprot = protohdr; From ddc8b6027ad08d145a6d7a6a6abc00e43f315bd1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 9 Sep 2016 12:42:51 +0200 Subject: [PATCH 0144/1050] netfilter: introduce nft_set_pktinfo_{ipv4, ipv6}_validate() These functions are extracted from the netdev family, they initialize the pktinfo structure and validate that the IPv4 and IPv6 headers are well-formed given that these functions are called from a path where layer 3 sanitization did not happen yet. These functions are placed in include/net/netfilter/nf_tables_ipv{4,6}.h so they can be reused by a follow up patch to use them from the bridge family too. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_ipv4.h | 42 ++++++++++++++ include/net/netfilter/nf_tables_ipv6.h | 49 ++++++++++++++++ net/netfilter/nf_tables_netdev.c | 79 +------------------------- 3 files changed, 93 insertions(+), 77 deletions(-) diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h index af952f7843ee..968f00b82fb5 100644 --- a/include/net/netfilter/nf_tables_ipv4.h +++ b/include/net/netfilter/nf_tables_ipv4.h @@ -20,6 +20,48 @@ nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt, pkt->xt.fragoff = ntohs(ip->frag_off) & IP_OFFSET; } +static inline int +__nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct iphdr *iph, _iph; + u32 len, thoff; + + iph = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*iph), + &_iph); + if (!iph) + return -1; + + iph = ip_hdr(skb); + if (iph->ihl < 5 || iph->version != 4) + return -1; + + len = ntohs(iph->tot_len); + thoff = iph->ihl * 4; + if (skb->len < len) + return -1; + else if (len < thoff) + return -1; + + pkt->tprot_set = true; + pkt->tprot = iph->protocol; + pkt->xt.thoff = thoff; + pkt->xt.fragoff = ntohs(iph->frag_off) & IP_OFFSET; + + return 0; +} + +static inline void +nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + nft_set_pktinfo(pkt, skb, state); + if (__nft_set_pktinfo_ipv4_validate(pkt, skb, state) < 0) + nft_set_pktinfo_proto_unspec(pkt, skb); +} + extern struct nft_af_info nft_af_ipv4; #endif diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index 1e0ffd5aea47..39b7b717b540 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -28,6 +28,55 @@ nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, return 0; } +static inline int +__nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct ipv6hdr *ip6h, _ip6h; + unsigned int thoff = 0; + unsigned short frag_off; + int protohdr; + u32 pkt_len; + + ip6h = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*ip6h), + &_ip6h); + if (!ip6h) + return -1; + + if (ip6h->version != 6) + return -1; + + pkt_len = ntohs(ip6h->payload_len); + if (pkt_len + sizeof(*ip6h) > skb->len) + return -1; + + protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL); + if (protohdr < 0) + return -1; + + pkt->tprot_set = true; + pkt->tprot = protohdr; + pkt->xt.thoff = thoff; + pkt->xt.fragoff = frag_off; + + return 0; +#else + return -1; +#endif +} + +static inline void +nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + nft_set_pktinfo(pkt, skb, state); + if (__nft_set_pktinfo_ipv6_validate(pkt, skb, state) < 0) + nft_set_pktinfo_proto_unspec(pkt, skb); +} + extern struct nft_af_info nft_af_ipv6; #endif diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c index 8de502b0c37b..3e5475a833a5 100644 --- a/net/netfilter/nf_tables_netdev.c +++ b/net/netfilter/nf_tables_netdev.c @@ -15,81 +15,6 @@ #include #include -static inline void -nft_netdev_set_pktinfo_ipv4(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - struct iphdr *iph, _iph; - u32 len, thoff; - - nft_set_pktinfo(pkt, skb, state); - - iph = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*iph), - &_iph); - if (!iph) - return; - - iph = ip_hdr(skb); - if (iph->ihl < 5 || iph->version != 4) - return; - - len = ntohs(iph->tot_len); - thoff = iph->ihl * 4; - if (skb->len < len) - return; - else if (len < thoff) - return; - - pkt->tprot_set = true; - pkt->tprot = iph->protocol; - pkt->xt.thoff = thoff; - pkt->xt.fragoff = ntohs(iph->frag_off) & IP_OFFSET; -} - -static inline void -__nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ -#if IS_ENABLED(CONFIG_IPV6) - struct ipv6hdr *ip6h, _ip6h; - unsigned int thoff = 0; - unsigned short frag_off; - int protohdr; - u32 pkt_len; - - ip6h = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*ip6h), - &_ip6h); - if (!ip6h) - return; - - if (ip6h->version != 6) - return; - - pkt_len = ntohs(ip6h->payload_len); - if (pkt_len + sizeof(*ip6h) > skb->len) - return; - - protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL); - if (protohdr < 0) - return; - - pkt->tprot_set = true; - pkt->tprot = protohdr; - pkt->xt.thoff = thoff; - pkt->xt.fragoff = frag_off; -#endif -} - -static inline void nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - nft_set_pktinfo(pkt, skb, state); - __nft_netdev_set_pktinfo_ipv6(pkt, skb, state); -} - static unsigned int nft_do_chain_netdev(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) @@ -98,10 +23,10 @@ nft_do_chain_netdev(void *priv, struct sk_buff *skb, switch (skb->protocol) { case htons(ETH_P_IP): - nft_netdev_set_pktinfo_ipv4(&pkt, skb, state); + nft_set_pktinfo_ipv4_validate(&pkt, skb, state); break; case htons(ETH_P_IPV6): - nft_netdev_set_pktinfo_ipv6(&pkt, skb, state); + nft_set_pktinfo_ipv6_validate(&pkt, skb, state); break; default: nft_set_pktinfo_unspec(&pkt, skb, state); From 10151d7b03e23afce76a59f717f2616a10ddef86 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 9 Sep 2016 12:42:52 +0200 Subject: [PATCH 0145/1050] netfilter: nf_tables_bridge: use nft_set_pktinfo_ipv{4, 6}_validate Consolidate pktinfo setup and validation by using the new generic functions so we converge to the netdev family codebase. We only need a linear IPv4 and IPv6 header from the reject expression, so move nft_bridge_iphdr_validate() and nft_bridge_ip6hdr_validate() to net/bridge/netfilter/nft_reject_bridge.c. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_bridge.h | 7 --- net/bridge/netfilter/nf_tables_bridge.c | 72 +----------------------- net/bridge/netfilter/nft_reject_bridge.c | 44 ++++++++++++++- 3 files changed, 45 insertions(+), 78 deletions(-) delete mode 100644 include/net/netfilter/nf_tables_bridge.h diff --git a/include/net/netfilter/nf_tables_bridge.h b/include/net/netfilter/nf_tables_bridge.h deleted file mode 100644 index 511fb79f6dad..000000000000 --- a/include/net/netfilter/nf_tables_bridge.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef _NET_NF_TABLES_BRIDGE_H -#define _NET_NF_TABLES_BRIDGE_H - -int nft_bridge_iphdr_validate(struct sk_buff *skb); -int nft_bridge_ip6hdr_validate(struct sk_buff *skb); - -#endif /* _NET_NF_TABLES_BRIDGE_H */ diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index 29899887163e..06f0f81456a0 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -13,79 +13,11 @@ #include #include #include -#include #include #include #include #include -int nft_bridge_iphdr_validate(struct sk_buff *skb) -{ - struct iphdr *iph; - u32 len; - - if (!pskb_may_pull(skb, sizeof(struct iphdr))) - return 0; - - iph = ip_hdr(skb); - if (iph->ihl < 5 || iph->version != 4) - return 0; - - len = ntohs(iph->tot_len); - if (skb->len < len) - return 0; - else if (len < (iph->ihl*4)) - return 0; - - if (!pskb_may_pull(skb, iph->ihl*4)) - return 0; - - return 1; -} -EXPORT_SYMBOL_GPL(nft_bridge_iphdr_validate); - -int nft_bridge_ip6hdr_validate(struct sk_buff *skb) -{ - struct ipv6hdr *hdr; - u32 pkt_len; - - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) - return 0; - - hdr = ipv6_hdr(skb); - if (hdr->version != 6) - return 0; - - pkt_len = ntohs(hdr->payload_len); - if (pkt_len + sizeof(struct ipv6hdr) > skb->len) - return 0; - - return 1; -} -EXPORT_SYMBOL_GPL(nft_bridge_ip6hdr_validate); - -static inline void nft_bridge_set_pktinfo_ipv4(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - if (nft_bridge_iphdr_validate(skb)) - nft_set_pktinfo_ipv4(pkt, skb, state); - else - nft_set_pktinfo_unspec(pkt, skb, state); -} - -static inline void nft_bridge_set_pktinfo_ipv6(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ -#if IS_ENABLED(CONFIG_IPV6) - if (nft_bridge_ip6hdr_validate(skb) && - nft_set_pktinfo_ipv6(pkt, skb, state) == 0) - return; -#endif - nft_set_pktinfo_unspec(pkt, skb, state); -} - static unsigned int nft_do_chain_bridge(void *priv, struct sk_buff *skb, @@ -95,10 +27,10 @@ nft_do_chain_bridge(void *priv, switch (eth_hdr(skb)->h_proto) { case htons(ETH_P_IP): - nft_bridge_set_pktinfo_ipv4(&pkt, skb, state); + nft_set_pktinfo_ipv4_validate(&pkt, skb, state); break; case htons(ETH_P_IPV6): - nft_bridge_set_pktinfo_ipv6(&pkt, skb, state); + nft_set_pktinfo_ipv6_validate(&pkt, skb, state); break; default: nft_set_pktinfo_unspec(&pkt, skb, state); diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index 0b77ffbc27d6..4b3df6b0e3b9 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -37,6 +36,30 @@ static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb, skb_pull(nskb, ETH_HLEN); } +static int nft_bridge_iphdr_validate(struct sk_buff *skb) +{ + struct iphdr *iph; + u32 len; + + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + return 0; + + iph = ip_hdr(skb); + if (iph->ihl < 5 || iph->version != 4) + return 0; + + len = ntohs(iph->tot_len); + if (skb->len < len) + return 0; + else if (len < (iph->ihl*4)) + return 0; + + if (!pskb_may_pull(skb, iph->ihl*4)) + return 0; + + return 1; +} + /* We cannot use oldskb->dev, it can be either bridge device (NF_BRIDGE INPUT) * or the bridge port (NF_BRIDGE PREROUTING). */ @@ -143,6 +166,25 @@ static void nft_reject_br_send_v4_unreach(struct net *net, br_forward(br_port_get_rcu(dev), nskb, false, true); } +static int nft_bridge_ip6hdr_validate(struct sk_buff *skb) +{ + struct ipv6hdr *hdr; + u32 pkt_len; + + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + return 0; + + hdr = ipv6_hdr(skb); + if (hdr->version != 6) + return 0; + + pkt_len = ntohs(hdr->payload_len); + if (pkt_len + sizeof(struct ipv6hdr) > skb->len) + return 0; + + return 1; +} + static void nft_reject_br_send_v6_tcp_reset(struct net *net, struct sk_buff *oldskb, const struct net_device *dev, From 71212c9b04eba76faa4dca26ccd1552d6bb300c1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 9 Sep 2016 12:42:53 +0200 Subject: [PATCH 0146/1050] netfilter: nf_tables: don't drop IPv6 packets that cannot parse transport This is overly conservative and not flexible at all, so better let them go through and let the filtering policy decide what to do with them. We use skb_header_pointer() all over the place so we would just fail to match when trying to access fields from malformed traffic. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_ipv6.h | 6 ++---- net/ipv6/netfilter/nf_tables_ipv6.c | 4 +--- net/ipv6/netfilter/nft_chain_route_ipv6.c | 4 +--- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index 39b7b717b540..d150b5066201 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -4,7 +4,7 @@ #include #include -static inline int +static inline void nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, struct sk_buff *skb, const struct nf_hook_state *state) @@ -17,15 +17,13 @@ nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL); if (protohdr < 0) { nft_set_pktinfo_proto_unspec(pkt, skb); - return -1; + return; } pkt->tprot_set = true; pkt->tprot = protohdr; pkt->xt.thoff = thoff; pkt->xt.fragoff = frag_off; - - return 0; } static inline int diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c index 30b22f4dff55..05d05926962a 100644 --- a/net/ipv6/netfilter/nf_tables_ipv6.c +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -22,9 +22,7 @@ static unsigned int nft_do_chain_ipv6(void *priv, { struct nft_pktinfo pkt; - /* malformed packet, drop it */ - if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0) - return NF_DROP; + nft_set_pktinfo_ipv6(&pkt, skb, state); return nft_do_chain(&pkt, priv); } diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c index 71d995ff3108..01eb0f658366 100644 --- a/net/ipv6/netfilter/nft_chain_route_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c @@ -32,9 +32,7 @@ static unsigned int nf_route_table_hook(void *priv, u_int8_t hop_limit; u32 mark, flowlabel; - /* malformed packet, drop it */ - if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0) - return NF_DROP; + nft_set_pktinfo_ipv6(&pkt, skb, state); /* save source/dest address, mark, hoplimit, flowlabel, priority */ memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr)); From cf71c03edf10076f05a0b678fc9c8f8e6c6e24e4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Fri, 9 Sep 2016 14:01:26 +0200 Subject: [PATCH 0147/1050] netfilter: nf_conntrack: simplify __nf_ct_try_assign_helper() return logic Instead of several goto's just to return the result, simply return it. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_helper.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index b989b81ac156..4ffe388a9a1e 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -189,7 +189,6 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, struct nf_conntrack_helper *helper = NULL; struct nf_conn_help *help; struct net *net = nf_ct_net(ct); - int ret = 0; /* We already got a helper explicitly attached. The function * nf_conntrack_alter_reply - in case NAT is in use - asks for looking @@ -223,15 +222,13 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, if (helper == NULL) { if (help) RCU_INIT_POINTER(help->helper, NULL); - goto out; + return 0; } if (help == NULL) { help = nf_ct_helper_ext_add(ct, helper, flags); - if (help == NULL) { - ret = -ENOMEM; - goto out; - } + if (help == NULL) + return -ENOMEM; } else { /* We only allow helper re-assignment of the same sort since * we cannot reallocate the helper extension area. @@ -240,13 +237,13 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, if (tmp && tmp->help != helper->help) { RCU_INIT_POINTER(help->helper, NULL); - goto out; + return 0; } } rcu_assign_pointer(help->helper, helper); -out: - return ret; + + return 0; } EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper); From 4e6577de71803142d01e374cf15664af0388799a Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Fri, 9 Sep 2016 23:25:09 +0800 Subject: [PATCH 0148/1050] netfilter: Add the missed return value check of register_netdevice_notifier There are some codes of netfilter module which did not check the return value of register_netdevice_notifier. Add the checks now. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_netdev.c | 18 +++++++++++++----- net/netfilter/nfnetlink_queue.c | 9 ++++++++- net/netfilter/xt_TEE.c | 8 +++++++- 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c index 3e5475a833a5..38a3e8385042 100644 --- a/net/netfilter/nf_tables_netdev.c +++ b/net/netfilter/nf_tables_netdev.c @@ -151,12 +151,20 @@ static int __init nf_tables_netdev_init(void) nft_register_chain_type(&nft_filter_chain_netdev); ret = register_pernet_subsys(&nf_tables_netdev_net_ops); - if (ret < 0) { - nft_unregister_chain_type(&nft_filter_chain_netdev); - return ret; - } - register_netdevice_notifier(&nf_tables_netdev_notifier); + if (ret) + goto err1; + + ret = register_netdevice_notifier(&nf_tables_netdev_notifier); + if (ret) + goto err2; + return 0; + +err2: + unregister_pernet_subsys(&nf_tables_netdev_net_ops); +err1: + nft_unregister_chain_type(&nft_filter_chain_netdev); + return ret; } static void __exit nf_tables_netdev_exit(void) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index f49f45081acb..808da34f94cd 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1522,9 +1522,16 @@ static int __init nfnetlink_queue_init(void) goto cleanup_netlink_notifier; } - register_netdevice_notifier(&nfqnl_dev_notifier); + status = register_netdevice_notifier(&nfqnl_dev_notifier); + if (status < 0) { + pr_err("nf_queue: failed to register netdevice notifier\n"); + goto cleanup_netlink_subsys; + } + return status; +cleanup_netlink_subsys: + nfnetlink_subsys_unregister(&nfqnl_subsys); cleanup_netlink_notifier: netlink_unregister_notifier(&nfqnl_rtnl_notifier); unregister_pernet_subsys(&nfnl_queue_net_ops); diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index 6e57a3966dc5..0471db4032c5 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -89,6 +89,8 @@ static int tee_tg_check(const struct xt_tgchk_param *par) return -EINVAL; if (info->oif[0]) { + int ret; + if (info->oif[sizeof(info->oif)-1] != '\0') return -EINVAL; @@ -101,7 +103,11 @@ static int tee_tg_check(const struct xt_tgchk_param *par) priv->notifier.notifier_call = tee_netdev_event; info->priv = priv; - register_netdevice_notifier(&priv->notifier); + ret = register_netdevice_notifier(&priv->notifier); + if (ret) { + kfree(priv); + return ret; + } } else info->priv = NULL; From 23d07508d25cea9984ee068538b4e86932b015c2 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Sat, 10 Sep 2016 10:04:30 +0800 Subject: [PATCH 0149/1050] netfilter: Add the missed return value check of nft_register_chain_type There are some codes of netfilter module which did not check the return value of nft_register_chain_type. Add the checks now. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/nf_tables_bridge.c | 18 +++++++++++++----- net/ipv4/netfilter/nf_tables_arp.c | 5 ++++- net/ipv4/netfilter/nf_tables_ipv4.c | 5 ++++- net/ipv6/netfilter/nf_tables_ipv6.c | 5 ++++- net/netfilter/nf_tables_inet.c | 5 ++++- net/netfilter/nf_tables_netdev.c | 5 ++++- 6 files changed, 33 insertions(+), 10 deletions(-) diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index 06f0f81456a0..97afdc0744e6 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -139,12 +139,20 @@ static int __init nf_tables_bridge_init(void) int ret; nf_register_afinfo(&nf_br_afinfo); - nft_register_chain_type(&filter_bridge); + ret = nft_register_chain_type(&filter_bridge); + if (ret < 0) + goto err1; + ret = register_pernet_subsys(&nf_tables_bridge_net_ops); - if (ret < 0) { - nft_unregister_chain_type(&filter_bridge); - nf_unregister_afinfo(&nf_br_afinfo); - } + if (ret < 0) + goto err2; + + return ret; + +err2: + nft_unregister_chain_type(&filter_bridge); +err1: + nf_unregister_afinfo(&nf_br_afinfo); return ret; } diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c index 058c034be376..805c8ddfe860 100644 --- a/net/ipv4/netfilter/nf_tables_arp.c +++ b/net/ipv4/netfilter/nf_tables_arp.c @@ -80,7 +80,10 @@ static int __init nf_tables_arp_init(void) { int ret; - nft_register_chain_type(&filter_arp); + ret = nft_register_chain_type(&filter_arp); + if (ret < 0) + return ret; + ret = register_pernet_subsys(&nf_tables_arp_net_ops); if (ret < 0) nft_unregister_chain_type(&filter_arp); diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c index e44ba3b12fbb..2840a29b2e04 100644 --- a/net/ipv4/netfilter/nf_tables_ipv4.c +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -103,7 +103,10 @@ static int __init nf_tables_ipv4_init(void) { int ret; - nft_register_chain_type(&filter_ipv4); + ret = nft_register_chain_type(&filter_ipv4); + if (ret < 0) + return ret; + ret = register_pernet_subsys(&nf_tables_ipv4_net_ops); if (ret < 0) nft_unregister_chain_type(&filter_ipv4); diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c index 05d05926962a..d6e4ba5de916 100644 --- a/net/ipv6/netfilter/nf_tables_ipv6.c +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -100,7 +100,10 @@ static int __init nf_tables_ipv6_init(void) { int ret; - nft_register_chain_type(&filter_ipv6); + ret = nft_register_chain_type(&filter_ipv6); + if (ret < 0) + return ret; + ret = register_pernet_subsys(&nf_tables_ipv6_net_ops); if (ret < 0) nft_unregister_chain_type(&filter_ipv6); diff --git a/net/netfilter/nf_tables_inet.c b/net/netfilter/nf_tables_inet.c index 6b5f76295d3d..f713cc205669 100644 --- a/net/netfilter/nf_tables_inet.c +++ b/net/netfilter/nf_tables_inet.c @@ -82,7 +82,10 @@ static int __init nf_tables_inet_init(void) { int ret; - nft_register_chain_type(&filter_inet); + ret = nft_register_chain_type(&filter_inet); + if (ret < 0) + return ret; + ret = register_pernet_subsys(&nf_tables_inet_net_ops); if (ret < 0) nft_unregister_chain_type(&filter_inet); diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c index 38a3e8385042..9e2ae424b640 100644 --- a/net/netfilter/nf_tables_netdev.c +++ b/net/netfilter/nf_tables_netdev.c @@ -149,7 +149,10 @@ static int __init nf_tables_netdev_init(void) { int ret; - nft_register_chain_type(&nft_filter_chain_netdev); + ret = nft_register_chain_type(&nft_filter_chain_netdev); + if (ret) + return ret; + ret = register_pernet_subsys(&nf_tables_netdev_net_ops); if (ret) goto err1; From 6bd14303a908833e10ac731a3308eba938305269 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sun, 11 Sep 2016 22:05:27 +0800 Subject: [PATCH 0150/1050] netfilter: nf_queue: get rid of dependency on IP6_NF_IPTABLES hash_v6 is used by both nftables and ip6tables, so depend on IP6_NF_IPTABLES is not properly. Actually, it only parses ipv6hdr and computes a hash value, so even if IPV6 is disabled, there's no side effect too, remove it. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_queue.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 0dbce55437f2..cc8a11f7e306 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -54,7 +54,6 @@ static inline u32 hash_v4(const struct sk_buff *skb, u32 jhash_initval) (__force u32)iph->saddr, iph->protocol, jhash_initval); } -#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) static inline u32 hash_v6(const struct sk_buff *skb, u32 jhash_initval) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); @@ -77,7 +76,6 @@ static inline u32 hash_v6(const struct sk_buff *skb, u32 jhash_initval) return jhash_3words(a, b, c, jhash_initval); } -#endif static inline u32 nfqueue_hash(const struct sk_buff *skb, u16 queue, u16 queues_total, u8 family, @@ -85,10 +83,8 @@ nfqueue_hash(const struct sk_buff *skb, u16 queue, u16 queues_total, u8 family, { if (family == NFPROTO_IPV4) queue += ((u64) hash_v4(skb, jhash_initval) * queues_total) >> 32; -#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) else if (family == NFPROTO_IPV6) queue += ((u64) hash_v6(skb, jhash_initval) * queues_total) >> 32; -#endif return queue; } From 8e8118f893138d4cc3d4dbf4163d7497fca54a9d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 11 Sep 2016 22:55:53 +0200 Subject: [PATCH 0151/1050] netfilter: conntrack: remove packet hotpath stats These counters sit in hot path and do show up in perf, this is especially true for 'found' and 'searched' which get incremented for every packet processed. Information like searched=212030105 new=623431 found=333613 delete=623327 does not seem too helpful nowadays: - on busy systems found and searched will overflow every few hours (these are 32bit integers), other more busy ones every few days. - for debugging there are better methods, such as iptables' trace target, the conntrack log sysctls. Nowadays we also have perf tool. This removes packet path stat counters except those that are expected to be 0 (or close to 0) on a normal system, e.g. 'insert_failed' (race happened) or 'invalid' (proto tracker rejects). The insert stat is retained for the ctnetlink case. The found stat is retained for the tuple-is-taken check when NAT has to determine if it needs to pick a different source address. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_common.h | 4 ---- include/uapi/linux/netfilter/nfnetlink_conntrack.h | 8 ++++---- net/netfilter/nf_conntrack_core.c | 14 ++------------ net/netfilter/nf_conntrack_netlink.c | 6 +----- net/netfilter/nf_conntrack_standalone.c | 8 ++++---- 5 files changed, 11 insertions(+), 29 deletions(-) diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index 275505792664..1d1ef4e20512 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -4,13 +4,9 @@ #include struct ip_conntrack_stat { - unsigned int searched; unsigned int found; - unsigned int new; unsigned int invalid; unsigned int ignore; - unsigned int delete; - unsigned int delete_list; unsigned int insert; unsigned int insert_failed; unsigned int drop; diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h index 9df789709abe..6deb8867c5fc 100644 --- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h +++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h @@ -231,13 +231,13 @@ enum ctattr_secctx { enum ctattr_stats_cpu { CTA_STATS_UNSPEC, - CTA_STATS_SEARCHED, + CTA_STATS_SEARCHED, /* no longer used */ CTA_STATS_FOUND, - CTA_STATS_NEW, + CTA_STATS_NEW, /* no longer used */ CTA_STATS_INVALID, CTA_STATS_IGNORE, - CTA_STATS_DELETE, - CTA_STATS_DELETE_LIST, + CTA_STATS_DELETE, /* no longer used */ + CTA_STATS_DELETE_LIST, /* no longer used */ CTA_STATS_INSERT, CTA_STATS_INSERT_FAILED, CTA_STATS_DROP, diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index ac1db4019d5c..8d1ddb9b63ed 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -379,7 +379,6 @@ static void destroy_conntrack(struct nf_conntrack *nfct) { struct nf_conn *ct = (struct nf_conn *)nfct; - struct net *net = nf_ct_net(ct); struct nf_conntrack_l4proto *l4proto; pr_debug("destroy_conntrack(%p)\n", ct); @@ -406,7 +405,6 @@ destroy_conntrack(struct nf_conntrack *nfct) nf_ct_del_from_dying_or_unconfirmed_list(ct); - NF_CT_STAT_INC(net, delete); local_bh_enable(); if (ct->master) @@ -438,7 +436,6 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct) nf_ct_add_to_dying_list(ct); - NF_CT_STAT_INC(net, delete_list); local_bh_enable(); } @@ -529,11 +526,8 @@ begin: if (nf_ct_is_dying(ct)) continue; - if (nf_ct_key_equal(h, tuple, zone, net)) { - NF_CT_STAT_INC_ATOMIC(net, found); + if (nf_ct_key_equal(h, tuple, zone, net)) return h; - } - NF_CT_STAT_INC_ATOMIC(net, searched); } /* * if the nulls value we got at the end of this lookup is @@ -798,7 +792,6 @@ __nf_conntrack_confirm(struct sk_buff *skb) */ __nf_conntrack_hash_insert(ct, hash, reply_hash); nf_conntrack_double_unlock(hash, reply_hash); - NF_CT_STAT_INC(net, insert); local_bh_enable(); help = nfct_help(ct); @@ -857,7 +850,6 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, rcu_read_unlock(); return 1; } - NF_CT_STAT_INC_ATOMIC(net, searched); } if (get_nulls_value(n) != hash) { @@ -1177,10 +1169,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, } spin_unlock(&nf_conntrack_expect_lock); } - if (!exp) { + if (!exp) __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC); - NF_CT_STAT_INC(net, new); - } /* Now it is inserted into the unconfirmed list, bump refcount */ nf_conntrack_get(&ct->ct_general); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index c052b712c49f..27540455dc62 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1984,13 +1984,9 @@ ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq, nfmsg->version = NFNETLINK_V0; nfmsg->res_id = htons(cpu); - if (nla_put_be32(skb, CTA_STATS_SEARCHED, htonl(st->searched)) || - nla_put_be32(skb, CTA_STATS_FOUND, htonl(st->found)) || - nla_put_be32(skb, CTA_STATS_NEW, htonl(st->new)) || + if (nla_put_be32(skb, CTA_STATS_FOUND, htonl(st->found)) || nla_put_be32(skb, CTA_STATS_INVALID, htonl(st->invalid)) || nla_put_be32(skb, CTA_STATS_IGNORE, htonl(st->ignore)) || - nla_put_be32(skb, CTA_STATS_DELETE, htonl(st->delete)) || - nla_put_be32(skb, CTA_STATS_DELETE_LIST, htonl(st->delete_list)) || nla_put_be32(skb, CTA_STATS_INSERT, htonl(st->insert)) || nla_put_be32(skb, CTA_STATS_INSERT_FAILED, htonl(st->insert_failed)) || diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 3d9a316a3c77..7d52f8401afd 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -352,13 +352,13 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x " "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", nr_conntracks, - st->searched, + 0, st->found, - st->new, + 0, st->invalid, st->ignore, - st->delete, - st->delete_list, + 0, + 0, st->insert, st->insert_failed, st->drop, From 2e917d602acd9e3e8c6e4c43b213c8929d986503 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Mon, 12 Sep 2016 22:21:36 +0800 Subject: [PATCH 0152/1050] netfilter: nft_numgen: fix race between num generate and store it After we generate a new number, we still use the priv->counter and store it to the dreg. This is not correct, another cpu may already change it to a new number. So we must use the generated number, not the priv->counter itself. Fixes: 91dbc6be0a62 ("netfilter: nf_tables: add number generator expression") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_numgen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c index f51a3ede3932..f173ebec30a7 100644 --- a/net/netfilter/nft_numgen.c +++ b/net/netfilter/nft_numgen.c @@ -37,7 +37,7 @@ static void nft_ng_inc_eval(const struct nft_expr *expr, nval = (oval + 1 < priv->modulus) ? oval + 1 : 0; } while (atomic_cmpxchg(&priv->counter, oval, nval) != oval); - memcpy(®s->data[priv->dreg], &priv->counter, sizeof(u32)); + regs->data[priv->dreg] = nval; } static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = { From ecfcdfec7e0cc64215a194044305f02a5a836e6d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 9 Sep 2016 15:38:12 +0200 Subject: [PATCH 0153/1050] netfilter: nf_nat: handle NF_DROP from nfnetlink_parse_nat_setup() nf_nat_setup_info() returns NF_* verdicts, so convert them to error codes that is what ctnelink expects. This has passed overlook without having any impact since this nf_nat_setup_info() has always returned NF_ACCEPT so far. Since 870190a9ec90 ("netfilter: nat: convert nat bysrc hash to rhashtable"), this is problem. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_nat_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index de31818417b8..19c081e1b328 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -807,7 +807,7 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct, if (err < 0) return err; - return nf_nat_setup_info(ct, &range, manip); + return nf_nat_setup_info(ct, &range, manip) == NF_DROP ? -ENOMEM : 0; } #else static int From e89ca58f9c901c8c4cfb09f96d879b186bb01492 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 2 Sep 2016 09:01:54 -0700 Subject: [PATCH 0154/1050] nvme-rdma: add DELETING queue flag When we get a surprise disconnect from the target we queue a periodic reconnect (which is the sane thing to do...). We only move the queues out of CONNECTED when we retry to reconnect (after 10 seconds in the default case) but we stop the blk queues immediately so we are not bothered with traffic from now on. If delete() is kicking off in this period the queues are still in CONNECTED state. Part of the delete sequence is trying to issue ctrl shutdown if the admin queue is CONNECTED (which it is!). This request is issued but stuck in blk-mq waiting for the queues to start again. This might be the one preventing us from forward progress... The patch separates the queue flags to CONNECTED and DELETING. Now we will move out of CONNECTED as soon as error recovery kicks in (before stopping the queues) and DELETING is on when we start the queue deletion. Signed-off-by: Sagi Grimberg --- drivers/nvme/host/rdma.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index a9d43f09963f..eeb08b658640 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -83,6 +83,7 @@ struct nvme_rdma_request { enum nvme_rdma_queue_flags { NVME_RDMA_Q_CONNECTED = (1 << 0), NVME_RDMA_IB_QUEUE_ALLOCATED = (1 << 1), + NVME_RDMA_Q_DELETING = (1 << 2), }; struct nvme_rdma_queue { @@ -559,6 +560,7 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl, queue = &ctrl->queues[idx]; queue->ctrl = ctrl; + queue->flags = 0; init_completion(&queue->cm_done); if (idx > 0) @@ -616,7 +618,7 @@ static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue) static void nvme_rdma_stop_and_free_queue(struct nvme_rdma_queue *queue) { - if (!test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags)) + if (test_and_set_bit(NVME_RDMA_Q_DELETING, &queue->flags)) return; nvme_rdma_stop_queue(queue); nvme_rdma_free_queue(queue); @@ -769,8 +771,13 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) { struct nvme_rdma_ctrl *ctrl = container_of(work, struct nvme_rdma_ctrl, err_work); + int i; nvme_stop_keep_alive(&ctrl->ctrl); + + for (i = 0; i < ctrl->queue_count; i++) + clear_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[i].flags); + if (ctrl->queue_count > 1) nvme_stop_queues(&ctrl->ctrl); blk_mq_stop_hw_queues(ctrl->ctrl.admin_q); @@ -1350,7 +1357,7 @@ static int nvme_rdma_device_unplug(struct nvme_rdma_queue *queue) cancel_delayed_work_sync(&ctrl->reconnect_work); /* Disable the queue so ctrl delete won't free it */ - if (test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags)) { + if (!test_and_set_bit(NVME_RDMA_Q_DELETING, &queue->flags)) { /* Free this queue ourselves */ nvme_rdma_stop_queue(queue); nvme_rdma_destroy_queue_ib(queue); From e87a911fed07e368c6f97e75152e6297a7dfba48 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 2 Sep 2016 09:01:54 -0700 Subject: [PATCH 0155/1050] nvme-rdma: use ib_client API to detect device removal Change nvme-rdma to use the IB Client API to detect device removal. This has the wonderful benefit of being able to blow away all the ib/rdma_cm resources for the device being removed. No craziness about not destroying the cm_id handling the event. No deadlocks due to broken iw_cm/rdma_cm/iwarp dependencies. And no need to have a bound cm_id around during controller recovery/reconnect to catch device removal events. We don't use the device_add aspect of the ib_client service since we only want to create resources for an IB device if we have a target utilizing that device. Reviewed-by: Christoph Hellwig Signed-off-by: Steve Wise Signed-off-by: Sagi Grimberg --- drivers/nvme/host/rdma.c | 108 +++++++++++++++------------------------ 1 file changed, 40 insertions(+), 68 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index eeb08b658640..d6bdf55a969e 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1320,64 +1320,6 @@ out_destroy_queue_ib: return ret; } -/** - * nvme_rdma_device_unplug() - Handle RDMA device unplug - * @queue: Queue that owns the cm_id that caught the event - * - * DEVICE_REMOVAL event notifies us that the RDMA device is about - * to unplug so we should take care of destroying our RDMA resources. - * This event will be generated for each allocated cm_id. - * - * In our case, the RDMA resources are managed per controller and not - * only per queue. So the way we handle this is we trigger an implicit - * controller deletion upon the first DEVICE_REMOVAL event we see, and - * hold the event inflight until the controller deletion is completed. - * - * One exception that we need to handle is the destruction of the cm_id - * that caught the event. Since we hold the callout until the controller - * deletion is completed, we'll deadlock if the controller deletion will - * call rdma_destroy_id on this queue's cm_id. Thus, we claim ownership - * of destroying this queue before-hand, destroy the queue resources, - * then queue the controller deletion which won't destroy this queue and - * we destroy the cm_id implicitely by returning a non-zero rc to the callout. - */ -static int nvme_rdma_device_unplug(struct nvme_rdma_queue *queue) -{ - struct nvme_rdma_ctrl *ctrl = queue->ctrl; - int ret = 0; - - /* Own the controller deletion */ - if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) - return 0; - - dev_warn(ctrl->ctrl.device, - "Got rdma device removal event, deleting ctrl\n"); - - /* Get rid of reconnect work if its running */ - cancel_delayed_work_sync(&ctrl->reconnect_work); - - /* Disable the queue so ctrl delete won't free it */ - if (!test_and_set_bit(NVME_RDMA_Q_DELETING, &queue->flags)) { - /* Free this queue ourselves */ - nvme_rdma_stop_queue(queue); - nvme_rdma_destroy_queue_ib(queue); - - /* Return non-zero so the cm_id will destroy implicitly */ - ret = 1; - } - - /* - * Queue controller deletion. Keep a reference until all - * work is flushed since delete_work will free the ctrl mem - */ - kref_get(&ctrl->ctrl.kref); - queue_work(nvme_rdma_wq, &ctrl->delete_work); - flush_work(&ctrl->delete_work); - nvme_put_ctrl(&ctrl->ctrl); - - return ret; -} - static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *ev) { @@ -1419,8 +1361,8 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, nvme_rdma_error_recovery(queue->ctrl); break; case RDMA_CM_EVENT_DEVICE_REMOVAL: - /* return 1 means impliciy CM ID destroy */ - return nvme_rdma_device_unplug(queue); + /* device removal is handled via the ib_client API */ + break; default: dev_err(queue->ctrl->ctrl.device, "Unexpected RDMA CM event (%d)\n", ev->event); @@ -2030,27 +1972,57 @@ static struct nvmf_transport_ops nvme_rdma_transport = { .create_ctrl = nvme_rdma_create_ctrl, }; +static void nvme_rdma_add_one(struct ib_device *ib_device) +{ +} + +static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data) +{ + struct nvme_rdma_ctrl *ctrl; + + /* Delete all controllers using this device */ + mutex_lock(&nvme_rdma_ctrl_mutex); + list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) { + if (ctrl->device->dev != ib_device) + continue; + dev_info(ctrl->ctrl.device, + "Removing ctrl: NQN \"%s\", addr %pISp\n", + ctrl->ctrl.opts->subsysnqn, &ctrl->addr); + __nvme_rdma_del_ctrl(ctrl); + } + mutex_unlock(&nvme_rdma_ctrl_mutex); + + flush_workqueue(nvme_rdma_wq); +} + +static struct ib_client nvme_rdma_ib_client = { + .name = "nvme_rdma", + .add = nvme_rdma_add_one, + .remove = nvme_rdma_remove_one +}; + static int __init nvme_rdma_init_module(void) { + int ret; + nvme_rdma_wq = create_workqueue("nvme_rdma_wq"); if (!nvme_rdma_wq) return -ENOMEM; + ret = ib_register_client(&nvme_rdma_ib_client); + if (ret) { + destroy_workqueue(nvme_rdma_wq); + return ret; + } + nvmf_register_transport(&nvme_rdma_transport); return 0; } static void __exit nvme_rdma_cleanup_module(void) { - struct nvme_rdma_ctrl *ctrl; - nvmf_unregister_transport(&nvme_rdma_transport); - - mutex_lock(&nvme_rdma_ctrl_mutex); - list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) - __nvme_rdma_del_ctrl(ctrl); - mutex_unlock(&nvme_rdma_ctrl_mutex); - + ib_unregister_client(&nvme_rdma_ib_client); destroy_workqueue(nvme_rdma_wq); } From 1bda18de8f15a611a61d1a935b679db2e153338a Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 5 Sep 2016 16:24:38 +0100 Subject: [PATCH 0156/1050] nvme-rdma: fix null pointer dereference on req->mr If there is an error on req->mr, req->mr is set to null, however the following statement sets req->mr->need_inval causing a null pointer dereference. Fix this by bailing out to label 'out' to immediately return and hence skip over the offending null pointer dereference. Fixes: f5b7b559e1488 ("nvme-rdma: Get rid of duplicate variable") Signed-off-by: Colin Ian King Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- drivers/nvme/host/rdma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index d6bdf55a969e..c2c2c28e6eb5 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -293,6 +293,7 @@ static int nvme_rdma_reinit_request(void *data, struct request *rq) if (IS_ERR(req->mr)) { ret = PTR_ERR(req->mr); req->mr = NULL; + goto out; } req->mr->need_inval = false; From 2cfe199ca5a8816ee80fe15bcf202dd1020aaea0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 6 Sep 2016 14:58:06 +0200 Subject: [PATCH 0157/1050] nvme-rdma: add back dependency on CONFIG_BLOCK A recent change removed the dependency on BLK_DEV_NVME, which implies the dependency on PCI and BLOCK. We don't need CONFIG_PCI, but without CONFIG_BLOCK we get tons of build errors, e.g. In file included from drivers/nvme/host/core.c:16:0: linux/blk-mq.h:182:33: error: 'struct gendisk' declared inside parameter list will not be visible outside of this definition or declaration [-Werror] drivers/nvme/host/core.c: In function 'nvme_setup_rw': drivers/nvme/host/core.c:295:21: error: implicit declaration of function 'rq_data_dir' [-Werror=implicit-function-declaration] drivers/nvme/host/nvme.h: In function 'nvme_map_len': drivers/nvme/host/nvme.h:217:6: error: implicit declaration of function 'req_op' [-Werror=implicit-function-declaration] drivers/nvme/host/scsi.c: In function 'nvme_trans_bdev_limits_page': drivers/nvme/host/scsi.c:768:85: error: implicit declaration of function 'queue_max_hw_sectors' [-Werror=implicit-function-declaration] This adds back the specific CONFIG_BLOCK dependency to avoid broken configurations. Signed-off-by: Arnd Bergmann Fixes: aa71987472a9 ("nvme: fabrics drivers don't need the nvme-pci driver") Signed-off-by: Sagi Grimberg --- drivers/nvme/host/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index 0c644f7bdf80..4b6cfff4b5be 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -31,6 +31,7 @@ config NVME_FABRICS config NVME_RDMA tristate "NVM Express over Fabrics RDMA host driver" depends on INFINIBAND + depends on BLOCK select NVME_CORE select NVME_FABRICS select SG_POOL From bf2c4b6f9b74c2ee1dd3c050b181e9b9c86fbcdb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 1 Sep 2016 10:50:38 -0400 Subject: [PATCH 0158/1050] svcauth_gss: Revert 64c59a3726f2 ("Remove unnecessary allocation") rsc_lookup steals the passed-in memory to avoid doing an allocation of its own, so we can't just pass in a pointer to memory that someone else is using. If we really want to avoid allocation there then maybe we should preallocate somwhere, or reference count these handles. For now we should revert. On occasion I see this on my server: kernel: kernel BUG at /home/cel/src/linux/linux-2.6/mm/slub.c:3851! kernel: invalid opcode: 0000 [#1] SMP kernel: Modules linked in: cts rpcsec_gss_krb5 sb_edac edac_core x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd btrfs xor iTCO_wdt iTCO_vendor_support raid6_pq pcspkr i2c_i801 i2c_smbus lpc_ich mfd_core mei_me sg mei shpchp wmi ioatdma ipmi_si ipmi_msghandler acpi_pad acpi_power_meter rpcrdma ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm nfsd nfs_acl lockd grace auth_rpcgss sunrpc ip_tables xfs libcrc32c mlx4_ib mlx4_en ib_core sr_mod cdrom sd_mod ast drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm crc32c_intel igb mlx4_core ahci libahci libata ptp pps_core dca i2c_algo_bit i2c_core dm_mirror dm_region_hash dm_log dm_mod kernel: CPU: 7 PID: 145 Comm: kworker/7:2 Not tainted 4.8.0-rc4-00006-g9d06b0b #15 kernel: Hardware name: Supermicro Super Server/X10SRL-F, BIOS 1.0c 09/09/2015 kernel: Workqueue: events do_cache_clean [sunrpc] kernel: task: ffff8808541d8000 task.stack: ffff880854344000 kernel: RIP: 0010:[] [] kfree+0x155/0x180 kernel: RSP: 0018:ffff880854347d70 EFLAGS: 00010246 kernel: RAX: ffffea0020fe7660 RBX: ffff88083f9db064 RCX: 146ff0f9d5ec5600 kernel: RDX: 000077ff80000000 RSI: ffff880853f01500 RDI: ffff88083f9db064 kernel: RBP: ffff880854347d88 R08: ffff8808594ee000 R09: ffff88087fdd8780 kernel: R10: 0000000000000000 R11: ffffea0020fe76c0 R12: ffff880853f01500 kernel: R13: ffffffffa013cf76 R14: ffffffffa013cff0 R15: ffffffffa04253a0 kernel: FS: 0000000000000000(0000) GS:ffff88087fdc0000(0000) knlGS:0000000000000000 kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 kernel: CR2: 00007fed60b020c3 CR3: 0000000001c06000 CR4: 00000000001406e0 kernel: Stack: kernel: ffff8808589f2f00 ffff880853f01500 0000000000000001 ffff880854347da0 kernel: ffffffffa013cf76 ffff8808589f2f00 ffff880854347db8 ffffffffa013d006 kernel: ffff8808589f2f20 ffff880854347e00 ffffffffa0406f60 0000000057c7044f kernel: Call Trace: kernel: [] rsc_free+0x16/0x90 [auth_rpcgss] kernel: [] rsc_put+0x16/0x30 [auth_rpcgss] kernel: [] cache_clean+0x2e0/0x300 [sunrpc] kernel: [] do_cache_clean+0xe/0x70 [sunrpc] kernel: [] process_one_work+0x1ff/0x3b0 kernel: [] worker_thread+0x2bc/0x4a0 kernel: [] ? rescuer_thread+0x3a0/0x3a0 kernel: [] kthread+0xe4/0xf0 kernel: [] ret_from_fork+0x1f/0x40 kernel: [] ? kthread_stop+0x110/0x110 kernel: Code: f7 ff ff eb 3b 65 8b 05 da 30 e2 7e 89 c0 48 0f a3 05 a0 38 b8 00 0f 92 c0 84 c0 0f 85 d1 fe ff ff 0f 1f 44 00 00 e9 f5 fe ff ff <0f> 0b 49 8b 03 31 f6 f6 c4 40 0f 85 62 ff ff ff e9 61 ff ff ff kernel: RIP [] kfree+0x155/0x180 kernel: RSP kernel: ---[ end trace 3fdec044969def26 ]--- It seems to be most common after a server reboot where a client has been using a Kerberos mount, and reconnects to continue its workload. Signed-off-by: Chuck Lever Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/svcauth_gss.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 1d281816f2bf..d8582028b346 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -569,9 +569,10 @@ gss_svc_searchbyctx(struct cache_detail *cd, struct xdr_netobj *handle) struct rsc *found; memset(&rsci, 0, sizeof(rsci)); - rsci.handle.data = handle->data; - rsci.handle.len = handle->len; + if (dup_to_netobj(&rsci.handle, handle->data, handle->len)) + return NULL; found = rsc_lookup(cd, &rsci); + rsc_free(&rsci); if (!found) return NULL; if (cache_check(cd, &found->h, NULL)) From 14e2dee0996f51e0ff0d868497c7e1b90f012665 Mon Sep 17 00:00:00 2001 From: Laura Garcia Liebana Date: Tue, 13 Sep 2016 10:21:46 +0200 Subject: [PATCH 0159/1050] netfilter: nft_hash: fix hash overflow validation The overflow validation in the init() function establishes that the maximum value that the hash could reach is less than U32_MAX, which is likely to be true. The fix detects the overflow when the maximum hash value is less than the offset itself. Fixes: 70ca767ea1b2 ("netfilter: nft_hash: Add hash offset value") Reported-by: Liping Zhang Signed-off-by: Laura Garcia Liebana Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_hash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index bd12f7a801c2..09473b415b95 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -76,7 +76,7 @@ static int nft_hash_init(const struct nft_ctx *ctx, if (priv->modulus <= 1) return -ERANGE; - if (priv->offset + priv->modulus - 1 < U32_MAX) + if (priv->offset + priv->modulus - 1 < priv->offset) return -EOVERFLOW; priv->seed = ntohl(nla_get_be32(tb[NFTA_HASH_SEED])); From 4440a2ab3b9f40dddbe006331ef0659c76859296 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Tue, 13 Sep 2016 08:49:18 +0800 Subject: [PATCH 0160/1050] netfilter: synproxy: Check oom when adding synproxy and seqadj ct extensions When memory is exhausted, nfct_seqadj_ext_add may fail to add the synproxy and seqadj extensions. The function nf_ct_seqadj_init doesn't check if get valid seqadj pointer by the nfct_seqadj. Now drop the packet directly when fail to add seqadj extension to avoid dereference NULL pointer in nf_ct_seqadj_init from init_conntrack(). Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_synproxy.h | 14 ++++++++++++++ net/netfilter/nf_conntrack_core.c | 6 +++--- net/netfilter/nf_nat_core.c | 3 ++- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_synproxy.h b/include/net/netfilter/nf_conntrack_synproxy.h index 6793614e6502..e6937318546c 100644 --- a/include/net/netfilter/nf_conntrack_synproxy.h +++ b/include/net/netfilter/nf_conntrack_synproxy.h @@ -27,6 +27,20 @@ static inline struct nf_conn_synproxy *nfct_synproxy_ext_add(struct nf_conn *ct) #endif } +static inline bool nf_ct_add_synproxy(struct nf_conn *ct, + const struct nf_conn *tmpl) +{ + if (tmpl && nfct_synproxy(tmpl)) { + if (!nfct_seqadj_ext_add(ct)) + return false; + + if (!nfct_synproxy_ext_add(ct)) + return false; + } + + return true; +} + struct synproxy_stats { unsigned int syn_received; unsigned int cookie_invalid; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index dd2c43abf9e2..9934b0c93c1e 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1035,9 +1035,9 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, if (IS_ERR(ct)) return (struct nf_conntrack_tuple_hash *)ct; - if (tmpl && nfct_synproxy(tmpl)) { - nfct_seqadj_ext_add(ct); - nfct_synproxy_ext_add(ct); + if (!nf_ct_add_synproxy(ct, tmpl)) { + nf_conntrack_free(ct); + return ERR_PTR(-ENOMEM); } timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL; diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 19c081e1b328..ecee105bbada 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -441,7 +441,8 @@ nf_nat_setup_info(struct nf_conn *ct, ct->status |= IPS_DST_NAT; if (nfct_help(ct)) - nfct_seqadj_ext_add(ct); + if (!nfct_seqadj_ext_add(ct)) + return NF_DROP; } if (maniptype == NF_NAT_MANIP_SRC) { From 53a5d5ddccf849dbc27a8c1bba0b43c3a45fb792 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 7 Sep 2016 18:42:08 +0800 Subject: [PATCH 0161/1050] crypto: echainiv - Replace chaining with multiplication The current implementation uses a global per-cpu array to store data which are used to derive the next IV. This is insecure as the attacker may change the stored data. This patch removes all traces of chaining and replaces it with multiplication of the salt and the sequence number. Fixes: a10f554fa7e0 ("crypto: echainiv - Add encrypted chain IV...") Cc: stable@vger.kernel.org Reported-by: Mathias Krause Signed-off-by: Herbert Xu --- crypto/echainiv.c | 117 ++++++++++------------------------------------ 1 file changed, 25 insertions(+), 92 deletions(-) diff --git a/crypto/echainiv.c b/crypto/echainiv.c index 1b01fe98e91f..e3d889b122e0 100644 --- a/crypto/echainiv.c +++ b/crypto/echainiv.c @@ -1,8 +1,8 @@ /* * echainiv: Encrypted Chain IV Generator * - * This generator generates an IV based on a sequence number by xoring it - * with a salt and then encrypting it with the same key as used to encrypt + * This generator generates an IV based on a sequence number by multiplying + * it with a salt and then encrypting it with the same key as used to encrypt * the plain text. This algorithm requires that the block size be equal * to the IV size. It is mainly useful for CBC. * @@ -24,81 +24,17 @@ #include #include #include -#include #include -#include -#include +#include #include -#define MAX_IV_SIZE 16 - -static DEFINE_PER_CPU(u32 [MAX_IV_SIZE / sizeof(u32)], echainiv_iv); - -/* We don't care if we get preempted and read/write IVs from the next CPU. */ -static void echainiv_read_iv(u8 *dst, unsigned size) -{ - u32 *a = (u32 *)dst; - u32 __percpu *b = echainiv_iv; - - for (; size >= 4; size -= 4) { - *a++ = this_cpu_read(*b); - b++; - } -} - -static void echainiv_write_iv(const u8 *src, unsigned size) -{ - const u32 *a = (const u32 *)src; - u32 __percpu *b = echainiv_iv; - - for (; size >= 4; size -= 4) { - this_cpu_write(*b, *a); - a++; - b++; - } -} - -static void echainiv_encrypt_complete2(struct aead_request *req, int err) -{ - struct aead_request *subreq = aead_request_ctx(req); - struct crypto_aead *geniv; - unsigned int ivsize; - - if (err == -EINPROGRESS) - return; - - if (err) - goto out; - - geniv = crypto_aead_reqtfm(req); - ivsize = crypto_aead_ivsize(geniv); - - echainiv_write_iv(subreq->iv, ivsize); - - if (req->iv != subreq->iv) - memcpy(req->iv, subreq->iv, ivsize); - -out: - if (req->iv != subreq->iv) - kzfree(subreq->iv); -} - -static void echainiv_encrypt_complete(struct crypto_async_request *base, - int err) -{ - struct aead_request *req = base->data; - - echainiv_encrypt_complete2(req, err); - aead_request_complete(req, err); -} - static int echainiv_encrypt(struct aead_request *req) { struct crypto_aead *geniv = crypto_aead_reqtfm(req); struct aead_geniv_ctx *ctx = crypto_aead_ctx(geniv); struct aead_request *subreq = aead_request_ctx(req); - crypto_completion_t compl; - void *data; + __be64 nseqno; + u64 seqno; u8 *info; unsigned int ivsize = crypto_aead_ivsize(geniv); int err; @@ -108,8 +44,6 @@ static int echainiv_encrypt(struct aead_request *req) aead_request_set_tfm(subreq, ctx->child); - compl = echainiv_encrypt_complete; - data = req; info = req->iv; if (req->src != req->dst) { @@ -127,29 +61,30 @@ static int echainiv_encrypt(struct aead_request *req) return err; } - if (unlikely(!IS_ALIGNED((unsigned long)info, - crypto_aead_alignmask(geniv) + 1))) { - info = kmalloc(ivsize, req->base.flags & - CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL: - GFP_ATOMIC); - if (!info) - return -ENOMEM; - - memcpy(info, req->iv, ivsize); - } - - aead_request_set_callback(subreq, req->base.flags, compl, data); + aead_request_set_callback(subreq, req->base.flags, + req->base.complete, req->base.data); aead_request_set_crypt(subreq, req->dst, req->dst, req->cryptlen, info); aead_request_set_ad(subreq, req->assoclen); - crypto_xor(info, ctx->salt, ivsize); - scatterwalk_map_and_copy(info, req->dst, req->assoclen, ivsize, 1); - echainiv_read_iv(info, ivsize); + memcpy(&nseqno, info + ivsize - 8, 8); + seqno = be64_to_cpu(nseqno); + memset(info, 0, ivsize); - err = crypto_aead_encrypt(subreq); - echainiv_encrypt_complete2(req, err); - return err; + scatterwalk_map_and_copy(info, req->dst, req->assoclen, ivsize, 1); + + do { + u64 a; + + memcpy(&a, ctx->salt + ivsize - 8, 8); + + a |= 1; + a *= seqno; + + memcpy(info + ivsize - 8, &a, 8); + } while ((ivsize -= 8)); + + return crypto_aead_encrypt(subreq); } static int echainiv_decrypt(struct aead_request *req) @@ -196,8 +131,7 @@ static int echainiv_aead_create(struct crypto_template *tmpl, alg = crypto_spawn_aead_alg(spawn); err = -EINVAL; - if (inst->alg.ivsize & (sizeof(u32) - 1) || - inst->alg.ivsize > MAX_IV_SIZE) + if (inst->alg.ivsize & (sizeof(u64) - 1) || !inst->alg.ivsize) goto free_inst; inst->alg.encrypt = echainiv_encrypt; @@ -206,7 +140,6 @@ static int echainiv_aead_create(struct crypto_template *tmpl, inst->alg.init = aead_init_geniv; inst->alg.exit = aead_exit_geniv; - inst->alg.base.cra_alignmask |= __alignof__(u32) - 1; inst->alg.base.cra_ctxsize = sizeof(struct aead_geniv_ctx); inst->alg.base.cra_ctxsize += inst->alg.ivsize; From acdb04d0b36769b3e05990c488dc74d8b7ac8060 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 Sep 2016 14:43:29 +0800 Subject: [PATCH 0162/1050] crypto: skcipher - Fix blkcipher walk OOM crash When we need to allocate a temporary blkcipher_walk_next and it fails, the code is supposed to take the slow path of processing the data block by block. However, due to an unrelated change we instead end up dereferencing the NULL pointer. This patch fixes it by moving the unrelated bsize setting out of the way so that we enter the slow path as inteded. Fixes: 7607bd8ff03b ("[CRYPTO] blkcipher: Added blkcipher_walk_virt_block") Cc: stable@vger.kernel.org Reported-by: xiakaixu Reported-by: Ard Biesheuvel Signed-off-by: Herbert Xu Tested-by: Ard Biesheuvel --- crypto/blkcipher.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c index 369999530108..a832426820e8 100644 --- a/crypto/blkcipher.c +++ b/crypto/blkcipher.c @@ -233,6 +233,8 @@ static int blkcipher_walk_next(struct blkcipher_desc *desc, return blkcipher_walk_done(desc, walk, -EINVAL); } + bsize = min(walk->walk_blocksize, n); + walk->flags &= ~(BLKCIPHER_WALK_SLOW | BLKCIPHER_WALK_COPY | BLKCIPHER_WALK_DIFF); if (!scatterwalk_aligned(&walk->in, walk->alignmask) || @@ -245,7 +247,6 @@ static int blkcipher_walk_next(struct blkcipher_desc *desc, } } - bsize = min(walk->walk_blocksize, n); n = scatterwalk_clamp(&walk->in, n); n = scatterwalk_clamp(&walk->out, n); From f82e90b28654804ab72881d577d87c3d5c65e2bc Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 13 Sep 2016 09:48:52 +0100 Subject: [PATCH 0163/1050] crypto: arm/aes-ctr - fix NULL dereference in tail processing The AES-CTR glue code avoids calling into the blkcipher API for the tail portion of the walk, by comparing the remainder of walk.nbytes modulo AES_BLOCK_SIZE with the residual nbytes, and jumping straight into the tail processing block if they are equal. This tail processing block checks whether nbytes != 0, and does nothing otherwise. However, in case of an allocation failure in the blkcipher layer, we may enter this code with walk.nbytes == 0, while nbytes > 0. In this case, we should not dereference the source and destination pointers, since they may be NULL. So instead of checking for nbytes != 0, check for (walk.nbytes % AES_BLOCK_SIZE) != 0, which implies the former in non-error conditions. Fixes: 86464859cc77 ("crypto: arm - AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions") Cc: stable@vger.kernel.org Reported-by: xiakaixu Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/aes-ce-glue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c index da3c0428507b..aef022a87c53 100644 --- a/arch/arm/crypto/aes-ce-glue.c +++ b/arch/arm/crypto/aes-ce-glue.c @@ -284,7 +284,7 @@ static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } - if (nbytes) { + if (walk.nbytes % AES_BLOCK_SIZE) { u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; u8 __aligned(8) tail[AES_BLOCK_SIZE]; From 2db34e78f126c6001d79d3b66ab1abb482dc7caa Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 13 Sep 2016 09:48:53 +0100 Subject: [PATCH 0164/1050] crypto: arm64/aes-ctr - fix NULL dereference in tail processing The AES-CTR glue code avoids calling into the blkcipher API for the tail portion of the walk, by comparing the remainder of walk.nbytes modulo AES_BLOCK_SIZE with the residual nbytes, and jumping straight into the tail processing block if they are equal. This tail processing block checks whether nbytes != 0, and does nothing otherwise. However, in case of an allocation failure in the blkcipher layer, we may enter this code with walk.nbytes == 0, while nbytes > 0. In this case, we should not dereference the source and destination pointers, since they may be NULL. So instead of checking for nbytes != 0, check for (walk.nbytes % AES_BLOCK_SIZE) != 0, which implies the former in non-error conditions. Fixes: 49788fe2a128 ("arm64/crypto: AES-ECB/CBC/CTR/XTS using ARMv8 NEON and Crypto Extensions") Cc: stable@vger.kernel.org Reported-by: xiakaixu Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-glue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 5c888049d061..6b2aa0fd6cd0 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -216,7 +216,7 @@ static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } - if (nbytes) { + if (walk.nbytes % AES_BLOCK_SIZE) { u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; u8 __aligned(8) tail[AES_BLOCK_SIZE]; From e3b23148fd8af277fd55068183ed3ce4c8f51aa5 Mon Sep 17 00:00:00 2001 From: Amitoj Kaur Chawla Date: Fri, 12 Aug 2016 08:36:54 +0530 Subject: [PATCH 0165/1050] MIPS: ath79: Fix test for error return of clk_register_fixed_factor(). clk_register_fixed_factor returns an ERR_PTR in case of an error and should have an IS_ERR check instead of a null check. The Coccinelle semantic patch used to find this issue is as follows: @@ expression e; statement S; @@ *e = clk_register_fixed_factor(...); if (!e) S Signed-off-by: Amitoj Kaur Chawla Cc: julia.lawall@lip6.fr Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/13894/ Signed-off-by: Ralf Baechle --- arch/mips/ath79/clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/ath79/clock.c b/arch/mips/ath79/clock.c index 2e7378467c5c..cc3a1e33a600 100644 --- a/arch/mips/ath79/clock.c +++ b/arch/mips/ath79/clock.c @@ -96,7 +96,7 @@ static struct clk * __init ath79_reg_ffclk(const char *name, struct clk *clk; clk = clk_register_fixed_factor(NULL, name, parent_name, 0, mult, div); - if (!clk) + if (IS_ERR(clk)) panic("failed to allocate %s clock structure", name); return clk; From 2809328f6ef2139115665dda1b83ee9303c52431 Mon Sep 17 00:00:00 2001 From: Marcin Nowakowski Date: Thu, 11 Aug 2016 09:02:30 +0200 Subject: [PATCH 0166/1050] MIPS: uprobes: fix incorrect uprobe brk handling When a uprobe-replacement breakpoint instruction is handled, a notifier is called with DIE_UPROBE argument, but a corresponding exception notify handler for MIPS attempts to handle DIE_BREAK instead. As a result the breakpoint instruction isn't handled by the uprobe code and the probed application terminates with SIGTRAP. Fix this by changing arch_uprobe_exception_notify code to handle DIE_UPROBE as a pre-singlestep condition instead of DIE_BREAK. Signed-off-by: Marcin Nowakowski Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/13884/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/uprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/uprobes.c b/arch/mips/kernel/uprobes.c index 8452d933a645..1149b30c9aeb 100644 --- a/arch/mips/kernel/uprobes.c +++ b/arch/mips/kernel/uprobes.c @@ -222,7 +222,7 @@ int arch_uprobe_exception_notify(struct notifier_block *self, return NOTIFY_DONE; switch (val) { - case DIE_BREAK: + case DIE_UPROBE: if (uprobe_pre_sstep_notifier(regs)) return NOTIFY_STOP; break; From 58cae9b0f0c1d9cc55de018d927e65549b24cf5b Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 9 Aug 2016 13:21:48 +0100 Subject: [PATCH 0167/1050] MIPS: Fix memory regions reaching top of physical Memory regions added with add_memory_region() at the top of the physical address space will have their end address overflow to 0. This causes them to be rejected as invalid, and would cause various other issues later on. This causes issues on Malta and Boston platforms when wanting to use all 2GB of RAM on a 32-bit kernel, either via highmem (using physical addresses 0x90000000..0xFFFFFFFF), or with the Malta Enhanced Virtual Addressing (EVA) layout which exposes the whole 0x80000000..0xFFFFFFFF physical address range to kernel mode at 0x00000000..0x7FFFFFFF. Due to the abundance of these non-overflow assumptions and the fact that memblock already avoids the arithmetic overflow by limiting the size of new memory regions without the arch code knowing it (in particular mem_init_free_highmem() will trigger a page dump due to nonzero mapcount on the last page), it is simpler and safer to just limit the size of the region in a similar way to memblock but at the arch level to allow most of the RAM to be used without arithmetic overflows. Therefore we detect this case specifically and reduce the size of the region slightly to avoid the arithmetic overflows and cause the last page to be ignored. Signed-off-by: James Hogan Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/13857/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/setup.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 36cf8d65c47d..3be0e6ba2797 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -87,6 +87,13 @@ void __init add_memory_region(phys_addr_t start, phys_addr_t size, long type) int x = boot_mem_map.nr_map; int i; + /* + * If the region reaches the top of the physical address space, adjust + * the size slightly so that (start + size) doesn't overflow + */ + if (start + size - 1 == (phys_addr_t)ULLONG_MAX) + --size; + /* Sanity check */ if (start + size < start) { pr_warn("Trying to add an invalid memory region, skipped\n"); From ac7e385f2bf1c39615cf58f7e58246fdd9da5bb9 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 9 Aug 2016 13:21:49 +0100 Subject: [PATCH 0168/1050] MIPS: MAAR: Fix address alignment The alignment of MIPS MAAR region addresses isn't quite right. - It rounds an already 64 KiB aligned start address up to the next 64 KiB boundary, e.g. 0x80000000 is rounded up to 0x80010000. - It assumes the end address is already on a 64 KiB boundary and doesn't round it down. Should that not be the case it will hit the second BUG_ON() in write_maar_pair(). Both cases are addressed by rounding up and down to 64 KiB boundaries in the more traditional way of adding 0xffff (for rounding up) and masking off the low 16 bits. Signed-off-by: James Hogan Cc: Paul Burton Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/13858/ Signed-off-by: Ralf Baechle --- arch/mips/mm/init.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index a5509e7dcad2..2c3749d98f04 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -261,7 +261,6 @@ unsigned __weak platform_maar_init(unsigned num_pairs) { struct maar_config cfg[BOOT_MEM_MAP_MAX]; unsigned i, num_configured, num_cfg = 0; - phys_addr_t skip; for (i = 0; i < boot_mem_map.nr_map; i++) { switch (boot_mem_map.map[i].type) { @@ -272,14 +271,14 @@ unsigned __weak platform_maar_init(unsigned num_pairs) continue; } - skip = 0x10000 - (boot_mem_map.map[i].addr & 0xffff); - + /* Round lower up */ cfg[num_cfg].lower = boot_mem_map.map[i].addr; - cfg[num_cfg].lower += skip; + cfg[num_cfg].lower = (cfg[num_cfg].lower + 0xffff) & ~0xffff; - cfg[num_cfg].upper = cfg[num_cfg].lower; - cfg[num_cfg].upper += boot_mem_map.map[i].size - 1; - cfg[num_cfg].upper -= skip; + /* Round upper down */ + cfg[num_cfg].upper = boot_mem_map.map[i].addr + + boot_mem_map.map[i].size; + cfg[num_cfg].upper = (cfg[num_cfg].upper & ~0xffff) - 1; cfg[num_cfg].attrs = MIPS_MAAR_S; num_cfg++; From b03c1e3b8eed9026733c473071d1f528358a0e50 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 12 Sep 2016 10:58:06 +0100 Subject: [PATCH 0169/1050] MIPS: Remove compact branch policy Kconfig entries Commit c1a0e9bc885d ("MIPS: Allow compact branch policy to be changed") added Kconfig entries allowing for the compact branch policy used by the compiler for MIPSr6 kernels to be specified. This can be useful for debugging, particularly in systems where compact branches have recently been introduced. Unfortunately mainline gcc 5.x supports MIPSr6 but not the -mcompact-branches compiler flag, leading to MIPSr6 kernels failing to build with gcc 5.x with errors such as: mipsel-linux-gnu-gcc: error: unrecognized command line option '-mcompact-branches=optimal' make[2]: *** [kernel/bounds.s] Error 1 Fixing this by hiding the Kconfig entry behind another seems to be more hassle than it's worth, as MIPSr6 & compact branches have been around for a while now and if policy does need to be set for debug it can be done easily enough with KCFLAGS. Therefore remove the compact branch policy Kconfig entries & their handling in the Makefile. This reverts commit c1a0e9bc885d ("MIPS: Allow compact branch policy to be changed"). Signed-off-by: Paul Burton Reported-by: kbuild test robot Fixes: c1a0e9bc885d ("MIPS: Allow compact branch policy to be changed") Cc: stable # v4.4+ Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14241/ Signed-off-by: Ralf Baechle --- arch/mips/Kconfig.debug | 36 ------------------------------------ arch/mips/Makefile | 4 ---- 2 files changed, 40 deletions(-) diff --git a/arch/mips/Kconfig.debug b/arch/mips/Kconfig.debug index f0e314ceb8ba..7f975b20b20c 100644 --- a/arch/mips/Kconfig.debug +++ b/arch/mips/Kconfig.debug @@ -113,42 +113,6 @@ config SPINLOCK_TEST help Add several files to the debugfs to test spinlock speed. -if CPU_MIPSR6 - -choice - prompt "Compact branch policy" - default MIPS_COMPACT_BRANCHES_OPTIMAL - -config MIPS_COMPACT_BRANCHES_NEVER - bool "Never (force delay slot branches)" - help - Pass the -mcompact-branches=never flag to the compiler in order to - force it to always emit branches with delay slots, and make no use - of the compact branch instructions introduced by MIPSr6. This is - useful if you suspect there may be an issue with compact branches in - either the compiler or the CPU. - -config MIPS_COMPACT_BRANCHES_OPTIMAL - bool "Optimal (use where beneficial)" - help - Pass the -mcompact-branches=optimal flag to the compiler in order for - it to make use of compact branch instructions where it deems them - beneficial, and use branches with delay slots elsewhere. This is the - default compiler behaviour, and should be used unless you have a - reason to choose otherwise. - -config MIPS_COMPACT_BRANCHES_ALWAYS - bool "Always (force compact branches)" - help - Pass the -mcompact-branches=always flag to the compiler in order to - force it to always emit compact branches, making no use of branch - instructions with delay slots. This can result in more compact code - which may be beneficial in some scenarios. - -endchoice - -endif # CPU_MIPSR6 - config SCACHE_DEBUGFS bool "L2 cache debugfs entries" depends on DEBUG_FS diff --git a/arch/mips/Makefile b/arch/mips/Makefile index efd7a9dc93c4..598ab2930fce 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -203,10 +203,6 @@ endif toolchain-virt := $(call cc-option-yn,$(mips-cflags) -mvirt) cflags-$(toolchain-virt) += -DTOOLCHAIN_SUPPORTS_VIRT -cflags-$(CONFIG_MIPS_COMPACT_BRANCHES_NEVER) += -mcompact-branches=never -cflags-$(CONFIG_MIPS_COMPACT_BRANCHES_OPTIMAL) += -mcompact-branches=optimal -cflags-$(CONFIG_MIPS_COMPACT_BRANCHES_ALWAYS) += -mcompact-branches=always - # # Firmware support # From 951c39cd3bc0aedf67fbd8fb4b9380287e6205d1 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Mon, 5 Sep 2016 15:43:40 +0100 Subject: [PATCH 0170/1050] MIPS: paravirt: Fix undefined reference to smp_bootstrap If the paravirt machine is compiles without CONFIG_SMP, the following linker error occurs arch/mips/kernel/head.o: In function `kernel_entry': (.ref.text+0x10): undefined reference to `smp_bootstrap' due to the kernel entry macro always including SMP startup code. Wrap this code in CONFIG_SMP to fix the error. Signed-off-by: Matt Redfearn Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # 3.16+ Patchwork: https://patchwork.linux-mips.org/patch/14212/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/mach-paravirt/kernel-entry-init.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/mips/include/asm/mach-paravirt/kernel-entry-init.h b/arch/mips/include/asm/mach-paravirt/kernel-entry-init.h index 2f82bfa3a773..c9f5769dfc8f 100644 --- a/arch/mips/include/asm/mach-paravirt/kernel-entry-init.h +++ b/arch/mips/include/asm/mach-paravirt/kernel-entry-init.h @@ -11,11 +11,13 @@ #define CP0_EBASE $15, 1 .macro kernel_entry_setup +#ifdef CONFIG_SMP mfc0 t0, CP0_EBASE andi t0, t0, 0x3ff # CPUNum beqz t0, 1f # CPUs other than zero goto smp_bootstrap j smp_bootstrap +#endif /* CONFIG_SMP */ 1: .endm From ebf9ff753c041b296241990aef76163bbb2cc9c8 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 13 Sep 2016 15:58:28 +0200 Subject: [PATCH 0171/1050] genirq: Provide irq_gc_{lock_irqsave,unlock_irqrestore}() helpers Some irqchip drivers need to take the generic chip lock outside of the irq context. Provide the irq_gc_{lock_irqsave,unlock_irqrestore}() helpers to allow one to disable irqs while entering a critical section protected by gc->lock. Note that we do not provide optimized version of these helpers for !SMP, because they are not called from the hot-path. [ tglx: Added a comment when these helpers should be [not] used ] Signed-off-by: Boris Brezillon Cc: Jason Cooper Cc: Marc Zyngier Cc: Nicolas Ferre Cc: stable@vger.kernel.org Cc: Alexandre Belloni Link: http://lkml.kernel.org/r/1473775109-4192-1-git-send-email-boris.brezillon@free-electrons.com Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/include/linux/irq.h b/include/linux/irq.h index b52424eaa0ed..0ac26c892fe2 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -945,6 +945,16 @@ static inline void irq_gc_lock(struct irq_chip_generic *gc) { } static inline void irq_gc_unlock(struct irq_chip_generic *gc) { } #endif +/* + * The irqsave variants are for usage in non interrupt code. Do not use + * them in irq_chip callbacks. Use irq_gc_lock() instead. + */ +#define irq_gc_lock_irqsave(gc, flags) \ + raw_spin_lock_irqsave(&(gc)->lock, flags) + +#define irq_gc_unlock_irqrestore(gc, flags) \ + raw_spin_unlock_irqrestore(&(gc)->lock, flags) + static inline void irq_reg_writel(struct irq_chip_generic *gc, u32 val, int reg_offset) { From 5eb0d6eb3fac3daa60d9190eed9fa41cf809c756 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 13 Sep 2016 15:58:29 +0200 Subject: [PATCH 0172/1050] irqchip/atmel-aic: Fix potential deadlock in ->xlate() aic5_irq_domain_xlate() and aic_irq_domain_xlate() take the generic chip lock without disabling interrupts, which can lead to a deadlock if an interrupt occurs while the lock is held in one of these functions. Replace irq_gc_{lock,unlock}() calls by irq_gc_{lock_irqsave,unlock_irqrestore}() ones to prevent this bug from happening. Fixes: b1479ebb7720 ("irqchip: atmel-aic: Add atmel AIC/AIC5 drivers") Signed-off-by: Boris Brezillon Acked-by: Marc Zyngier Cc: Jason Cooper Cc: Nicolas Ferre Cc: stable@vger.kernel.org Cc: Alexandre Belloni Link: http://lkml.kernel.org/r/1473775109-4192-2-git-send-email-boris.brezillon@free-electrons.com Signed-off-by: Thomas Gleixner --- drivers/irqchip/irq-atmel-aic.c | 5 +++-- drivers/irqchip/irq-atmel-aic5.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/irqchip/irq-atmel-aic.c b/drivers/irqchip/irq-atmel-aic.c index 112e17c2768b..37f952dd9fc9 100644 --- a/drivers/irqchip/irq-atmel-aic.c +++ b/drivers/irqchip/irq-atmel-aic.c @@ -176,6 +176,7 @@ static int aic_irq_domain_xlate(struct irq_domain *d, { struct irq_domain_chip_generic *dgc = d->gc; struct irq_chip_generic *gc; + unsigned long flags; unsigned smr; int idx; int ret; @@ -194,11 +195,11 @@ static int aic_irq_domain_xlate(struct irq_domain *d, gc = dgc->gc[idx]; - irq_gc_lock(gc); + irq_gc_lock_irqsave(gc, flags); smr = irq_reg_readl(gc, AT91_AIC_SMR(*out_hwirq)); aic_common_set_priority(intspec[2], &smr); irq_reg_writel(gc, smr, AT91_AIC_SMR(*out_hwirq)); - irq_gc_unlock(gc); + irq_gc_unlock_irqrestore(gc, flags); return ret; } diff --git a/drivers/irqchip/irq-atmel-aic5.c b/drivers/irqchip/irq-atmel-aic5.c index 4f0d068e1abe..2a624d87a035 100644 --- a/drivers/irqchip/irq-atmel-aic5.c +++ b/drivers/irqchip/irq-atmel-aic5.c @@ -258,6 +258,7 @@ static int aic5_irq_domain_xlate(struct irq_domain *d, unsigned int *out_type) { struct irq_chip_generic *bgc = irq_get_domain_generic_chip(d, 0); + unsigned long flags; unsigned smr; int ret; @@ -269,12 +270,12 @@ static int aic5_irq_domain_xlate(struct irq_domain *d, if (ret) return ret; - irq_gc_lock(bgc); + irq_gc_lock_irqsave(bgc, flags); irq_reg_writel(bgc, *out_hwirq, AT91_AIC5_SSR); smr = irq_reg_readl(bgc, AT91_AIC5_SMR); aic_common_set_priority(intspec[2], &smr); irq_reg_writel(bgc, smr, AT91_AIC5_SMR); - irq_gc_unlock(bgc); + irq_gc_unlock_irqrestore(bgc, flags); return ret; } From 3e1be7ad2d38c6bd6aeef96df9bd0a7822f4e51c Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Fri, 9 Sep 2016 22:43:12 +0800 Subject: [PATCH 0173/1050] bnx2: Reset device during driver initialization When system enters into kdump kernel because of kernel panic, it won't shutdown devices. On-flight DMA will continue transferring data until device driver initializes. All devices are supposed to reset during driver initialization. And this property is used to fix the kdump failure in system with intel iommu. Other systems with hardware iommu should be similar. Please check commit 091d42e ("iommu/vt-d: Copy translation tables from old kernel") and those commits around. But bnx2 driver doesn't reset device during driver initialization. The device resetting is deferred to net device up stage. This will cause hardware iommu handling failure on bnx2 device. And its resetting relies on firmware. So in this patch move the firmware requesting code to earlier bnx2_init_one(), then next call bnx2_reset_chip to reset device. Signed-off-by: Baoquan He Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index 8fc3f3c137f8..505ceaf451e2 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -6356,10 +6356,6 @@ bnx2_open(struct net_device *dev) struct bnx2 *bp = netdev_priv(dev); int rc; - rc = bnx2_request_firmware(bp); - if (rc < 0) - goto out; - netif_carrier_off(dev); bnx2_disable_int(bp); @@ -6428,7 +6424,6 @@ open_err: bnx2_free_irq(bp); bnx2_free_mem(bp); bnx2_del_napi(bp); - bnx2_release_firmware(bp); goto out; } @@ -8575,6 +8570,12 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, dev); + rc = bnx2_request_firmware(bp); + if (rc < 0) + goto error; + + + bnx2_reset_chip(bp, BNX2_DRV_MSG_CODE_RESET); memcpy(dev->dev_addr, bp->mac_addr, ETH_ALEN); dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG | @@ -8607,6 +8608,7 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; error: + bnx2_release_firmware(bp); pci_iounmap(pdev, bp->regview); pci_release_regions(pdev); pci_disable_device(pdev); From 3cbc6fc9c99f1709203711f125bc3b79487aba06 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 5 Sep 2016 08:48:03 +0800 Subject: [PATCH 0174/1050] MIPS: Add a missing ".set pop" in an early commit Commit 842dfc11ea9a21 ("MIPS: Fix build with binutils 2.24.51+") missing a ".set pop" in macro fpu_restore_16even, so add it. Signed-off-by: Huacai Chen Acked-by: Manuel Lauss Cc: Steven J . Hill Cc: Fuxin Zhang Cc: Zhangjin Wu Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # 3.18+ Patchwork: https://patchwork.linux-mips.org/patch/14210/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/asmmacro.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h index 56584a659183..83054f79f72a 100644 --- a/arch/mips/include/asm/asmmacro.h +++ b/arch/mips/include/asm/asmmacro.h @@ -157,6 +157,7 @@ ldc1 $f28, THREAD_FPR28(\thread) ldc1 $f30, THREAD_FPR30(\thread) ctc1 \tmp, fcr31 + .set pop .endm .macro fpu_restore_16odd thread From 28b89b9e6f7b6c8fef7b3af39828722bca20cfee Mon Sep 17 00:00:00 2001 From: Joonwoo Park Date: Sun, 11 Sep 2016 21:14:58 -0700 Subject: [PATCH 0175/1050] cpuset: handle race between CPU hotplug and cpuset_hotplug_work A discrepancy between cpu_online_mask and cpuset's effective_cpus mask is inevitable during hotplug since cpuset defers updating of effective_cpus mask using a workqueue, during which time nothing prevents the system from more hotplug operations. For that reason guarantee_online_cpus() walks up the cpuset hierarchy until it finds an intersection under the assumption that top cpuset's effective_cpus mask intersects with cpu_online_mask even with such a race occurring. However a sequence of CPU hotplugs can open a time window, during which none of the effective CPUs in the top cpuset intersect with cpu_online_mask. For example when there are 4 possible CPUs 0-3 and only CPU0 is online: ======================== =========================== cpu_online_mask top_cpuset.effective_cpus ======================== =========================== echo 1 > cpu2/online. CPU hotplug notifier woke up hotplug work but not yet scheduled. [0,2] [0] echo 0 > cpu0/online. The workqueue is still runnable. [2] [0] ======================== =========================== Now there is no intersection between cpu_online_mask and top_cpuset.effective_cpus. Thus invoking sys_sched_setaffinity() at this moment can cause following: Unable to handle kernel NULL pointer dereference at virtual address 000000d0 ------------[ cut here ]------------ Kernel BUG at ffffffc0001389b0 [verbose debug info unavailable] Internal error: Oops - BUG: 96000005 [#1] PREEMPT SMP Modules linked in: CPU: 2 PID: 1420 Comm: taskset Tainted: G W 4.4.8+ #98 task: ffffffc06a5c4880 ti: ffffffc06e124000 task.ti: ffffffc06e124000 PC is at guarantee_online_cpus+0x2c/0x58 LR is at cpuset_cpus_allowed+0x4c/0x6c Process taskset (pid: 1420, stack limit = 0xffffffc06e124020) Call trace: [] guarantee_online_cpus+0x2c/0x58 [] cpuset_cpus_allowed+0x4c/0x6c [] sched_setaffinity+0xc0/0x1ac [] SyS_sched_setaffinity+0x98/0xac [] el0_svc_naked+0x24/0x28 The top cpuset's effective_cpus are guaranteed to be identical to cpu_online_mask eventually. Hence fall back to cpu_online_mask when there is no intersection between top cpuset's effective_cpus and cpu_online_mask. Signed-off-by: Joonwoo Park Acked-by: Li Zefan Cc: Tejun Heo Cc: cgroups@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: # 3.17+ Signed-off-by: Tejun Heo --- kernel/cpuset.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index c27e53326bef..c08585ad996b 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -325,8 +325,7 @@ static struct file_system_type cpuset_fs_type = { /* * Return in pmask the portion of a cpusets's cpus_allowed that * are online. If none are online, walk up the cpuset hierarchy - * until we find one that does have some online cpus. The top - * cpuset always has some cpus online. + * until we find one that does have some online cpus. * * One way or another, we guarantee to return some non-empty subset * of cpu_online_mask. @@ -335,8 +334,20 @@ static struct file_system_type cpuset_fs_type = { */ static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask) { - while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask)) + while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask)) { cs = parent_cs(cs); + if (unlikely(!cs)) { + /* + * The top cpuset doesn't have any online cpu as a + * consequence of a race between cpuset_hotplug_work + * and cpu hotplug notifier. But we know the top + * cpuset's effective_cpus is on its way to to be + * identical to cpu_online_mask. + */ + cpumask_copy(pmask, cpu_online_mask); + return; + } + } cpumask_and(pmask, cs->effective_cpus, cpu_online_mask); } From 801f823dc2d585253f2f8dd17c4a46d9da560579 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 5 Sep 2016 15:24:54 +0100 Subject: [PATCH 0176/1050] MIPS: c-r4k: Fix size calc when avoiding IPIs for small icache flushes Commit f70ddc07b637 ("MIPS: c-r4k: Avoid small flush_icache_range SMP calls") adds checks to force use of hit-type cache ops for small icache flushes where they are globalised & index-type cache ops aren't, in order to avoid the overhead of IPIs in those cases. However it calculated the size of the region being flushed incorrectly, subtracting the end address from the start address rather than the reverse. This would have led to an overflow with size wrapping round to some large value, and likely to the special case for avoiding IPIs not actually being hit. Signed-off-by: Paul Burton Cc: James Hogan Fixes: f70ddc07b637 ("MIPS: c-r4k: Avoid small flush_icache_range SMP calls") Reviewed-by: James Hogan Reviewed-by: Florian Fainelli Cc: Huacai Chen Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14211/ Signed-off-by: Ralf Baechle --- arch/mips/mm/c-r4k.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index cd72805b64a7..fa7d8d3790bf 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -800,7 +800,7 @@ static void r4k_flush_icache_range(unsigned long start, unsigned long end) * If address-based cache ops don't require an SMP call, then * use them exclusively for small flushes. */ - size = start - end; + size = end - start; cache_size = icache_size; if (!cpu_has_ic_fills_f_dc) { size *= 2; From 715f5552b1e90ba3eecf6d1a6d044d0d5226663f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 10 Sep 2016 23:11:23 +0800 Subject: [PATCH 0177/1050] sctp: hold the transport before using it in sctp_hash_cmp Since commit 4f0087812648 ("sctp: apply rhashtable api to send/recv path"), sctp uses transport rhashtable with .obj_cmpfn sctp_hash_cmp, in which it compares the members of the transport with the rhashtable args to check if it's the right transport. But sctp uses the transport without holding it in sctp_hash_cmp, it can cause a use-after-free panic. As after it gets transport from hashtable, another CPU may close the sk and free the asoc. In sctp_association_free, it frees all the transports, meanwhile, the assoc's refcnt may be reduced to 0, assoc can be destroyed by sctp_association_destroy. So after that, transport->assoc is actually an unavailable memory address in sctp_hash_cmp. Although sctp_hash_cmp is under rcu_read_lock, it still can not avoid this, as assoc is not freed by RCU. This patch is to hold the transport before checking it's members with sctp_transport_hold, in which it checks the refcnt first, holds it if it's not 0. Fixes: 4f0087812648 ("sctp: apply rhashtable api to send/recv path") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/input.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/net/sctp/input.c b/net/sctp/input.c index 69444d32ecda..1555fb8c68e0 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -796,27 +796,34 @@ struct sctp_hash_cmp_arg { static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg, const void *ptr) { + struct sctp_transport *t = (struct sctp_transport *)ptr; const struct sctp_hash_cmp_arg *x = arg->key; - const struct sctp_transport *t = ptr; - struct sctp_association *asoc = t->asoc; - const struct net *net = x->net; + struct sctp_association *asoc; + int err = 1; if (!sctp_cmp_addr_exact(&t->ipaddr, x->paddr)) - return 1; - if (!net_eq(sock_net(asoc->base.sk), net)) - return 1; + return err; + if (!sctp_transport_hold(t)) + return err; + + asoc = t->asoc; + if (!net_eq(sock_net(asoc->base.sk), x->net)) + goto out; if (x->ep) { if (x->ep != asoc->ep) - return 1; + goto out; } else { if (x->laddr->v4.sin_port != htons(asoc->base.bind_addr.port)) - return 1; + goto out; if (!sctp_bind_addr_match(&asoc->base.bind_addr, x->laddr, sctp_sk(asoc->base.sk))) - return 1; + goto out; } - return 0; + err = 0; +out: + sctp_transport_put(t); + return err; } static inline u32 sctp_hash_obj(const void *data, u32 len, u32 seed) From 440f895aa97f81a2bdc02993da5360a1f6da2fb5 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Sun, 11 Sep 2016 21:43:34 +0200 Subject: [PATCH 0178/1050] drivers: net: phy: xgene: Fix 'remove' function If 'IS_ERR(pdata->clk)' is true, then 'clk_disable_unprepare(pdata->clk)' will do nothing. It is likely that 'if (!IS_ERR(pdata->clk))' was expected here. In fact, the test can even be removed because 'clk_disable_unprepare' already handles such cases. Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/phy/mdio-xgene.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/mdio-xgene.c b/drivers/net/phy/mdio-xgene.c index 775674808249..92af182951be 100644 --- a/drivers/net/phy/mdio-xgene.c +++ b/drivers/net/phy/mdio-xgene.c @@ -424,10 +424,8 @@ static int xgene_mdio_remove(struct platform_device *pdev) mdiobus_unregister(mdio_bus); mdiobus_free(mdio_bus); - if (dev->of_node) { - if (IS_ERR(pdata->clk)) - clk_disable_unprepare(pdata->clk); - } + if (dev->of_node) + clk_disable_unprepare(pdata->clk); return 0; } From ad5987b47e96a0fb6d13fea250e936aed000093c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 13 Sep 2016 15:53:55 +0200 Subject: [PATCH 0179/1050] nl80211: validate number of probe response CSA counters Due to an apparent copy/paste bug, the number of counters for the beacon configuration were checked twice, instead of checking the number of probe response counters. Fix this to check the number of probe response counters before parsing those. Cc: stable@vger.kernel.org Fixes: 9a774c78e211 ("cfg80211: Support multiple CSA counters") Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f02653a08993..4809f4d2cdcc 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -6978,7 +6978,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) params.n_counter_offsets_presp = len / sizeof(u16); if (rdev->wiphy.max_num_csa_counters && - (params.n_counter_offsets_beacon > + (params.n_counter_offsets_presp > rdev->wiphy.max_num_csa_counters)) return -EINVAL; From 035ee288ae7ade4152f1c3cf23a587b04fdc526c Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 6 Sep 2016 06:20:46 +0200 Subject: [PATCH 0180/1050] PCI: Fix bridge_d3 update on device removal Starting with v4.8, we allow a PCIe port to runtime suspend to D3hot if the port itself and its children satisfy a number of conditions. Once a child is removed, we recheck those conditions in case the removed device was blocking the port from suspending. The rechecking needs to happen *after* the device has been removed from the bus it resides on. Otherwise when walking the port's subordinate bus in pci_bridge_d3_update(), the device being removed would erroneously still be taken into account. However the device is removed from the bus_list in pci_destroy_dev() and we currently recheck *before* that. Fix it. Fixes: 9d26d3a8f1b0 ("PCI: Put PCIe ports into D3 during suspend") Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Reviewed-by: Mika Westerberg Acked-by: Rafael J. Wysocki --- drivers/pci/remove.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c index d1ef7acf6930..f9357e09e9b3 100644 --- a/drivers/pci/remove.c +++ b/drivers/pci/remove.c @@ -40,6 +40,7 @@ static void pci_destroy_dev(struct pci_dev *dev) list_del(&dev->bus_list); up_write(&pci_bus_sem); + pci_bridge_d3_device_removed(dev); pci_free_resources(dev); put_device(&dev->dev); } @@ -96,8 +97,6 @@ static void pci_remove_bus_device(struct pci_dev *dev) dev->subordinate = NULL; } - pci_bridge_d3_device_removed(dev); - pci_destroy_dev(dev); } From 9ad18b75c2f6e4a78ce204e79f37781f8815c0fa Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 17 Aug 2016 23:19:01 -0400 Subject: [PATCH 0181/1050] asm-generic: make get_user() clear the destination on errors both for access_ok() failures and for faults halfway through Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- include/asm-generic/uaccess.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h index 04e21a41796a..32901d11f8c4 100644 --- a/include/asm-generic/uaccess.h +++ b/include/asm-generic/uaccess.h @@ -230,14 +230,18 @@ extern int __put_user_bad(void) __attribute__((noreturn)); might_fault(); \ access_ok(VERIFY_READ, __p, sizeof(*ptr)) ? \ __get_user((x), (__typeof__(*(ptr)) *)__p) : \ - -EFAULT; \ + ((x) = (__typeof__(*(ptr)))0,-EFAULT); \ }) #ifndef __get_user_fn static inline int __get_user_fn(size_t size, const void __user *ptr, void *x) { - size = __copy_from_user(x, ptr, size); - return size ? -EFAULT : size; + size_t n = __copy_from_user(x, ptr, size); + if (unlikely(n)) { + memset(x + (size - n), 0, n); + return -EFAULT; + } + return 0; } #define __get_user_fn(sz, u, k) __get_user_fn(sz, u, k) From eb47e0293baaa3044022059f1fa9ff474bfe35cb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 18 Aug 2016 19:34:00 -0400 Subject: [PATCH 0182/1050] cris: buggered copy_from_user/copy_to_user/clear_user * copy_from_user() on access_ok() failure ought to zero the destination * none of those primitives should skip the access_ok() check in case of small constant size. Cc: stable@vger.kernel.org Acked-by: Jesper Nilsson Signed-off-by: Al Viro --- arch/cris/include/asm/uaccess.h | 71 +++++++++++++++------------------ 1 file changed, 32 insertions(+), 39 deletions(-) diff --git a/arch/cris/include/asm/uaccess.h b/arch/cris/include/asm/uaccess.h index e3530d0f13ee..56c7d5750abd 100644 --- a/arch/cris/include/asm/uaccess.h +++ b/arch/cris/include/asm/uaccess.h @@ -194,30 +194,6 @@ extern unsigned long __copy_user(void __user *to, const void *from, unsigned lon extern unsigned long __copy_user_zeroing(void *to, const void __user *from, unsigned long n); extern unsigned long __do_clear_user(void __user *to, unsigned long n); -static inline unsigned long -__generic_copy_to_user(void __user *to, const void *from, unsigned long n) -{ - if (access_ok(VERIFY_WRITE, to, n)) - return __copy_user(to, from, n); - return n; -} - -static inline unsigned long -__generic_copy_from_user(void *to, const void __user *from, unsigned long n) -{ - if (access_ok(VERIFY_READ, from, n)) - return __copy_user_zeroing(to, from, n); - return n; -} - -static inline unsigned long -__generic_clear_user(void __user *to, unsigned long n) -{ - if (access_ok(VERIFY_WRITE, to, n)) - return __do_clear_user(to, n); - return n; -} - static inline long __strncpy_from_user(char *dst, const char __user *src, long count) { @@ -282,7 +258,7 @@ __constant_copy_from_user(void *to, const void __user *from, unsigned long n) else if (n == 24) __asm_copy_from_user_24(to, from, ret); else - ret = __generic_copy_from_user(to, from, n); + ret = __copy_user_zeroing(to, from, n); return ret; } @@ -333,7 +309,7 @@ __constant_copy_to_user(void __user *to, const void *from, unsigned long n) else if (n == 24) __asm_copy_to_user_24(to, from, ret); else - ret = __generic_copy_to_user(to, from, n); + ret = __copy_user(to, from, n); return ret; } @@ -366,26 +342,43 @@ __constant_clear_user(void __user *to, unsigned long n) else if (n == 24) __asm_clear_24(to, ret); else - ret = __generic_clear_user(to, n); + ret = __do_clear_user(to, n); return ret; } -#define clear_user(to, n) \ - (__builtin_constant_p(n) ? \ - __constant_clear_user(to, n) : \ - __generic_clear_user(to, n)) +static inline size_t clear_user(void __user *to, size_t n) +{ + if (unlikely(!access_ok(VERIFY_WRITE, to, n))) + return n; + if (__builtin_constant_p(n)) + return __constant_clear_user(to, n); + else + return __do_clear_user(to, n); +} -#define copy_from_user(to, from, n) \ - (__builtin_constant_p(n) ? \ - __constant_copy_from_user(to, from, n) : \ - __generic_copy_from_user(to, from, n)) +static inline size_t copy_from_user(void *to, const void __user *from, size_t n) +{ + if (unlikely(!access_ok(VERIFY_READ, from, n))) { + memset(to, 0, n); + return n; + } + if (__builtin_constant_p(n)) + return __constant_copy_from_user(to, from, n); + else + return __copy_user_zeroing(to, from, n); +} -#define copy_to_user(to, from, n) \ - (__builtin_constant_p(n) ? \ - __constant_copy_to_user(to, from, n) : \ - __generic_copy_to_user(to, from, n)) +static inline size_t copy_to_user(void __user *to, const void *from, size_t n) +{ + if (unlikely(!access_ok(VERIFY_WRITE, to, n))) + return n; + if (__builtin_constant_p(n)) + return __constant_copy_to_user(to, from, n); + else + return __copy_user(to, from, n); +} /* We let the __ versions of copy_from/to_user inline, because they're often * used in fast paths and have only a small space overhead. From 3b8767a8f00cc6538ba6b1cf0f88502e2fd2eb90 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 18 Aug 2016 20:54:02 -0400 Subject: [PATCH 0183/1050] frv: fix clear_user() It should check access_ok(). Otherwise a bunch of places turn into trivially exploitable rootholes. Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/frv/include/asm/uaccess.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/frv/include/asm/uaccess.h b/arch/frv/include/asm/uaccess.h index 3ac9a59d65d4..87d9e34c5df8 100644 --- a/arch/frv/include/asm/uaccess.h +++ b/arch/frv/include/asm/uaccess.h @@ -263,19 +263,25 @@ do { \ extern long __memset_user(void *dst, unsigned long count); extern long __memcpy_user(void *dst, const void *src, unsigned long count); -#define clear_user(dst,count) __memset_user(____force(dst), (count)) +#define __clear_user(dst,count) __memset_user(____force(dst), (count)) #define __copy_from_user_inatomic(to, from, n) __memcpy_user((to), ____force(from), (n)) #define __copy_to_user_inatomic(to, from, n) __memcpy_user(____force(to), (from), (n)) #else -#define clear_user(dst,count) (memset(____force(dst), 0, (count)), 0) +#define __clear_user(dst,count) (memset(____force(dst), 0, (count)), 0) #define __copy_from_user_inatomic(to, from, n) (memcpy((to), ____force(from), (n)), 0) #define __copy_to_user_inatomic(to, from, n) (memcpy(____force(to), (from), (n)), 0) #endif -#define __clear_user clear_user +static inline unsigned long __must_check +clear_user(void __user *to, unsigned long n) +{ + if (likely(__access_ok(to, n))) + n = __clear_user(to, n); + return n; +} static inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n) From f35c1e0671728d1c9abc405d05ef548b5fcb2fc4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 18 Aug 2016 21:16:49 -0400 Subject: [PATCH 0184/1050] hexagon: fix strncpy_from_user() error return It's -EFAULT, not -1 (and contrary to the comment in there, __strnlen_user() can return 0 - on faults). Cc: stable@vger.kernel.org Acked-by: Richard Kuo Signed-off-by: Al Viro --- arch/hexagon/include/asm/uaccess.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/hexagon/include/asm/uaccess.h b/arch/hexagon/include/asm/uaccess.h index f000a382bc7f..f61cfb28e9f2 100644 --- a/arch/hexagon/include/asm/uaccess.h +++ b/arch/hexagon/include/asm/uaccess.h @@ -103,7 +103,8 @@ static inline long hexagon_strncpy_from_user(char *dst, const char __user *src, { long res = __strnlen_user(src, n); - /* return from strnlen can't be zero -- that would be rubbish. */ + if (unlikely(!res)) + return -EFAULT; if (res > n) { copy_from_user(dst, src, n); From a5e541f796f17228793694d64b507f5f57db4cd7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 18 Aug 2016 21:31:41 -0400 Subject: [PATCH 0185/1050] ia64: copy_from_user() should zero the destination on access_ok() failure Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/ia64/include/asm/uaccess.h | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/arch/ia64/include/asm/uaccess.h b/arch/ia64/include/asm/uaccess.h index 465c70982f40..6c2d2c8c24e8 100644 --- a/arch/ia64/include/asm/uaccess.h +++ b/arch/ia64/include/asm/uaccess.h @@ -272,20 +272,17 @@ __copy_from_user (void *to, const void __user *from, unsigned long count) __cu_len; \ }) -#define copy_from_user(to, from, n) \ -({ \ - void *__cu_to = (to); \ - const void __user *__cu_from = (from); \ - long __cu_len = (n); \ - \ - __chk_user_ptr(__cu_from); \ - if (__access_ok(__cu_from, __cu_len, get_fs())) { \ - if (!__builtin_constant_p(n)) \ - check_object_size(__cu_to, __cu_len, false); \ - __cu_len = __copy_user((__force void __user *) __cu_to, __cu_from, __cu_len); \ - } \ - __cu_len; \ -}) +static inline unsigned long +copy_from_user(void *to, const void __user *from, unsigned long n) +{ + if (!__builtin_constant_p(n)) + check_object_size(to, n, false); + if (likely(__access_ok(from, n, get_fs()))) + n = __copy_user((__force void __user *) to, from, n); + else + memset(to, 0, n); + return n; +} #define __copy_in_user(to, from, size) __copy_user((to), (from), (size)) From 8ae95ed4ae5fc7c3391ed668b2014c9e2079533b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 18 Aug 2016 22:08:20 -0400 Subject: [PATCH 0186/1050] metag: copy_from_user() should zero the destination on access_ok() failure Cc: stable@vger.kernel.org Acked-by: James Hogan Signed-off-by: Al Viro --- arch/metag/include/asm/uaccess.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/metag/include/asm/uaccess.h b/arch/metag/include/asm/uaccess.h index 8282cbce7e39..273e61225c27 100644 --- a/arch/metag/include/asm/uaccess.h +++ b/arch/metag/include/asm/uaccess.h @@ -204,8 +204,9 @@ extern unsigned long __must_check __copy_user_zeroing(void *to, static inline unsigned long copy_from_user(void *to, const void __user *from, unsigned long n) { - if (access_ok(VERIFY_READ, from, n)) + if (likely(access_ok(VERIFY_READ, from, n))) return __copy_user_zeroing(to, from, n); + memset(to, 0, n); return n; } From 05d9d0b96e53c52a113fd783c0c97c830c8dc7af Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 19 Aug 2016 12:10:02 -0700 Subject: [PATCH 0187/1050] ARC: uaccess: get_user to zero out dest in cause of fault Al reported potential issue with ARC get_user() as it wasn't clearing out destination pointer in case of fault due to bad address etc. Verified using following | { | u32 bogus1 = 0xdeadbeef; | u64 bogus2 = 0xdead; | int rc1, rc2; | | pr_info("Orig values %x %llx\n", bogus1, bogus2); | rc1 = get_user(bogus1, (u32 __user *)0x40000000); | rc2 = get_user(bogus2, (u64 __user *)0x50000000); | pr_info("access %d %d, new values %x %llx\n", | rc1, rc2, bogus1, bogus2); | } | [ARCLinux]# insmod /mnt/kernel-module/qtn.ko | Orig values deadbeef dead | access -14 -14, new values 0 0 Reported-by: Al Viro Cc: Linus Torvalds Cc: linux-snps-arc@lists.infradead.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: Vineet Gupta Signed-off-by: Al Viro --- arch/arc/include/asm/uaccess.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h index a78d5670884f..41faf17cd28d 100644 --- a/arch/arc/include/asm/uaccess.h +++ b/arch/arc/include/asm/uaccess.h @@ -83,7 +83,10 @@ "2: ;nop\n" \ " .section .fixup, \"ax\"\n" \ " .align 4\n" \ - "3: mov %0, %3\n" \ + "3: # return -EFAULT\n" \ + " mov %0, %3\n" \ + " # zero out dst ptr\n" \ + " mov %1, 0\n" \ " j 2b\n" \ " .previous\n" \ " .section __ex_table, \"a\"\n" \ @@ -101,7 +104,11 @@ "2: ;nop\n" \ " .section .fixup, \"ax\"\n" \ " .align 4\n" \ - "3: mov %0, %3\n" \ + "3: # return -EFAULT\n" \ + " mov %0, %3\n" \ + " # zero out dst ptr\n" \ + " mov %1, 0\n" \ + " mov %R1, 0\n" \ " j 2b\n" \ " .previous\n" \ " .section __ex_table, \"a\"\n" \ From e69d700535ac43a18032b3c399c69bf4639e89a2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 20 Aug 2016 16:18:53 -0400 Subject: [PATCH 0188/1050] mips: copy_from_user() must zero the destination on access_ok() failure Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/mips/include/asm/uaccess.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h index 11b965f98d95..21a2aaba20d5 100644 --- a/arch/mips/include/asm/uaccess.h +++ b/arch/mips/include/asm/uaccess.h @@ -14,6 +14,7 @@ #include #include #include +#include #include /* @@ -1170,6 +1171,8 @@ extern size_t __copy_in_user_eva(void *__to, const void *__from, size_t __n); __cu_len = __invoke_copy_from_user(__cu_to, \ __cu_from, \ __cu_len); \ + } else { \ + memset(__cu_to, 0, __cu_len); \ } \ } \ __cu_len; \ From 43403eabf558d2800b429cd886e996fd555aa542 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 20 Aug 2016 16:32:02 -0400 Subject: [PATCH 0189/1050] mn10300: failing __get_user() and get_user() should zero Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/mn10300/include/asm/uaccess.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mn10300/include/asm/uaccess.h b/arch/mn10300/include/asm/uaccess.h index 20f7bf6de384..d012e877a95a 100644 --- a/arch/mn10300/include/asm/uaccess.h +++ b/arch/mn10300/include/asm/uaccess.h @@ -166,6 +166,7 @@ struct __large_struct { unsigned long buf[100]; }; "2:\n" \ " .section .fixup,\"ax\"\n" \ "3:\n\t" \ + " mov 0,%1\n" \ " mov %3,%0\n" \ " jmp 2b\n" \ " .previous\n" \ From ae7cc577ec2a4a6151c9e928fd1f595d953ecef1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 20 Aug 2016 16:33:10 -0400 Subject: [PATCH 0190/1050] mn10300: copy_from_user() should zero on access_ok() failure... Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/mn10300/lib/usercopy.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/mn10300/lib/usercopy.c b/arch/mn10300/lib/usercopy.c index 7826e6c364e7..ce8899e5e171 100644 --- a/arch/mn10300/lib/usercopy.c +++ b/arch/mn10300/lib/usercopy.c @@ -9,7 +9,7 @@ * as published by the Free Software Foundation; either version * 2 of the Licence, or (at your option) any later version. */ -#include +#include unsigned long __generic_copy_to_user(void *to, const void *from, unsigned long n) @@ -24,6 +24,8 @@ __generic_copy_from_user(void *to, const void *from, unsigned long n) { if (access_ok(VERIFY_READ, from, n)) __copy_user_zeroing(to, from, n); + else + memset(to, 0, n); return n; } From e33d1f6f72cc82fcfc3d1fb20c9e3ad83b1928fa Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 20 Aug 2016 16:36:36 -0400 Subject: [PATCH 0191/1050] nios2: copy_from_user() should zero the tail of destination Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/nios2/include/asm/uaccess.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/nios2/include/asm/uaccess.h b/arch/nios2/include/asm/uaccess.h index caa51ff85a3c..2b4b9e919675 100644 --- a/arch/nios2/include/asm/uaccess.h +++ b/arch/nios2/include/asm/uaccess.h @@ -102,9 +102,12 @@ extern long __copy_to_user(void __user *to, const void *from, unsigned long n); static inline long copy_from_user(void *to, const void __user *from, unsigned long n) { - if (!access_ok(VERIFY_READ, from, n)) - return n; - return __copy_from_user(to, from, n); + unsigned long res = n; + if (access_ok(VERIFY_READ, from, n)) + res = __copy_from_user(to, from, n); + if (unlikely(res)) + memset(to + (n - res), 0, res); + return res; } static inline long copy_to_user(void __user *to, const void *from, From 2e29f50ad5e23db37dde9be71410d95d50241ecd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 20 Aug 2016 16:39:01 -0400 Subject: [PATCH 0192/1050] nios2: fix __get_user() a) should not leave crap on fault b) should _not_ require access_ok() in any cases. Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/nios2/include/asm/uaccess.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/nios2/include/asm/uaccess.h b/arch/nios2/include/asm/uaccess.h index 2b4b9e919675..0ab82324c817 100644 --- a/arch/nios2/include/asm/uaccess.h +++ b/arch/nios2/include/asm/uaccess.h @@ -142,7 +142,7 @@ extern long strnlen_user(const char __user *s, long n); #define __get_user_unknown(val, size, ptr, err) do { \ err = 0; \ - if (copy_from_user(&(val), ptr, size)) { \ + if (__copy_from_user(&(val), ptr, size)) { \ err = -EFAULT; \ } \ } while (0) @@ -169,7 +169,7 @@ do { \ ({ \ long __gu_err = -EFAULT; \ const __typeof__(*(ptr)) __user *__gu_ptr = (ptr); \ - unsigned long __gu_val; \ + unsigned long __gu_val = 0; \ __get_user_common(__gu_val, sizeof(*(ptr)), __gu_ptr, __gu_err);\ (x) = (__force __typeof__(x))__gu_val; \ __gu_err; \ From acb2505d0119033a80c85ac8d02dccae41271667 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 20 Aug 2016 17:05:21 -0400 Subject: [PATCH 0193/1050] openrisc: fix copy_from_user() ... that should zero on faults. Also remove the helpful logics wrt range truncation copied from ppc32. Where it had ever been needed only in case of copy_from_user() *and* had not been merged into the mainline until a month after the need had disappeared. A decade before openrisc went into mainline, I might add... Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/openrisc/include/asm/uaccess.h | 33 +++++++++-------------------- 1 file changed, 10 insertions(+), 23 deletions(-) diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h index a6bd07ca3d6c..cbad29b5a131 100644 --- a/arch/openrisc/include/asm/uaccess.h +++ b/arch/openrisc/include/asm/uaccess.h @@ -273,28 +273,20 @@ __copy_tofrom_user(void *to, const void *from, unsigned long size); static inline unsigned long copy_from_user(void *to, const void *from, unsigned long n) { - unsigned long over; + unsigned long res = n; - if (access_ok(VERIFY_READ, from, n)) - return __copy_tofrom_user(to, from, n); - if ((unsigned long)from < TASK_SIZE) { - over = (unsigned long)from + n - TASK_SIZE; - return __copy_tofrom_user(to, from, n - over) + over; - } - return n; + if (likely(access_ok(VERIFY_READ, from, n))) + n = __copy_tofrom_user(to, from, n); + if (unlikely(res)) + memset(to + (n - res), 0, res); + return res; } static inline unsigned long copy_to_user(void *to, const void *from, unsigned long n) { - unsigned long over; - - if (access_ok(VERIFY_WRITE, to, n)) - return __copy_tofrom_user(to, from, n); - if ((unsigned long)to < TASK_SIZE) { - over = (unsigned long)to + n - TASK_SIZE; - return __copy_tofrom_user(to, from, n - over) + over; - } + if (likely(access_ok(VERIFY_WRITE, to, n))) + n = __copy_tofrom_user(to, from, n); return n; } @@ -303,13 +295,8 @@ extern unsigned long __clear_user(void *addr, unsigned long size); static inline __must_check unsigned long clear_user(void *addr, unsigned long size) { - - if (access_ok(VERIFY_WRITE, addr, size)) - return __clear_user(addr, size); - if ((unsigned long)addr < TASK_SIZE) { - unsigned long over = (unsigned long)addr + size - TASK_SIZE; - return __clear_user(addr, size - over) + over; - } + if (likely(access_ok(VERIFY_WRITE, addr, size))) + size = __clear_user(addr, size); return size; } From aace880feea38875fbc919761b77e5732a3659ef Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 20 Aug 2016 19:03:37 -0400 Subject: [PATCH 0194/1050] parisc: fix copy_from_user() Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/parisc/include/asm/uaccess.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index 0f59fd9ca205..37a1bee96444 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -10,6 +10,7 @@ #include #include +#include #define VERIFY_READ 0 #define VERIFY_WRITE 1 @@ -221,13 +222,14 @@ static inline unsigned long __must_check copy_from_user(void *to, unsigned long n) { int sz = __compiletime_object_size(to); - int ret = -EFAULT; + unsigned long ret = n; if (likely(sz == -1 || !__builtin_constant_p(n) || sz >= n)) ret = __copy_from_user(to, from, n); else copy_from_user_overflow(); - + if (unlikely(ret)) + memset(to + (n - ret), 0, ret); return ret; } From 224264657b8b228f949b42346e09ed8c90136a8e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 21 Aug 2016 19:16:26 -0400 Subject: [PATCH 0195/1050] ppc32: fix copy_from_user() should clear on access_ok() failures. Also remove the useless range truncation logics. Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/powerpc/include/asm/uaccess.h | 25 ++----------------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index c1dc6c14deb8..c2ce5dd33673 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -308,40 +308,23 @@ extern unsigned long __copy_tofrom_user(void __user *to, static inline unsigned long copy_from_user(void *to, const void __user *from, unsigned long n) { - unsigned long over; - - if (access_ok(VERIFY_READ, from, n)) { + if (likely(access_ok(VERIFY_READ, from, n))) { if (!__builtin_constant_p(n)) check_object_size(to, n, false); return __copy_tofrom_user((__force void __user *)to, from, n); } - if ((unsigned long)from < TASK_SIZE) { - over = (unsigned long)from + n - TASK_SIZE; - if (!__builtin_constant_p(n - over)) - check_object_size(to, n - over, false); - return __copy_tofrom_user((__force void __user *)to, from, - n - over) + over; - } + memset(to, 0, n); return n; } static inline unsigned long copy_to_user(void __user *to, const void *from, unsigned long n) { - unsigned long over; - if (access_ok(VERIFY_WRITE, to, n)) { if (!__builtin_constant_p(n)) check_object_size(from, n, true); return __copy_tofrom_user(to, (__force void __user *)from, n); } - if ((unsigned long)to < TASK_SIZE) { - over = (unsigned long)to + n - TASK_SIZE; - if (!__builtin_constant_p(n)) - check_object_size(from, n - over, true); - return __copy_tofrom_user(to, (__force void __user *)from, - n - over) + over; - } return n; } @@ -439,10 +422,6 @@ static inline unsigned long clear_user(void __user *addr, unsigned long size) might_fault(); if (likely(access_ok(VERIFY_WRITE, addr, size))) return __clear_user(addr, size); - if ((unsigned long)addr < TASK_SIZE) { - unsigned long over = (unsigned long)addr + size - TASK_SIZE; - return __clear_user(addr, size - over) + over; - } return size; } From fd2d2b191fe75825c4c7a6f12f3fef35aaed7dd7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 21 Aug 2016 22:00:54 -0400 Subject: [PATCH 0196/1050] s390: get_user() should zero on failure Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/s390/include/asm/uaccess.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 9b49cf1daa8f..2c5d292ff752 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -266,28 +266,28 @@ int __put_user_bad(void) __attribute__((noreturn)); __chk_user_ptr(ptr); \ switch (sizeof(*(ptr))) { \ case 1: { \ - unsigned char __x; \ + unsigned char __x = 0; \ __gu_err = __get_user_fn(&__x, ptr, \ sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *) &__x; \ break; \ }; \ case 2: { \ - unsigned short __x; \ + unsigned short __x = 0; \ __gu_err = __get_user_fn(&__x, ptr, \ sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *) &__x; \ break; \ }; \ case 4: { \ - unsigned int __x; \ + unsigned int __x = 0; \ __gu_err = __get_user_fn(&__x, ptr, \ sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *) &__x; \ break; \ }; \ case 8: { \ - unsigned long long __x; \ + unsigned long long __x = 0; \ __gu_err = __get_user_fn(&__x, ptr, \ sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *) &__x; \ From c2f18fa4cbb3ad92e033a24efa27583978ce9600 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 21 Aug 2016 22:13:39 -0400 Subject: [PATCH 0197/1050] score: fix __get_user/get_user * should zero on any failure * __get_user() should use __copy_from_user(), not copy_from_user() Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/score/include/asm/uaccess.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/score/include/asm/uaccess.h b/arch/score/include/asm/uaccess.h index 20a3591225cc..c5d43111a5cc 100644 --- a/arch/score/include/asm/uaccess.h +++ b/arch/score/include/asm/uaccess.h @@ -163,7 +163,7 @@ do { \ __get_user_asm(val, "lw", ptr); \ break; \ case 8: \ - if ((copy_from_user((void *)&val, ptr, 8)) == 0) \ + if (__copy_from_user((void *)&val, ptr, 8) == 0) \ __gu_err = 0; \ else \ __gu_err = -EFAULT; \ @@ -188,6 +188,8 @@ do { \ \ if (likely(access_ok(VERIFY_READ, __gu_ptr, size))) \ __get_user_common((x), size, __gu_ptr); \ + else \ + (x) = 0; \ \ __gu_err; \ }) @@ -201,6 +203,7 @@ do { \ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3:li %0, %4\n" \ + "li %1, 0\n" \ "j 2b\n" \ ".previous\n" \ ".section __ex_table,\"a\"\n" \ From b615e3c74621e06cd97f86373ca90d43d6d998aa Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 21 Aug 2016 22:30:44 -0400 Subject: [PATCH 0198/1050] score: fix copy_from_user() and friends Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/score/include/asm/uaccess.h | 41 ++++++++++++++++---------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/arch/score/include/asm/uaccess.h b/arch/score/include/asm/uaccess.h index c5d43111a5cc..01aec8ccde83 100644 --- a/arch/score/include/asm/uaccess.h +++ b/arch/score/include/asm/uaccess.h @@ -301,35 +301,34 @@ extern int __copy_tofrom_user(void *to, const void *from, unsigned long len); static inline unsigned long copy_from_user(void *to, const void *from, unsigned long len) { - unsigned long over; + unsigned long res = len; - if (access_ok(VERIFY_READ, from, len)) - return __copy_tofrom_user(to, from, len); + if (likely(access_ok(VERIFY_READ, from, len))) + res = __copy_tofrom_user(to, from, len); - if ((unsigned long)from < TASK_SIZE) { - over = (unsigned long)from + len - TASK_SIZE; - return __copy_tofrom_user(to, from, len - over) + over; - } - return len; + if (unlikely(res)) + memset(to + (len - res), 0, res); + + return res; } static inline unsigned long copy_to_user(void *to, const void *from, unsigned long len) { - unsigned long over; + if (likely(access_ok(VERIFY_WRITE, to, len))) + len = __copy_tofrom_user(to, from, len); - if (access_ok(VERIFY_WRITE, to, len)) - return __copy_tofrom_user(to, from, len); - - if ((unsigned long)to < TASK_SIZE) { - over = (unsigned long)to + len - TASK_SIZE; - return __copy_tofrom_user(to, from, len - over) + over; - } return len; } -#define __copy_from_user(to, from, len) \ - __copy_tofrom_user((to), (from), (len)) +static inline unsigned long +__copy_from_user(void *to, const void *from, unsigned long len) +{ + unsigned long left = __copy_tofrom_user(to, from, len); + if (unlikely(left)) + memset(to + (len - left), 0, left); + return left; +} #define __copy_to_user(to, from, len) \ __copy_tofrom_user((to), (from), (len)) @@ -343,17 +342,17 @@ __copy_to_user_inatomic(void *to, const void *from, unsigned long len) static inline unsigned long __copy_from_user_inatomic(void *to, const void *from, unsigned long len) { - return __copy_from_user(to, from, len); + return __copy_tofrom_user(to, from, len); } -#define __copy_in_user(to, from, len) __copy_from_user(to, from, len) +#define __copy_in_user(to, from, len) __copy_tofrom_user(to, from, len) static inline unsigned long copy_in_user(void *to, const void *from, unsigned long len) { if (access_ok(VERIFY_READ, from, len) && access_ok(VERFITY_WRITE, to, len)) - return copy_from_user(to, from, len); + return __copy_tofrom_user(to, from, len); } /* From c6852389228df9fb3067f94f3b651de2a7921b36 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 21 Aug 2016 23:33:47 -0400 Subject: [PATCH 0199/1050] sh64: failing __get_user() should zero It could be done in exception-handling bits in __get_user_b() et.al., but the surgery involved would take more knowledge of sh64 details than I have or _want_ to have. Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/sh/include/asm/uaccess_64.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/sh/include/asm/uaccess_64.h b/arch/sh/include/asm/uaccess_64.h index c01376c76b86..ca5073dd4596 100644 --- a/arch/sh/include/asm/uaccess_64.h +++ b/arch/sh/include/asm/uaccess_64.h @@ -24,6 +24,7 @@ #define __get_user_size(x,ptr,size,retval) \ do { \ retval = 0; \ + x = 0; \ switch (size) { \ case 1: \ retval = __get_user_asm_b((void *)&x, \ From 6e050503a150b2126620c1a1e9b3a368fcd51eac Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 21 Aug 2016 23:39:47 -0400 Subject: [PATCH 0200/1050] sh: fix copy_from_user() Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/sh/include/asm/uaccess.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/sh/include/asm/uaccess.h b/arch/sh/include/asm/uaccess.h index a49635c51266..92ade79ac427 100644 --- a/arch/sh/include/asm/uaccess.h +++ b/arch/sh/include/asm/uaccess.h @@ -151,7 +151,10 @@ copy_from_user(void *to, const void __user *from, unsigned long n) __kernel_size_t __copy_size = (__kernel_size_t) n; if (__copy_size && __access_ok(__copy_from, __copy_size)) - return __copy_user(to, from, __copy_size); + __copy_size = __copy_user(to, from, __copy_size); + + if (unlikely(__copy_size)) + memset(to + (n - __copy_size), 0, __copy_size); return __copy_size; } From 917400cecb4b52b5cde5417348322bb9c8272fa6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 22 Aug 2016 00:23:07 -0400 Subject: [PATCH 0201/1050] sparc32: fix copy_from_user() Cc: stable@vger.kernel.org Acked-by: David S. Miller Signed-off-by: Al Viro --- arch/sparc/include/asm/uaccess_32.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h index 341a5a133f48..035c89b7df5d 100644 --- a/arch/sparc/include/asm/uaccess_32.h +++ b/arch/sparc/include/asm/uaccess_32.h @@ -269,8 +269,10 @@ static inline unsigned long copy_from_user(void *to, const void __user *from, un if (!__builtin_constant_p(n)) check_object_size(to, n, false); return __copy_user((__force void __user *) to, from, n); - } else + } else { + memset(to, 0, n); return n; + } } static inline unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) From 8f035983dd826d7e04f67b28acf8e2f08c347e41 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 9 Sep 2016 19:16:58 -0400 Subject: [PATCH 0202/1050] blackfin: fix copy_from_user() Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/blackfin/include/asm/uaccess.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/blackfin/include/asm/uaccess.h b/arch/blackfin/include/asm/uaccess.h index 12f5d6851bbc..0a2a70096d8b 100644 --- a/arch/blackfin/include/asm/uaccess.h +++ b/arch/blackfin/include/asm/uaccess.h @@ -171,11 +171,12 @@ static inline int bad_user_access_length(void) static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { - if (access_ok(VERIFY_READ, from, n)) + if (likely(access_ok(VERIFY_READ, from, n))) { memcpy(to, (const void __force *)from, n); - else - return n; - return 0; + return 0; + } + memset(to, 0, n); + return n; } static inline unsigned long __must_check From c90a3bc5061d57e7931a9b7ad14784e1a0ed497d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 9 Sep 2016 19:20:13 -0400 Subject: [PATCH 0203/1050] m32r: fix __get_user() Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/m32r/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/m32r/include/asm/uaccess.h b/arch/m32r/include/asm/uaccess.h index cac7014daef3..6f8982157a75 100644 --- a/arch/m32r/include/asm/uaccess.h +++ b/arch/m32r/include/asm/uaccess.h @@ -219,7 +219,7 @@ extern int fixup_exception(struct pt_regs *regs); #define __get_user_nocheck(x, ptr, size) \ ({ \ long __gu_err = 0; \ - unsigned long __gu_val; \ + unsigned long __gu_val = 0; \ might_fault(); \ __get_user_size(__gu_val, (ptr), (size), __gu_err); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ From d0cf385160c12abd109746cad1f13e3b3e8b50b8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 9 Sep 2016 19:22:34 -0400 Subject: [PATCH 0204/1050] microblaze: fix copy_from_user() Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/microblaze/include/asm/uaccess.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h index 331b0d35f89c..3a486d3b6f52 100644 --- a/arch/microblaze/include/asm/uaccess.h +++ b/arch/microblaze/include/asm/uaccess.h @@ -373,10 +373,13 @@ extern long __user_bad(void); static inline long copy_from_user(void *to, const void __user *from, unsigned long n) { + unsigned long res = n; might_fault(); - if (access_ok(VERIFY_READ, from, n)) - return __copy_from_user(to, from, n); - return n; + if (likely(access_ok(VERIFY_READ, from, n))) + res = __copy_from_user(to, from, n); + if (unlikely(res)) + memset(to + (n - res), 0, res); + return res; } #define __copy_to_user(to, from, n) \ From e98b9e37ae04562d52c96f46b3cf4c2e80222dc1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 9 Sep 2016 19:23:33 -0400 Subject: [PATCH 0205/1050] microblaze: fix __get_user() Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/microblaze/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h index 3a486d3b6f52..826676778094 100644 --- a/arch/microblaze/include/asm/uaccess.h +++ b/arch/microblaze/include/asm/uaccess.h @@ -227,7 +227,7 @@ extern long __user_bad(void); #define __get_user(x, ptr) \ ({ \ - unsigned long __gu_val; \ + unsigned long __gu_val = 0; \ /*unsigned long __gu_ptr = (unsigned long)(ptr);*/ \ long __gu_err; \ switch (sizeof(*(ptr))) { \ From 8630c32275bac2de6ffb8aea9d9b11663e7ad28e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 9 Sep 2016 19:28:23 -0400 Subject: [PATCH 0206/1050] avr32: fix copy_from_user() really ugly, but apparently avr32 compilers turns access_ok() into something so bad that they want it in assembler. Left that way, zeroing added in inline wrapper. Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/avr32/include/asm/uaccess.h | 11 ++++++++++- arch/avr32/kernel/avr32_ksyms.c | 2 +- arch/avr32/lib/copy_user.S | 4 ++-- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/avr32/include/asm/uaccess.h b/arch/avr32/include/asm/uaccess.h index 68cf638faf48..b1ec1fa06463 100644 --- a/arch/avr32/include/asm/uaccess.h +++ b/arch/avr32/include/asm/uaccess.h @@ -74,7 +74,7 @@ extern __kernel_size_t __copy_user(void *to, const void *from, extern __kernel_size_t copy_to_user(void __user *to, const void *from, __kernel_size_t n); -extern __kernel_size_t copy_from_user(void *to, const void __user *from, +extern __kernel_size_t ___copy_from_user(void *to, const void __user *from, __kernel_size_t n); static inline __kernel_size_t __copy_to_user(void __user *to, const void *from, @@ -88,6 +88,15 @@ static inline __kernel_size_t __copy_from_user(void *to, { return __copy_user(to, (const void __force *)from, n); } +static inline __kernel_size_t copy_from_user(void *to, + const void __user *from, + __kernel_size_t n) +{ + size_t res = ___copy_from_user(to, from, n); + if (unlikely(res)) + memset(to + (n - res), 0, res); + return res; +} #define __copy_to_user_inatomic __copy_to_user #define __copy_from_user_inatomic __copy_from_user diff --git a/arch/avr32/kernel/avr32_ksyms.c b/arch/avr32/kernel/avr32_ksyms.c index d93ead02daed..7c6cf14f0985 100644 --- a/arch/avr32/kernel/avr32_ksyms.c +++ b/arch/avr32/kernel/avr32_ksyms.c @@ -36,7 +36,7 @@ EXPORT_SYMBOL(copy_page); /* * Userspace access stuff. */ -EXPORT_SYMBOL(copy_from_user); +EXPORT_SYMBOL(___copy_from_user); EXPORT_SYMBOL(copy_to_user); EXPORT_SYMBOL(__copy_user); EXPORT_SYMBOL(strncpy_from_user); diff --git a/arch/avr32/lib/copy_user.S b/arch/avr32/lib/copy_user.S index ea59c04b07de..96a6de9d578f 100644 --- a/arch/avr32/lib/copy_user.S +++ b/arch/avr32/lib/copy_user.S @@ -25,11 +25,11 @@ .align 1 .global copy_from_user .type copy_from_user, @function -copy_from_user: +___copy_from_user: branch_if_kernel r8, __copy_user ret_if_privileged r8, r11, r10, r10 rjmp __copy_user - .size copy_from_user, . - copy_from_user + .size ___copy_from_user, . - ___copy_from_user .global copy_to_user .type copy_to_user, @function From 6eaed1665fc6864fbdbffcc6f43a7f5d012f3052 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 13 Sep 2016 16:40:24 +1000 Subject: [PATCH 0207/1050] powerpc/powernv: Fix the state of root PE The PE for root bus (root PE) can be removed because of PCI hot remove in EEH recovery path for fenced PHB error. We need update @phb->root_pe_populated accordingly so that the root PE can be populated again in forthcoming PCI hot add path. Also, the PE shouldn't be destroyed as it's global and reserved resource. Fixes: c5f7700bbd2e ("powerpc/powernv: Dynamically release PE") Reported-by: Frederic Barrat Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index c16d790808f1..0c71a2ffb5be 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -3426,7 +3426,17 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe) } } - pnv_ioda_free_pe(pe); + /* + * The PE for root bus can be removed because of hotplug in EEH + * recovery for fenced PHB error. We need to mark the PE dead so + * that it can be populated again in PCI hot add path. The PE + * shouldn't be destroyed as it's the global reserved resource. + */ + if (phb->ioda.root_pe_populated && + phb->ioda.root_pe_idx == pe->pe_number) + phb->ioda.root_pe_populated = false; + else + pnv_ioda_free_pe(pe); } static void pnv_pci_release_device(struct pci_dev *pdev) From 74712339a4fc0f4ddc710e6bca836a6b78b7d8de Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 25 Aug 2016 08:23:14 +0100 Subject: [PATCH 0208/1050] drm/i915: Restore lost "Initialized i915" welcome message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A side effect of removing the midlayer from driver loading was the loss of a useful message announcing to userspace that i915 had successfully started, e.g.: [drm] Initialized i915 1.6.0 20160425 for 0000:00:02.0 on minor 0 Reported-by: Timo Aaltonen Signed-off-by: Chris Wilson Fixes: 8f460e2c78f2 ("drm/i915: Demidlayer driver loading") Cc: Daniel Vetter Cc: Ville Syrjälä Cc: drm-intel-fixes@lists.freedesktop.org Link: http://patchwork.freedesktop.org/patch/msgid/20160825072314.17402-1-chris@chris-wilson.co.uk Reviewed-by: Daniel Vetter (cherry picked from commit bc5ca47c0af4f949ba889e666b7da65569e36093) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 95ddd56b89f0..5de36d8dcc68 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1281,6 +1281,11 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent) intel_runtime_pm_enable(dev_priv); + /* Everything is in place, we can now relax! */ + DRM_INFO("Initialized %s %d.%d.%d %s for %s on minor %d\n", + driver.name, driver.major, driver.minor, driver.patchlevel, + driver.date, pci_name(pdev), dev_priv->drm.primary->index); + intel_runtime_pm_put(dev_priv); return 0; From 86dfb76cba284114cf586005cd943eeb6e4f328d Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 7 Sep 2016 17:42:31 -0700 Subject: [PATCH 0209/1050] Revert "drm/i915/psr: Make idle_frames sensible again" This reverts commit 1c80c25fb622973dd135878e98d172be20859049 Author: Daniel Vetter Date: Wed May 18 18:47:12 2016 +0200 drm/i915/psr: Make idle_frames sensible again There are panels that needs 4 idle frames before entering PSR, but VBT is unproperly set. Also lately it was identified that idle frame count calculated at HW can be off by 1, what makes the minimum of 2, at least. Without the current vbt+1 we are with the risk of having HW calculating 0 idle frames and entering PSR when it shouldn't. Regardless the lack of link training. [Jani: there is some disagreement on the explanation, but the commit regresses so revert it is.] References: http://marc.info/?i=20160904191153.GA2328@light.dominikbrodowski.net Cc: Dominik Brodowski Cc: Jani Nikula Cc: Daniel Vetter Signed-off-by: Rodrigo Vivi Fixes: 1c80c25fb622 ("drm/i915/psr: Make idle_frames sensible again") Cc: drm-intel-fixes@lists.freedesktop.org # v4.8-rc1+ Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1473295351-8766-1-git-send-email-rodrigo.vivi@intel.com (cherry picked from commit 40918e0bb81be02f507a941f8b2741f0dc1771b0) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_psr.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 2b0d1baf15b3..cf171b4b8c67 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -255,14 +255,14 @@ static void hsw_psr_enable_source(struct intel_dp *intel_dp) struct drm_i915_private *dev_priv = to_i915(dev); uint32_t max_sleep_time = 0x1f; - /* Lately it was identified that depending on panel idle frame count - * calculated at HW can be off by 1. So let's use what came - * from VBT + 1. - * There are also other cases where panel demands at least 4 - * but VBT is not being set. To cover these 2 cases lets use - * at least 5 when VBT isn't set to be on the safest side. + /* + * Let's respect VBT in case VBT asks a higher idle_frame value. + * Let's use 6 as the minimum to cover all known cases including + * the off-by-one issue that HW has in some cases. Also there are + * cases where sink should be able to train + * with the 5 or 6 idle patterns. */ - uint32_t idle_frames = dev_priv->vbt.psr.idle_frames + 1; + uint32_t idle_frames = max(6, dev_priv->vbt.psr.idle_frames); uint32_t val = EDP_PSR_ENABLE; val |= max_sleep_time << EDP_PSR_MAX_SLEEP_TIME_SHIFT; From ea54ff4008892b46c7a3e6bc8ab8aaec9d198639 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 13 Sep 2016 12:22:19 +0300 Subject: [PATCH 0210/1050] drm/i915: Ignore OpRegion panel type except on select machines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Turns out commit a05628195a0d ("drm/i915: Get panel_type from OpRegion panel details") has regressed quite a few machines. So it looks like we can't use the panel type from OpRegion on all systems, and yet we absolutely must use it on some specific systems. Despite trying, I was unable to find any automagic way to determine if the OpRegion panel type is respectable or not. The only glimmer of hope I had was bit 8 in the SCIC response, but that turned out to not work either (it was always 0 on both types of systems). So, to fix the regressions without breaking the machine we know to need the OpRegion panel type, let's just add a quirk for this. Only specific machines known to require the OpRegion panel type will therefore use it. Everyone else will fall bck to the VBT panel type. The only known machine so far is a "Conrac GmbH IX45GM2". The PCI subsystem ID on this machine is just a generic 8086:2a42, so of no use. Instead we'll go with a DMI match. I suspect we can now also revert commit aeddda06c1a7 ("drm/i915: Ignore panel type from OpRegion on SKL") but let's leave that to a separate patch. v2: Do the DMI match in the opregion code directly, as dev_priv->quirks gets populated too late Cc: Rob Kramer Cc: Martin van Es Cc: Andrea Arcangeli Cc: Dave Airlie Cc: Marco Krüger Cc: Sean Greenslade Cc: Trudy Tective Cc: Robin Müller Cc: Alexander Kobel Cc: Alexey Shumitsky Cc: Emil Andersen Lauridsen Cc: oceans112@gmail.com Cc: James Hogan Cc: James Bottomley Cc: stable@vger.kernel.org References: https://lists.freedesktop.org/archives/intel-gfx/2016-August/105545.html References: https://lists.freedesktop.org/archives/dri-devel/2016-August/116888.html References: https://lists.freedesktop.org/archives/intel-gfx/2016-June/098826.html Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94825 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97060 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97443 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97363 Fixes: a05628195a0d ("drm/i915: Get panel_type from OpRegion panel details") Tested-by: Marco Krüger Tested-by: Alexey Shumitsky Tested-by: Sean Greenslade Tested-by: Emil Andersen Lauridsen Tested-by: Robin Müller Tested-by: oceans112@gmail.com Tested-by: Rob Kramer Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1473758539-21565-1-git-send-email-ville.syrjala@linux.intel.com References: http://patchwork.freedesktop.org/patch/msgid/1473602239-15855-1-git-send-email-adrienverge@gmail.com Acked-by: Jani Nikula (cherry picked from commit c8ebfad7a063fe665417fa0eeb0da7cfe987d8ed) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_opregion.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c index adca262d591a..7acbbbf97833 100644 --- a/drivers/gpu/drm/i915/intel_opregion.c +++ b/drivers/gpu/drm/i915/intel_opregion.c @@ -1047,6 +1047,23 @@ err_out: return err; } +static int intel_use_opregion_panel_type_callback(const struct dmi_system_id *id) +{ + DRM_INFO("Using panel type from OpRegion on %s\n", id->ident); + return 1; +} + +static const struct dmi_system_id intel_use_opregion_panel_type[] = { + { + .callback = intel_use_opregion_panel_type_callback, + .ident = "Conrac GmbH IX45GM2", + .matches = {DMI_MATCH(DMI_SYS_VENDOR, "Conrac GmbH"), + DMI_MATCH(DMI_PRODUCT_NAME, "IX45GM2"), + }, + }, + { } +}; + int intel_opregion_get_panel_type(struct drm_i915_private *dev_priv) { @@ -1072,6 +1089,16 @@ intel_opregion_get_panel_type(struct drm_i915_private *dev_priv) return -ENODEV; } + /* + * So far we know that some machined must use it, others must not use it. + * There doesn't seem to be any way to determine which way to go, except + * via a quirk list :( + */ + if (!dmi_check_system(intel_use_opregion_panel_type)) { + DRM_DEBUG_KMS("Ignoring OpRegion panel type (%d)\n", ret - 1); + return -ENODEV; + } + /* * FIXME On Dell XPS 13 9350 the OpRegion panel type (0) gives us * low vswing for eDP, whereas the VBT panel type (2) gives us normal From 0b97a484e52cb423662eb98904aad82dafcc1f10 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Sep 2016 09:41:34 +0200 Subject: [PATCH 0211/1050] mac80211: check skb_linearize() return value The A-MSDU TX code (within TXQs) didn't always check the return value of skb_linearize() properly, resulting in potentially passing a frag- list SKB down to the driver even when it said it can't handle it. Fix that. Fixes: 6e0456b545456 ("mac80211: add A-MSDU tx support") Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index cc8e95554b48..18b285e06bc8 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1515,8 +1515,12 @@ out: spin_unlock_bh(&fq->lock); if (skb && skb_has_frag_list(skb) && - !ieee80211_hw_check(&local->hw, TX_FRAG_LIST)) - skb_linearize(skb); + !ieee80211_hw_check(&local->hw, TX_FRAG_LIST)) { + if (skb_linearize(skb)) { + ieee80211_free_txskb(&local->hw, skb); + return NULL; + } + } return skb; } From e5789608766113ca9c30d596d93ca7d5cbd8b461 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 14 Sep 2016 14:22:07 +0300 Subject: [PATCH 0212/1050] mmc: omap_hsmmc: Initialize dma_slave_config to avoid random data It is wrong to use uninitialized dma_slave_config and configure only certain fields as the DMAengine driver might look at non initialized (random data) fields and tries to interpret it. Signed-off-by: Peter Ujfalusi Signed-off-by: Ulf Hansson --- drivers/mmc/host/omap_hsmmc.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index 24ebc9a8de89..5f2f24a7360d 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -1409,11 +1409,18 @@ static int omap_hsmmc_pre_dma_transfer(struct omap_hsmmc_host *host, static int omap_hsmmc_setup_dma_transfer(struct omap_hsmmc_host *host, struct mmc_request *req) { - struct dma_slave_config cfg; struct dma_async_tx_descriptor *tx; int ret = 0, i; struct mmc_data *data = req->data; struct dma_chan *chan; + struct dma_slave_config cfg = { + .src_addr = host->mapbase + OMAP_HSMMC_DATA, + .dst_addr = host->mapbase + OMAP_HSMMC_DATA, + .src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES, + .dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES, + .src_maxburst = data->blksz / 4, + .dst_maxburst = data->blksz / 4, + }; /* Sanity check: all the SG entries must be aligned by block size. */ for (i = 0; i < data->sg_len; i++) { @@ -1433,13 +1440,6 @@ static int omap_hsmmc_setup_dma_transfer(struct omap_hsmmc_host *host, chan = omap_hsmmc_get_dma_chan(host, data); - cfg.src_addr = host->mapbase + OMAP_HSMMC_DATA; - cfg.dst_addr = host->mapbase + OMAP_HSMMC_DATA; - cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; - cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; - cfg.src_maxburst = data->blksz / 4; - cfg.dst_maxburst = data->blksz / 4; - ret = dmaengine_slave_config(chan, &cfg); if (ret) return ret; From df804d5e27490151da1ce9f216031a31352203e6 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 14 Sep 2016 14:21:54 +0300 Subject: [PATCH 0213/1050] mmc: omap: Initialize dma_slave_config to avoid random data in it's fields It is wrong to use uninitialized dma_slave_config and configure only certain fields as the DMAengine driver might look at non initialized (random data) fields and tries to interpret it. Signed-off-by: Peter Ujfalusi Signed-off-by: Ulf Hansson --- drivers/mmc/host/omap.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c index f23d65eb070d..be3c49fa7382 100644 --- a/drivers/mmc/host/omap.c +++ b/drivers/mmc/host/omap.c @@ -1016,14 +1016,16 @@ mmc_omap_prepare_data(struct mmc_omap_host *host, struct mmc_request *req) /* Only reconfigure if we have a different burst size */ if (*bp != burst) { - struct dma_slave_config cfg; - - cfg.src_addr = host->phys_base + OMAP_MMC_REG(host, DATA); - cfg.dst_addr = host->phys_base + OMAP_MMC_REG(host, DATA); - cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES; - cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES; - cfg.src_maxburst = burst; - cfg.dst_maxburst = burst; + struct dma_slave_config cfg = { + .src_addr = host->phys_base + + OMAP_MMC_REG(host, DATA), + .dst_addr = host->phys_base + + OMAP_MMC_REG(host, DATA), + .src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES, + .dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES, + .src_maxburst = burst, + .dst_maxburst = burst, + }; if (dmaengine_slave_config(c, &cfg)) goto use_pio; From 6cfeaf5125d425043d44002d0a1a8a147be582bf Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Wed, 14 Sep 2016 11:00:26 +0100 Subject: [PATCH 0214/1050] cpu/hotplug: Include linux/types.h in linux/cpuhotplug.h The linux/cpuhotplug.h header makes use of the bool type, but wasn't including linux/types.h to ensure that type has been defined. Fix this by including linux/types.h in preparation for including linux/cpuhotplug.h in a file that doesn't do so already. Signed-off-by: Paul Burton Cc: linux-mips@linux-mips.org Cc: Richard Cochran Cc: Sebastian Andrzej Siewior Cc: Ralf Baechle Cc: Anna-Maria Gleixner Link: http://lkml.kernel.org/r/20160914100027.20945-1-paul.burton@imgtec.com Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 242bf530edfc..34bd80512a0c 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -1,6 +1,8 @@ #ifndef __CPUHOTPLUG_H #define __CPUHOTPLUG_H +#include + enum cpuhp_state { CPUHP_OFFLINE, CPUHP_CREATE_THREADS, From de75abbe0121a6c3c9c6b04c75300088e57ad1d5 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 8 Sep 2016 11:48:28 +0200 Subject: [PATCH 0215/1050] arm/xen: fix SMP guests boot Commit 88e957d6e47f ("xen: introduce xen_vcpu_id mapping") broke SMP ARM guests on Xen. When FIFO-based event channels are in use (this is the default), evtchn_fifo_alloc_control_block() is called on CPU_UP_PREPARE event and this happens before we set up xen_vcpu_id mapping in xen_starting_cpu. Temporary fix the issue by setting direct Linux CPU id <-> Xen vCPU id mapping for all possible CPUs at boot. We don't currently support kexec/kdump on Xen/ARM so these ids always match. In future, we have several ways to solve the issue, e.g.: - Eliminate all hypercalls from CPU_UP_PREPARE, do them from the starting CPU. This can probably be done for both x86 and ARM and, if done, will allow us to get Xen's idea of vCPU id from CPUID/MPIDR on the starting CPU directly, no messing with ACPI/device tree required. - Save vCPU id information from ACPI/device tree on ARM and use it to initialize xen_vcpu_id mapping. This is the same trick we currently do on x86. Reported-by: Julien Grall Tested-by: Wei Chen Signed-off-by: Vitaly Kuznetsov Acked-by: Stefano Stabellini Signed-off-by: David Vrabel --- arch/arm/xen/enlighten.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 3d2cef6488ea..f193414d0f6f 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -170,9 +170,6 @@ static int xen_starting_cpu(unsigned int cpu) pr_info("Xen: initializing cpu%d\n", cpu); vcpup = per_cpu_ptr(xen_vcpu_info, cpu); - /* Direct vCPU id mapping for ARM guests. */ - per_cpu(xen_vcpu_id, cpu) = cpu; - info.mfn = virt_to_gfn(vcpup); info.offset = xen_offset_in_page(vcpup); @@ -330,6 +327,7 @@ static int __init xen_guest_init(void) { struct xen_add_to_physmap xatp; struct shared_info *shared_info_page = NULL; + int cpu; if (!xen_domain()) return 0; @@ -380,7 +378,8 @@ static int __init xen_guest_init(void) return -ENOMEM; /* Direct vCPU id mapping for ARM guests. */ - per_cpu(xen_vcpu_id, 0) = 0; + for_each_possible_cpu(cpu) + per_cpu(xen_vcpu_id, cpu) = cpu; xen_auto_xlat_grant_frames.count = gnttab_max_grant_frames(); if (xen_xlate_map_ballooned_pages(&xen_auto_xlat_grant_frames.pfn, From 7ccb8e633cfeb7969eba09bbf53346e746bb7f89 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Mon, 15 Aug 2016 19:34:44 +0200 Subject: [PATCH 0216/1050] ARM: multi_v7_defconfig: update XILINX_VDMA Commit fde57a7c4474 ("dmaengine: xilinx: Rename driver and config") renamed config XILINX_VDMA to config XILINX_DMA Update defconfig accordingly. Signed-off-by: Fabian Frederick Signed-off-by: Arnd Bergmann --- arch/arm/configs/multi_v7_defconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 2c8665cd9dc5..ea3566fb92e2 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -781,7 +781,7 @@ CONFIG_MXS_DMA=y CONFIG_DMA_BCM2835=y CONFIG_DMA_OMAP=y CONFIG_QCOM_BAM_DMA=y -CONFIG_XILINX_VDMA=y +CONFIG_XILINX_DMA=y CONFIG_DMA_SUN6I=y CONFIG_STAGING=y CONFIG_SENSORS_ISL29018=y From f2a89d3b2b85b90b05453872aaabfdb412a21a03 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 1 Aug 2016 10:54:16 +0100 Subject: [PATCH 0217/1050] arm64: dts: Fix broken architected timer interrupt trigger The ARM architected timer specification mandates that the interrupt associated with each timer is level triggered (which corresponds to the "counter >= comparator" condition). A number of DTs are being remarkably creative, declaring the interrupt to be edge triggered. A quick look at the TRM for the corresponding ARM CPUs clearly shows that this is wrong, and I've corrected those. For non-ARM designs (and in the absence of a publicly available TRM), I've made them active low as well, which can't be completely wrong as the GIC cannot disinguish between level low and level high. The respective maintainers are of course welcome to prove me wrong. While I was at it, I took the liberty to fix a couple of related issue, such as some spurious affinity bits on ThunderX, and their complete absence on ls1043a (both of which seem to be related to copy-pasting from other DTs). Acked-by: Duc Dang Acked-by: Carlo Caione Acked-by: Michal Simek Acked-by: Krzysztof Kozlowski Acked-by: Dinh Nguyen Acked-by: Masahiro Yamada Signed-off-by: Marc Zyngier Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi | 8 ++++---- arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi | 8 ++++---- arch/arm64/boot/dts/apm/apm-storm.dtsi | 8 ++++---- arch/arm64/boot/dts/broadcom/ns2.dtsi | 8 ++++---- arch/arm64/boot/dts/cavium/thunder-88xx.dtsi | 8 ++++---- arch/arm64/boot/dts/exynos/exynos7.dtsi | 8 ++++---- arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi | 8 ++++---- arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi | 8 ++++---- arch/arm64/boot/dts/marvell/armada-ap806.dtsi | 8 ++++---- arch/arm64/boot/dts/socionext/uniphier-ph1-ld20.dtsi | 8 ++++---- arch/arm64/boot/dts/xilinx/zynqmp.dtsi | 8 ++++---- 11 files changed, 44 insertions(+), 44 deletions(-) diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi index 445aa678f914..c2b9bcb0ef61 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi @@ -255,10 +255,10 @@ /* Local timer */ timer { compatible = "arm,armv8-timer"; - interrupts = <1 13 0xf01>, - <1 14 0xf01>, - <1 11 0xf01>, - <1 10 0xf01>; + interrupts = <1 13 0xf08>, + <1 14 0xf08>, + <1 11 0xf08>, + <1 10 0xf08>; }; timer0: timer0@ffc03000 { diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi index e502c24b0ac7..bf6c8d051002 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi @@ -102,13 +102,13 @@ timer { compatible = "arm,armv8-timer"; interrupts = , + (GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_LEVEL_LOW)>, , + (GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_LEVEL_LOW)>, , + (GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_LEVEL_LOW)>, ; + (GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_LEVEL_LOW)>; }; xtal: xtal-clk { diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi index f1c2c713f9b0..c29dab9d1834 100644 --- a/arch/arm64/boot/dts/apm/apm-storm.dtsi +++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi @@ -110,10 +110,10 @@ timer { compatible = "arm,armv8-timer"; - interrupts = <1 0 0xff01>, /* Secure Phys IRQ */ - <1 13 0xff01>, /* Non-secure Phys IRQ */ - <1 14 0xff01>, /* Virt IRQ */ - <1 15 0xff01>; /* Hyp IRQ */ + interrupts = <1 0 0xff08>, /* Secure Phys IRQ */ + <1 13 0xff08>, /* Non-secure Phys IRQ */ + <1 14 0xff08>, /* Virt IRQ */ + <1 15 0xff08>; /* Hyp IRQ */ clock-frequency = <50000000>; }; diff --git a/arch/arm64/boot/dts/broadcom/ns2.dtsi b/arch/arm64/boot/dts/broadcom/ns2.dtsi index f53b0955bfd3..d4a12fad8afd 100644 --- a/arch/arm64/boot/dts/broadcom/ns2.dtsi +++ b/arch/arm64/boot/dts/broadcom/ns2.dtsi @@ -88,13 +88,13 @@ timer { compatible = "arm,armv8-timer"; interrupts = , + IRQ_TYPE_LEVEL_LOW)>, , + IRQ_TYPE_LEVEL_LOW)>, , + IRQ_TYPE_LEVEL_LOW)>, ; + IRQ_TYPE_LEVEL_LOW)>; }; pmu { diff --git a/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi b/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi index 2eb9b225f0bc..04dc8a8d1539 100644 --- a/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi +++ b/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi @@ -354,10 +354,10 @@ timer { compatible = "arm,armv8-timer"; - interrupts = <1 13 0xff01>, - <1 14 0xff01>, - <1 11 0xff01>, - <1 10 0xff01>; + interrupts = <1 13 4>, + <1 14 4>, + <1 11 4>, + <1 10 4>; }; pmu { diff --git a/arch/arm64/boot/dts/exynos/exynos7.dtsi b/arch/arm64/boot/dts/exynos/exynos7.dtsi index ca663dfe5189..162831546e18 100644 --- a/arch/arm64/boot/dts/exynos/exynos7.dtsi +++ b/arch/arm64/boot/dts/exynos/exynos7.dtsi @@ -473,10 +473,10 @@ timer { compatible = "arm,armv8-timer"; - interrupts = <1 13 0xff01>, - <1 14 0xff01>, - <1 11 0xff01>, - <1 10 0xff01>; + interrupts = <1 13 0xff08>, + <1 14 0xff08>, + <1 11 0xff08>, + <1 10 0xff08>; }; pmu_system_controller: system-controller@105c0000 { diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi index e669fbd7f9c3..a67e210e2019 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi @@ -119,10 +119,10 @@ timer { compatible = "arm,armv8-timer"; - interrupts = <1 13 0x1>, /* Physical Secure PPI */ - <1 14 0x1>, /* Physical Non-Secure PPI */ - <1 11 0x1>, /* Virtual PPI */ - <1 10 0x1>; /* Hypervisor PPI */ + interrupts = <1 13 0xf08>, /* Physical Secure PPI */ + <1 14 0xf08>, /* Physical Non-Secure PPI */ + <1 11 0xf08>, /* Virtual PPI */ + <1 10 0xf08>; /* Hypervisor PPI */ }; pmu { diff --git a/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi index 21023a388c29..e3b6034ea5d9 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi @@ -191,10 +191,10 @@ timer { compatible = "arm,armv8-timer"; - interrupts = <1 13 0x8>, /* Physical Secure PPI, active-low */ - <1 14 0x8>, /* Physical Non-Secure PPI, active-low */ - <1 11 0x8>, /* Virtual PPI, active-low */ - <1 10 0x8>; /* Hypervisor PPI, active-low */ + interrupts = <1 13 4>, /* Physical Secure PPI, active-low */ + <1 14 4>, /* Physical Non-Secure PPI, active-low */ + <1 11 4>, /* Virtual PPI, active-low */ + <1 10 4>; /* Hypervisor PPI, active-low */ }; pmu { diff --git a/arch/arm64/boot/dts/marvell/armada-ap806.dtsi b/arch/arm64/boot/dts/marvell/armada-ap806.dtsi index eab1a42fb934..c2a6745f168c 100644 --- a/arch/arm64/boot/dts/marvell/armada-ap806.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-ap806.dtsi @@ -122,10 +122,10 @@ timer { compatible = "arm,armv8-timer"; - interrupts = , - , - , - ; + interrupts = , + , + , + ; }; odmi: odmi@300000 { diff --git a/arch/arm64/boot/dts/socionext/uniphier-ph1-ld20.dtsi b/arch/arm64/boot/dts/socionext/uniphier-ph1-ld20.dtsi index c223915f0907..d73bdc8c9115 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-ph1-ld20.dtsi +++ b/arch/arm64/boot/dts/socionext/uniphier-ph1-ld20.dtsi @@ -129,10 +129,10 @@ timer { compatible = "arm,armv8-timer"; - interrupts = <1 13 0xf01>, - <1 14 0xf01>, - <1 11 0xf01>, - <1 10 0xf01>; + interrupts = <1 13 4>, + <1 14 4>, + <1 11 4>, + <1 10 4>; }; soc { diff --git a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi index e595f22e7e4b..3e2e51fbd2bc 100644 --- a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi +++ b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi @@ -65,10 +65,10 @@ timer { compatible = "arm,armv8-timer"; interrupt-parent = <&gic>; - interrupts = <1 13 0xf01>, - <1 14 0xf01>, - <1 11 0xf01>, - <1 10 0xf01>; + interrupts = <1 13 0xf08>, + <1 14 0xf08>, + <1 11 0xf08>, + <1 10 0xf08>; }; amba_apu { From 29bf282dec94f6015a675c007614cb29563f1c18 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 6 Sep 2016 16:34:01 +1000 Subject: [PATCH 0218/1050] powerpc/powernv: Detach from PE on releasing PCI device The PCI hotplug can be part of EEH error recovery. The @pdn and the device's PE number aren't removed and added afterwords. The PE number in @pdn should be set to an invalid one. Otherwise, the PE's device count is decreased on removing devices while failing to be increased on adding devices. It leads to unbalanced PE's device count and make normal PCI hotplug path broken. Fixes: c5f7700bbd2e ("powerpc/powernv: Dynamically release PE") Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 0c71a2ffb5be..da5da11a6223 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -3452,7 +3452,17 @@ static void pnv_pci_release_device(struct pci_dev *pdev) if (!pdn || pdn->pe_number == IODA_INVALID_PE) return; + /* + * PCI hotplug can happen as part of EEH error recovery. The @pdn + * isn't removed and added afterwards in this scenario. We should + * set the PE number in @pdn to an invalid one. Otherwise, the PE's + * device count is decreased on removing devices while failing to + * be increased on adding devices. It leads to unbalanced PE's device + * count and eventually make normal PCI hotplug path broken. + */ pe = &phb->ioda.pe_array[pdn->pe_number]; + pdn->pe_number = IODA_INVALID_PE; + WARN_ON(--pe->device_count < 0); if (pe->device_count == 0) pnv_ioda_release_pe(pe); From ed7d9a1d7da6fe7b1c7477dc70e95051583fd60c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 15 Sep 2016 17:03:06 +1000 Subject: [PATCH 0219/1050] powerpc/powernv/pci: Fix missed TCE invalidations that should fallback to OPAL In commit f0228c413011 ("powerpc/powernv/pci: Fallback to OPAL for TCE invalidations"), we added logic to fallback to OPAL for doing TCE invalidations if we can't do it in Linux. Ben sent a v2 of the patch, containing these additional call sites, but I had already applied v1 and didn't notice. So fix them now. Fixes: f0228c413011 ("powerpc/powernv/pci: Fallback to OPAL for TCE invalidations") Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index da5da11a6223..bc0c91e84ca0 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2217,7 +2217,7 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group, pnv_pci_link_table_and_group(phb->hose->node, num, tbl, &pe->table_group); - pnv_pci_phb3_tce_invalidate_pe(pe); + pnv_pci_ioda2_tce_invalidate_pe(pe); return 0; } @@ -2355,7 +2355,7 @@ static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group, if (ret) pe_warn(pe, "Unmapping failed, ret = %ld\n", ret); else - pnv_pci_phb3_tce_invalidate_pe(pe); + pnv_pci_ioda2_tce_invalidate_pe(pe); pnv_pci_unlink_table_and_group(table_group->tables[num], table_group); From 85d5313ed717ad60769491c7c072d23bc0a68e7a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Sep 2016 11:38:31 +0200 Subject: [PATCH 0220/1050] mac80211: reject TSPEC TIDs (TSIDs) for aggregation Since mac80211 doesn't currently support TSIDs 8-15 which can only be used after QoS TSPEC negotiation (and not even after WMM negotiation), reject attempts to set up aggregation sessions for them, which might confuse drivers. In mac80211 we do correctly handle that, but the TSIDs should never get used anyway, and drivers might not be able to handle it. Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg --- net/mac80211/agg-rx.c | 8 +++++++- net/mac80211/agg-tx.c | 3 +++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index a9aff6079c42..afa94687d5e1 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -261,10 +261,16 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, .timeout = timeout, .ssn = start_seq_num, }; - int i, ret = -EOPNOTSUPP; u16 status = WLAN_STATUS_REQUEST_DECLINED; + if (tid >= IEEE80211_FIRST_TSPEC_TSID) { + ht_dbg(sta->sdata, + "STA %pM requests BA session on unsupported tid %d\n", + sta->sta.addr, tid); + goto end_no_lock; + } + if (!sta->sta.ht_cap.ht_supported) { ht_dbg(sta->sdata, "STA %pM erroneously requests BA session on tid %d w/o QoS\n", diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 5650c46bf91a..45319cc01121 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -584,6 +584,9 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, ieee80211_hw_check(&local->hw, TX_AMPDU_SETUP_IN_HW)) return -EINVAL; + if (WARN_ON(tid >= IEEE80211_FIRST_TSPEC_TSID)) + return -EINVAL; + ht_dbg(sdata, "Open BA session requested for %pM tid %u\n", pubsta->addr, tid); From cecf62352aee2b4fe114aafd1b8c5f265a4243ce Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Thu, 15 Sep 2016 11:22:33 +0300 Subject: [PATCH 0221/1050] perf/x86/intel: Don't disable "intel_bts" around "intel" event batching At the moment, intel_bts events get disabled from intel PMU's disable callback, which includes event scheduling transactions of said PMU, which have nothing to do with intel_bts events. We do want to keep intel_bts events off inside the PMI handler to avoid filling up their buffer too soon. This patch moves intel_bts enabling/disabling directly to the PMI handler. Reported-by: Vince Weaver Signed-off-by: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: vince@deater.net Link: http://lkml.kernel.org/r/20160915082233.11065-1-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 2cbde2f449aa..4c9a79b9cd69 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -1730,9 +1730,11 @@ static __initconst const u64 knl_hw_cache_extra_regs * disabled state if called consecutively. * * During consecutive calls, the same disable value will be written to related - * registers, so the PMU state remains unchanged. hw.state in - * intel_bts_disable_local will remain PERF_HES_STOPPED too in consecutive - * calls. + * registers, so the PMU state remains unchanged. + * + * intel_bts events don't coexist with intel PMU's BTS events because of + * x86_add_exclusive(x86_lbr_exclusive_lbr); there's no need to keep them + * disabled around intel PMU's event batching etc, only inside the PMI handler. */ static void __intel_pmu_disable_all(void) { @@ -1742,8 +1744,6 @@ static void __intel_pmu_disable_all(void) if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) intel_pmu_disable_bts(); - else - intel_bts_disable_local(); intel_pmu_pebs_disable_all(); } @@ -1771,8 +1771,7 @@ static void __intel_pmu_enable_all(int added, bool pmi) return; intel_pmu_enable_bts(event->hw.config); - } else - intel_bts_enable_local(); + } } static void intel_pmu_enable_all(int added) @@ -2073,6 +2072,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) */ if (!x86_pmu.late_ack) apic_write(APIC_LVTPC, APIC_DM_NMI); + intel_bts_disable_local(); __intel_pmu_disable_all(); handled = intel_pmu_drain_bts_buffer(); handled += intel_bts_interrupt(); @@ -2172,6 +2172,7 @@ done: /* Only restore PMU state when it's active. See x86_pmu_disable(). */ if (cpuc->enabled) __intel_pmu_enable_all(0, true); + intel_bts_enable_local(); /* * Only unmask the NMI after the overflow counters From a6805884e263e82d9fb87bd5f39ad4bb38cde246 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 17 Aug 2016 13:44:50 +0300 Subject: [PATCH 0222/1050] ARM: keystone: defconfig: Fix USB configuration Simply enabling CONFIG_KEYSTONE_USB_PHY doesn't work anymore as it depends on CONFIG_NOP_USB_XCEIV. We need to enable that as well. This fixes USB on Keystone boards from v4.8-rc1 onwards. Signed-off-by: Roger Quadros Acked-by: Santosh Shilimkar Signed-off-by: Arnd Bergmann --- arch/arm/configs/keystone_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/keystone_defconfig b/arch/arm/configs/keystone_defconfig index 71b42e66488a..78cd2f197e01 100644 --- a/arch/arm/configs/keystone_defconfig +++ b/arch/arm/configs/keystone_defconfig @@ -161,6 +161,7 @@ CONFIG_USB_MON=y CONFIG_USB_XHCI_HCD=y CONFIG_USB_STORAGE=y CONFIG_USB_DWC3=y +CONFIG_NOP_USB_XCEIV=y CONFIG_KEYSTONE_USB_PHY=y CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y From 7892a1f64a447b6f65fe2888688883b7c26d81d3 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 9 Aug 2016 12:36:41 -0300 Subject: [PATCH 0223/1050] [media] rcar-fcp: Make sure rcar_fcp_enable() returns 0 on success When resuming from suspend-to-RAM on r8a7795/salvator-x: dpm_run_callback(): pm_genpd_resume_noirq+0x0/0x90 returns 1 PM: Device fe940000.fdp1 failed to resume noirq: error 1 dpm_run_callback(): pm_genpd_resume_noirq+0x0/0x90 returns 1 PM: Device fe944000.fdp1 failed to resume noirq: error 1 dpm_run_callback(): pm_genpd_resume_noirq+0x0/0x90 returns 1 PM: Device fe948000.fdp1 failed to resume noirq: error 1 According to its documentation, rcar_fcp_enable() returns 0 on success or a negative error code if an error occurs. Hence fdp1_pm_runtime_resume() and vsp1_pm_runtime_resume() forward its return value to their callers. However, rcar_fcp_enable() forwards the return value of pm_runtime_get_sync(), which can actually be 1 on success, leading to the resume failure above. To fix this, consider only negative values returned by pm_runtime_get_sync() to be failures. Fixes: 7b49235e83b2347c ("[media] v4l: Add Renesas R-Car FCP driver") Signed-off-by: Geert Uytterhoeven Reviewed-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/rcar-fcp.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/media/platform/rcar-fcp.c b/drivers/media/platform/rcar-fcp.c index 6a7bcc3028b1..bc50c69ee0c5 100644 --- a/drivers/media/platform/rcar-fcp.c +++ b/drivers/media/platform/rcar-fcp.c @@ -99,10 +99,16 @@ EXPORT_SYMBOL_GPL(rcar_fcp_put); */ int rcar_fcp_enable(struct rcar_fcp_device *fcp) { + int error; + if (!fcp) return 0; - return pm_runtime_get_sync(fcp->dev); + error = pm_runtime_get_sync(fcp->dev); + if (error < 0) + return error; + + return 0; } EXPORT_SYMBOL_GPL(rcar_fcp_enable); From 54c5ef2e93ea002dc5dd63349298b2778fe59edb Mon Sep 17 00:00:00 2001 From: Beni Lev Date: Wed, 10 Aug 2016 17:03:43 +0300 Subject: [PATCH 0224/1050] iwlwifi: mvm: update TX queue before making a copy of the skb Off-channel action frames (such as ANQP frames) must be sent either on the AUX queue or on the offchannel queue, otherwise the firmware will cause a SYSASSERT. In the current implementation, the queue to be used is correctly set in the original skb, but this is done after it is copied. Thus the copy remains with the original, incorrect queue. Fix this by setting the queue in the original skb before copying it. Fixes: commit 5c08b0f5026f ("iwlwifi: mvm: don't override the rate with the AMSDU len") Cc: stable@vger.kernel.org # v4.6+ Signed-off-by: Beni Lev Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index c6585ab48df3..b3a87a31de30 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -513,6 +513,15 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb) int hdrlen = ieee80211_hdrlen(hdr->frame_control); int queue; + /* IWL_MVM_OFFCHANNEL_QUEUE is used for ROC packets that can be used + * in 2 different types of vifs, P2P & STATION. P2P uses the offchannel + * queue. STATION (HS2.0) uses the auxiliary context of the FW, + * and hence needs to be sent on the aux queue + */ + if (IEEE80211_SKB_CB(skb)->hw_queue == IWL_MVM_OFFCHANNEL_QUEUE && + skb_info->control.vif->type == NL80211_IFTYPE_STATION) + IEEE80211_SKB_CB(skb)->hw_queue = mvm->aux_queue; + memcpy(&info, skb->cb, sizeof(info)); if (WARN_ON_ONCE(info.flags & IEEE80211_TX_CTL_AMPDU)) @@ -526,16 +535,6 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb) /* This holds the amsdu headers length */ skb_info->driver_data[0] = (void *)(uintptr_t)0; - /* - * IWL_MVM_OFFCHANNEL_QUEUE is used for ROC packets that can be used - * in 2 different types of vifs, P2P & STATION. P2P uses the offchannel - * queue. STATION (HS2.0) uses the auxiliary context of the FW, - * and hence needs to be sent on the aux queue - */ - if (IEEE80211_SKB_CB(skb)->hw_queue == IWL_MVM_OFFCHANNEL_QUEUE && - info.control.vif->type == NL80211_IFTYPE_STATION) - IEEE80211_SKB_CB(skb)->hw_queue = mvm->aux_queue; - queue = info.hw_queue; /* From b0eaf4506f5f95d15d6731d72c0ddf4a2179eefa Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 14 Sep 2016 23:39:12 +0200 Subject: [PATCH 0225/1050] kvm: x86: correctly reset dest_map->vector when restoring LAPIC state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When userspace sends KVM_SET_LAPIC, KVM schedules a check between the vCPU's IRR and ISR and the IOAPIC redirection table, in order to re-establish the IOAPIC's dest_map (the list of CPUs servicing the real-time clock interrupt with the corresponding vectors). However, __rtc_irq_eoi_tracking_restore_one was forgetting to set dest_map->vectors. Because of this, the IOAPIC did not process the real-time clock interrupt EOI, ioapic->rtc_status.pending_eoi got stuck at a non-zero value, and further RTC interrupts were reported to userspace as coalesced. Fixes: 9e4aabe2bb3454c83dac8139cf9974503ee044db Fixes: 4d99ba898dd0c521ca6cdfdde55c9b58aea3cb3d Cc: stable@vger.kernel.org Cc: Joerg Roedel Cc: David Gilbert Reviewed-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- arch/x86/kvm/ioapic.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 5f42d038fcb4..c7220ba94aa7 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -109,6 +109,7 @@ static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) { bool new_val, old_val; struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; + struct dest_map *dest_map = &ioapic->rtc_status.dest_map; union kvm_ioapic_redirect_entry *e; e = &ioapic->redirtbl[RTC_GSI]; @@ -117,16 +118,17 @@ static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) return; new_val = kvm_apic_pending_eoi(vcpu, e->fields.vector); - old_val = test_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map.map); + old_val = test_bit(vcpu->vcpu_id, dest_map->map); if (new_val == old_val) return; if (new_val) { - __set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map.map); + __set_bit(vcpu->vcpu_id, dest_map->map); + dest_map->vectors[vcpu->vcpu_id] = e->fields.vector; ioapic->rtc_status.pending_eoi++; } else { - __clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map.map); + __clear_bit(vcpu->vcpu_id, dest_map->map); ioapic->rtc_status.pending_eoi--; rtc_status_pending_eoi_check_valid(ioapic); } From ca3b9c6b6d4db9a8ba5fc8b95664e75468c59f9f Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Thu, 30 Jun 2016 16:14:02 +0300 Subject: [PATCH 0226/1050] iwlwifi: mvm: call a different txq_enable function Since the SCD_QUEUE_CFG command was introduced the driver calls iwl_trans_txq_enable_cfg() with a NULL for scd_cfg parameter. This makes the transport avoid writing to the SCD pointers, since it can cause races with firmware, which is also accessing the registers. The transport only updates the write pointer in that case. Fix a wrong call to iwl_trans_txq_enable() which caused a scd_cfg parameter to be sent to transport, resulting with an access to SCD registers. Fixes: 58f2cc57dc6a ("iwlwifi: mvm: support dqa-mode scd queue redirection") Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 30fc3afe5241..a92e12edeb3a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -588,9 +588,7 @@ int iwl_mvm_scd_queue_redirect(struct iwl_mvm *mvm, int queue, int tid, ret); /* Make sure the SCD wrptr is correctly set before reconfiguring */ - iwl_trans_txq_enable(mvm->trans, queue, iwl_mvm_ac_to_tx_fifo[ac], - cmd.sta_id, tid, LINK_QUAL_AGG_FRAME_LIMIT_DEF, - ssn, wdg_timeout); + iwl_trans_txq_enable_cfg(mvm->trans, queue, ssn, NULL, wdg_timeout); /* Update the TID "owner" of the queue */ spin_lock_bh(&mvm->queue_info_lock); From 15985fba2e93872f3070af63ae9b0d0f42b2c7ea Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Sun, 26 Jun 2016 14:45:12 +0300 Subject: [PATCH 0227/1050] iwlwifi: mvm: don't free queue after delba in dqa In DQA mode, a delBA might free the queue although it shouldn't. Fix that. Fixes: cf941e174ee2 ("iwlwifi: mvm: support dqa-mode agg on non-shared queue") Signed-off-by: Liad Kaufman Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 8b91544e6220..62714709ba8f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -1100,9 +1100,13 @@ static void iwl_mvm_check_ratid_empty(struct iwl_mvm *mvm, IWL_DEBUG_TX_QUEUES(mvm, "Can continue DELBA flow ssn = next_recl = %d\n", tid_data->next_reclaimed); - iwl_mvm_disable_txq(mvm, tid_data->txq_id, - vif->hw_queue[tid_to_mac80211_ac[tid]], tid, - CMD_ASYNC); + if (!iwl_mvm_is_dqa_supported(mvm)) { + u8 mac80211_ac = tid_to_mac80211_ac[tid]; + + iwl_mvm_disable_txq(mvm, tid_data->txq_id, + vif->hw_queue[mac80211_ac], tid, + CMD_ASYNC); + } tid_data->state = IWL_AGG_OFF; ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid); break; From c0ed8aa4d1babfe173d01ce6169c237ad9b0c462 Mon Sep 17 00:00:00 2001 From: kbuild test robot Date: Mon, 11 Jul 2016 05:01:12 +0800 Subject: [PATCH 0228/1050] iwlwifi: fix semicolon.cocci warnings drivers/net/wireless/intel/iwlwifi/iwl-io.c:243:2-3: Unneeded semicolon Remove unneeded semicolon. Generated by: scripts/coccinelle/misc/semicolon.cocci CC: Sara Sharon Signed-off-by: Fengguang Wu Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-io.c b/drivers/net/wireless/intel/iwlwifi/iwl-io.c index 92c8b5f9a9cb..a9f69fdd170b 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-io.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-io.c @@ -267,7 +267,7 @@ static const char *get_rfh_string(int cmd) IWL_CMD_MQ(cmd, RFH_Q_FRBDCB_WIDX, i); IWL_CMD_MQ(cmd, RFH_Q_FRBDCB_RIDX, i); IWL_CMD_MQ(cmd, RFH_Q_URBD_STTS_WPTR_LSB, i); - }; + } switch (cmd) { IWL_CMD(RFH_RXF_DMA_CFG); From 3cd1980b0cdf66443a610b62e3a630e44eac4e45 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Thu, 23 Jun 2016 16:31:40 +0300 Subject: [PATCH 0229/1050] iwlwifi: pcie: introduce new tfd and tb formats New hardware supports bigger TFDs and TBs. Introduce the new formats and adjust defines and code relying on old format. Changing the actual TFD allocation is trickier and deferred to the next patch. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-fh.h | 50 +++++++++++++++---- .../net/wireless/intel/iwlwifi/iwl-trans.c | 1 + .../net/wireless/intel/iwlwifi/iwl-trans.h | 4 -- .../wireless/intel/iwlwifi/pcie/internal.h | 4 +- .../net/wireless/intel/iwlwifi/pcie/trans.c | 16 ++++-- drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 20 ++++---- 6 files changed, 64 insertions(+), 31 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-fh.h b/drivers/net/wireless/intel/iwlwifi/iwl-fh.h index dd75ea7c936e..98d2ff240ea5 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-fh.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-fh.h @@ -643,6 +643,7 @@ struct iwl_rb_status { #define TFD_QUEUE_BC_SIZE (TFD_QUEUE_SIZE_MAX + TFD_QUEUE_SIZE_BC_DUP) #define IWL_TX_DMA_MASK DMA_BIT_MASK(36) #define IWL_NUM_OF_TBS 20 +#define IWL_TFH_NUM_TBS 25 static inline u8 iwl_get_dma_hi_addr(dma_addr_t addr) { @@ -664,25 +665,29 @@ struct iwl_tfd_tb { } __packed; /** - * struct iwl_tfd + * struct iwl_tfh_tb transmit buffer descriptor within transmit frame descriptor * - * Transmit Frame Descriptor (TFD) - * - * @ __reserved1[3] reserved - * @ num_tbs 0-4 number of active tbs - * 5 reserved - * 6-7 padding (not used) - * @ tbs[20] transmit frame buffer descriptors - * @ __pad padding + * This structure contains dma address and length of transmission address * + * @tb_len length of the tx buffer + * @addr 64 bits dma address + */ +struct iwl_tfh_tb { + __le16 tb_len; + __le64 addr; +} __packed; + +/** * Each Tx queue uses a circular buffer of 256 TFDs stored in host DRAM. * Both driver and device share these circular buffers, each of which must be - * contiguous 256 TFDs x 128 bytes-per-TFD = 32 KBytes + * contiguous 256 TFDs. + * For pre a000 HW it is 256 x 128 bytes-per-TFD = 32 KBytes + * For a000 HW and on it is 256 x 256 bytes-per-TFD = 65 KBytes * * Driver must indicate the physical address of the base of each * circular buffer via the FH_MEM_CBBC_QUEUE registers. * - * Each TFD contains pointer/size information for up to 20 data buffers + * Each TFD contains pointer/size information for up to 20 / 25 data buffers * in host DRAM. These buffers collectively contain the (one) frame described * by the TFD. Each buffer must be a single contiguous block of memory within * itself, but buffers may be scattered in host DRAM. Each buffer has max size @@ -691,6 +696,16 @@ struct iwl_tfd_tb { * * A maximum of 255 (not 256!) TFDs may be on a queue waiting for Tx. */ + +/** + * struct iwl_tfd - Transmit Frame Descriptor (TFD) + * @ __reserved1[3] reserved + * @ num_tbs 0-4 number of active tbs + * 5 reserved + * 6-7 padding (not used) + * @ tbs[20] transmit frame buffer descriptors + * @ __pad padding + */ struct iwl_tfd { u8 __reserved1[3]; u8 num_tbs; @@ -698,6 +713,19 @@ struct iwl_tfd { __le32 __pad; } __packed; +/** + * struct iwl_tfh_tfd - Transmit Frame Descriptor (TFD) + * @ num_tbs 0-4 number of active tbs + * 5 -15 reserved + * @ tbs[25] transmit frame buffer descriptors + * @ __pad padding + */ +struct iwl_tfh_tfd { + __le16 num_tbs; + struct iwl_tfh_tb tbs[IWL_TFH_NUM_TBS]; + __le32 __pad; +} __packed; + /* Keep Warm Size */ #define IWL_KW_SIZE 0x1000 /* 4k */ diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c index 6069a9ff53fa..b0bd67c64b5c 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c @@ -65,6 +65,7 @@ #include "iwl-trans.h" #include "iwl-drv.h" +#include "iwl-fh.h" struct iwl_trans *iwl_trans_alloc(unsigned int priv_size, struct device *dev, diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h index 5535e2238da3..883cb487b652 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h @@ -262,8 +262,6 @@ static inline u32 iwl_rx_packet_payload_len(const struct iwl_rx_packet *pkt) * (i.e. mark it as non-idle). * @CMD_WANT_ASYNC_CALLBACK: the op_mode's async callback function must be * called after this command completes. Valid only with CMD_ASYNC. - * @CMD_TB_BITMAP_POS: Position of the first bit for the TB bitmap. We need to - * check that we leave enough room for the TBs bitmap which needs 20 bits. */ enum CMD_MODE { CMD_ASYNC = BIT(0), @@ -274,8 +272,6 @@ enum CMD_MODE { CMD_MAKE_TRANS_IDLE = BIT(5), CMD_WAKE_UP_TRANS = BIT(6), CMD_WANT_ASYNC_CALLBACK = BIT(7), - - CMD_TB_BITMAP_POS = 11, }; #define DEF_CMD_PAYLOAD_SIZE 320 diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index 11e347dd44c7..975900a1efa1 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -49,7 +49,7 @@ * be needed for potential data in the SKB's head. The remaining ones can * be used for frags. */ -#define IWL_PCIE_MAX_FRAGS (IWL_NUM_OF_TBS - 3) +#define IWL_PCIE_MAX_FRAGS(x) (x->max_tbs - 3) /* * RX related structures and functions @@ -192,6 +192,7 @@ struct iwl_cmd_meta { /* only for SYNC commands, iff the reply skb is wanted */ struct iwl_host_cmd *source; u32 flags; + u32 tbs; }; /* @@ -391,6 +392,7 @@ struct iwl_trans_pcie { unsigned int cmd_q_wdg_timeout; u8 n_no_reclaim_cmds; u8 no_reclaim_cmds[MAX_NO_RECLAIM_CMDS]; + u8 max_tbs; enum iwl_amsdu_size rx_buf_size; bool bc_table_dword; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 2f46eedd7c4d..0c2ccbeab167 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -2437,12 +2437,14 @@ err: } #endif /*CONFIG_IWLWIFI_DEBUGFS */ -static u32 iwl_trans_pcie_get_cmdlen(struct iwl_tfd *tfd) +static u32 iwl_trans_pcie_get_cmdlen(struct iwl_trans *trans, + struct iwl_tfd *tfd) { + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); u32 cmdlen = 0; int i; - for (i = 0; i < IWL_NUM_OF_TBS; i++) + for (i = 0; i < trans_pcie->max_tbs; i++) cmdlen += iwl_pcie_tfd_tb_get_len(tfd, i); return cmdlen; @@ -2731,7 +2733,7 @@ static struct iwl_trans_dump_data u8 idx = get_cmd_index(&cmdq->q, ptr); u32 caplen, cmdlen; - cmdlen = iwl_trans_pcie_get_cmdlen(&cmdq->tfds[ptr]); + cmdlen = iwl_trans_pcie_get_cmdlen(trans, &cmdq->tfds[ptr]); caplen = min_t(u32, TFD_MAX_PAYLOAD_SIZE, cmdlen); if (cmdlen) { @@ -2839,8 +2841,6 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, if (!trans) return ERR_PTR(-ENOMEM); - trans->max_skb_frags = IWL_PCIE_MAX_FRAGS; - trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); trans_pcie->trans = trans; @@ -2874,6 +2874,12 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, else addr_size = 36; + if (cfg->use_tfh) + trans_pcie->max_tbs = IWL_TFH_NUM_TBS; + else + trans_pcie->max_tbs = IWL_NUM_OF_TBS; + trans->max_skb_frags = IWL_PCIE_MAX_FRAGS(trans_pcie); + pci_set_master(pdev); ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(addr_size)); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index 9636dc89f6bd..1c46f146e168 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -348,13 +348,13 @@ static void iwl_pcie_tfd_unmap(struct iwl_trans *trans, struct iwl_cmd_meta *meta, struct iwl_tfd *tfd) { - int i; - int num_tbs; + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + int i, num_tbs; /* Sanity check on number of chunks */ num_tbs = iwl_pcie_tfd_get_num_tbs(tfd); - if (num_tbs >= IWL_NUM_OF_TBS) { + if (num_tbs >= trans_pcie->max_tbs) { IWL_ERR(trans, "Too many chunks: %i\n", num_tbs); /* @todo issue fatal error, it is quite serious situation */ return; @@ -363,7 +363,7 @@ static void iwl_pcie_tfd_unmap(struct iwl_trans *trans, /* first TB is never freed - it's the bidirectional DMA data */ for (i = 1; i < num_tbs; i++) { - if (meta->flags & BIT(i + CMD_TB_BITMAP_POS)) + if (meta->tbs & BIT(i)) dma_unmap_page(trans->dev, iwl_pcie_tfd_tb_get_addr(tfd, i), iwl_pcie_tfd_tb_get_len(tfd, i), @@ -423,6 +423,7 @@ static void iwl_pcie_txq_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq) static int iwl_pcie_txq_build_tfd(struct iwl_trans *trans, struct iwl_txq *txq, dma_addr_t addr, u16 len, bool reset) { + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_queue *q; struct iwl_tfd *tfd, *tfd_tmp; u32 num_tbs; @@ -437,9 +438,9 @@ static int iwl_pcie_txq_build_tfd(struct iwl_trans *trans, struct iwl_txq *txq, num_tbs = iwl_pcie_tfd_get_num_tbs(tfd); /* Each TFD can point to a maximum 20 Tx buffers */ - if (num_tbs >= IWL_NUM_OF_TBS) { + if (num_tbs >= trans_pcie->max_tbs) { IWL_ERR(trans, "Error can not send more than %d chunks\n", - IWL_NUM_OF_TBS); + trans_pcie->max_tbs); return -EINVAL; } @@ -1640,8 +1641,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, iwl_pcie_txq_build_tfd(trans, txq, phys_addr, cmdlen[i], false); } - BUILD_BUG_ON(IWL_NUM_OF_TBS + CMD_TB_BITMAP_POS > - sizeof(out_meta->flags) * BITS_PER_BYTE); + BUILD_BUG_ON(IWL_TFH_NUM_TBS > sizeof(out_meta->tbs) * BITS_PER_BYTE); out_meta->flags = cmd->flags; if (WARN_ON_ONCE(txq->entries[idx].free_buf)) kzfree(txq->entries[idx].free_buf); @@ -1953,7 +1953,7 @@ static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb, tb_idx = iwl_pcie_txq_build_tfd(trans, txq, tb_phys, skb_frag_size(frag), false); - out_meta->flags |= BIT(tb_idx + CMD_TB_BITMAP_POS); + out_meta->tbs |= BIT(tb_idx); } trace_iwlwifi_dev_tx(trans->dev, skb, @@ -2247,7 +2247,7 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, } if (skb_is_nonlinear(skb) && - skb_shinfo(skb)->nr_frags > IWL_PCIE_MAX_FRAGS && + skb_shinfo(skb)->nr_frags > IWL_PCIE_MAX_FRAGS(trans_pcie) && __skb_linearize(skb)) return -ENOMEM; From 585a26274221e1cd7e0c06e0f96fbf4a7269187b Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Wed, 13 Jul 2016 16:53:36 +0300 Subject: [PATCH 0230/1050] iwlwifi: mvm: remove dump of locked registers Firmware may lock those registers for access. This results in 9000 devices with a bus stall and an endless loop of 0x5a5a5a. Don't dump those registers. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c index 1abcabb9b6cd..bebb148d352b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c @@ -440,14 +440,12 @@ static const struct iwl_prph_range iwl_prph_dump_addr_comm[] = { { .start = 0x00a04560, .end = 0x00a0457c }, { .start = 0x00a04590, .end = 0x00a04598 }, { .start = 0x00a045c0, .end = 0x00a045f4 }, - { .start = 0x00a44000, .end = 0x00a7bf80 }, }; static const struct iwl_prph_range iwl_prph_dump_addr_9000[] = { { .start = 0x00a05c00, .end = 0x00a05c18 }, { .start = 0x00a05400, .end = 0x00a056e8 }, { .start = 0x00a08000, .end = 0x00a098bc }, - { .start = 0x00adfc00, .end = 0x00adfd1c }, { .start = 0x00a02400, .end = 0x00a02758 }, }; From db06f04dafa1c1d94db1ef162addaff778f6457a Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Tue, 5 Jul 2016 17:37:58 +0300 Subject: [PATCH 0231/1050] iwlwifi: mvm: support new shared memory config API In a000 devices we have 15 fifos, so in the shared memory config the number of tx fifos in the array was changed accordingly. As it is in the middle of the struct, the parsing code needs to be duplicated. To minimize the duplication, do not save variables we never actually use. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/mvm/fw-api.h | 22 ++++- .../net/wireless/intel/iwlwifi/mvm/fw-dbg.c | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 98 +++++++++++-------- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 9 +- 4 files changed, 78 insertions(+), 53 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h index 57b574b2a092..2f92994d0e5b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h @@ -1977,8 +1977,9 @@ struct iwl_tdls_config_res { struct iwl_tdls_config_sta_info_res sta_info[IWL_MVM_TDLS_STA_COUNT]; } __packed; /* TDLS_CONFIG_RSP_API_S_VER_1 */ -#define TX_FIFO_MAX_NUM 8 -#define RX_FIFO_MAX_NUM 2 +#define TX_FIFO_MAX_NUM_9000 8 +#define TX_FIFO_MAX_NUM 15 +#define RX_FIFO_MAX_NUM 2 #define TX_FIFO_INTERNAL_MAX_NUM 6 /** @@ -2004,6 +2005,21 @@ struct iwl_tdls_config_res { * NOTE: on firmware that don't have IWL_UCODE_TLV_CAPA_EXTEND_SHARED_MEM_CFG * set, the last 3 members don't exist. */ +struct iwl_shared_mem_cfg_v1 { + __le32 shared_mem_addr; + __le32 shared_mem_size; + __le32 sample_buff_addr; + __le32 sample_buff_size; + __le32 txfifo_addr; + __le32 txfifo_size[TX_FIFO_MAX_NUM_9000]; + __le32 rxfifo_size[RX_FIFO_MAX_NUM]; + __le32 page_buff_addr; + __le32 page_buff_size; + __le32 rxfifo_addr; + __le32 internal_txfifo_addr; + __le32 internal_txfifo_size[TX_FIFO_INTERNAL_MAX_NUM]; +} __packed; /* SHARED_MEM_ALLOC_API_S_VER_2 */ + struct iwl_shared_mem_cfg { __le32 shared_mem_addr; __le32 shared_mem_size; @@ -2017,7 +2033,7 @@ struct iwl_shared_mem_cfg { __le32 rxfifo_addr; __le32 internal_txfifo_addr; __le32 internal_txfifo_size[TX_FIFO_INTERNAL_MAX_NUM]; -} __packed; /* SHARED_MEM_ALLOC_API_S_VER_2 */ +} __packed; /* SHARED_MEM_ALLOC_API_S_VER_3 */ /** * VHT MU-MIMO group configuration diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c index bebb148d352b..42dcefe33104 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c @@ -557,7 +557,7 @@ void iwl_mvm_fw_error_dump(struct iwl_mvm *mvm) sizeof(struct iwl_fw_error_dump_fifo); } - for (i = 0; i < ARRAY_SIZE(mem_cfg->txfifo_size); i++) { + for (i = 0; i < mem_cfg->num_txfifo_entries; i++) { if (!mem_cfg->txfifo_size[i]) continue; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 47e8e70dcfac..b951a7f06060 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -838,6 +838,59 @@ out: return ret; } +static void iwl_mvm_parse_shared_mem_a000(struct iwl_mvm *mvm, + struct iwl_rx_packet *pkt) +{ + struct iwl_shared_mem_cfg *mem_cfg = (void *)pkt->data; + int i; + + mvm->shared_mem_cfg.num_txfifo_entries = + ARRAY_SIZE(mvm->shared_mem_cfg.txfifo_size); + for (i = 0; i < ARRAY_SIZE(mem_cfg->txfifo_size); i++) + mvm->shared_mem_cfg.txfifo_size[i] = + le32_to_cpu(mem_cfg->txfifo_size[i]); + for (i = 0; i < ARRAY_SIZE(mvm->shared_mem_cfg.rxfifo_size); i++) + mvm->shared_mem_cfg.rxfifo_size[i] = + le32_to_cpu(mem_cfg->rxfifo_size[i]); + + BUILD_BUG_ON(sizeof(mvm->shared_mem_cfg.internal_txfifo_size) != + sizeof(mem_cfg->internal_txfifo_size)); + + for (i = 0; i < ARRAY_SIZE(mvm->shared_mem_cfg.internal_txfifo_size); + i++) + mvm->shared_mem_cfg.internal_txfifo_size[i] = + le32_to_cpu(mem_cfg->internal_txfifo_size[i]); +} + +static void iwl_mvm_parse_shared_mem(struct iwl_mvm *mvm, + struct iwl_rx_packet *pkt) +{ + struct iwl_shared_mem_cfg_v1 *mem_cfg = (void *)pkt->data; + int i; + + mvm->shared_mem_cfg.num_txfifo_entries = + ARRAY_SIZE(mvm->shared_mem_cfg.txfifo_size); + for (i = 0; i < ARRAY_SIZE(mem_cfg->txfifo_size); i++) + mvm->shared_mem_cfg.txfifo_size[i] = + le32_to_cpu(mem_cfg->txfifo_size[i]); + for (i = 0; i < ARRAY_SIZE(mvm->shared_mem_cfg.rxfifo_size); i++) + mvm->shared_mem_cfg.rxfifo_size[i] = + le32_to_cpu(mem_cfg->rxfifo_size[i]); + + /* new API has more data, from rxfifo_addr field and on */ + if (fw_has_capa(&mvm->fw->ucode_capa, + IWL_UCODE_TLV_CAPA_EXTEND_SHARED_MEM_CFG)) { + BUILD_BUG_ON(sizeof(mvm->shared_mem_cfg.internal_txfifo_size) != + sizeof(mem_cfg->internal_txfifo_size)); + + for (i = 0; + i < ARRAY_SIZE(mvm->shared_mem_cfg.internal_txfifo_size); + i++) + mvm->shared_mem_cfg.internal_txfifo_size[i] = + le32_to_cpu(mem_cfg->internal_txfifo_size[i]); + } +} + static void iwl_mvm_get_shared_mem_conf(struct iwl_mvm *mvm) { struct iwl_host_cmd cmd = { @@ -845,9 +898,7 @@ static void iwl_mvm_get_shared_mem_conf(struct iwl_mvm *mvm) .data = { NULL, }, .len = { 0, }, }; - struct iwl_shared_mem_cfg *mem_cfg; struct iwl_rx_packet *pkt; - u32 i; lockdep_assert_held(&mvm->mutex); @@ -861,45 +912,10 @@ static void iwl_mvm_get_shared_mem_conf(struct iwl_mvm *mvm) return; pkt = cmd.resp_pkt; - mem_cfg = (void *)pkt->data; - - mvm->shared_mem_cfg.shared_mem_addr = - le32_to_cpu(mem_cfg->shared_mem_addr); - mvm->shared_mem_cfg.shared_mem_size = - le32_to_cpu(mem_cfg->shared_mem_size); - mvm->shared_mem_cfg.sample_buff_addr = - le32_to_cpu(mem_cfg->sample_buff_addr); - mvm->shared_mem_cfg.sample_buff_size = - le32_to_cpu(mem_cfg->sample_buff_size); - mvm->shared_mem_cfg.txfifo_addr = le32_to_cpu(mem_cfg->txfifo_addr); - for (i = 0; i < ARRAY_SIZE(mvm->shared_mem_cfg.txfifo_size); i++) - mvm->shared_mem_cfg.txfifo_size[i] = - le32_to_cpu(mem_cfg->txfifo_size[i]); - for (i = 0; i < ARRAY_SIZE(mvm->shared_mem_cfg.rxfifo_size); i++) - mvm->shared_mem_cfg.rxfifo_size[i] = - le32_to_cpu(mem_cfg->rxfifo_size[i]); - mvm->shared_mem_cfg.page_buff_addr = - le32_to_cpu(mem_cfg->page_buff_addr); - mvm->shared_mem_cfg.page_buff_size = - le32_to_cpu(mem_cfg->page_buff_size); - - /* new API has more data */ - if (fw_has_capa(&mvm->fw->ucode_capa, - IWL_UCODE_TLV_CAPA_EXTEND_SHARED_MEM_CFG)) { - mvm->shared_mem_cfg.rxfifo_addr = - le32_to_cpu(mem_cfg->rxfifo_addr); - mvm->shared_mem_cfg.internal_txfifo_addr = - le32_to_cpu(mem_cfg->internal_txfifo_addr); - - BUILD_BUG_ON(sizeof(mvm->shared_mem_cfg.internal_txfifo_size) != - sizeof(mem_cfg->internal_txfifo_size)); - - for (i = 0; - i < ARRAY_SIZE(mvm->shared_mem_cfg.internal_txfifo_size); - i++) - mvm->shared_mem_cfg.internal_txfifo_size[i] = - le32_to_cpu(mem_cfg->internal_txfifo_size[i]); - } + if (iwl_mvm_has_new_tx_api(mvm)) + iwl_mvm_parse_shared_mem_a000(mvm, pkt); + else + iwl_mvm_parse_shared_mem(mvm, pkt); IWL_DEBUG_INFO(mvm, "SHARED MEM CFG: got memory offsets/sizes\n"); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 1806495aab57..f5df7064abe3 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -602,16 +602,9 @@ enum iwl_mvm_tdls_cs_state { }; struct iwl_mvm_shared_mem_cfg { - u32 shared_mem_addr; - u32 shared_mem_size; - u32 sample_buff_addr; - u32 sample_buff_size; - u32 txfifo_addr; + int num_txfifo_entries; u32 txfifo_size[TX_FIFO_MAX_NUM]; u32 rxfifo_size[RX_FIFO_MAX_NUM]; - u32 page_buff_addr; - u32 page_buff_size; - u32 rxfifo_addr; u32 internal_txfifo_addr; u32 internal_txfifo_size[TX_FIFO_INTERNAL_MAX_NUM]; }; From 1c109fabbd51863475cd12ac206bdd249aee35af Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 15 Sep 2016 02:35:29 +0100 Subject: [PATCH 0232/1050] fix minor infoleak in get_user_ex() get_user_ex(x, ptr) should zero x on failure. It's not a lot of a leak (at most we are leaking uninitialized 64bit value off the kernel stack, and in a fairly constrained situation, at that), but the fix is trivial, so... Cc: stable@vger.kernel.org Signed-off-by: Al Viro [ This sat in different branch from the uaccess fixes since mid-August ] Signed-off-by: Linus Torvalds --- arch/x86/include/asm/uaccess.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index e3af86f58eaf..2131c4ce7d8a 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -433,7 +433,11 @@ do { \ #define __get_user_asm_ex(x, addr, itype, rtype, ltype) \ asm volatile("1: mov"itype" %1,%"rtype"0\n" \ "2:\n" \ - _ASM_EXTABLE_EX(1b, 2b) \ + ".section .fixup,\"ax\"\n" \ + "3:xor"itype" %"rtype"0,%"rtype"0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + _ASM_EXTABLE_EX(1b, 3b) \ : ltype(x) : "m" (__m(addr))) #define __put_user_nocheck(x, ptr, size) \ From 5297e0f0fe13305a1fc7f01986be0dccd063d57a Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 14 Sep 2016 20:20:00 -0700 Subject: [PATCH 0233/1050] vfs: fix return type of ioctl_file_dedupe_range All the VFS functions in the dedupe ioctl path return int status, so the ioctl handler ought to as well. Found by Coverity, CID 1350952. Signed-off-by: Darrick J. Wong Signed-off-by: Linus Torvalds --- fs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ioctl.c b/fs/ioctl.c index 0f56deb24ce6..26aba0942a98 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -568,7 +568,7 @@ static int ioctl_fsthaw(struct file *filp) return thaw_super(sb); } -static long ioctl_file_dedupe_range(struct file *file, void __user *arg) +static int ioctl_file_dedupe_range(struct file *file, void __user *arg) { struct file_dedupe_range __user *argp = arg; struct file_dedupe_range *same = NULL; From b71dbf1032f546bf3efd60fb5d9d0cefd200a508 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 14 Sep 2016 20:20:44 -0700 Subject: [PATCH 0234/1050] vfs: cap dedupe request structure size at PAGE_SIZE Kirill A Shutemov reports that the kernel doesn't try to cap dest_count in any way, and uses the number to allocate kernel memory. This causes high order allocation warnings in the kernel log if someone passes in a big enough value. We should clamp the allocation at PAGE_SIZE to avoid stressing the VM. The two existing users of the dedupe ioctl never send more than 120 requests, so we can safely clamp dest_range at PAGE_SIZE, because with 4k pages we can handle up to 127 dedupe candidates. Given the max extent length of 16MB, we can end up doing 2GB of IO which is plenty. [ Note: the "offsetof()" can't overflow, because 'count' is just a 16-bit integer. That's not obvious in the limited context of the patch, so I'm noting it here because it made me go look. - Linus ] Reported-by: "Kirill A. Shutemov" Signed-off-by: Darrick J. Wong Signed-off-by: Linus Torvalds --- fs/ioctl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ioctl.c b/fs/ioctl.c index 26aba0942a98..c415668c86d4 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -582,6 +582,10 @@ static int ioctl_file_dedupe_range(struct file *file, void __user *arg) } size = offsetof(struct file_dedupe_range __user, info[count]); + if (size > PAGE_SIZE) { + ret = -ENOMEM; + goto out; + } same = memdup_user(argp, size); if (IS_ERR(same)) { From 22f6b4d34fcf039c63a94e7670e0da24f8575a5a Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 16 Sep 2016 00:31:22 +0200 Subject: [PATCH 0235/1050] aio: mark AIO pseudo-fs noexec This ensures that do_mmap() won't implicitly make AIO memory mappings executable if the READ_IMPLIES_EXEC personality flag is set. Such behavior is problematic because the security_mmap_file LSM hook doesn't catch this case, potentially permitting an attacker to bypass a W^X policy enforced by SELinux. I have tested the patch on my machine. To test the behavior, compile and run this: #define _GNU_SOURCE #include #include #include #include #include #include #include int main(void) { personality(READ_IMPLIES_EXEC); aio_context_t ctx = 0; if (syscall(__NR_io_setup, 1, &ctx)) err(1, "io_setup"); char cmd[1000]; sprintf(cmd, "cat /proc/%d/maps | grep -F '/[aio]'", (int)getpid()); system(cmd); return 0; } In the output, "rw-s" is good, "rwxs" is bad. Signed-off-by: Jann Horn Signed-off-by: Linus Torvalds --- fs/aio.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/aio.c b/fs/aio.c index fb8e45b88cd4..4fe81d1c60f9 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -239,7 +239,12 @@ static struct dentry *aio_mount(struct file_system_type *fs_type, static const struct dentry_operations ops = { .d_dname = simple_dname, }; - return mount_pseudo(fs_type, "aio:", NULL, &ops, AIO_RING_MAGIC); + struct dentry *root = mount_pseudo(fs_type, "aio:", NULL, &ops, + AIO_RING_MAGIC); + + if (!IS_ERR(root)) + root->d_sb->s_iflags |= SB_I_NOEXEC; + return root; } /* aio_setup From 778935778c3b88e5152a88765850009006ef2e32 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 26 Apr 2016 10:42:25 -0700 Subject: [PATCH 0236/1050] PM / runtime: Use _rcuidle for runtime suspend tracepoints Further testing with false negatives suppressed by commit 293e2421fe25 ("rcu: Remove superfluous versions of rcu_read_lock_sched_held()") identified a few more unprotected uses of RCU from the idle loop. Because RCU actively ignores idle-loop code (for energy-efficiency reasons, among other things), using RCU from the idle loop can result in too-short grace periods, in turn resulting in arbitrary misbehavior. The affected function is rpm_suspend(). The resulting lockdep-RCU splat is as follows: ------------------------------------------------------------------------ Warning from omap3 =============================== [ INFO: suspicious RCU usage. ] 4.6.0-rc5-next-20160426+ #1112 Not tainted ------------------------------- include/trace/events/rpm.h:63 suspicious rcu_dereference_check() usage! other info that might help us debug this: RCU used illegally from idle CPU! rcu_scheduler_active = 1, debug_locks = 0 RCU used illegally from extended quiescent state! 1 lock held by swapper/0/0: #0: (&(&dev->power.lock)->rlock){-.-...}, at: [] __pm_runtime_suspend+0x54/0x84 stack backtrace: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0-rc5-next-20160426+ #1112 Hardware name: Generic OMAP36xx (Flattened Device Tree) [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0xb0/0xe4) [] (dump_stack) from [] (rpm_suspend+0x604/0x7e4) [] (rpm_suspend) from [] (__pm_runtime_suspend+0x64/0x84) [] (__pm_runtime_suspend) from [] (omap2_gpio_prepare_for_idle+0x5c/0x70) [] (omap2_gpio_prepare_for_idle) from [] (omap_sram_idle+0x140/0x244) [] (omap_sram_idle) from [] (omap3_enter_idle_bm+0xfc/0x1ec) [] (omap3_enter_idle_bm) from [] (cpuidle_enter_state+0x80/0x3d4) [] (cpuidle_enter_state) from [] (cpu_startup_entry+0x198/0x3a0) [] (cpu_startup_entry) from [] (start_kernel+0x354/0x3c8) [] (start_kernel) from [<8000807c>] (0x8000807c) ------------------------------------------------------------------------ Reported-by: Tony Lindgren Signed-off-by: Paul E. McKenney Tested-by: Tony Lindgren Tested-by: Guenter Roeck [ rjw: Subject ] Signed-off-by: Rafael J. Wysocki --- drivers/base/power/runtime.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 17995fadebd7..82a081ea4317 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -419,7 +419,7 @@ static int rpm_suspend(struct device *dev, int rpmflags) struct device *parent = NULL; int retval; - trace_rpm_suspend(dev, rpmflags); + trace_rpm_suspend_rcuidle(dev, rpmflags); repeat: retval = rpm_check_suspend_allowed(dev); @@ -549,7 +549,7 @@ static int rpm_suspend(struct device *dev, int rpmflags) } out: - trace_rpm_return_int(dev, _THIS_IP_, retval); + trace_rpm_return_int_rcuidle(dev, _THIS_IP_, retval); return retval; From 8aacf4b73fe87bc8fbe75a83862f411b52b7f272 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Mon, 4 Jul 2016 15:40:11 +0300 Subject: [PATCH 0237/1050] iwlwifi: introduce trans API to get byte count table In future HW the byte count table address will be configured by ucode per queue. Add API to expose the byte count table to the opmode Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-trans.h | 11 +++++++++++ drivers/net/wireless/intel/iwlwifi/pcie/internal.h | 1 + drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 2 ++ drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 8 ++++++++ 4 files changed, 22 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h index 883cb487b652..04e998d9d5fe 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h @@ -645,6 +645,8 @@ struct iwl_trans_ops { void (*txq_set_shared_mode)(struct iwl_trans *trans, u32 txq_id, bool shared); + dma_addr_t (*get_txq_byte_table)(struct iwl_trans *trans, int txq_id); + int (*wait_tx_queue_empty)(struct iwl_trans *trans, u32 txq_bm); void (*freeze_txq_timer)(struct iwl_trans *trans, unsigned long txqs, bool freeze); @@ -1069,6 +1071,15 @@ static inline void iwl_trans_txq_set_shared_mode(struct iwl_trans *trans, trans->ops->txq_set_shared_mode(trans, queue, shared_mode); } +static inline dma_addr_t iwl_trans_get_txq_byte_table(struct iwl_trans *trans, + int queue) +{ + /* we should never be called if the trans doesn't support it */ + BUG_ON(!trans->ops->get_txq_byte_table); + + return trans->ops->get_txq_byte_table(trans, queue); +} + static inline void iwl_trans_txq_enable(struct iwl_trans *trans, int queue, int fifo, int sta_id, int tid, int frame_limit, u16 ssn, diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index 975900a1efa1..b9dc82b981e1 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -476,6 +476,7 @@ void iwl_trans_pcie_txq_disable(struct iwl_trans *trans, int queue, bool configure_scd); void iwl_trans_pcie_txq_set_shared_mode(struct iwl_trans *trans, u32 txq_id, bool shared_mode); +dma_addr_t iwl_trans_pcie_get_txq_byte_table(struct iwl_trans *trans, int txq); void iwl_trans_pcie_log_scd_error(struct iwl_trans *trans, struct iwl_txq *txq); int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 0c2ccbeab167..21b1be11100a 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -2803,6 +2803,8 @@ static const struct iwl_trans_ops trans_ops_pcie = { .txq_disable = iwl_trans_pcie_txq_disable, .txq_enable = iwl_trans_pcie_txq_enable, + .get_txq_byte_table = iwl_trans_pcie_get_txq_byte_table, + .txq_set_shared_mode = iwl_trans_pcie_txq_set_shared_mode, .wait_tx_queue_empty = iwl_trans_pcie_wait_txq_empty, diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index 1c46f146e168..57a657a17e1e 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -1352,6 +1352,14 @@ void iwl_trans_pcie_txq_set_shared_mode(struct iwl_trans *trans, u32 txq_id, txq->ampdu = !shared_mode; } +dma_addr_t iwl_trans_pcie_get_txq_byte_table(struct iwl_trans *trans, int txq) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + + return trans_pcie->scd_bc_tbls.dma + + txq * sizeof(struct iwlagn_scd_bc_tbl); +} + void iwl_trans_pcie_txq_disable(struct iwl_trans *trans, int txq_id, bool configure_scd) { From 6983ba6951139c99f0692c94f83d8d75ea559bcc Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Sun, 26 Jun 2016 13:17:56 +0300 Subject: [PATCH 0238/1050] iwlwifi: pcie: assign and access a000 TFD & TBs Previous patch introduced the new formats. This patch allocates the new structures and adjusts code accordingly. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- .../wireless/intel/iwlwifi/pcie/internal.h | 19 ++- .../net/wireless/intel/iwlwifi/pcie/trans.c | 16 +- drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 141 ++++++++++++------ 3 files changed, 120 insertions(+), 56 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index b9dc82b981e1..d185692676fd 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -280,7 +280,7 @@ struct iwl_pcie_first_tb_buf { */ struct iwl_txq { struct iwl_queue q; - struct iwl_tfd *tfds; + void *tfds; struct iwl_pcie_first_tb_buf *first_tb_bufs; dma_addr_t first_tb_dma; struct iwl_pcie_txq_entry *entries; @@ -393,6 +393,7 @@ struct iwl_trans_pcie { u8 n_no_reclaim_cmds; u8 no_reclaim_cmds[MAX_NO_RECLAIM_CMDS]; u8 max_tbs; + u16 tfd_size; enum iwl_amsdu_size rx_buf_size; bool bc_table_dword; @@ -489,9 +490,21 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, struct sk_buff_head *skbs); void iwl_trans_pcie_tx_reset(struct iwl_trans *trans); -static inline u16 iwl_pcie_tfd_tb_get_len(struct iwl_tfd *tfd, u8 idx) +static inline u16 iwl_pcie_tfd_tb_get_len(struct iwl_trans *trans, void *tfd, + u8 idx) { - struct iwl_tfd_tb *tb = &tfd->tbs[idx]; + struct iwl_tfd *tfd_fh; + struct iwl_tfd_tb *tb; + + if (trans->cfg->use_tfh) { + struct iwl_tfh_tfd *tfd_fh = (void *)tfd; + struct iwl_tfh_tb *tb = &tfd_fh->tbs[idx]; + + return le16_to_cpu(tb->tb_len); + } + + tfd_fh = (void *)tfd; + tb = &tfd_fh->tbs[idx]; return le16_to_cpu(tb->hi_n_len) >> 4; } diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 21b1be11100a..e908bb8e10b6 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -2437,15 +2437,14 @@ err: } #endif /*CONFIG_IWLWIFI_DEBUGFS */ -static u32 iwl_trans_pcie_get_cmdlen(struct iwl_trans *trans, - struct iwl_tfd *tfd) +static u32 iwl_trans_pcie_get_cmdlen(struct iwl_trans *trans, void *tfd) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); u32 cmdlen = 0; int i; for (i = 0; i < trans_pcie->max_tbs; i++) - cmdlen += iwl_pcie_tfd_tb_get_len(tfd, i); + cmdlen += iwl_pcie_tfd_tb_get_len(trans, tfd, i); return cmdlen; } @@ -2733,7 +2732,8 @@ static struct iwl_trans_dump_data u8 idx = get_cmd_index(&cmdq->q, ptr); u32 caplen, cmdlen; - cmdlen = iwl_trans_pcie_get_cmdlen(trans, &cmdq->tfds[ptr]); + cmdlen = iwl_trans_pcie_get_cmdlen(trans, cmdq->tfds + + trans_pcie->tfd_size * ptr); caplen = min_t(u32, TFD_MAX_PAYLOAD_SIZE, cmdlen); if (cmdlen) { @@ -2876,10 +2876,14 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, else addr_size = 36; - if (cfg->use_tfh) + if (cfg->use_tfh) { trans_pcie->max_tbs = IWL_TFH_NUM_TBS; - else + trans_pcie->tfd_size = sizeof(struct iwl_tfh_tb); + + } else { trans_pcie->max_tbs = IWL_NUM_OF_TBS; + trans_pcie->tfd_size = sizeof(struct iwl_tfd); + } trans->max_skb_frags = IWL_PCIE_MAX_FRAGS(trans_pcie); pci_set_master(pdev); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index 57a657a17e1e..f893ee111b46 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -312,11 +312,30 @@ void iwl_pcie_txq_check_wrptrs(struct iwl_trans *trans) } } -static inline dma_addr_t iwl_pcie_tfd_tb_get_addr(struct iwl_tfd *tfd, u8 idx) +static inline void *iwl_pcie_get_tfd(struct iwl_trans_pcie *trans_pcie, + struct iwl_txq *txq, int idx) { - struct iwl_tfd_tb *tb = &tfd->tbs[idx]; + return txq->tfds + trans_pcie->tfd_size * idx; +} + +static inline dma_addr_t iwl_pcie_tfd_tb_get_addr(struct iwl_trans *trans, + void *tfd, u8 idx) +{ + struct iwl_tfd *tfd_fh; + struct iwl_tfd_tb *tb; + dma_addr_t addr; + + if (trans->cfg->use_tfh) { + struct iwl_tfh_tfd *tfd_fh = (void *)tfd; + struct iwl_tfh_tb *tb = &tfd_fh->tbs[idx]; + + return (dma_addr_t)(le64_to_cpu(tb->addr)); + } + + tfd_fh = (void *)tfd; + tb = &tfd_fh->tbs[idx]; + addr = get_unaligned_le32(&tb->lo); - dma_addr_t addr = get_unaligned_le32(&tb->lo); if (sizeof(dma_addr_t) > sizeof(u32)) addr |= ((dma_addr_t)(le16_to_cpu(tb->hi_n_len) & 0xF) << 16) << 16; @@ -324,35 +343,57 @@ static inline dma_addr_t iwl_pcie_tfd_tb_get_addr(struct iwl_tfd *tfd, u8 idx) return addr; } -static inline void iwl_pcie_tfd_set_tb(struct iwl_tfd *tfd, u8 idx, - dma_addr_t addr, u16 len) +static inline void iwl_pcie_tfd_set_tb(struct iwl_trans *trans, void *tfd, + u8 idx, dma_addr_t addr, u16 len) { - struct iwl_tfd_tb *tb = &tfd->tbs[idx]; - u16 hi_n_len = len << 4; + if (trans->cfg->use_tfh) { + struct iwl_tfh_tfd *tfd_fh = (void *)tfd; + struct iwl_tfh_tb *tb = &tfd_fh->tbs[idx]; - put_unaligned_le32(addr, &tb->lo); - if (sizeof(dma_addr_t) > sizeof(u32)) - hi_n_len |= ((addr >> 16) >> 16) & 0xF; + put_unaligned_le64(addr, &tb->addr); + tb->tb_len = cpu_to_le16(len); - tb->hi_n_len = cpu_to_le16(hi_n_len); + tfd_fh->num_tbs = cpu_to_le16(idx + 1); + } else { + struct iwl_tfd *tfd_fh = (void *)tfd; + struct iwl_tfd_tb *tb = &tfd_fh->tbs[idx]; - tfd->num_tbs = idx + 1; + u16 hi_n_len = len << 4; + + put_unaligned_le32(addr, &tb->lo); + if (sizeof(dma_addr_t) > sizeof(u32)) + hi_n_len |= ((addr >> 16) >> 16) & 0xF; + + tb->hi_n_len = cpu_to_le16(hi_n_len); + + tfd_fh->num_tbs = idx + 1; + } } -static inline u8 iwl_pcie_tfd_get_num_tbs(struct iwl_tfd *tfd) +static inline u8 iwl_pcie_tfd_get_num_tbs(struct iwl_trans *trans, void *tfd) { - return tfd->num_tbs & 0x1f; + struct iwl_tfd *tfd_fh; + + if (trans->cfg->use_tfh) { + struct iwl_tfh_tfd *tfd_fh = (void *)tfd; + + return le16_to_cpu(tfd_fh->num_tbs) & 0x1f; + } + + tfd_fh = (void *)tfd; + return tfd_fh->num_tbs & 0x1f; } static void iwl_pcie_tfd_unmap(struct iwl_trans *trans, struct iwl_cmd_meta *meta, - struct iwl_tfd *tfd) + struct iwl_txq *txq, int index) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); int i, num_tbs; + void *tfd = iwl_pcie_get_tfd(trans_pcie, txq, index); /* Sanity check on number of chunks */ - num_tbs = iwl_pcie_tfd_get_num_tbs(tfd); + num_tbs = iwl_pcie_tfd_get_num_tbs(trans, tfd); if (num_tbs >= trans_pcie->max_tbs) { IWL_ERR(trans, "Too many chunks: %i\n", num_tbs); @@ -365,16 +406,28 @@ static void iwl_pcie_tfd_unmap(struct iwl_trans *trans, for (i = 1; i < num_tbs; i++) { if (meta->tbs & BIT(i)) dma_unmap_page(trans->dev, - iwl_pcie_tfd_tb_get_addr(tfd, i), - iwl_pcie_tfd_tb_get_len(tfd, i), + iwl_pcie_tfd_tb_get_addr(trans, tfd, i), + iwl_pcie_tfd_tb_get_len(trans, tfd, i), DMA_TO_DEVICE); else dma_unmap_single(trans->dev, - iwl_pcie_tfd_tb_get_addr(tfd, i), - iwl_pcie_tfd_tb_get_len(tfd, i), + iwl_pcie_tfd_tb_get_addr(trans, tfd, + i), + iwl_pcie_tfd_tb_get_len(trans, tfd, + i), DMA_TO_DEVICE); } - tfd->num_tbs = 0; + + if (trans->cfg->use_tfh) { + struct iwl_tfh_tfd *tfd_fh = (void *)tfd; + + tfd_fh->num_tbs = 0; + } else { + struct iwl_tfd *tfd_fh = (void *)tfd; + + tfd_fh->num_tbs = 0; + } + } /* @@ -388,8 +441,6 @@ static void iwl_pcie_tfd_unmap(struct iwl_trans *trans, */ static void iwl_pcie_txq_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq) { - struct iwl_tfd *tfd_tmp = txq->tfds; - /* rd_ptr is bounded by TFD_QUEUE_SIZE_MAX and * idx is bounded by n_window */ @@ -401,7 +452,7 @@ static void iwl_pcie_txq_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq) /* We have only q->n_window txq->entries, but we use * TFD_QUEUE_SIZE_MAX tfds */ - iwl_pcie_tfd_unmap(trans, &txq->entries[idx].meta, &tfd_tmp[rd_ptr]); + iwl_pcie_tfd_unmap(trans, &txq->entries[idx].meta, txq, rd_ptr); /* free SKB */ if (txq->entries) { @@ -425,19 +476,18 @@ static int iwl_pcie_txq_build_tfd(struct iwl_trans *trans, struct iwl_txq *txq, { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_queue *q; - struct iwl_tfd *tfd, *tfd_tmp; + void *tfd; u32 num_tbs; q = &txq->q; - tfd_tmp = txq->tfds; - tfd = &tfd_tmp[q->write_ptr]; + tfd = txq->tfds + trans_pcie->tfd_size * q->write_ptr; if (reset) - memset(tfd, 0, sizeof(*tfd)); + memset(tfd, 0, trans_pcie->tfd_size); - num_tbs = iwl_pcie_tfd_get_num_tbs(tfd); + num_tbs = iwl_pcie_tfd_get_num_tbs(trans, tfd); - /* Each TFD can point to a maximum 20 Tx buffers */ + /* Each TFD can point to a maximum max_tbs Tx buffers */ if (num_tbs >= trans_pcie->max_tbs) { IWL_ERR(trans, "Error can not send more than %d chunks\n", trans_pcie->max_tbs); @@ -448,7 +498,7 @@ static int iwl_pcie_txq_build_tfd(struct iwl_trans *trans, struct iwl_txq *txq, "Unaligned address = %llx\n", (unsigned long long)addr)) return -EINVAL; - iwl_pcie_tfd_set_tb(tfd, num_tbs, addr, len); + iwl_pcie_tfd_set_tb(trans, tfd, num_tbs, addr, len); return num_tbs; } @@ -458,7 +508,7 @@ static int iwl_pcie_txq_alloc(struct iwl_trans *trans, u32 txq_id) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - size_t tfd_sz = sizeof(struct iwl_tfd) * TFD_QUEUE_SIZE_MAX; + size_t tfd_sz = trans_pcie->tfd_size * TFD_QUEUE_SIZE_MAX; size_t tb0_buf_sz; int i; @@ -672,7 +722,7 @@ static void iwl_pcie_txq_free(struct iwl_trans *trans, int txq_id) /* De-alloc circular buffer of TFDs */ if (txq->tfds) { dma_free_coherent(dev, - sizeof(struct iwl_tfd) * TFD_QUEUE_SIZE_MAX, + trans_pcie->tfd_size * TFD_QUEUE_SIZE_MAX, txq->tfds, txq->q.dma_addr); txq->q.dma_addr = 0; txq->tfds = NULL; @@ -1616,8 +1666,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, copy_size - tb0_size, DMA_TO_DEVICE); if (dma_mapping_error(trans->dev, phys_addr)) { - iwl_pcie_tfd_unmap(trans, out_meta, - &txq->tfds[q->write_ptr]); + iwl_pcie_tfd_unmap(trans, out_meta, txq, q->write_ptr); idx = -ENOMEM; goto out; } @@ -1640,8 +1689,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, phys_addr = dma_map_single(trans->dev, (void *)data, cmdlen[i], DMA_TO_DEVICE); if (dma_mapping_error(trans->dev, phys_addr)) { - iwl_pcie_tfd_unmap(trans, out_meta, - &txq->tfds[q->write_ptr]); + iwl_pcie_tfd_unmap(trans, out_meta, txq, q->write_ptr); idx = -ENOMEM; goto out; } @@ -1721,7 +1769,7 @@ void iwl_pcie_hcmd_complete(struct iwl_trans *trans, meta = &txq->entries[cmd_index].meta; cmd_id = iwl_cmd_id(cmd->hdr.cmd, group_id, 0); - iwl_pcie_tfd_unmap(trans, meta, &txq->tfds[index]); + iwl_pcie_tfd_unmap(trans, meta, txq, index); /* Input error checking is done when commands are added to queue. */ if (meta->flags & CMD_WANT_SKB) { @@ -1919,6 +1967,7 @@ static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb, struct iwl_cmd_meta *out_meta, struct iwl_device_cmd *dev_cmd, u16 tb1_len) { + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_queue *q = &txq->q; u16 tb2_len; int i; @@ -1934,8 +1983,7 @@ static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb, skb->data + hdr_len, tb2_len, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(trans->dev, tb2_phys))) { - iwl_pcie_tfd_unmap(trans, out_meta, - &txq->tfds[q->write_ptr]); + iwl_pcie_tfd_unmap(trans, out_meta, txq, q->write_ptr); return -EINVAL; } iwl_pcie_txq_build_tfd(trans, txq, tb2_phys, tb2_len, false); @@ -1954,8 +2002,7 @@ static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb, skb_frag_size(frag), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(trans->dev, tb_phys))) { - iwl_pcie_tfd_unmap(trans, out_meta, - &txq->tfds[q->write_ptr]); + iwl_pcie_tfd_unmap(trans, out_meta, txq, q->write_ptr); return -EINVAL; } tb_idx = iwl_pcie_txq_build_tfd(trans, txq, tb_phys, @@ -1965,8 +2012,8 @@ static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb, } trace_iwlwifi_dev_tx(trans->dev, skb, - &txq->tfds[txq->q.write_ptr], - sizeof(struct iwl_tfd), + iwl_pcie_get_tfd(trans_pcie, txq, q->write_ptr), + trans_pcie->tfd_size, &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len, skb->data + hdr_len, tb2_len); trace_iwlwifi_dev_tx_data(trans->dev, skb, @@ -2041,8 +2088,8 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, IEEE80211_CCMP_HDR_LEN : 0; trace_iwlwifi_dev_tx(trans->dev, skb, - &txq->tfds[txq->q.write_ptr], - sizeof(struct iwl_tfd), + iwl_pcie_get_tfd(trans_pcie, txq, q->write_ptr), + trans_pcie->tfd_size, &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len, NULL, 0); @@ -2198,7 +2245,7 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, return 0; out_unmap: - iwl_pcie_tfd_unmap(trans, out_meta, &txq->tfds[q->write_ptr]); + iwl_pcie_tfd_unmap(trans, out_meta, txq, q->write_ptr); return ret; } #else /* CONFIG_INET */ From 4fe10bc6038a6b3f6a025aca8fc9c6c23e046b1e Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Mon, 4 Jul 2016 14:34:26 +0300 Subject: [PATCH 0239/1050] iwlwifi: change byte count table for a000 devices Since TFD was enlarged to 256 bytes, the fetch of the TFD itself is very expensive. To make DRAM to SRAM more efficient, bits 12-13 will indicate the number of 64 byte chunks that should be transferred to SRAM. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-fh.h | 7 +++- drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 36 ++++++++++++++++---- 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-fh.h b/drivers/net/wireless/intel/iwlwifi/iwl-fh.h index 98d2ff240ea5..33ef5372d195 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-fh.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-fh.h @@ -734,8 +734,13 @@ struct iwl_tfh_tfd { /** * struct iwlagn_schedq_bc_tbl scheduler byte count table * base physical address provided by SCD_DRAM_BASE_ADDR + * For devices up to a000: * @tfd_offset 0-12 - tx command byte count - * 12-16 - station index + * 12-16 - station index + * For a000 and on: + * @tfd_offset 0-12 - tx command byte count + * 12-13 - number of 64 byte chunks + * 14-16 - reserved */ struct iwlagn_scd_bc_tbl { __le16 tfd_offset[TFD_QUEUE_BC_SIZE]; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index f893ee111b46..b4b925ed5267 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -176,14 +176,14 @@ static void iwl_pcie_txq_stuck_timer(unsigned long data) * iwl_pcie_txq_update_byte_cnt_tbl - Set up entry in Tx byte-count array */ static void iwl_pcie_txq_update_byte_cnt_tbl(struct iwl_trans *trans, - struct iwl_txq *txq, u16 byte_cnt) + struct iwl_txq *txq, u16 byte_cnt, + int num_tbs) { struct iwlagn_scd_bc_tbl *scd_bc_tbl; struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); int write_ptr = txq->q.write_ptr; int txq_id = txq->q.id; u8 sec_ctl = 0; - u8 sta_id = 0; u16 len = byte_cnt + IWL_TX_CRC_SIZE + IWL_TX_DELIMITER_SIZE; __le16 bc_ent; struct iwl_tx_cmd *tx_cmd = @@ -191,7 +191,6 @@ static void iwl_pcie_txq_update_byte_cnt_tbl(struct iwl_trans *trans, scd_bc_tbl = trans_pcie->scd_bc_tbls.addr; - sta_id = tx_cmd->sta_id; sec_ctl = tx_cmd->sec_ctl; switch (sec_ctl & TX_CMD_SEC_MSK) { @@ -205,14 +204,32 @@ static void iwl_pcie_txq_update_byte_cnt_tbl(struct iwl_trans *trans, len += IEEE80211_WEP_IV_LEN + IEEE80211_WEP_ICV_LEN; break; } - if (trans_pcie->bc_table_dword) len = DIV_ROUND_UP(len, 4); if (WARN_ON(len > 0xFFF || write_ptr >= TFD_QUEUE_SIZE_MAX)) return; - bc_ent = cpu_to_le16(len | (sta_id << 12)); + if (trans->cfg->use_tfh) { + u8 filled_tfd_size = offsetof(struct iwl_tfh_tfd, tbs) + + num_tbs * sizeof(struct iwl_tfh_tb); + /* + * filled_tfd_size contains the number of filled bytes in the + * TFD. + * Dividing it by 64 will give the number of chunks to fetch + * to SRAM- 0 for one chunk, 1 for 2 and so on. + * If, for example, TFD contains only 3 TBs then 32 bytes + * of the TFD are used, and only one chunk of 64 bytes should + * be fetched + */ + u8 num_fetch_chunks = DIV_ROUND_UP(filled_tfd_size, 64) - 1; + + bc_ent = cpu_to_le16(len | (num_fetch_chunks << 12)); + } else { + u8 sta_id = tx_cmd->sta_id; + + bc_ent = cpu_to_le16(len | (sta_id << 12)); + } scd_bc_tbl[txq_id].tfd_offset[write_ptr] = bc_ent; @@ -240,6 +257,7 @@ static void iwl_pcie_txq_inval_byte_cnt_tbl(struct iwl_trans *trans, sta_id = tx_cmd->sta_id; bc_ent = cpu_to_le16(1 | (sta_id << 12)); + scd_bc_tbl[txq_id].tfd_offset[read_ptr] = bc_ent; if (read_ptr < TFD_QUEUE_SIZE_BC_DUP) @@ -1126,7 +1144,8 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, txq->entries[txq->q.read_ptr].skb = NULL; - iwl_pcie_txq_inval_byte_cnt_tbl(trans, txq); + if (!trans->cfg->use_tfh) + iwl_pcie_txq_inval_byte_cnt_tbl(trans, txq); iwl_pcie_txq_free_tfd(trans, txq); } @@ -2272,6 +2291,7 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, struct iwl_queue *q; dma_addr_t tb0_phys, tb1_phys, scratch_phys; void *tb1_addr; + void *tfd; u16 len, tb1_len; bool wait_write_ptr; __le16 fc; @@ -2410,8 +2430,10 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, goto out_err; } + tfd = iwl_pcie_get_tfd(trans_pcie, txq, q->write_ptr); /* Set up entry for this TFD in Tx byte-count array */ - iwl_pcie_txq_update_byte_cnt_tbl(trans, txq, le16_to_cpu(tx_cmd->len)); + iwl_pcie_txq_update_byte_cnt_tbl(trans, txq, le16_to_cpu(tx_cmd->len), + iwl_pcie_tfd_get_num_tbs(trans, tfd)); wait_write_ptr = ieee80211_has_morefrags(fc); From bb98ecd4d347f052887518293736ab70c9909b2a Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Thu, 7 Jul 2016 18:17:45 +0300 Subject: [PATCH 0240/1050] iwlwifi: pcie: merge iwl_queue and iwl_txq The original intent was to have the general iwl_queue shared between RX and TX queues, but it is not the actual status. Since it is not shared with any struct but iwl_txq, it adds unnecessary complexity. Merge those structs. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- .../wireless/intel/iwlwifi/pcie/internal.h | 80 ++++--- drivers/net/wireless/intel/iwlwifi/pcie/rx.c | 2 +- .../net/wireless/intel/iwlwifi/pcie/trans.c | 30 ++- drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 197 +++++++++--------- 4 files changed, 146 insertions(+), 163 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index d185692676fd..cad2d1199321 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -195,39 +195,6 @@ struct iwl_cmd_meta { u32 tbs; }; -/* - * Generic queue structure - * - * Contains common data for Rx and Tx queues. - * - * Note the difference between TFD_QUEUE_SIZE_MAX and n_window: the hardware - * always assumes 256 descriptors, so TFD_QUEUE_SIZE_MAX is always 256 (unless - * there might be HW changes in the future). For the normal TX - * queues, n_window, which is the size of the software queue data - * is also 256; however, for the command queue, n_window is only - * 32 since we don't need so many commands pending. Since the HW - * still uses 256 BDs for DMA though, TFD_QUEUE_SIZE_MAX stays 256. As a result, - * the software buffers (in the variables @meta, @txb in struct - * iwl_txq) only have 32 entries, while the HW buffers (@tfds in - * the same struct) have 256. - * This means that we end up with the following: - * HW entries: | 0 | ... | N * 32 | ... | N * 32 + 31 | ... | 255 | - * SW entries: | 0 | ... | 31 | - * where N is a number between 0 and 7. This means that the SW - * data is a window overlayed over the HW queue. - */ -struct iwl_queue { - int write_ptr; /* 1-st empty entry (index) host_w*/ - int read_ptr; /* last used entry (index) host_r*/ - /* use for monitoring and recovering the stuck queue */ - dma_addr_t dma_addr; /* physical addr for BD's */ - int n_window; /* safe queue window */ - u32 id; - int low_mark; /* low watermark, resume queue if free - * space more than this */ - int high_mark; /* high watermark, stop queue if free - * space less than this */ -}; #define TFD_TX_CMD_SLOTS 256 #define TFD_CMD_SLOTS 32 @@ -274,12 +241,31 @@ struct iwl_pcie_first_tb_buf { * @wd_timeout: queue watchdog timeout (jiffies) - per queue * @frozen: tx stuck queue timer is frozen * @frozen_expiry_remainder: remember how long until the timer fires + * @write_ptr: 1-st empty entry (index) host_w + * @read_ptr: last used entry (index) host_r + * @dma_addr: physical addr for BD's + * @n_window: safe queue window + * @id: queue id + * @low_mark: low watermark, resume queue if free space more than this + * @high_mark: high watermark, stop queue if free space less than this * * A Tx queue consists of circular buffer of BDs (a.k.a. TFDs, transmit frame * descriptors) and required locking structures. + * + * Note the difference between TFD_QUEUE_SIZE_MAX and n_window: the hardware + * always assumes 256 descriptors, so TFD_QUEUE_SIZE_MAX is always 256 (unless + * there might be HW changes in the future). For the normal TX + * queues, n_window, which is the size of the software queue data + * is also 256; however, for the command queue, n_window is only + * 32 since we don't need so many commands pending. Since the HW + * still uses 256 BDs for DMA though, TFD_QUEUE_SIZE_MAX stays 256. + * This means that we end up with the following: + * HW entries: | 0 | ... | N * 32 | ... | N * 32 + 31 | ... | 255 | + * SW entries: | 0 | ... | 31 | + * where N is a number between 0 and 7. This means that the SW + * data is a window overlayed over the HW queue. */ struct iwl_txq { - struct iwl_queue q; void *tfds; struct iwl_pcie_first_tb_buf *first_tb_bufs; dma_addr_t first_tb_dma; @@ -295,6 +281,14 @@ struct iwl_txq { bool block; unsigned long wd_timeout; struct sk_buff_head overflow_q; + + int write_ptr; + int read_ptr; + dma_addr_t dma_addr; + int n_window; + u32 id; + int low_mark; + int high_mark; }; static inline dma_addr_t @@ -633,9 +627,9 @@ static inline void iwl_wake_queue(struct iwl_trans *trans, { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - if (test_and_clear_bit(txq->q.id, trans_pcie->queue_stopped)) { - IWL_DEBUG_TX_QUEUES(trans, "Wake hwq %d\n", txq->q.id); - iwl_op_mode_queue_not_full(trans->op_mode, txq->q.id); + if (test_and_clear_bit(txq->id, trans_pcie->queue_stopped)) { + IWL_DEBUG_TX_QUEUES(trans, "Wake hwq %d\n", txq->id); + iwl_op_mode_queue_not_full(trans->op_mode, txq->id); } } @@ -644,22 +638,22 @@ static inline void iwl_stop_queue(struct iwl_trans *trans, { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - if (!test_and_set_bit(txq->q.id, trans_pcie->queue_stopped)) { - iwl_op_mode_queue_full(trans->op_mode, txq->q.id); - IWL_DEBUG_TX_QUEUES(trans, "Stop hwq %d\n", txq->q.id); + if (!test_and_set_bit(txq->id, trans_pcie->queue_stopped)) { + iwl_op_mode_queue_full(trans->op_mode, txq->id); + IWL_DEBUG_TX_QUEUES(trans, "Stop hwq %d\n", txq->id); } else IWL_DEBUG_TX_QUEUES(trans, "hwq %d already stopped\n", - txq->q.id); + txq->id); } -static inline bool iwl_queue_used(const struct iwl_queue *q, int i) +static inline bool iwl_queue_used(const struct iwl_txq *q, int i) { return q->write_ptr >= q->read_ptr ? (i >= q->read_ptr && i < q->write_ptr) : !(i < q->read_ptr && i >= q->write_ptr); } -static inline u8 get_cmd_index(struct iwl_queue *q, u32 index) +static inline u8 get_cmd_index(struct iwl_txq *q, u32 index) { return index & (q->n_window - 1); } diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c index 5c36e6d00622..452387cd5df4 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c @@ -1142,7 +1142,7 @@ static void iwl_pcie_rx_handle_rb(struct iwl_trans *trans, sequence = le16_to_cpu(pkt->hdr.sequence); index = SEQ_TO_INDEX(sequence); - cmd_index = get_cmd_index(&txq->q, index); + cmd_index = get_cmd_index(txq, index); if (rxq->id == 0) iwl_op_mode_rx(trans->op_mode, &rxq->napi, diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index e908bb8e10b6..361ccf651e9b 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -1899,7 +1899,7 @@ static void iwl_trans_pcie_freeze_txq_timer(struct iwl_trans *trans, txq->frozen = freeze; - if (txq->q.read_ptr == txq->q.write_ptr) + if (txq->read_ptr == txq->write_ptr) goto next_queue; if (freeze) { @@ -1947,7 +1947,7 @@ static void iwl_trans_pcie_block_txq_ptrs(struct iwl_trans *trans, bool block) txq->block--; if (!txq->block) { iwl_write32(trans, HBUS_TARG_WRPTR, - txq->q.write_ptr | (i << 8)); + txq->write_ptr | (i << 8)); } } else if (block) { txq->block++; @@ -1967,14 +1967,14 @@ void iwl_trans_pcie_log_scd_error(struct iwl_trans *trans, struct iwl_txq *txq) int cnt; IWL_ERR(trans, "Current SW read_ptr %d write_ptr %d\n", - txq->q.read_ptr, txq->q.write_ptr); + txq->read_ptr, txq->write_ptr); if (trans->cfg->use_tfh) /* TODO: access new SCD registers and dump them */ return; scd_sram_addr = trans_pcie->scd_base_addr + - SCD_TX_STTS_QUEUE_OFFSET(txq->q.id); + SCD_TX_STTS_QUEUE_OFFSET(txq->id); iwl_trans_read_mem_bytes(trans, scd_sram_addr, buf, sizeof(buf)); iwl_print_hex_error(trans, buf, sizeof(buf)); @@ -2009,7 +2009,6 @@ static int iwl_trans_pcie_wait_txq_empty(struct iwl_trans *trans, u32 txq_bm) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_txq *txq; - struct iwl_queue *q; int cnt; unsigned long now = jiffies; int ret = 0; @@ -2027,13 +2026,12 @@ static int iwl_trans_pcie_wait_txq_empty(struct iwl_trans *trans, u32 txq_bm) IWL_DEBUG_TX_QUEUES(trans, "Emptying queue %d...\n", cnt); txq = &trans_pcie->txq[cnt]; - q = &txq->q; - wr_ptr = ACCESS_ONCE(q->write_ptr); + wr_ptr = ACCESS_ONCE(txq->write_ptr); - while (q->read_ptr != ACCESS_ONCE(q->write_ptr) && + while (txq->read_ptr != ACCESS_ONCE(txq->write_ptr) && !time_after(jiffies, now + msecs_to_jiffies(IWL_FLUSH_WAIT_MS))) { - u8 write_ptr = ACCESS_ONCE(q->write_ptr); + u8 write_ptr = ACCESS_ONCE(txq->write_ptr); if (WARN_ONCE(wr_ptr != write_ptr, "WR pointer moved while flushing %d -> %d\n", @@ -2042,7 +2040,7 @@ static int iwl_trans_pcie_wait_txq_empty(struct iwl_trans *trans, u32 txq_bm) usleep_range(1000, 2000); } - if (q->read_ptr != q->write_ptr) { + if (txq->read_ptr != txq->write_ptr) { IWL_ERR(trans, "fail to flush all tx fifo queues Q %d\n", cnt); ret = -ETIMEDOUT; @@ -2210,7 +2208,6 @@ static ssize_t iwl_dbgfs_tx_queue_read(struct file *file, struct iwl_trans *trans = file->private_data; struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_txq *txq; - struct iwl_queue *q; char *buf; int pos = 0; int cnt; @@ -2228,10 +2225,9 @@ static ssize_t iwl_dbgfs_tx_queue_read(struct file *file, for (cnt = 0; cnt < trans->cfg->base_params->num_of_queues; cnt++) { txq = &trans_pcie->txq[cnt]; - q = &txq->q; pos += scnprintf(buf + pos, bufsz - pos, "hwq %.2d: read=%u write=%u use=%d stop=%d need_update=%d frozen=%d%s\n", - cnt, q->read_ptr, q->write_ptr, + cnt, txq->read_ptr, txq->write_ptr, !!test_bit(cnt, trans_pcie->queue_used), !!test_bit(cnt, trans_pcie->queue_stopped), txq->need_update, txq->frozen, @@ -2659,7 +2655,7 @@ static struct iwl_trans_dump_data /* host commands */ len += sizeof(*data) + - cmdq->q.n_window * (sizeof(*txcmd) + TFD_MAX_PAYLOAD_SIZE); + cmdq->n_window * (sizeof(*txcmd) + TFD_MAX_PAYLOAD_SIZE); /* FW monitor */ if (trans_pcie->fw_mon_page) { @@ -2727,9 +2723,9 @@ static struct iwl_trans_dump_data data->type = cpu_to_le32(IWL_FW_ERROR_DUMP_TXCMD); txcmd = (void *)data->data; spin_lock_bh(&cmdq->lock); - ptr = cmdq->q.write_ptr; - for (i = 0; i < cmdq->q.n_window; i++) { - u8 idx = get_cmd_index(&cmdq->q, ptr); + ptr = cmdq->write_ptr; + for (i = 0; i < cmdq->n_window; i++) { + u8 idx = get_cmd_index(cmdq, ptr); u32 caplen, cmdlen; cmdlen = iwl_trans_pcie_get_cmdlen(trans, cmdq->tfds + diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index b4b925ed5267..0d156fddbf3d 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -71,7 +71,7 @@ * ***************************************************/ -static int iwl_queue_space(const struct iwl_queue *q) +static int iwl_queue_space(const struct iwl_txq *q) { unsigned int max; unsigned int used; @@ -102,7 +102,7 @@ static int iwl_queue_space(const struct iwl_queue *q) /* * iwl_queue_init - Initialize queue's high/low-water and read/write indexes */ -static int iwl_queue_init(struct iwl_queue *q, int slots_num, u32 id) +static int iwl_queue_init(struct iwl_txq *q, int slots_num, u32 id) { q->n_window = slots_num; q->id = id; @@ -158,13 +158,13 @@ static void iwl_pcie_txq_stuck_timer(unsigned long data) spin_lock(&txq->lock); /* check if triggered erroneously */ - if (txq->q.read_ptr == txq->q.write_ptr) { + if (txq->read_ptr == txq->write_ptr) { spin_unlock(&txq->lock); return; } spin_unlock(&txq->lock); - IWL_ERR(trans, "Queue %d stuck for %u ms.\n", txq->q.id, + IWL_ERR(trans, "Queue %d stuck for %u ms.\n", txq->id, jiffies_to_msecs(txq->wd_timeout)); iwl_trans_pcie_log_scd_error(trans, txq); @@ -181,13 +181,13 @@ static void iwl_pcie_txq_update_byte_cnt_tbl(struct iwl_trans *trans, { struct iwlagn_scd_bc_tbl *scd_bc_tbl; struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - int write_ptr = txq->q.write_ptr; - int txq_id = txq->q.id; + int write_ptr = txq->write_ptr; + int txq_id = txq->id; u8 sec_ctl = 0; u16 len = byte_cnt + IWL_TX_CRC_SIZE + IWL_TX_DELIMITER_SIZE; __le16 bc_ent; struct iwl_tx_cmd *tx_cmd = - (void *) txq->entries[txq->q.write_ptr].cmd->payload; + (void *)txq->entries[txq->write_ptr].cmd->payload; scd_bc_tbl = trans_pcie->scd_bc_tbls.addr; @@ -244,12 +244,12 @@ static void iwl_pcie_txq_inval_byte_cnt_tbl(struct iwl_trans *trans, struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwlagn_scd_bc_tbl *scd_bc_tbl = trans_pcie->scd_bc_tbls.addr; - int txq_id = txq->q.id; - int read_ptr = txq->q.read_ptr; + int txq_id = txq->id; + int read_ptr = txq->read_ptr; u8 sta_id = 0; __le16 bc_ent; struct iwl_tx_cmd *tx_cmd = - (void *)txq->entries[txq->q.read_ptr].cmd->payload; + (void *)txq->entries[read_ptr].cmd->payload; WARN_ON(read_ptr >= TFD_QUEUE_SIZE_MAX); @@ -273,7 +273,7 @@ static void iwl_pcie_txq_inc_wr_ptr(struct iwl_trans *trans, { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); u32 reg = 0; - int txq_id = txq->q.id; + int txq_id = txq->id; lockdep_assert_held(&txq->lock); @@ -307,10 +307,10 @@ static void iwl_pcie_txq_inc_wr_ptr(struct iwl_trans *trans, * if not in power-save mode, uCode will never sleep when we're * trying to tx (during RFKILL, we're not trying to tx). */ - IWL_DEBUG_TX(trans, "Q:%d WR: 0x%x\n", txq_id, txq->q.write_ptr); + IWL_DEBUG_TX(trans, "Q:%d WR: 0x%x\n", txq_id, txq->write_ptr); if (!txq->block) iwl_write32(trans, HBUS_TARG_WRPTR, - txq->q.write_ptr | (txq_id << 8)); + txq->write_ptr | (txq_id << 8)); } void iwl_pcie_txq_check_wrptrs(struct iwl_trans *trans) @@ -462,8 +462,8 @@ static void iwl_pcie_txq_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq) /* rd_ptr is bounded by TFD_QUEUE_SIZE_MAX and * idx is bounded by n_window */ - int rd_ptr = txq->q.read_ptr; - int idx = get_cmd_index(&txq->q, rd_ptr); + int rd_ptr = txq->read_ptr; + int idx = get_cmd_index(txq, rd_ptr); lockdep_assert_held(&txq->lock); @@ -493,12 +493,10 @@ static int iwl_pcie_txq_build_tfd(struct iwl_trans *trans, struct iwl_txq *txq, dma_addr_t addr, u16 len, bool reset) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - struct iwl_queue *q; void *tfd; u32 num_tbs; - q = &txq->q; - tfd = txq->tfds + trans_pcie->tfd_size * q->write_ptr; + tfd = txq->tfds + trans_pcie->tfd_size * txq->write_ptr; if (reset) memset(tfd, 0, trans_pcie->tfd_size); @@ -537,7 +535,7 @@ static int iwl_pcie_txq_alloc(struct iwl_trans *trans, (unsigned long)txq); txq->trans_pcie = trans_pcie; - txq->q.n_window = slots_num; + txq->n_window = slots_num; txq->entries = kcalloc(slots_num, sizeof(struct iwl_pcie_txq_entry), @@ -558,7 +556,7 @@ static int iwl_pcie_txq_alloc(struct iwl_trans *trans, /* Circular buffer of transmit frame descriptors (TFDs), * shared with device */ txq->tfds = dma_alloc_coherent(trans->dev, tfd_sz, - &txq->q.dma_addr, GFP_KERNEL); + &txq->dma_addr, GFP_KERNEL); if (!txq->tfds) goto error; @@ -572,11 +570,11 @@ static int iwl_pcie_txq_alloc(struct iwl_trans *trans, if (!txq->first_tb_bufs) goto err_free_tfds; - txq->q.id = txq_id; + txq->id = txq_id; return 0; err_free_tfds: - dma_free_coherent(trans->dev, tfd_sz, txq->tfds, txq->q.dma_addr); + dma_free_coherent(trans->dev, tfd_sz, txq->tfds, txq->dma_addr); error: if (txq->entries && txq_id == trans_pcie->cmd_queue) for (i = 0; i < slots_num; i++) @@ -600,7 +598,7 @@ static int iwl_pcie_txq_init(struct iwl_trans *trans, struct iwl_txq *txq, BUILD_BUG_ON(TFD_QUEUE_SIZE_MAX & (TFD_QUEUE_SIZE_MAX - 1)); /* Initialize queue's high/low-water marks, and head/tail indexes */ - ret = iwl_queue_init(&txq->q, slots_num, txq_id); + ret = iwl_queue_init(txq, slots_num, txq_id); if (ret) return ret; @@ -614,10 +612,10 @@ static int iwl_pcie_txq_init(struct iwl_trans *trans, struct iwl_txq *txq, if (trans->cfg->use_tfh) iwl_write_direct64(trans, FH_MEM_CBBC_QUEUE(trans, txq_id), - txq->q.dma_addr); + txq->dma_addr); else iwl_write_direct32(trans, FH_MEM_CBBC_QUEUE(trans, txq_id), - txq->q.dma_addr >> 8); + txq->dma_addr >> 8); return 0; } @@ -664,15 +662,14 @@ static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_txq *txq = &trans_pcie->txq[txq_id]; - struct iwl_queue *q = &txq->q; spin_lock_bh(&txq->lock); - while (q->write_ptr != q->read_ptr) { + while (txq->write_ptr != txq->read_ptr) { IWL_DEBUG_TX_REPLY(trans, "Q %d Free %d\n", - txq_id, q->read_ptr); + txq_id, txq->read_ptr); if (txq_id != trans_pcie->cmd_queue) { - struct sk_buff *skb = txq->entries[q->read_ptr].skb; + struct sk_buff *skb = txq->entries[txq->read_ptr].skb; if (WARN_ON_ONCE(!skb)) continue; @@ -680,15 +677,15 @@ static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id) iwl_pcie_free_tso_page(trans_pcie, skb); } iwl_pcie_txq_free_tfd(trans, txq); - q->read_ptr = iwl_queue_inc_wrap(q->read_ptr); + txq->read_ptr = iwl_queue_inc_wrap(txq->read_ptr); - if (q->read_ptr == q->write_ptr) { + if (txq->read_ptr == txq->write_ptr) { unsigned long flags; spin_lock_irqsave(&trans_pcie->reg_lock, flags); if (txq_id != trans_pcie->cmd_queue) { IWL_DEBUG_RPM(trans, "Q %d - last tx freed\n", - q->id); + txq->id); iwl_trans_unref(trans); } else { iwl_pcie_clear_cmd_in_flight(trans); @@ -732,7 +729,7 @@ static void iwl_pcie_txq_free(struct iwl_trans *trans, int txq_id) /* De-alloc array of command/tx buffers */ if (txq_id == trans_pcie->cmd_queue) - for (i = 0; i < txq->q.n_window; i++) { + for (i = 0; i < txq->n_window; i++) { kzfree(txq->entries[i].cmd); kzfree(txq->entries[i].free_buf); } @@ -741,12 +738,12 @@ static void iwl_pcie_txq_free(struct iwl_trans *trans, int txq_id) if (txq->tfds) { dma_free_coherent(dev, trans_pcie->tfd_size * TFD_QUEUE_SIZE_MAX, - txq->tfds, txq->q.dma_addr); - txq->q.dma_addr = 0; + txq->tfds, txq->dma_addr); + txq->dma_addr = 0; txq->tfds = NULL; dma_free_coherent(dev, - sizeof(*txq->first_tb_bufs) * txq->q.n_window, + sizeof(*txq->first_tb_bufs) * txq->n_window, txq->first_tb_bufs, txq->first_tb_dma); } @@ -830,14 +827,14 @@ void iwl_trans_pcie_tx_reset(struct iwl_trans *trans) if (trans->cfg->use_tfh) iwl_write_direct64(trans, FH_MEM_CBBC_QUEUE(trans, txq_id), - txq->q.dma_addr); + txq->dma_addr); else iwl_write_direct32(trans, FH_MEM_CBBC_QUEUE(trans, txq_id), - txq->q.dma_addr >> 8); + txq->dma_addr >> 8); iwl_pcie_txq_unmap(trans, txq_id); - txq->q.read_ptr = 0; - txq->q.write_ptr = 0; + txq->read_ptr = 0; + txq->write_ptr = 0; } /* Tell NIC where to find the "keep warm" buffer */ @@ -1081,7 +1078,7 @@ static inline void iwl_pcie_txq_progress(struct iwl_txq *txq) * if empty delete timer, otherwise move timer forward * since we're making progress on this queue */ - if (txq->q.read_ptr == txq->q.write_ptr) + if (txq->read_ptr == txq->write_ptr) del_timer(&txq->stuck_timer); else mod_timer(&txq->stuck_timer, jiffies + txq->wd_timeout); @@ -1094,7 +1091,6 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_txq *txq = &trans_pcie->txq[txq_id]; int tfd_num = ssn & (TFD_QUEUE_SIZE_MAX - 1); - struct iwl_queue *q = &txq->q; int last_to_free; /* This function is not meant to release cmd queue*/ @@ -1109,21 +1105,21 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, goto out; } - if (txq->q.read_ptr == tfd_num) + if (txq->read_ptr == tfd_num) goto out; IWL_DEBUG_TX_REPLY(trans, "[Q %d] %d -> %d (%d)\n", - txq_id, txq->q.read_ptr, tfd_num, ssn); + txq_id, txq->read_ptr, tfd_num, ssn); /*Since we free until index _not_ inclusive, the one before index is * the last we will free. This one must be used */ last_to_free = iwl_queue_dec_wrap(tfd_num); - if (!iwl_queue_used(q, last_to_free)) { + if (!iwl_queue_used(txq, last_to_free)) { IWL_ERR(trans, "%s: Read index for DMA queue txq id (%d), last_to_free %d is out of range [0-%d] %d %d.\n", __func__, txq_id, last_to_free, TFD_QUEUE_SIZE_MAX, - q->write_ptr, q->read_ptr); + txq->write_ptr, txq->read_ptr); goto out; } @@ -1131,9 +1127,9 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, goto out; for (; - q->read_ptr != tfd_num; - q->read_ptr = iwl_queue_inc_wrap(q->read_ptr)) { - struct sk_buff *skb = txq->entries[txq->q.read_ptr].skb; + txq->read_ptr != tfd_num; + txq->read_ptr = iwl_queue_inc_wrap(txq->read_ptr)) { + struct sk_buff *skb = txq->entries[txq->read_ptr].skb; if (WARN_ON_ONCE(!skb)) continue; @@ -1142,7 +1138,7 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, __skb_queue_tail(skbs, skb); - txq->entries[txq->q.read_ptr].skb = NULL; + txq->entries[txq->read_ptr].skb = NULL; if (!trans->cfg->use_tfh) iwl_pcie_txq_inval_byte_cnt_tbl(trans, txq); @@ -1152,7 +1148,7 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, iwl_pcie_txq_progress(txq); - if (iwl_queue_space(&txq->q) > txq->q.low_mark && + if (iwl_queue_space(txq) > txq->low_mark && test_bit(txq_id, trans_pcie->queue_stopped)) { struct sk_buff_head overflow_skbs; @@ -1184,12 +1180,12 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, } spin_lock_bh(&txq->lock); - if (iwl_queue_space(&txq->q) > txq->q.low_mark) + if (iwl_queue_space(txq) > txq->low_mark) iwl_wake_queue(trans, txq); } - if (q->read_ptr == q->write_ptr) { - IWL_DEBUG_RPM(trans, "Q %d - last tx reclaimed\n", q->id); + if (txq->read_ptr == txq->write_ptr) { + IWL_DEBUG_RPM(trans, "Q %d - last tx reclaimed\n", txq->id); iwl_trans_unref(trans); } @@ -1251,31 +1247,30 @@ static void iwl_pcie_cmdq_reclaim(struct iwl_trans *trans, int txq_id, int idx) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_txq *txq = &trans_pcie->txq[txq_id]; - struct iwl_queue *q = &txq->q; unsigned long flags; int nfreed = 0; lockdep_assert_held(&txq->lock); - if ((idx >= TFD_QUEUE_SIZE_MAX) || (!iwl_queue_used(q, idx))) { + if ((idx >= TFD_QUEUE_SIZE_MAX) || (!iwl_queue_used(txq, idx))) { IWL_ERR(trans, "%s: Read index for DMA queue txq id (%d), index %d is out of range [0-%d] %d %d.\n", __func__, txq_id, idx, TFD_QUEUE_SIZE_MAX, - q->write_ptr, q->read_ptr); + txq->write_ptr, txq->read_ptr); return; } - for (idx = iwl_queue_inc_wrap(idx); q->read_ptr != idx; - q->read_ptr = iwl_queue_inc_wrap(q->read_ptr)) { + for (idx = iwl_queue_inc_wrap(idx); txq->read_ptr != idx; + txq->read_ptr = iwl_queue_inc_wrap(txq->read_ptr)) { if (nfreed++ > 0) { IWL_ERR(trans, "HCMD skipped: index (%d) %d %d\n", - idx, q->write_ptr, q->read_ptr); + idx, txq->write_ptr, txq->read_ptr); iwl_force_nmi(trans); } } - if (q->read_ptr == q->write_ptr) { + if (txq->read_ptr == txq->write_ptr) { spin_lock_irqsave(&trans_pcie->reg_lock, flags); iwl_pcie_clear_cmd_in_flight(trans); spin_unlock_irqrestore(&trans_pcie->reg_lock, flags); @@ -1361,14 +1356,14 @@ void iwl_trans_pcie_txq_enable(struct iwl_trans *trans, int txq_id, u16 ssn, */ iwl_scd_txq_disable_agg(trans, txq_id); - ssn = txq->q.read_ptr; + ssn = txq->read_ptr; } } /* Place first TFD at index corresponding to start sequence number. * Assumes that ssn_idx is valid (!= 0xFFF) */ - txq->q.read_ptr = (ssn & 0xff); - txq->q.write_ptr = (ssn & 0xff); + txq->read_ptr = (ssn & 0xff); + txq->write_ptr = (ssn & 0xff); iwl_write_direct32(trans, HBUS_TARG_WRPTR, (ssn & 0xff) | (txq_id << 8)); @@ -1484,7 +1479,6 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_txq *txq = &trans_pcie->txq[trans_pcie->cmd_queue]; - struct iwl_queue *q = &txq->q; struct iwl_device_cmd *out_cmd; struct iwl_cmd_meta *out_meta; unsigned long flags; @@ -1583,7 +1577,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, spin_lock_bh(&txq->lock); - if (iwl_queue_space(q) < ((cmd->flags & CMD_ASYNC) ? 2 : 1)) { + if (iwl_queue_space(txq) < ((cmd->flags & CMD_ASYNC) ? 2 : 1)) { spin_unlock_bh(&txq->lock); IWL_ERR(trans, "No space in command queue\n"); @@ -1592,7 +1586,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, goto free_dup_buf; } - idx = get_cmd_index(q, q->write_ptr); + idx = get_cmd_index(txq, txq->write_ptr); out_cmd = txq->entries[idx].cmd; out_meta = &txq->entries[idx].meta; @@ -1611,7 +1605,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, out_cmd->hdr_wide.reserved = 0; out_cmd->hdr_wide.sequence = cpu_to_le16(QUEUE_TO_SEQ(trans_pcie->cmd_queue) | - INDEX_TO_SEQ(q->write_ptr)); + INDEX_TO_SEQ(txq->write_ptr)); cmd_pos = sizeof(struct iwl_cmd_header_wide); copy_size = sizeof(struct iwl_cmd_header_wide); @@ -1619,7 +1613,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, out_cmd->hdr.cmd = iwl_cmd_opcode(cmd->id); out_cmd->hdr.sequence = cpu_to_le16(QUEUE_TO_SEQ(trans_pcie->cmd_queue) | - INDEX_TO_SEQ(q->write_ptr)); + INDEX_TO_SEQ(txq->write_ptr)); out_cmd->hdr.group_id = 0; cmd_pos = sizeof(struct iwl_cmd_header); @@ -1669,7 +1663,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, iwl_get_cmd_string(trans, cmd->id), group_id, out_cmd->hdr.cmd, le16_to_cpu(out_cmd->hdr.sequence), - cmd_size, q->write_ptr, idx, trans_pcie->cmd_queue); + cmd_size, txq->write_ptr, idx, trans_pcie->cmd_queue); /* start the TFD with the minimum copy bytes */ tb0_size = min_t(int, copy_size, IWL_FIRST_TB_SIZE); @@ -1685,7 +1679,8 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, copy_size - tb0_size, DMA_TO_DEVICE); if (dma_mapping_error(trans->dev, phys_addr)) { - iwl_pcie_tfd_unmap(trans, out_meta, txq, q->write_ptr); + iwl_pcie_tfd_unmap(trans, out_meta, txq, + txq->write_ptr); idx = -ENOMEM; goto out; } @@ -1708,7 +1703,8 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, phys_addr = dma_map_single(trans->dev, (void *)data, cmdlen[i], DMA_TO_DEVICE); if (dma_mapping_error(trans->dev, phys_addr)) { - iwl_pcie_tfd_unmap(trans, out_meta, txq, q->write_ptr); + iwl_pcie_tfd_unmap(trans, out_meta, txq, + txq->write_ptr); idx = -ENOMEM; goto out; } @@ -1725,7 +1721,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, trace_iwlwifi_dev_hcmd(trans->dev, cmd, cmd_size, &out_cmd->hdr_wide); /* start timer if queue currently empty */ - if (q->read_ptr == q->write_ptr && txq->wd_timeout) + if (txq->read_ptr == txq->write_ptr && txq->wd_timeout) mod_timer(&txq->stuck_timer, jiffies + txq->wd_timeout); spin_lock_irqsave(&trans_pcie->reg_lock, flags); @@ -1737,7 +1733,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, } /* Increment and update queue's write index */ - q->write_ptr = iwl_queue_inc_wrap(q->write_ptr); + txq->write_ptr = iwl_queue_inc_wrap(txq->write_ptr); iwl_pcie_txq_inc_wr_ptr(trans, txq); spin_unlock_irqrestore(&trans_pcie->reg_lock, flags); @@ -1775,15 +1771,15 @@ void iwl_pcie_hcmd_complete(struct iwl_trans *trans, if (WARN(txq_id != trans_pcie->cmd_queue, "wrong command queue %d (should be %d), sequence 0x%X readp=%d writep=%d\n", txq_id, trans_pcie->cmd_queue, sequence, - trans_pcie->txq[trans_pcie->cmd_queue].q.read_ptr, - trans_pcie->txq[trans_pcie->cmd_queue].q.write_ptr)) { + trans_pcie->txq[trans_pcie->cmd_queue].read_ptr, + trans_pcie->txq[trans_pcie->cmd_queue].write_ptr)) { iwl_print_hex_error(trans, pkt, 32); return; } spin_lock_bh(&txq->lock); - cmd_index = get_cmd_index(&txq->q, index); + cmd_index = get_cmd_index(txq, index); cmd = txq->entries[cmd_index].cmd; meta = &txq->entries[cmd_index].meta; cmd_id = iwl_cmd_id(cmd->hdr.cmd, group_id, 0); @@ -1901,14 +1897,13 @@ static int iwl_pcie_send_hcmd_sync(struct iwl_trans *trans, HOST_COMPLETE_TIMEOUT); if (!ret) { struct iwl_txq *txq = &trans_pcie->txq[trans_pcie->cmd_queue]; - struct iwl_queue *q = &txq->q; IWL_ERR(trans, "Error sending %s: time out after %dms.\n", iwl_get_cmd_string(trans, cmd->id), jiffies_to_msecs(HOST_COMPLETE_TIMEOUT)); IWL_ERR(trans, "Current CMD queue read_ptr %d write_ptr %d\n", - q->read_ptr, q->write_ptr); + txq->read_ptr, txq->write_ptr); clear_bit(STATUS_SYNC_HCMD_ACTIVE, &trans->status); IWL_DEBUG_INFO(trans, "Clearing HCMD_ACTIVE for command %s\n", @@ -1987,7 +1982,6 @@ static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb, struct iwl_device_cmd *dev_cmd, u16 tb1_len) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - struct iwl_queue *q = &txq->q; u16 tb2_len; int i; @@ -2002,7 +1996,8 @@ static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb, skb->data + hdr_len, tb2_len, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(trans->dev, tb2_phys))) { - iwl_pcie_tfd_unmap(trans, out_meta, txq, q->write_ptr); + iwl_pcie_tfd_unmap(trans, out_meta, txq, + txq->write_ptr); return -EINVAL; } iwl_pcie_txq_build_tfd(trans, txq, tb2_phys, tb2_len, false); @@ -2021,7 +2016,8 @@ static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb, skb_frag_size(frag), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(trans->dev, tb_phys))) { - iwl_pcie_tfd_unmap(trans, out_meta, txq, q->write_ptr); + iwl_pcie_tfd_unmap(trans, out_meta, txq, + txq->write_ptr); return -EINVAL; } tb_idx = iwl_pcie_txq_build_tfd(trans, txq, tb_phys, @@ -2031,7 +2027,7 @@ static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb, } trace_iwlwifi_dev_tx(trans->dev, skb, - iwl_pcie_get_tfd(trans_pcie, txq, q->write_ptr), + iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr), trans_pcie->tfd_size, &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len, skb->data + hdr_len, tb2_len); @@ -2093,7 +2089,6 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, struct ieee80211_hdr *hdr = (void *)skb->data; unsigned int snap_ip_tcp_hdrlen, ip_hdrlen, total_len, hdr_room; unsigned int mss = skb_shinfo(skb)->gso_size; - struct iwl_queue *q = &txq->q; u16 length, iv_len, amsdu_pad; u8 *start_hdr; struct iwl_tso_hdr_page *hdr_page; @@ -2107,7 +2102,7 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, IEEE80211_CCMP_HDR_LEN : 0; trace_iwlwifi_dev_tx(trans->dev, skb, - iwl_pcie_get_tfd(trans_pcie, txq, q->write_ptr), + iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr), trans_pcie->tfd_size, &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len, NULL, 0); @@ -2264,7 +2259,7 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, return 0; out_unmap: - iwl_pcie_tfd_unmap(trans, out_meta, txq, q->write_ptr); + iwl_pcie_tfd_unmap(trans, out_meta, txq, txq->write_ptr); return ret; } #else /* CONFIG_INET */ @@ -2288,7 +2283,6 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, struct iwl_tx_cmd *tx_cmd = (struct iwl_tx_cmd *)dev_cmd->payload; struct iwl_cmd_meta *out_meta; struct iwl_txq *txq; - struct iwl_queue *q; dma_addr_t tb0_phys, tb1_phys, scratch_phys; void *tb1_addr; void *tfd; @@ -2300,7 +2294,6 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, bool amsdu; txq = &trans_pcie->txq[txq_id]; - q = &txq->q; if (WARN_ONCE(!test_bit(txq_id, trans_pcie->queue_used), "TX on unused queue %d\n", txq_id)) @@ -2335,11 +2328,11 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, spin_lock(&txq->lock); - if (iwl_queue_space(q) < q->high_mark) { + if (iwl_queue_space(txq) < txq->high_mark) { iwl_stop_queue(trans, txq); /* don't put the packet on the ring, if there is no room */ - if (unlikely(iwl_queue_space(q) < 3)) { + if (unlikely(iwl_queue_space(txq) < 3)) { struct iwl_device_cmd **dev_cmd_ptr; dev_cmd_ptr = (void *)((u8 *)skb->cb + @@ -2360,19 +2353,19 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, */ wifi_seq = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl)); WARN_ONCE(txq->ampdu && - (wifi_seq & 0xff) != q->write_ptr, + (wifi_seq & 0xff) != txq->write_ptr, "Q: %d WiFi Seq %d tfdNum %d", - txq_id, wifi_seq, q->write_ptr); + txq_id, wifi_seq, txq->write_ptr); /* Set up driver data for this TFD */ - txq->entries[q->write_ptr].skb = skb; - txq->entries[q->write_ptr].cmd = dev_cmd; + txq->entries[txq->write_ptr].skb = skb; + txq->entries[txq->write_ptr].cmd = dev_cmd; dev_cmd->hdr.sequence = cpu_to_le16((u16)(QUEUE_TO_SEQ(txq_id) | - INDEX_TO_SEQ(q->write_ptr))); + INDEX_TO_SEQ(txq->write_ptr))); - tb0_phys = iwl_pcie_get_first_tb_dma(txq, q->write_ptr); + tb0_phys = iwl_pcie_get_first_tb_dma(txq, txq->write_ptr); scratch_phys = tb0_phys + sizeof(struct iwl_cmd_header) + offsetof(struct iwl_tx_cmd, scratch); @@ -2380,7 +2373,7 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, tx_cmd->dram_msb_ptr = iwl_get_dma_hi_addr(scratch_phys); /* Set up first empty entry in queue's array of Tx/cmd buffers */ - out_meta = &txq->entries[q->write_ptr].meta; + out_meta = &txq->entries[txq->write_ptr].meta; out_meta->flags = 0; /* @@ -2405,7 +2398,7 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, } /* The first TB points to bi-directional DMA data */ - memcpy(&txq->first_tb_bufs[q->write_ptr], &dev_cmd->hdr, + memcpy(&txq->first_tb_bufs[txq->write_ptr], &dev_cmd->hdr, IWL_FIRST_TB_SIZE); iwl_pcie_txq_build_tfd(trans, txq, tb0_phys, IWL_FIRST_TB_SIZE, true); @@ -2430,7 +2423,7 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, goto out_err; } - tfd = iwl_pcie_get_tfd(trans_pcie, txq, q->write_ptr); + tfd = iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr); /* Set up entry for this TFD in Tx byte-count array */ iwl_pcie_txq_update_byte_cnt_tbl(trans, txq, le16_to_cpu(tx_cmd->len), iwl_pcie_tfd_get_num_tbs(trans, tfd)); @@ -2438,7 +2431,7 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, wait_write_ptr = ieee80211_has_morefrags(fc); /* start timer if queue currently empty */ - if (q->read_ptr == q->write_ptr) { + if (txq->read_ptr == txq->write_ptr) { if (txq->wd_timeout) { /* * If the TXQ is active, then set the timer, if not, @@ -2452,12 +2445,12 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, else txq->frozen_expiry_remainder = txq->wd_timeout; } - IWL_DEBUG_RPM(trans, "Q: %d first tx - take ref\n", q->id); + IWL_DEBUG_RPM(trans, "Q: %d first tx - take ref\n", txq->id); iwl_trans_ref(trans); } /* Tell device the write index *just past* this latest filled TFD */ - q->write_ptr = iwl_queue_inc_wrap(q->write_ptr); + txq->write_ptr = iwl_queue_inc_wrap(txq->write_ptr); if (!wait_write_ptr) iwl_pcie_txq_inc_wr_ptr(trans, txq); From c46e7724bfe9bbff17bc468903a757248b99e4ed Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Sun, 17 Jul 2016 14:24:55 +0300 Subject: [PATCH 0241/1050] iwlwifi: mvm: support new BA notification response Support new format. TX response will not be sent anymore, so all needed data is in the BA response. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- .../wireless/intel/iwlwifi/mvm/fw-api-tx.h | 79 +++++++++ drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 159 ++++++++++++------ 2 files changed, 184 insertions(+), 54 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h index 6b4c63a5e625..0055a960238b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h @@ -577,6 +577,85 @@ struct iwl_mvm_ba_notif { u8 reserved1; } __packed; +/** + * struct iwl_mvm_compressed_ba_tfd - progress of a TFD queue + * @q_num: TFD queue number + * @tfd_index: Index of first un-acked frame in the TFD queue + */ +struct iwl_mvm_compressed_ba_tfd { + u8 q_num; + u8 reserved; + __le16 tfd_index; +} __packed; /* COMPRESSED_BA_TFD_API_S_VER_1 */ + +/** + * struct iwl_mvm_compressed_ba_ratid - progress of a RA TID queue + * @q_num: RA TID queue number + * @tid: TID of the queue + * @ssn: BA window current SSN + */ +struct iwl_mvm_compressed_ba_ratid { + u8 q_num; + u8 tid; + __le16 ssn; +} __packed; /* COMPRESSED_BA_RATID_API_S_VER_1 */ + +/* + * enum iwl_mvm_ba_resp_flags - TX aggregation status + * @IWL_MVM_BA_RESP_TX_AGG: generated due to BA + * @IWL_MVM_BA_RESP_TX_BAR: generated due to BA after BAR + * @IWL_MVM_BA_RESP_TX_AGG_FAIL: aggregation didn't receive BA + * @IWL_MVM_BA_RESP_TX_UNDERRUN: aggregation got underrun + * @IWL_MVM_BA_RESP_TX_BT_KILL: aggregation got BT-kill + * @IWL_MVM_BA_RESP_TX_DSP_TIMEOUT: aggregation didn't finish within the + * expected time + */ +enum iwl_mvm_ba_resp_flags { + IWL_MVM_BA_RESP_TX_AGG, + IWL_MVM_BA_RESP_TX_BAR, + IWL_MVM_BA_RESP_TX_AGG_FAIL, + IWL_MVM_BA_RESP_TX_UNDERRUN, + IWL_MVM_BA_RESP_TX_BT_KILL, + IWL_MVM_BA_RESP_TX_DSP_TIMEOUT +}; + +/** + * struct iwl_mvm_compressed_ba_notif - notifies about reception of BA + * ( BA_NOTIF = 0xc5 ) + * @flags: status flag, see the &iwl_mvm_ba_resp_flags + * @sta_id: Index of recipient (BA-sending) station in fw's station table + * @reduced_txp: power reduced according to TPC. This is the actual value and + * not a copy from the LQ command. Thus, if not the first rate was used + * for Tx-ing then this value will be set to 0 by FW. + * @initial_rate: TLC rate info, initial rate index, TLC table color + * @retry_cnt: retry count + * @query_byte_cnt: SCD query byte count + * @query_frame_cnt: SCD query frame count + * @txed: number of frames sent in the aggregation (all-TIDs) + * @done: number of frames that were Acked by the BA (all-TIDs) + * @wireless_time: Wireless-media time + * @tx_rate: the rate the aggregation was sent at + * @tfd_cnt: number of TFD-Q elements + * @ra_tid_cnt: number of RATID-Q elements + */ +struct iwl_mvm_compressed_ba_notif { + __le32 flags; + u8 sta_id; + u8 reduced_txp; + u8 initial_rate; + u8 retry_cnt; + __le32 query_byte_cnt; + __le16 query_frame_cnt; + __le16 txed; + __le16 done; + __le32 wireless_time; + __le32 tx_rate; + __le16 tfd_cnt; + __le16 ra_tid_cnt; + struct iwl_mvm_compressed_ba_tfd tfd[1]; + struct iwl_mvm_compressed_ba_ratid ra_tid[0]; +} __packed; /* COMPRESSED_BA_RES_API_S_VER_4 */ + /** * struct iwl_mac_beacon_cmd_v6 - beacon template command * @tx: the tx commands associated with the beacon frame diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 62714709ba8f..96482a5de38c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -1584,41 +1584,16 @@ void iwl_mvm_rx_tx_cmd(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) iwl_mvm_rx_tx_cmd_agg(mvm, pkt); } -static void iwl_mvm_tx_info_from_ba_notif(struct ieee80211_tx_info *info, - struct iwl_mvm_ba_notif *ba_notif, - struct iwl_mvm_tid_data *tid_data) +static void iwl_mvm_tx_reclaim(struct iwl_mvm *mvm, int sta_id, int tid, + int txq, int index, + struct ieee80211_tx_info *ba_info, u32 rate) { - info->flags |= IEEE80211_TX_STAT_AMPDU; - info->status.ampdu_ack_len = ba_notif->txed_2_done; - info->status.ampdu_len = ba_notif->txed; - iwl_mvm_hwrate_to_tx_status(tid_data->rate_n_flags, - info); - /* TODO: not accounted if the whole A-MPDU failed */ - info->status.tx_time = tid_data->tx_time; - info->status.status_driver_data[0] = - (void *)(uintptr_t)ba_notif->reduced_txp; - info->status.status_driver_data[1] = - (void *)(uintptr_t)tid_data->rate_n_flags; -} - -void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) -{ - struct iwl_rx_packet *pkt = rxb_addr(rxb); - struct iwl_mvm_ba_notif *ba_notif = (void *)pkt->data; struct sk_buff_head reclaimed_skbs; struct iwl_mvm_tid_data *tid_data; struct ieee80211_sta *sta; struct iwl_mvm_sta *mvmsta; struct sk_buff *skb; - int sta_id, tid, freed; - /* "flow" corresponds to Tx queue */ - u16 scd_flow = le16_to_cpu(ba_notif->scd_flow); - /* "ssn" is start of block-ack Tx window, corresponds to index - * (in Tx queue's circular buffer) of first TFD/frame in window */ - u16 ba_resp_scd_ssn = le16_to_cpu(ba_notif->scd_ssn); - - sta_id = ba_notif->sta_id; - tid = ba_notif->tid; + int freed; if (WARN_ONCE(sta_id >= IWL_MVM_STATION_COUNT || tid >= IWL_MAX_TID_COUNT, @@ -1638,10 +1613,10 @@ void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) mvmsta = iwl_mvm_sta_from_mac80211(sta); tid_data = &mvmsta->tid_data[tid]; - if (tid_data->txq_id != scd_flow) { + if (tid_data->txq_id != txq) { IWL_ERR(mvm, - "invalid BA notification: Q %d, tid %d, flow %d\n", - tid_data->txq_id, tid, scd_flow); + "invalid BA notification: Q %d, tid %d\n", + tid_data->txq_id, tid); rcu_read_unlock(); return; } @@ -1655,27 +1630,14 @@ void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) * block-ack window (we assume that they've been successfully * transmitted ... if not, it's too late anyway). */ - iwl_trans_reclaim(mvm->trans, scd_flow, ba_resp_scd_ssn, - &reclaimed_skbs); + iwl_trans_reclaim(mvm->trans, txq, index, &reclaimed_skbs); - IWL_DEBUG_TX_REPLY(mvm, - "BA_NOTIFICATION Received from %pM, sta_id = %d\n", - (u8 *)&ba_notif->sta_addr_lo32, - ba_notif->sta_id); - IWL_DEBUG_TX_REPLY(mvm, - "TID = %d, SeqCtl = %d, bitmap = 0x%llx, scd_flow = %d, scd_ssn = %d sent:%d, acked:%d\n", - ba_notif->tid, le16_to_cpu(ba_notif->seq_ctl), - (unsigned long long)le64_to_cpu(ba_notif->bitmap), - scd_flow, ba_resp_scd_ssn, ba_notif->txed, - ba_notif->txed_2_done); - - IWL_DEBUG_TX_REPLY(mvm, "reduced txp from ba notif %d\n", - ba_notif->reduced_txp); - tid_data->next_reclaimed = ba_resp_scd_ssn; + tid_data->next_reclaimed = index; iwl_mvm_check_ratid_empty(mvm, sta, tid); freed = 0; + ba_info->status.status_driver_data[1] = (void *)(uintptr_t)rate; skb_queue_walk(&reclaimed_skbs, skb) { struct ieee80211_hdr *hdr = (void *)skb->data; @@ -1697,8 +1659,12 @@ void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) /* this is the first skb we deliver in this batch */ /* put the rate scaling data there */ - if (freed == 1) - iwl_mvm_tx_info_from_ba_notif(info, ba_notif, tid_data); + if (freed == 1) { + info->flags |= IEEE80211_TX_STAT_AMPDU; + memcpy(&info->status, &ba_info->status, + sizeof(ba_info->status)); + iwl_mvm_hwrate_to_tx_status(rate, info); + } } spin_unlock_bh(&mvmsta->lock); @@ -1708,7 +1674,6 @@ void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) * Still it's important to update RS about sent vs. acked. */ if (skb_queue_empty(&reclaimed_skbs)) { - struct ieee80211_tx_info ba_info = {}; struct ieee80211_chanctx_conf *chanctx_conf = NULL; if (mvmsta->vif) @@ -1718,11 +1683,11 @@ void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) if (WARN_ON_ONCE(!chanctx_conf)) goto out; - ba_info.band = chanctx_conf->def.chan->band; - iwl_mvm_tx_info_from_ba_notif(&ba_info, ba_notif, tid_data); + ba_info->band = chanctx_conf->def.chan->band; + iwl_mvm_hwrate_to_tx_status(rate, ba_info); IWL_DEBUG_TX_REPLY(mvm, "No reclaim. Update rs directly\n"); - iwl_mvm_rs_tx_status(mvm, sta, tid, &ba_info, false); + iwl_mvm_rs_tx_status(mvm, sta, tid, ba_info, false); } out: @@ -1734,6 +1699,92 @@ out: } } +void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) +{ + struct iwl_rx_packet *pkt = rxb_addr(rxb); + int sta_id, tid, txq, index; + struct ieee80211_tx_info ba_info = {}; + struct iwl_mvm_ba_notif *ba_notif; + struct iwl_mvm_tid_data *tid_data; + struct iwl_mvm_sta *mvmsta; + + if (iwl_mvm_has_new_tx_api(mvm)) { + struct iwl_mvm_compressed_ba_notif *ba_res = + (void *)pkt->data; + + sta_id = ba_res->sta_id; + ba_info.status.ampdu_ack_len = (u8)le16_to_cpu(ba_res->done); + ba_info.status.ampdu_len = (u8)le16_to_cpu(ba_res->txed); + ba_info.status.tx_time = + (u16)le32_to_cpu(ba_res->wireless_time); + ba_info.status.status_driver_data[0] = + (void *)(uintptr_t)ba_res->reduced_txp; + + /* + * TODO: + * When supporting multi TID aggregations - we need to move + * next_reclaimed to be per TXQ and not per TID or handle it + * in a different way. + * This will go together with SN and AddBA offload and cannot + * be handled properly for now. + */ + WARN_ON(le16_to_cpu(ba_res->tfd_cnt) != 1); + iwl_mvm_tx_reclaim(mvm, sta_id, ba_res->ra_tid[0].tid, + (int)ba_res->tfd[0].q_num, + le16_to_cpu(ba_res->tfd[0].tfd_index), + &ba_info, le32_to_cpu(ba_res->tx_rate)); + + IWL_DEBUG_TX_REPLY(mvm, + "BA_NOTIFICATION Received from sta_id = %d, flags %x, sent:%d, acked:%d\n", + sta_id, le32_to_cpu(ba_res->flags), + le16_to_cpu(ba_res->txed), + le16_to_cpu(ba_res->done)); + return; + } + + ba_notif = (void *)pkt->data; + sta_id = ba_notif->sta_id; + tid = ba_notif->tid; + /* "flow" corresponds to Tx queue */ + txq = le16_to_cpu(ba_notif->scd_flow); + /* "ssn" is start of block-ack Tx window, corresponds to index + * (in Tx queue's circular buffer) of first TFD/frame in window */ + index = le16_to_cpu(ba_notif->scd_ssn); + + rcu_read_lock(); + mvmsta = iwl_mvm_sta_from_staid_rcu(mvm, sta_id); + if (WARN_ON_ONCE(!mvmsta)) { + rcu_read_unlock(); + return; + } + + tid_data = &mvmsta->tid_data[tid]; + + ba_info.status.ampdu_ack_len = ba_notif->txed_2_done; + ba_info.status.ampdu_len = ba_notif->txed; + ba_info.status.tx_time = tid_data->tx_time; + ba_info.status.status_driver_data[0] = + (void *)(uintptr_t)ba_notif->reduced_txp; + + rcu_read_unlock(); + + iwl_mvm_tx_reclaim(mvm, sta_id, tid, txq, index, &ba_info, + tid_data->rate_n_flags); + + IWL_DEBUG_TX_REPLY(mvm, + "BA_NOTIFICATION Received from %pM, sta_id = %d\n", + (u8 *)&ba_notif->sta_addr_lo32, ba_notif->sta_id); + + IWL_DEBUG_TX_REPLY(mvm, + "TID = %d, SeqCtl = %d, bitmap = 0x%llx, scd_flow = %d, scd_ssn = %d sent:%d, acked:%d\n", + ba_notif->tid, le16_to_cpu(ba_notif->seq_ctl), + le64_to_cpu(ba_notif->bitmap), txq, index, + ba_notif->txed, ba_notif->txed_2_done); + + IWL_DEBUG_TX_REPLY(mvm, "reduced txp from ba notif %d\n", + ba_notif->reduced_txp); +} + /* * Note that there are transports that buffer frames before they reach * the firmware. This means that after flush_tx_path is called, the From 496d83caf354a84a3159c3b7aee9276640982e56 Mon Sep 17 00:00:00 2001 From: Haim Dreyfuss Date: Sun, 20 Mar 2016 17:57:22 +0200 Subject: [PATCH 0242/1050] iwlwifi: pcie: Configure shared interrupt vector in MSIX mode In case the OS provides fewer interrupts than requested, different causes will share the same interrupt vector as follow: 1.One interrupt less: non rx causes shared with FBQ. 2.Two interrupts less: non rx causes shared with FBQ and RSS. 3.More than two interrupts: we will use fewer RSS queues. Also make the request depend on the number of online CPUs instead of possible CPUs. Signed-off-by: Haim Dreyfuss Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-csr.h | 2 + .../wireless/intel/iwlwifi/pcie/internal.h | 21 ++- drivers/net/wireless/intel/iwlwifi/pcie/rx.c | 14 ++ .../net/wireless/intel/iwlwifi/pcie/trans.c | 154 ++++++++++++------ 4 files changed, 134 insertions(+), 57 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h index 871ad02fdb17..d73e9d436027 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h @@ -589,6 +589,8 @@ enum dtd_diode_reg { * Causes for the FH register interrupts */ enum msix_fh_int_causes { + MSIX_FH_INT_CAUSES_Q0 = BIT(0), + MSIX_FH_INT_CAUSES_Q1 = BIT(1), MSIX_FH_INT_CAUSES_D2S_CH0_NUM = BIT(16), MSIX_FH_INT_CAUSES_D2S_CH1_NUM = BIT(17), MSIX_FH_INT_CAUSES_S2D = BIT(19), diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index cad2d1199321..8ad92fac8592 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -303,6 +303,16 @@ struct iwl_tso_hdr_page { u8 *pos; }; +/** + * enum iwl_shared_irq_flags - level of sharing for irq + * @IWL_SHARED_IRQ_NON_RX: interrupt vector serves non rx causes. + * @IWL_SHARED_IRQ_FIRST_RSS: interrupt vector serves first RSS queue. + */ +enum iwl_shared_irq_flags { + IWL_SHARED_IRQ_NON_RX = BIT(0), + IWL_SHARED_IRQ_FIRST_RSS = BIT(1), +}; + /** * struct iwl_trans_pcie - PCIe transport specific data * @rxq: all the RX queue data @@ -333,8 +343,10 @@ struct iwl_tso_hdr_page { * @fw_mon_size: size of the buffer for the firmware monitor * @msix_entries: array of MSI-X entries * @msix_enabled: true if managed to enable MSI-X - * @allocated_vector: the number of interrupt vector allocated by the OS - * @default_irq_num: default irq for non rx interrupt + * @shared_vec_mask: the type of causes the shared vector handles + * (see iwl_shared_irq_flags). + * @alloc_vecs: the number of interrupt vectors allocated by the OS + * @def_irq: default irq for non rx causes * @fh_init_mask: initial unmasked fh causes * @hw_init_mask: initial unmasked hw causes * @fh_mask: current unmasked fh causes @@ -407,8 +419,9 @@ struct iwl_trans_pcie { struct msix_entry msix_entries[IWL_MAX_RX_HW_QUEUES]; bool msix_enabled; - u32 allocated_vector; - u32 default_irq_num; + u8 shared_vec_mask; + u32 alloc_vecs; + u32 def_irq; u32 fh_init_mask; u32 hw_init_mask; u32 fh_mask; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c index 452387cd5df4..0b5b331010fb 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c @@ -1885,6 +1885,20 @@ irqreturn_t iwl_pcie_irq_msix_handler(int irq, void *dev_id) inta_fh, iwl_read32(trans, CSR_MSIX_FH_INT_MASK_AD)); + if ((trans_pcie->shared_vec_mask & IWL_SHARED_IRQ_NON_RX) && + inta_fh & MSIX_FH_INT_CAUSES_Q0) { + local_bh_disable(); + iwl_pcie_rx_handle(trans, 0); + local_bh_enable(); + } + + if ((trans_pcie->shared_vec_mask & IWL_SHARED_IRQ_FIRST_RSS) && + inta_fh & MSIX_FH_INT_CAUSES_Q1) { + local_bh_disable(); + iwl_pcie_rx_handle(trans, 1); + local_bh_enable(); + } + /* This "Tx" DMA channel is used only for loading uCode */ if (inta_fh & MSIX_FH_INT_CAUSES_D2S_CH0_NUM) { IWL_DEBUG_ISR(trans, "uCode load interrupt\n"); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 361ccf651e9b..be32fe1683f5 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -1170,7 +1170,7 @@ static void iwl_pcie_synchronize_irqs(struct iwl_trans *trans) if (trans_pcie->msix_enabled) { int i; - for (i = 0; i < trans_pcie->allocated_vector; i++) + for (i = 0; i < trans_pcie->alloc_vecs; i++) synchronize_irq(trans_pcie->msix_entries[i].vector); } else { synchronize_irq(trans_pcie->pci_dev->irq); @@ -1429,13 +1429,58 @@ static struct iwl_causes_list causes_list[] = { {MSIX_HW_INT_CAUSES_REG_HAP, CSR_MSIX_HW_INT_MASK_AD, 0x2E}, }; +static void iwl_pcie_map_non_rx_causes(struct iwl_trans *trans) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + int val = trans_pcie->def_irq | MSIX_NON_AUTO_CLEAR_CAUSE; + int i; + + /* + * Access all non RX causes and map them to the default irq. + * In case we are missing at least one interrupt vector, + * the first interrupt vector will serve non-RX and FBQ causes. + */ + for (i = 0; i < ARRAY_SIZE(causes_list); i++) { + iwl_write8(trans, CSR_MSIX_IVAR(causes_list[i].addr), val); + iwl_clear_bit(trans, causes_list[i].mask_reg, + causes_list[i].cause_num); + } +} + +static void iwl_pcie_map_rx_causes(struct iwl_trans *trans) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + u32 offset = + trans_pcie->shared_vec_mask & IWL_SHARED_IRQ_FIRST_RSS ? 1 : 0; + u32 val, idx; + + /* + * The first RX queue - fallback queue, which is designated for + * management frame, command responses etc, is always mapped to the + * first interrupt vector. The other RX queues are mapped to + * the other (N - 2) interrupt vectors. + */ + val = BIT(MSIX_FH_INT_CAUSES_Q(0)); + for (idx = 1; idx < trans->num_rx_queues; idx++) { + iwl_write8(trans, CSR_MSIX_RX_IVAR(idx), + MSIX_FH_INT_CAUSES_Q(idx - offset)); + val |= BIT(MSIX_FH_INT_CAUSES_Q(idx)); + } + iwl_write32(trans, CSR_MSIX_FH_INT_MASK_AD, ~val); + + val = MSIX_FH_INT_CAUSES_Q(0); + if (trans_pcie->shared_vec_mask & IWL_SHARED_IRQ_NON_RX) + val |= MSIX_NON_AUTO_CLEAR_CAUSE; + iwl_write8(trans, CSR_MSIX_RX_IVAR(0), val); + + if (trans_pcie->shared_vec_mask & IWL_SHARED_IRQ_FIRST_RSS) + iwl_write8(trans, CSR_MSIX_RX_IVAR(1), val); +} + static void iwl_pcie_init_msix(struct iwl_trans_pcie *trans_pcie) { - u32 val, max_rx_vector, i; struct iwl_trans *trans = trans_pcie->trans; - max_rx_vector = trans_pcie->allocated_vector - 1; - if (!trans_pcie->msix_enabled) { if (trans->cfg->mq_rx_supported) iwl_write_prph(trans, UREG_CHICK, @@ -1446,25 +1491,16 @@ static void iwl_pcie_init_msix(struct iwl_trans_pcie *trans_pcie) iwl_write_prph(trans, UREG_CHICK, UREG_CHICK_MSIX_ENABLE); /* - * Each cause from the list above and the RX causes is represented as - * a byte in the IVAR table. We access the first (N - 1) bytes and map - * them to the (N - 1) vectors so these vectors will be used as rx - * vectors. Then access all non rx causes and map them to the - * default queue (N'th queue). + * Each cause from the causes list above and the RX causes is + * represented as a byte in the IVAR table. The first nibble + * represents the bound interrupt vector of the cause, the second + * represents no auto clear for this cause. This will be set if its + * interrupt vector is bound to serve other causes. */ - for (i = 0; i < max_rx_vector; i++) { - iwl_write8(trans, CSR_MSIX_RX_IVAR(i), MSIX_FH_INT_CAUSES_Q(i)); - iwl_clear_bit(trans, CSR_MSIX_FH_INT_MASK_AD, - BIT(MSIX_FH_INT_CAUSES_Q(i))); - } + iwl_pcie_map_rx_causes(trans); + + iwl_pcie_map_non_rx_causes(trans); - for (i = 0; i < ARRAY_SIZE(causes_list); i++) { - val = trans_pcie->default_irq_num | - MSIX_NON_AUTO_CLEAR_CAUSE; - iwl_write8(trans, CSR_MSIX_IVAR(causes_list[i].addr), val); - iwl_clear_bit(trans, causes_list[i].mask_reg, - causes_list[i].cause_num); - } trans_pcie->fh_init_mask = ~iwl_read32(trans, CSR_MSIX_FH_INT_MASK_AD); trans_pcie->fh_mask = trans_pcie->fh_init_mask; @@ -1477,9 +1513,8 @@ static void iwl_pcie_set_interrupt_capa(struct pci_dev *pdev, struct iwl_trans *trans) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + int max_vector, nvec, i; u16 pci_cmd; - int max_vector; - int ret, i; if (trans->cfg->mq_rx_supported) { max_vector = min_t(u32, (num_possible_cpus() + 2), @@ -1487,33 +1522,48 @@ static void iwl_pcie_set_interrupt_capa(struct pci_dev *pdev, for (i = 0; i < max_vector; i++) trans_pcie->msix_entries[i].entry = i; - ret = pci_enable_msix_range(pdev, trans_pcie->msix_entries, - MSIX_MIN_INTERRUPT_VECTORS, - max_vector); - if (ret > 1) { + nvec = pci_enable_msix_range(pdev, trans_pcie->msix_entries, + MSIX_MIN_INTERRUPT_VECTORS, + max_vector); + if (nvec < 0) { IWL_DEBUG_INFO(trans, - "Enable MSI-X allocate %d interrupt vector\n", - ret); - trans_pcie->allocated_vector = ret; - trans_pcie->default_irq_num = - trans_pcie->allocated_vector - 1; - trans_pcie->trans->num_rx_queues = - trans_pcie->allocated_vector - 1; - trans_pcie->msix_enabled = true; - - return; + "ret = %d failed to enable msi-x mode move to msi mode\n", + nvec); + goto msi; } - IWL_DEBUG_INFO(trans, - "ret = %d %s move to msi mode\n", ret, - (ret == 1) ? - "can't allocate more than 1 interrupt vector" : - "failed to enable msi-x mode"); - pci_disable_msix(pdev); - } - ret = pci_enable_msi(pdev); - if (ret) { - dev_err(&pdev->dev, "pci_enable_msi failed - %d\n", ret); + IWL_DEBUG_INFO(trans, + "Enable MSI-X allocate %d interrupt vector\n", + nvec); + trans_pcie->def_irq = (nvec == max_vector) ? nvec - 1 : 0; + /* + * In case the OS provides fewer interrupts than requested, + * different causes will share the same interrupt vector + * as follow: + * One interrupt less: non rx causes shared with FBQ. + * Two interrupts less: non rx causes shared with FBQ and RSS. + * More than two interrupts: we will use fewer RSS queues. + */ + if (nvec <= num_online_cpus()) { + trans_pcie->trans->num_rx_queues = nvec + 1; + trans_pcie->shared_vec_mask = IWL_SHARED_IRQ_NON_RX | + IWL_SHARED_IRQ_FIRST_RSS; + } else if (nvec == num_online_cpus() + 1) { + trans_pcie->trans->num_rx_queues = nvec; + trans_pcie->shared_vec_mask = IWL_SHARED_IRQ_NON_RX; + } else { + trans_pcie->trans->num_rx_queues = nvec - 1; + } + + trans_pcie->alloc_vecs = nvec; + trans_pcie->msix_enabled = true; + return; + } +msi: + + nvec = pci_enable_msi(pdev); + if (nvec) { + dev_err(&pdev->dev, "pci_enable_msi failed - %d\n", nvec); /* enable rfkill interrupt: hw bug w/a */ pci_read_config_word(pdev, PCI_COMMAND, &pci_cmd); if (pci_cmd & PCI_COMMAND_INTX_DISABLE) { @@ -1526,16 +1576,14 @@ static void iwl_pcie_set_interrupt_capa(struct pci_dev *pdev, static int iwl_pcie_init_msix_handler(struct pci_dev *pdev, struct iwl_trans_pcie *trans_pcie) { - int i, last_vector; + int i; - last_vector = trans_pcie->trans->num_rx_queues; - - for (i = 0; i < trans_pcie->allocated_vector; i++) { + for (i = 0; i < trans_pcie->alloc_vecs; i++) { int ret; ret = request_threaded_irq(trans_pcie->msix_entries[i].vector, iwl_pcie_msix_isr, - (i == last_vector) ? + (i == trans_pcie->def_irq) ? iwl_pcie_irq_msix_handler : iwl_pcie_irq_rx_msix_handler, IRQF_SHARED, @@ -1712,7 +1760,7 @@ void iwl_trans_pcie_free(struct iwl_trans *trans) iwl_pcie_rx_free(trans); if (trans_pcie->msix_enabled) { - for (i = 0; i < trans_pcie->allocated_vector; i++) + for (i = 0; i < trans_pcie->alloc_vecs; i++) free_irq(trans_pcie->msix_entries[i].vector, &trans_pcie->msix_entries[i]); From 7585c354637bb003ce612dd22f5047c015545ef4 Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Thu, 7 Jul 2016 11:00:26 +0300 Subject: [PATCH 0243/1050] iwlwifi: mvm: fix pending frames tracking on tx resp In iwl_mvm_rx_tx_cmd_single(), when checking if a given TID is aggregated, the driver doesn't check whether or not the queue itself can be aggregated. For example, a management queue might be marked as aggregated if TID 0 is aggregated on a (different) data queue. Make sure that mgmt frames are sent with TID IWL_TID_NON_QOS, and in this way make sure no mixups of this sort happen. Fixes: commit 24afba7690e4 ("iwlwifi: mvm: support bss dynamic alloc/dealloc of queues") Signed-off-by: Liad Kaufman Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 96482a5de38c..10517778faf4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -920,9 +920,13 @@ static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb, tid = IWL_MAX_TID_COUNT; } - if (iwl_mvm_is_dqa_supported(mvm)) + if (iwl_mvm_is_dqa_supported(mvm)) { txq_id = mvmsta->tid_data[tid].txq_id; + if (ieee80211_is_mgmt(fc)) + tx_cmd->tid_tspec = IWL_TID_NON_QOS; + } + /* Copy MAC header from skb into command buffer */ memcpy(tx_cmd->hdr, hdr, hdrlen); From a0315dea9091d1ebc1534f6129b3fc9942b8ca99 Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Thu, 7 Jul 2016 13:25:59 +0300 Subject: [PATCH 0244/1050] iwlwifi: mvm: free reserved queue on STA removal When a STA is removed in DQA mode, if no traffic went through its reserved queue, the txq continues to be marked as reserved and no STA can use it. Make sure that in such a case the reserved queue is marked as free when the STA is removed. Fixes: commit 24afba7690e4 ("iwlwifi: mvm: support bss dynamic alloc/dealloc of queues") Signed-off-by: Liad Kaufman Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 24 +++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index a92e12edeb3a..11c7e1591b3b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -1496,9 +1496,31 @@ int iwl_mvm_rm_sta(struct iwl_mvm *mvm, ret = iwl_mvm_drain_sta(mvm, mvm_sta, false); /* If DQA is supported - the queues can be disabled now */ - if (iwl_mvm_is_dqa_supported(mvm)) + if (iwl_mvm_is_dqa_supported(mvm)) { + u8 reserved_txq = mvm_sta->reserved_queue; + enum iwl_mvm_queue_status *status; + iwl_mvm_disable_sta_queues(mvm, vif, mvm_sta); + /* + * If no traffic has gone through the reserved TXQ - it + * is still marked as IWL_MVM_QUEUE_RESERVED, and + * should be manually marked as free again + */ + spin_lock_bh(&mvm->queue_info_lock); + status = &mvm->queue_info[reserved_txq].status; + if (WARN((*status != IWL_MVM_QUEUE_RESERVED) && + (*status != IWL_MVM_QUEUE_FREE), + "sta_id %d reserved txq %d status %d", + mvm_sta->sta_id, reserved_txq, *status)) { + spin_unlock_bh(&mvm->queue_info_lock); + return -EINVAL; + } + + *status = IWL_MVM_QUEUE_FREE; + spin_unlock_bh(&mvm->queue_info_lock); + } + if (vif->type == NL80211_IFTYPE_STATION && mvmvif->ap_sta_id == mvm_sta->sta_id) { /* if associated - we can't remove the AP STA now */ From 7c8d91eb312f30e96db04c710988394242a83565 Mon Sep 17 00:00:00 2001 From: Haim Dreyfuss Date: Sun, 13 Mar 2016 17:51:59 +0200 Subject: [PATCH 0245/1050] iwlwifi: pcie: Set affinity mask for rx interrupt vectors per cpu In order to utilize the host's CPUs in the most efficient way we bind each rx interrupt vector to each CPU on the host. Each rx interrupt is prioritized to execute only on the designated CPU rather than any CPU. Processor affinity takes advantage of the fact that some remnants of a process that was run on a given processor may remain in that processor's memory state for example, data in the CPU cache after another process is run on that CPU. Scheduling that process to execute on the same processor could result in an efficient use of process by reducing performance-degrading situations such as cache misses and parallel processing. Signed-off-by: Haim Dreyfuss Signed-off-by: Luca Coelho --- .../wireless/intel/iwlwifi/pcie/internal.h | 2 ++ .../net/wireless/intel/iwlwifi/pcie/trans.c | 32 ++++++++++++++++++- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index 8ad92fac8592..987a0770fb5b 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -37,6 +37,7 @@ #include #include #include +#include #include "iwl-fh.h" #include "iwl-csr.h" @@ -426,6 +427,7 @@ struct iwl_trans_pcie { u32 hw_init_mask; u32 fh_mask; u32 hw_mask; + cpumask_t affinity_mask[IWL_MAX_RX_HW_QUEUES]; }; static inline struct iwl_trans_pcie * diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index be32fe1683f5..d7521c1d90ec 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -1573,6 +1573,30 @@ msi: } } +static void iwl_pcie_irq_set_affinity(struct iwl_trans *trans) +{ + int iter_rx_q, i, ret, cpu, offset; + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + + i = trans_pcie->shared_vec_mask & IWL_SHARED_IRQ_FIRST_RSS ? 0 : 1; + iter_rx_q = trans_pcie->trans->num_rx_queues - 1 + i; + offset = 1 + i; + for (; i < iter_rx_q ; i++) { + /* + * Get the cpu prior to the place to search + * (i.e. return will be > i - 1). + */ + cpu = cpumask_next(i - offset, cpu_online_mask); + cpumask_set_cpu(cpu, &trans_pcie->affinity_mask[i]); + ret = irq_set_affinity_hint(trans_pcie->msix_entries[i].vector, + &trans_pcie->affinity_mask[i]); + if (ret) + IWL_ERR(trans_pcie->trans, + "Failed to set affinity mask for IRQ %d\n", + i); + } +} + static int iwl_pcie_init_msix_handler(struct pci_dev *pdev, struct iwl_trans_pcie *trans_pcie) { @@ -1601,6 +1625,7 @@ static int iwl_pcie_init_msix_handler(struct pci_dev *pdev, return ret; } } + iwl_pcie_irq_set_affinity(trans_pcie->trans); return 0; } @@ -1760,9 +1785,14 @@ void iwl_trans_pcie_free(struct iwl_trans *trans) iwl_pcie_rx_free(trans); if (trans_pcie->msix_enabled) { - for (i = 0; i < trans_pcie->alloc_vecs; i++) + for (i = 0; i < trans_pcie->alloc_vecs; i++) { + irq_set_affinity_hint( + trans_pcie->msix_entries[i].vector, + NULL); + free_irq(trans_pcie->msix_entries[i].vector, &trans_pcie->msix_entries[i]); + } pci_disable_msix(trans_pcie->pci_dev); trans_pcie->msix_enabled = false; From 89e4ad53ae53e97a81c0bb1feeb40bc69d87d13c Mon Sep 17 00:00:00 2001 From: Oren Givon Date: Thu, 7 Jul 2016 10:40:27 +0300 Subject: [PATCH 0246/1050] iwlwifi: add the new 9560 series Add a new config struct for the new 9560 series and add the 4 new PCI IDs for it. Signed-off-by: Oren Givon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-9000.c | 11 +++++++++++ drivers/net/wireless/intel/iwlwifi/iwl-config.h | 1 + drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 4 ++++ 3 files changed, 16 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-9000.c b/drivers/net/wireless/intel/iwlwifi/iwl-9000.c index 1fec6afbe041..4d6898515046 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-9000.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-9000.c @@ -187,6 +187,17 @@ const struct iwl_cfg iwl9460_2ac_cfg = { .integrated = true, }; +const struct iwl_cfg iwl9560_2ac_cfg = { + .name = "Intel(R) Dual Band Wireless AC 9560", + .fw_name_pre = IWL9000_FW_PRE, + IWL_DEVICE_9000, + .ht_params = &iwl9000_ht_params, + .nvm_ver = IWL9000_NVM_VERSION, + .nvm_calib_ver = IWL9000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, + .integrated = true, +}; + /* * TODO the struct below is for internal testing only this should be * removed by EO 2016~ diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index 7008319532ef..63d3f9b18bcb 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -454,6 +454,7 @@ extern const struct iwl_cfg iwl9160_2ac_cfg; extern const struct iwl_cfg iwl9260_2ac_cfg; extern const struct iwl_cfg iwl9270_2ac_cfg; extern const struct iwl_cfg iwl9460_2ac_cfg; +extern const struct iwl_cfg iwl9560_2ac_cfg; extern const struct iwl_cfg iwla000_2ac_cfg; #endif /* CONFIG_IWLMVM */ diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index c6e24fb286be..74aa416831bf 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -523,6 +523,10 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x9DF0, 0x0060, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0xA370, 0x0060, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0x31DC, 0x0060, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x0030, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0030, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0xA370, 0x0030, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0x31DC, 0x0030, iwl9560_2ac_cfg)}, /* a000 Series */ {IWL_PCI_DEVICE(0x2720, 0x0A10, iwla000_2ac_cfg)}, From a3e939dfe0a1e12a37ecd08ab89bb6115a490909 Mon Sep 17 00:00:00 2001 From: Oren Givon Date: Thu, 7 Jul 2016 09:11:56 +0300 Subject: [PATCH 0247/1050] iwlwifi: add the new 8275 series Add a new config struct for the new 8275 series and add the first PCI ID for it. Signed-off-by: Oren Givon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-8000.c | 11 +++++++++++ drivers/net/wireless/intel/iwlwifi/iwl-config.h | 1 + drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 1 + 3 files changed, 13 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-8000.c b/drivers/net/wireless/intel/iwlwifi/iwl-8000.c index 6c6725e808d4..cde4f265e4e5 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-8000.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-8000.c @@ -212,6 +212,17 @@ const struct iwl_cfg iwl8265_2ac_cfg = { .vht_mu_mimo_supported = true, }; +const struct iwl_cfg iwl8275_2ac_cfg = { + .name = "Intel(R) Dual Band Wireless AC 8275", + .fw_name_pre = IWL8265_FW_PRE, + IWL_DEVICE_8265, + .ht_params = &iwl8000_ht_params, + .nvm_ver = IWL8000_NVM_VERSION, + .nvm_calib_ver = IWL8000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, + .vht_mu_mimo_supported = true, +}; + const struct iwl_cfg iwl4165_2ac_cfg = { .name = "Intel(R) Dual Band Wireless AC 4165", .fw_name_pre = IWL8000_FW_PRE, diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index 63d3f9b18bcb..6cecb1adf0c8 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -445,6 +445,7 @@ extern const struct iwl_cfg iwl7265d_n_cfg; extern const struct iwl_cfg iwl8260_2n_cfg; extern const struct iwl_cfg iwl8260_2ac_cfg; extern const struct iwl_cfg iwl8265_2ac_cfg; +extern const struct iwl_cfg iwl8275_2ac_cfg; extern const struct iwl_cfg iwl4165_2ac_cfg; extern const struct iwl_cfg iwl8260_2ac_sdio_cfg; extern const struct iwl_cfg iwl8265_2ac_sdio_cfg; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 74aa416831bf..14c6e94ecbf2 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -500,6 +500,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x24FD, 0x0930, iwl8265_2ac_cfg)}, {IWL_PCI_DEVICE(0x24FD, 0x0950, iwl8265_2ac_cfg)}, {IWL_PCI_DEVICE(0x24FD, 0x0850, iwl8265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24FD, 0x0012, iwl8275_2ac_cfg)}, /* 9000 Series */ {IWL_PCI_DEVICE(0x271B, 0x0010, iwl9160_2ac_cfg)}, From 72c240fed0d1334d6de5c22b0ef16bdf5b542447 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 12 Jul 2016 11:40:57 +0000 Subject: [PATCH 0248/1050] iwlwifi: mvm: use setup_timer instead of init_timer and data fields Use setup_timer function instead of initializing timer with the function and data fields Signed-off-by: Wei Yongjun Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 11c7e1591b3b..216aa54c8679 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -2050,11 +2050,9 @@ int iwl_mvm_sta_rx_agg(struct iwl_mvm *mvm, struct ieee80211_sta *sta, baid_data->baid = baid; baid_data->timeout = timeout; baid_data->last_rx = jiffies; - init_timer(&baid_data->session_timer); - baid_data->session_timer.function = - iwl_mvm_rx_agg_session_expired; - baid_data->session_timer.data = - (unsigned long)&mvm->baid_map[baid]; + setup_timer(&baid_data->session_timer, + iwl_mvm_rx_agg_session_expired, + (unsigned long)&mvm->baid_map[baid]); baid_data->mvm = mvm; baid_data->tid = tid; baid_data->sta_id = mvm_sta->sta_id; From 06f4b08179fa0590bd3a53e9af16b7197460947f Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Thu, 21 Jul 2016 15:39:29 +0300 Subject: [PATCH 0249/1050] iwlwifi: pcie: change indentation of iwl_pcie_set_interrupt_capa() Function is very indented. Go to msi section if needed to avoid it and by that make the code more readable. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/pcie/trans.c | 88 +++++++++---------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index d7521c1d90ec..43c3915ed352 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -1513,57 +1513,57 @@ static void iwl_pcie_set_interrupt_capa(struct pci_dev *pdev, struct iwl_trans *trans) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - int max_vector, nvec, i; + int max_irqs, num_irqs, i, ret; u16 pci_cmd; - if (trans->cfg->mq_rx_supported) { - max_vector = min_t(u32, (num_possible_cpus() + 2), - IWL_MAX_RX_HW_QUEUES); - for (i = 0; i < max_vector; i++) - trans_pcie->msix_entries[i].entry = i; + if (!trans->cfg->mq_rx_supported) + goto enable_msi; - nvec = pci_enable_msix_range(pdev, trans_pcie->msix_entries, - MSIX_MIN_INTERRUPT_VECTORS, - max_vector); - if (nvec < 0) { - IWL_DEBUG_INFO(trans, - "ret = %d failed to enable msi-x mode move to msi mode\n", - nvec); - goto msi; - } + max_irqs = min_t(u32, num_possible_cpus() + 2, IWL_MAX_RX_HW_QUEUES); + for (i = 0; i < max_irqs; i++) + trans_pcie->msix_entries[i].entry = i; + num_irqs = pci_enable_msix_range(pdev, trans_pcie->msix_entries, + MSIX_MIN_INTERRUPT_VECTORS, + max_irqs); + if (num_irqs < 0) { IWL_DEBUG_INFO(trans, - "Enable MSI-X allocate %d interrupt vector\n", - nvec); - trans_pcie->def_irq = (nvec == max_vector) ? nvec - 1 : 0; - /* - * In case the OS provides fewer interrupts than requested, - * different causes will share the same interrupt vector - * as follow: - * One interrupt less: non rx causes shared with FBQ. - * Two interrupts less: non rx causes shared with FBQ and RSS. - * More than two interrupts: we will use fewer RSS queues. - */ - if (nvec <= num_online_cpus()) { - trans_pcie->trans->num_rx_queues = nvec + 1; - trans_pcie->shared_vec_mask = IWL_SHARED_IRQ_NON_RX | - IWL_SHARED_IRQ_FIRST_RSS; - } else if (nvec == num_online_cpus() + 1) { - trans_pcie->trans->num_rx_queues = nvec; - trans_pcie->shared_vec_mask = IWL_SHARED_IRQ_NON_RX; - } else { - trans_pcie->trans->num_rx_queues = nvec - 1; - } - - trans_pcie->alloc_vecs = nvec; - trans_pcie->msix_enabled = true; - return; + "Failed to enable msi-x mode (ret %d). Moving to msi mode.\n", + num_irqs); + goto enable_msi; } -msi: + trans_pcie->def_irq = (num_irqs == max_irqs) ? num_irqs - 1 : 0; - nvec = pci_enable_msi(pdev); - if (nvec) { - dev_err(&pdev->dev, "pci_enable_msi failed - %d\n", nvec); + IWL_DEBUG_INFO(trans, + "MSI-X enabled. %d interrupt vectors were allocated\n", + num_irqs); + + /* + * In case the OS provides fewer interrupts than requested, different + * causes will share the same interrupt vector as follows: + * One interrupt less: non rx causes shared with FBQ. + * Two interrupts less: non rx causes shared with FBQ and RSS. + * More than two interrupts: we will use fewer RSS queues. + */ + if (num_irqs <= num_online_cpus()) { + trans_pcie->trans->num_rx_queues = num_irqs + 1; + trans_pcie->shared_vec_mask = IWL_SHARED_IRQ_NON_RX | + IWL_SHARED_IRQ_FIRST_RSS; + } else if (num_irqs == num_online_cpus() + 1) { + trans_pcie->trans->num_rx_queues = num_irqs; + trans_pcie->shared_vec_mask = IWL_SHARED_IRQ_NON_RX; + } else { + trans_pcie->trans->num_rx_queues = num_irqs - 1; + } + + trans_pcie->alloc_vecs = num_irqs; + trans_pcie->msix_enabled = true; + return; + +enable_msi: + ret = pci_enable_msi(pdev); + if (ret) { + dev_err(&pdev->dev, "pci_enable_msi failed - %d\n", ret); /* enable rfkill interrupt: hw bug w/a */ pci_read_config_word(pdev, PCI_COMMAND, &pci_cmd); if (pci_cmd & PCI_COMMAND_INTX_DISABLE) { From 5fe343978b663e347e2e478e9005ddbf228ca508 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 26 Jul 2016 23:22:01 +0300 Subject: [PATCH 0250/1050] iwlwifi: mvm: bump max API to 26 The driver now support version 26 of the firmware APIs. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-7000.c | 4 ++-- drivers/net/wireless/intel/iwlwifi/iwl-8000.c | 4 ++-- drivers/net/wireless/intel/iwlwifi/iwl-9000.c | 2 +- drivers/net/wireless/intel/iwlwifi/iwl-a000.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-7000.c b/drivers/net/wireless/intel/iwlwifi/iwl-7000.c index 64690c14ff4d..aa575fb9dea0 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-7000.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-7000.c @@ -73,8 +73,8 @@ /* Highest firmware API version supported */ #define IWL7260_UCODE_API_MAX 17 #define IWL7265_UCODE_API_MAX 17 -#define IWL7265D_UCODE_API_MAX 24 -#define IWL3168_UCODE_API_MAX 24 +#define IWL7265D_UCODE_API_MAX 26 +#define IWL3168_UCODE_API_MAX 26 /* Lowest firmware API version supported */ #define IWL7260_UCODE_API_MIN 16 diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-8000.c b/drivers/net/wireless/intel/iwlwifi/iwl-8000.c index cde4f265e4e5..990cf2b17517 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-8000.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-8000.c @@ -70,8 +70,8 @@ #include "iwl-agn-hw.h" /* Highest firmware API version supported */ -#define IWL8000_UCODE_API_MAX 24 -#define IWL8265_UCODE_API_MAX 24 +#define IWL8000_UCODE_API_MAX 26 +#define IWL8265_UCODE_API_MAX 26 /* Lowest firmware API version supported */ #define IWL8000_UCODE_API_MIN 16 diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-9000.c b/drivers/net/wireless/intel/iwlwifi/iwl-9000.c index 4d6898515046..a2c7946c12c9 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-9000.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-9000.c @@ -55,7 +55,7 @@ #include "iwl-agn-hw.h" /* Highest firmware API version supported */ -#define IWL9000_UCODE_API_MAX 24 +#define IWL9000_UCODE_API_MAX 26 /* Lowest firmware API version supported */ #define IWL9000_UCODE_API_MIN 16 diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-a000.c b/drivers/net/wireless/intel/iwlwifi/iwl-a000.c index 4d78232c8afe..ea1618525878 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-a000.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-a000.c @@ -55,7 +55,7 @@ #include "iwl-agn-hw.h" /* Highest firmware API version supported */ -#define IWL_A000_UCODE_API_MAX 24 +#define IWL_A000_UCODE_API_MAX 26 /* Lowest firmware API version supported */ #define IWL_A000_UCODE_API_MIN 24 From 9fb064df6d57bf09ed3f8f964c8b0789b55206e7 Mon Sep 17 00:00:00 2001 From: Haim Dreyfuss Date: Tue, 26 Jul 2016 18:03:07 +0300 Subject: [PATCH 0251/1050] iwlwifi: pcie: replace possible_cpus() with online_cpus() in MSIX mode In MSIX mode the number of irq depends on the number of possible cpus existing on the host. This cause to bug in case there are offline cores. Take into account only the online CPUs instead. Also save it in temporary variable. Fixes: commit 2e5d4a8f61dc ("iwlwifi: pcie: Add new configuration to enable MSIX") Signed-off-by: Haim Dreyfuss Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 43c3915ed352..188b0dee542c 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -1513,13 +1513,14 @@ static void iwl_pcie_set_interrupt_capa(struct pci_dev *pdev, struct iwl_trans *trans) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - int max_irqs, num_irqs, i, ret; + int max_irqs, num_irqs, i, ret, nr_online_cpus; u16 pci_cmd; if (!trans->cfg->mq_rx_supported) goto enable_msi; - max_irqs = min_t(u32, num_possible_cpus() + 2, IWL_MAX_RX_HW_QUEUES); + nr_online_cpus = num_online_cpus(); + max_irqs = min_t(u32, nr_online_cpus + 2, IWL_MAX_RX_HW_QUEUES); for (i = 0; i < max_irqs; i++) trans_pcie->msix_entries[i].entry = i; @@ -1545,11 +1546,11 @@ static void iwl_pcie_set_interrupt_capa(struct pci_dev *pdev, * Two interrupts less: non rx causes shared with FBQ and RSS. * More than two interrupts: we will use fewer RSS queues. */ - if (num_irqs <= num_online_cpus()) { + if (num_irqs <= nr_online_cpus) { trans_pcie->trans->num_rx_queues = num_irqs + 1; trans_pcie->shared_vec_mask = IWL_SHARED_IRQ_NON_RX | IWL_SHARED_IRQ_FIRST_RSS; - } else if (num_irqs == num_online_cpus() + 1) { + } else if (num_irqs == nr_online_cpus + 1) { trans_pcie->trans->num_rx_queues = num_irqs; trans_pcie->shared_vec_mask = IWL_SHARED_IRQ_NON_RX; } else { From 612da1efc07f28ea9e64402820dd25287ff5233b Mon Sep 17 00:00:00 2001 From: Sharon Dvir Date: Wed, 3 Aug 2016 10:55:45 +0300 Subject: [PATCH 0252/1050] iwlwifi: unify iwl_get_ucode_image() implementations Avoid multiple implementations. Signed-off-by: Sharon Dvir Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/dvm/ucode.c | 11 +---------- drivers/net/wireless/intel/iwlwifi/iwl-fw.h | 9 +++++++++ drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 13 ++----------- 3 files changed, 12 insertions(+), 21 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/ucode.c b/drivers/net/wireless/intel/iwlwifi/dvm/ucode.c index b662cf35b033..c7509c51e9d9 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/ucode.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/ucode.c @@ -46,15 +46,6 @@ * ******************************************************************************/ -static inline const struct fw_img * -iwl_get_ucode_image(struct iwl_priv *priv, enum iwl_ucode_type ucode_type) -{ - if (ucode_type >= IWL_UCODE_TYPE_MAX) - return NULL; - - return &priv->fw->img[ucode_type]; -} - /* * Calibration */ @@ -330,7 +321,7 @@ int iwl_load_ucode_wait_alive(struct iwl_priv *priv, enum iwl_ucode_type old_type; static const u16 alive_cmd[] = { REPLY_ALIVE }; - fw = iwl_get_ucode_image(priv, ucode_type); + fw = iwl_get_ucode_image(priv->fw, ucode_type); if (WARN_ON(!fw)) return -EINVAL; diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-fw.h b/drivers/net/wireless/intel/iwlwifi/iwl-fw.h index 74ea68d1063c..5f229556339a 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-fw.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-fw.h @@ -329,4 +329,13 @@ iwl_fw_dbg_conf_usniffer(const struct iwl_fw *fw, u8 id) return conf_tlv->usniffer; } +static inline const struct fw_img * +iwl_get_ucode_image(const struct iwl_fw *fw, enum iwl_ucode_type ucode_type) +{ + if (ucode_type >= IWL_UCODE_TYPE_MAX) + return NULL; + + return &fw->img[ucode_type]; +} + #endif /* __iwl_fw_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index b951a7f06060..7322c4394a9a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -90,15 +90,6 @@ struct iwl_mvm_alive_data { u32 scd_base_addr; }; -static inline const struct fw_img * -iwl_get_ucode_image(struct iwl_mvm *mvm, enum iwl_ucode_type ucode_type) -{ - if (ucode_type >= IWL_UCODE_TYPE_MAX) - return NULL; - - return &mvm->fw->img[ucode_type]; -} - static int iwl_send_tx_ant_cfg(struct iwl_mvm *mvm, u8 valid_tx_ant) { struct iwl_tx_ant_cfg_cmd tx_ant_cmd = { @@ -592,9 +583,9 @@ static int iwl_mvm_load_ucode_wait_alive(struct iwl_mvm *mvm, iwl_fw_dbg_conf_usniffer(mvm->fw, FW_DBG_START_FROM_ALIVE) && !(fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_USNIFFER_UNIFIED))) - fw = iwl_get_ucode_image(mvm, IWL_UCODE_REGULAR_USNIFFER); + fw = iwl_get_ucode_image(mvm->fw, IWL_UCODE_REGULAR_USNIFFER); else - fw = iwl_get_ucode_image(mvm, ucode_type); + fw = iwl_get_ucode_image(mvm->fw, ucode_type); if (WARN_ON(!fw)) return -EINVAL; mvm->cur_ucode = ucode_type; From 723d11a4cdf63544a01ae7a474c202a914b67a3b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 3 Aug 2016 09:34:39 +0200 Subject: [PATCH 0253/1050] iwlwifi: mvm: make RSS RX more robust If the firmware ever decides to send any new/more notifications to the RSS queues, the driver would currently try to interpret those as REPLY_RX_MPDU_CMD and, if the notification was small, access invalid memory. Prevent that by checking for REPLY_RX_MPDU_CMD explicitly which allows ignoring unexpected notifications. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 55d9096da68c..43ea1e5fdfc6 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -1672,7 +1672,7 @@ static void iwl_mvm_rx_mq_rss(struct iwl_op_mode *op_mode, else if (unlikely(pkt->hdr.cmd == RX_QUEUES_NOTIFICATION && pkt->hdr.group_id == DATA_PATH_GROUP)) iwl_mvm_rx_queue_notif(mvm, rxb, queue); - else + else if (likely(pkt->hdr.cmd == REPLY_RX_MPDU_CMD)) iwl_mvm_rx_mpdu_mq(mvm, napi, rxb, queue); } From 9b8568360585854c4fe99572e4fbffc736706cfd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 3 Aug 2016 13:38:59 +0200 Subject: [PATCH 0254/1050] iwlwifi: mvm: remove pointless _bh from spinlock in timer Inside the reorder timer expire function, there's no point in disabling BHs since it is in BH context. Remove that. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 08d8a8abb918..7845dbefb67a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -452,10 +452,10 @@ void iwl_mvm_reorder_timer_expired(unsigned long data) u16 sn = 0, index = 0; bool expired = false; - spin_lock_bh(&buf->lock); + spin_lock(&buf->lock); if (!buf->num_stored || buf->removed) { - spin_unlock_bh(&buf->lock); + spin_unlock(&buf->lock); return; } @@ -492,7 +492,7 @@ void iwl_mvm_reorder_timer_expired(unsigned long data) buf->reorder_time[index] + 1 + RX_REORDER_BUF_TIMEOUT_MQ); } - spin_unlock_bh(&buf->lock); + spin_unlock(&buf->lock); } static void iwl_mvm_del_ba(struct iwl_mvm *mvm, int queue, From fd659f8e75b7f8214b4f323d46248205b4566e10 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 3 Aug 2016 13:52:56 +0200 Subject: [PATCH 0255/1050] iwlwifi: mvm: tighten BAID range check As pointed out by smatch, checking the BAID for just >= INVALID is a bad idea since only 32 (IWL_MAX_BAID) actually exist. Check the range for that and print invalid ones in the warning. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 7845dbefb67a..d6d9ec401b44 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -503,7 +503,7 @@ static void iwl_mvm_del_ba(struct iwl_mvm *mvm, int queue, struct iwl_mvm_reorder_buffer *reorder_buf; u8 baid = data->baid; - if (WARN_ON_ONCE(baid >= IWL_RX_REORDER_DATA_INVALID_BAID)) + if (WARN_ONCE(baid >= IWL_MAX_BAID, "invalid BAID: %x\n", baid)) return; rcu_read_lock(); From 2a292822f00f7409fc0bd6b2d09efc5b8e6c9c5d Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Wed, 14 Sep 2016 13:09:24 +0200 Subject: [PATCH 0256/1050] net/mlx4_en: fix off by one in error handling If an error occurs in mlx4_init_eq_table the index used in the err_out_unmap label is one too big which results in a panic in mlx4_free_eq. This patch fixes the index in the error path. Signed-off-by: Sebastian Ott Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/eq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index f613977455e0..cf8f8a72a801 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -1305,8 +1305,8 @@ int mlx4_init_eq_table(struct mlx4_dev *dev) return 0; err_out_unmap: - while (i >= 0) - mlx4_free_eq(dev, &priv->eq_table.eq[i--]); + while (i > 0) + mlx4_free_eq(dev, &priv->eq_table.eq[--i]); #ifdef CONFIG_RFS_ACCEL for (i = 1; i <= dev->caps.num_ports; i++) { if (mlx4_priv(dev)->port[i].rmap) { From 7077dc415b113ac17a6696c432bad2d66574e4fb Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Wed, 14 Sep 2016 21:29:34 +0800 Subject: [PATCH 0257/1050] net: ethernet: mediatek: fix module loading automatically based on MODULE_DEVICE_TABLE The device table is required to load modules based on modaliases. After adding MODULE_DEVICE_TABLE, below entries for example will be added to modules.alias: alias of:N*T*Cmediatek,mt7623-ethC* mtk_eth_soc Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index d9199151a83e..3743af8f1ded 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1923,6 +1923,7 @@ const struct of_device_id of_mtk_match[] = { { .compatible = "mediatek,mt7623-eth" }, {}, }; +MODULE_DEVICE_TABLE(of, of_mtk_match); static struct platform_driver mtk_driver = { .probe = mtk_probe, From 01afd972a737879c1466a12f696601a2ce91ea84 Mon Sep 17 00:00:00 2001 From: Ivan Mikhaylov Date: Wed, 14 Sep 2016 19:06:44 +0300 Subject: [PATCH 0258/1050] net/ibm/emac: add set mac addr callback add realization for mac address set and remove dummy callback. Signed-off-by: Ivan Mikhaylov Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/emac/core.c | 31 ++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 4c9771d57d6e..2dfc60308648 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -980,6 +980,33 @@ static void emac_set_multicast_list(struct net_device *ndev) __emac_set_multicast_list(dev); } +static int emac_set_mac_address(struct net_device *ndev, void *sa) +{ + struct emac_instance *dev = netdev_priv(ndev); + struct sockaddr *addr = sa; + struct emac_regs __iomem *p = dev->emacp; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + mutex_lock(&dev->link_lock); + + memcpy(ndev->dev_addr, addr->sa_data, ndev->addr_len); + + emac_rx_disable(dev); + emac_tx_disable(dev); + out_be32(&p->iahr, (ndev->dev_addr[0] << 8) | ndev->dev_addr[1]); + out_be32(&p->ialr, (ndev->dev_addr[2] << 24) | + (ndev->dev_addr[3] << 16) | (ndev->dev_addr[4] << 8) | + ndev->dev_addr[5]); + emac_tx_enable(dev); + emac_rx_enable(dev); + + mutex_unlock(&dev->link_lock); + + return 0; +} + static int emac_resize_rx_ring(struct emac_instance *dev, int new_mtu) { int rx_sync_size = emac_rx_sync_size(new_mtu); @@ -2686,7 +2713,7 @@ static const struct net_device_ops emac_netdev_ops = { .ndo_do_ioctl = emac_ioctl, .ndo_tx_timeout = emac_tx_timeout, .ndo_validate_addr = eth_validate_addr, - .ndo_set_mac_address = eth_mac_addr, + .ndo_set_mac_address = emac_set_mac_address, .ndo_start_xmit = emac_start_xmit, .ndo_change_mtu = eth_change_mtu, }; @@ -2699,7 +2726,7 @@ static const struct net_device_ops emac_gige_netdev_ops = { .ndo_do_ioctl = emac_ioctl, .ndo_tx_timeout = emac_tx_timeout, .ndo_validate_addr = eth_validate_addr, - .ndo_set_mac_address = eth_mac_addr, + .ndo_set_mac_address = emac_set_mac_address, .ndo_start_xmit = emac_start_xmit_sg, .ndo_change_mtu = emac_change_mtu, }; From 7106a069f45b15e63d14484e72969e64798e641c Mon Sep 17 00:00:00 2001 From: Ivan Mikhaylov Date: Wed, 14 Sep 2016 19:06:45 +0300 Subject: [PATCH 0259/1050] net/ibm/emac: add mutex to 'set multicast list' for preventing race conditions within ioctl calls. Signed-off-by: Ivan Mikhaylov Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/emac/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 2dfc60308648..7af09cbc53f0 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -977,7 +977,10 @@ static void emac_set_multicast_list(struct net_device *ndev) dev->mcast_pending = 1; return; } + + mutex_lock(&dev->link_lock); __emac_set_multicast_list(dev); + mutex_unlock(&dev->link_lock); } static int emac_set_mac_address(struct net_device *ndev, void *sa) From d6f64d725bac20df66b2eacd847fc41d7a1905e0 Mon Sep 17 00:00:00 2001 From: Mark Tomlinson Date: Thu, 15 Sep 2016 11:40:05 +1200 Subject: [PATCH 0260/1050] net: VRF: Pass original iif to ip_route_input() The function ip_rcv_finish() calls l3mdev_ip_rcv(). On any VRF except the global VRF, this replaces skb->dev with the VRF master interface. When calling ip_route_input_noref() from here, the checks for forwarding look at this master device instead of the initial ingress interface. This will allow packets to be routed which normally would be dropped. For example, an interface that is not assigned an IP address should drop packets, but because the checking is against the master device, the packet will be forwarded. The fix here is to still call l3mdev_ip_rcv(), but remember the initial net_device. This is passed to the other functions within ip_rcv_finish, so they still see the original interface. Signed-off-by: Mark Tomlinson Acked-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/ip_input.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 4b351af3e67b..d6feabb03516 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -312,6 +312,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; + struct net_device *dev = skb->dev; /* if ingress device is enslaved to an L3 master device pass the * skb to its handler for processing @@ -341,7 +342,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) */ if (!skb_valid_dst(skb)) { int err = ip_route_input_noref(skb, iph->daddr, iph->saddr, - iph->tos, skb->dev); + iph->tos, dev); if (unlikely(err)) { if (err == -EXDEV) __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER); @@ -370,7 +371,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) __IP_UPD_PO_STATS(net, IPSTATS_MIB_INBCAST, skb->len); } else if (skb->pkt_type == PACKET_BROADCAST || skb->pkt_type == PACKET_MULTICAST) { - struct in_device *in_dev = __in_dev_get_rcu(skb->dev); + struct in_device *in_dev = __in_dev_get_rcu(dev); /* RFC 1122 3.3.6: * From bc6c03fa3cacd31b873e36ca16ef9678269deae6 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 15 Sep 2016 03:45:07 +0000 Subject: [PATCH 0261/1050] nfp: fix error return code in nfp_net_netdev_open() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: 73725d9dfd99 ("nfp: allocate ring SW structs dynamically") Signed-off-by: Wei Yongjun Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 252e4924de0f..39dadfca84ef 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2044,12 +2044,16 @@ static int nfp_net_netdev_open(struct net_device *netdev) nn->rx_rings = kcalloc(nn->num_rx_rings, sizeof(*nn->rx_rings), GFP_KERNEL); - if (!nn->rx_rings) + if (!nn->rx_rings) { + err = -ENOMEM; goto err_free_lsc; + } nn->tx_rings = kcalloc(nn->num_tx_rings, sizeof(*nn->tx_rings), GFP_KERNEL); - if (!nn->tx_rings) + if (!nn->tx_rings) { + err = -ENOMEM; goto err_free_rx_rings; + } for (r = 0; r < nn->num_r_vecs; r++) { err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r); From e830baa9c3f0023769ba9aab19eb44c892769d87 Mon Sep 17 00:00:00 2001 From: Hans Wippel Date: Thu, 15 Sep 2016 14:39:21 +0200 Subject: [PATCH 0262/1050] qeth: restore device features after recovery After device recovery, only a basic set of network device features is enabled on the device. If features like checksum offloading or TSO were enabled by the user before the recovery, this results in a mismatch between the network device features, that the kernel assumes to be enabled on the device, and the features actually enabled on the device. This patch tries to restore previously set features, that require changes on the device, after the recovery of a device. In case of an error, the network device's features are changed to contain only the features that are actually turned on. Signed-off-by: Hans Wippel Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core.h | 1 + drivers/s390/net/qeth_core_main.c | 29 +++++++++++++++++++++++++++++ drivers/s390/net/qeth_l2_main.c | 3 +++ drivers/s390/net/qeth_l3_main.c | 1 + 4 files changed, 34 insertions(+) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index bf40063de202..6d4b68c483f3 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -999,6 +999,7 @@ struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *, __u16, __u16, enum qeth_prot_versions); int qeth_set_features(struct net_device *, netdev_features_t); +int qeth_recover_features(struct net_device *); netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t); /* exports for OSN */ diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 7dba6c8537a1..6ad5a14669e7 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -6131,6 +6131,35 @@ static int qeth_set_ipa_tso(struct qeth_card *card, int on) return rc; } +/* try to restore device features on a device after recovery */ +int qeth_recover_features(struct net_device *dev) +{ + struct qeth_card *card = dev->ml_priv; + netdev_features_t recover = dev->features; + + if (recover & NETIF_F_IP_CSUM) { + if (qeth_set_ipa_csum(card, 1, IPA_OUTBOUND_CHECKSUM)) + recover ^= NETIF_F_IP_CSUM; + } + if (recover & NETIF_F_RXCSUM) { + if (qeth_set_ipa_csum(card, 1, IPA_INBOUND_CHECKSUM)) + recover ^= NETIF_F_RXCSUM; + } + if (recover & NETIF_F_TSO) { + if (qeth_set_ipa_tso(card, 1)) + recover ^= NETIF_F_TSO; + } + + if (recover == dev->features) + return 0; + + dev_warn(&card->gdev->dev, + "Device recovery failed to restore all offload features\n"); + dev->features = recover; + return -EIO; +} +EXPORT_SYMBOL_GPL(qeth_recover_features); + int qeth_set_features(struct net_device *dev, netdev_features_t features) { struct qeth_card *card = dev->ml_priv; diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 7bc20c5188bc..54fd89110ac7 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1246,6 +1246,9 @@ contin: } /* this also sets saved unicast addresses */ qeth_l2_set_rx_mode(card->dev); + rtnl_lock(); + qeth_recover_features(card->dev); + rtnl_unlock(); } /* let user_space know that device is online */ kobject_uevent(&gdev->dev.kobj, KOBJ_CHANGE); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 72934666fedf..2f512715403b 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -3269,6 +3269,7 @@ contin: else dev_open(card->dev); qeth_l3_set_multicast_list(card->dev); + qeth_recover_features(card->dev); rtnl_unlock(); } qeth_trace_features(card); From 016930b88a1d6eb6e6b3287d593e13ca06986acc Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 15 Sep 2016 14:39:22 +0200 Subject: [PATCH 0263/1050] s390/qeth: use ip_lock for hsuid configuration qeth_l3_dev_hsuid_store() changes the ip hash table, which requires the ip_lock. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l3_sys.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c index 65645b11fc19..0e00a5ce0f00 100644 --- a/drivers/s390/net/qeth_l3_sys.c +++ b/drivers/s390/net/qeth_l3_sys.c @@ -297,7 +297,9 @@ static ssize_t qeth_l3_dev_hsuid_store(struct device *dev, addr->u.a6.pfxlen = 0; addr->type = QETH_IP_TYPE_NORMAL; + spin_lock_bh(&card->ip_lock); qeth_l3_delete_ip(card, addr); + spin_unlock_bh(&card->ip_lock); kfree(addr); } @@ -329,7 +331,10 @@ static ssize_t qeth_l3_dev_hsuid_store(struct device *dev, addr->type = QETH_IP_TYPE_NORMAL; } else return -ENOMEM; + + spin_lock_bh(&card->ip_lock); qeth_l3_add_ip(card, addr); + spin_unlock_bh(&card->ip_lock); kfree(addr); return count; From a7531c1cc09855df5e33ceefe4fdfc2d74ccab19 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 15 Sep 2016 14:39:23 +0200 Subject: [PATCH 0264/1050] s390/qeth: allow hsuid configuration in DOWN state The qeth IP address mapping logic has been reworked recently. It causes now problems to specify qeth sysfs attribute "hsuid" in DOWN state, which is allowed. Postpone registering or deregistering of IP-addresses in this case. Signed-off-by: Ursula Braun Reviewed-by: Thomas Richter Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l3_main.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 2f512715403b..4ba82e12d6f9 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -257,6 +257,11 @@ int qeth_l3_delete_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr) if (addr->in_progress) return -EINPROGRESS; + if (!qeth_card_hw_is_reachable(card)) { + addr->disp_flag = QETH_DISP_ADDR_DELETE; + return 0; + } + rc = qeth_l3_deregister_addr_entry(card, addr); hash_del(&addr->hnode); @@ -296,6 +301,11 @@ int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr) hash_add(card->ip_htable, &addr->hnode, qeth_l3_ipaddr_hash(addr)); + if (!qeth_card_hw_is_reachable(card)) { + addr->disp_flag = QETH_DISP_ADDR_ADD; + return 0; + } + /* qeth_l3_register_addr_entry can go to sleep * if we add a IPV4 addr. It is caused by the reason * that SETIP ipa cmd starts ARP staff for IPV4 addr. @@ -390,12 +400,16 @@ static void qeth_l3_recover_ip(struct qeth_card *card) int i; int rc; - QETH_CARD_TEXT(card, 4, "recoverip"); + QETH_CARD_TEXT(card, 4, "recovrip"); spin_lock_bh(&card->ip_lock); hash_for_each_safe(card->ip_htable, i, tmp, addr, hnode) { - if (addr->disp_flag == QETH_DISP_ADDR_ADD) { + if (addr->disp_flag == QETH_DISP_ADDR_DELETE) { + qeth_l3_deregister_addr_entry(card, addr); + hash_del(&addr->hnode); + kfree(addr); + } else if (addr->disp_flag == QETH_DISP_ADDR_ADD) { if (addr->proto == QETH_PROT_IPV4) { addr->in_progress = 1; spin_unlock_bh(&card->ip_lock); @@ -407,10 +421,8 @@ static void qeth_l3_recover_ip(struct qeth_card *card) if (!rc) { addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING; - if (addr->ref_counter < 1) { + if (addr->ref_counter < 1) qeth_l3_delete_ip(card, addr); - kfree(addr); - } } else { hash_del(&addr->hnode); kfree(addr); From 903e48531e8b5d414c8f1960eacac24c31f60344 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 15 Sep 2016 14:39:24 +0200 Subject: [PATCH 0265/1050] qeth: check not more than 16 SBALEs on the completion queue af_iucv socket programs with HiperSockets as transport make use of the qdio completion queue. Running such an af_iucv socket program may result in a crash: [90341.677709] Oops: 0038 ilc:2 [#1] SMP [90341.677743] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.6.0-20160720.0.0e86ec7.5e62689.fc23.s390xperformance #1 [90341.677744] Hardware name: IBM 2964 N96 703 (LPAR) [90341.677746] task: 00000000edb79f00 ti: 00000000edb84000 task.ti: 00000000edb84000 [90341.677748] Krnl PSW : 0704d00180000000 000000000075bc50 (qeth_qdio_input_handler+0x258/0x4e0) [90341.677756] R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:1 PM:0 RI:0 EA:3 Krnl GPRS: 000003d10391e900 0000000000000001 00000000e61e6000 0000000000000005 [90341.677759] 0000000000a9e6ec 5420040001a77400 0000000000000001 000000000000006f [90341.677761] 00000000e0d83f00 0000000000000003 0000000000000010 5420040001a77400 [90341.677784] 000000007ba8b000 0000000000943fd0 000000000075bc4e 00000000ed3b3c10 [90341.677793] Krnl Code: 000000000075bc42: e320cc180004 lg %r2,3096(%r12) 000000000075bc48: c0e5ffffc5cc brasl %r14,7547e0 #000000000075bc4e: 1816 lr %r1,%r6 >000000000075bc50: ba19b008 cs %r1,%r9,8(%r11) 000000000075bc54: ec180041017e cij %r1,1,8,75bcd6 000000000075bc5a: 5810b008 l %r1,8(%r11) 000000000075bc5e: ec16005c027e cij %r1,2,6,75bd16 000000000075bc64: 5090b008 st %r9,8(%r11) [90341.677807] Call Trace: [90341.677810] ([<000000000075bbc0>] qeth_qdio_input_handler+0x1c8/0x4e0) [90341.677812] ([<000000000070efbc>] qdio_kick_handler+0x124/0x2a8) [90341.677814] ([<0000000000713570>] __tiqdio_inbound_processing+0xf0/0xcd0) [90341.677818] ([<0000000000143312>] tasklet_action+0x92/0x120) [90341.677823] ([<00000000008b6e72>] __do_softirq+0x112/0x308) [90341.677824] ([<0000000000142bce>] irq_exit+0xd6/0xf8) [90341.677829] ([<000000000010b1d2>] do_IRQ+0x6a/0x88) [90341.677830] ([<00000000008b6322>] io_int_handler+0x112/0x220) [90341.677832] ([<0000000000102b2e>] enabled_wait+0x56/0xa8) [90341.677833] ([<0000000000000000>] (null)) [90341.677835] ([<0000000000102e32>] arch_cpu_idle+0x32/0x48) [90341.677838] ([<000000000018a126>] cpu_startup_entry+0x266/0x2b0) [90341.677841] ([<0000000000113b38>] smp_start_secondary+0x100/0x110) [90341.677843] ([<00000000008b68a6>] restart_int_handler+0x62/0x78) [90341.677845] ([<00000000008b6588>] psw_idle+0x3c/0x40) [90341.677846] Last Breaking-Event-Address: [90341.677848] [<00000000007547ec>] qeth_dbf_longtext+0xc/0xc0 [90341.677849] [90341.677850] Kernel panic - not syncing: Fatal exception in interrupt qeth_qdio_cq_handler() analyzes SBALs on this completion queue, but does not observe the limit of 16 SBAL elements per SBAL. This patch adds the additional check to process not more than 16 SBAL elements. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 6ad5a14669e7..20cf29613043 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -3619,7 +3619,8 @@ static void qeth_qdio_cq_handler(struct qeth_card *card, int e; e = 0; - while (buffer->element[e].addr) { + while ((e < QDIO_MAX_ELEMENTS_PER_BUFFER) && + buffer->element[e].addr) { unsigned long phys_aob_addr; phys_aob_addr = (unsigned long) buffer->element[e].addr; From 243f750fc6f5d8e4dec984a9a785941c67452b8f Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 15 Sep 2016 14:39:25 +0200 Subject: [PATCH 0266/1050] qeth: do not limit number of gso segments To reduce the need of skb_linearize() calls, gso_max_segs of qeth net_devices had been limited according to the maximum number of qdio SBAL elements. But a gso segment cannot be larger than the mtu-size, while an SBAL element can contain up to 4096 bytes. The gso_max_segs limitation limits the maximum packet size given to the qeth driver. Performance measurements with tso-enabled qeth network interfaces and mtu-size 1500 showed, that the disadvantage of smaller packets is much more severe than the advantage of fewer skb_linearize() calls. This patch gets rid of the gso_max_segs limitations in the qeth driver. Signed-off-by: Ursula Braun Reviewed-by: Thomas Richter Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l2_main.c | 1 - drivers/s390/net/qeth_l3_main.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 54fd89110ac7..2081c1895638 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1131,7 +1131,6 @@ static int qeth_l2_setup_netdev(struct qeth_card *card) qeth_l2_request_initial_mac(card); card->dev->gso_max_size = (QETH_MAX_BUFFER_ELEMENTS(card) - 1) * PAGE_SIZE; - card->dev->gso_max_segs = (QETH_MAX_BUFFER_ELEMENTS(card) - 1); SET_NETDEV_DEV(card->dev, &card->gdev->dev); netif_napi_add(card->dev, &card->napi, qeth_l2_poll, QETH_NAPI_WEIGHT); netif_carrier_off(card->dev); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 4ba82e12d6f9..0cbbc803310f 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -3148,7 +3148,6 @@ static int qeth_l3_setup_netdev(struct qeth_card *card) netif_keep_dst(card->dev); card->dev->gso_max_size = (QETH_MAX_BUFFER_ELEMENTS(card) - 1) * PAGE_SIZE; - card->dev->gso_max_segs = (QETH_MAX_BUFFER_ELEMENTS(card) - 1); SET_NETDEV_DEV(card->dev, &card->gdev->dev); netif_napi_add(card->dev, &card->napi, qeth_l3_poll, QETH_NAPI_WEIGHT); From 5722963a8e83309dad831cf6968c4c805aa342c0 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 15 Sep 2016 14:39:26 +0200 Subject: [PATCH 0267/1050] qeth: do not turn on SG per default According to recent performance measurements, turning on net_device feature NETIF_F_SG only behaves well, but turning on feature NETIF_F_GSO shows bad results. Since the kernel activates NETIF_F_GSO automatically as soon as the driver configures feature NETIF_F_SG, qeth should not activate feature NETIF_F_SG per default, until the qeth problems with NETIF_F_GSO are solved. Signed-off-by: Ursula Braun Reviewed-by: Thomas Richter Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l2_main.c | 2 -- drivers/s390/net/qeth_l3_main.c | 1 - 2 files changed, 3 deletions(-) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 2081c1895638..bb27058fa9f0 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1124,8 +1124,6 @@ static int qeth_l2_setup_netdev(struct qeth_card *card) card->dev->hw_features |= NETIF_F_RXCSUM; card->dev->vlan_features |= NETIF_F_RXCSUM; } - /* Turn on SG per default */ - card->dev->features |= NETIF_F_SG; } card->info.broadcast_capable = 1; qeth_l2_request_initial_mac(card); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 0cbbc803310f..c00f6db812ac 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -3120,7 +3120,6 @@ static int qeth_l3_setup_netdev(struct qeth_card *card) card->dev->vlan_features = NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_TSO; - card->dev->features = NETIF_F_SG; } } } else if (card->info.type == QETH_CARD_TYPE_IQD) { From 732a59cb6e7faed7a40da6665a517945c95fc895 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 15 Sep 2016 14:39:27 +0200 Subject: [PATCH 0268/1050] s390/qeth: fix setting VIPA address commit 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback") restructured the internal address handling. This work broke setting a virtual IP address. The command echo 10.1.1.1 > /sys/bus/ccwgroup/devices//vipa/add4 fails with file exist error even if the IP address has not been set before. It turned out that the search result for the IP address search is handled incorrectly in the VIPA case. This patch fixes the setting of an virtual IP address. Signed-off-by: Thomas Richter Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l3_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index c00f6db812ac..272d9e7419be 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -701,7 +701,7 @@ int qeth_l3_add_vipa(struct qeth_card *card, enum qeth_prot_versions proto, spin_lock_bh(&card->ip_lock); - if (!qeth_l3_ip_from_hash(card, ipaddr)) + if (qeth_l3_ip_from_hash(card, ipaddr)) rc = -EEXIST; else qeth_l3_add_ip(card, ipaddr); @@ -769,7 +769,7 @@ int qeth_l3_add_rxip(struct qeth_card *card, enum qeth_prot_versions proto, spin_lock_bh(&card->ip_lock); - if (!qeth_l3_ip_from_hash(card, ipaddr)) + if (qeth_l3_ip_from_hash(card, ipaddr)) rc = -EEXIST; else qeth_l3_add_ip(card, ipaddr); From 95f60084acbcee6c466256cf26eb52191fad9edc Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Thu, 15 Sep 2016 18:13:50 +0300 Subject: [PATCH 0269/1050] perf/x86/intel/pt: Fix an off-by-one in address filter configuration PT address filter configuration requires that a range is specified by its first and last address, but at the moment we're obtaining the end of the range by adding user specified size to its start, which is off by one from what it actually needs to be. Fix this and make sure that zero-sized filters don't pass the filter validation. Reported-by: Adrian Hunter Signed-off-by: Alexander Shishkin Acked-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: stable@vger.kernel.org # v4.7 Cc: stable@vger.kernel.org#v4.7 Cc: vince@deater.net Link: http://lkml.kernel.org/r/20160915151352.21306-2-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/pt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 04bb5fb5a8d7..5ec0100e3fc6 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -1081,7 +1081,7 @@ static int pt_event_addr_filters_validate(struct list_head *filters) list_for_each_entry(filter, filters, entry) { /* PT doesn't support single address triggers */ - if (!filter->range) + if (!filter->range || !filter->size) return -EOPNOTSUPP; if (!filter->inode && !kernel_ip(filter->offset)) @@ -1111,7 +1111,7 @@ static void pt_event_addr_filters_sync(struct perf_event *event) } else { /* apply the offset */ msr_a = filter->offset + offs[range]; - msr_b = filter->size + msr_a; + msr_b = filter->size + msr_a - 1; } filters->filter[range].msr_a = msr_a; From ddfdad991e55b65c1cc4ee29502f6dceee04455a Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Thu, 15 Sep 2016 18:13:51 +0300 Subject: [PATCH 0270/1050] perf/x86/intel/pt: Fix kernel address filter's offset validation The kernel_ip() filter is used mostly by the DS/LBR code to look at the branch addresses, but Intel PT also uses it to validate the address filter offsets for kernel addresses, for which it is not sufficient: supplying something in bits 64:48 that's not a sign extension of the lower address bits (like 0xf00d000000000000) throws a #GP. This patch adds address validation for the user supplied kernel filters. Reported-by: Adrian Hunter Signed-off-by: Alexander Shishkin Acked-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: stable@vger.kernel.org # v4.7 Cc: stable@vger.kernel.org#v4.7 Cc: vince@deater.net Link: http://lkml.kernel.org/r/20160915151352.21306-3-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/pt.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 5ec0100e3fc6..1f94963a283b 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -1074,6 +1074,11 @@ static void pt_addr_filters_fini(struct perf_event *event) event->hw.addr_filters = NULL; } +static inline bool valid_kernel_ip(unsigned long ip) +{ + return virt_addr_valid(ip) && kernel_ip(ip); +} + static int pt_event_addr_filters_validate(struct list_head *filters) { struct perf_addr_filter *filter; @@ -1084,7 +1089,7 @@ static int pt_event_addr_filters_validate(struct list_head *filters) if (!filter->range || !filter->size) return -EOPNOTSUPP; - if (!filter->inode && !kernel_ip(filter->offset)) + if (!filter->inode && !valid_kernel_ip(filter->offset)) return -EINVAL; if (++range > pt_cap_get(PT_CAP_num_address_ranges)) From 1155bafcb79208abc6ae234c6e135ac70607755c Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Thu, 15 Sep 2016 18:13:52 +0300 Subject: [PATCH 0271/1050] perf/x86/intel/pt: Do validate the size of a kernel address filter Right now, the kernel address filters in PT are prone to integer overflow that may happen in adding filter's size to its offset to obtain the end of the range. Such an overflow would also throw a #GP in the PT event configuration path. Fix this by explicitly validating the result of this calculation. Reported-by: Adrian Hunter Signed-off-by: Alexander Shishkin Acked-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: stable@vger.kernel.org # v4.7 Cc: stable@vger.kernel.org#v4.7 Cc: vince@deater.net Link: http://lkml.kernel.org/r/20160915151352.21306-4-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/pt.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 1f94963a283b..861a7d9cb60f 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -1089,8 +1089,13 @@ static int pt_event_addr_filters_validate(struct list_head *filters) if (!filter->range || !filter->size) return -EOPNOTSUPP; - if (!filter->inode && !valid_kernel_ip(filter->offset)) - return -EINVAL; + if (!filter->inode) { + if (!valid_kernel_ip(filter->offset)) + return -EINVAL; + + if (!valid_kernel_ip(filter->offset + filter->size)) + return -EINVAL; + } if (++range > pt_cap_get(PT_CAP_num_address_ranges)) return -EOPNOTSUPP; From 42857cf512cb34c2c8cb50f1e766689d979d64e0 Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Thu, 15 Sep 2016 12:20:12 -0400 Subject: [PATCH 0272/1050] configfs: Return -EFBIG from configfs_write_bin_file. The check for writing more than cb_max_size bytes does not 'goto out' so it is a no-op which allows users to vmalloc an arbitrary amount. Fixes: 03607ace807b ("configfs: implement binary attributes") Cc: stable@kernel.org Signed-off-by: Phil Turnbull Signed-off-by: Christoph Hellwig --- fs/configfs/file.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/configfs/file.c b/fs/configfs/file.c index c30cf49b69d2..2c6312db8516 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c @@ -333,6 +333,7 @@ configfs_write_bin_file(struct file *file, const char __user *buf, if (bin_attr->cb_max_size && *ppos + count > bin_attr->cb_max_size) { len = -EFBIG; + goto out; } tbuf = vmalloc(*ppos + count); From 080fe0b790ad438fc1b61621dac37c1964ce7f35 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Wed, 24 Aug 2016 14:12:08 +0100 Subject: [PATCH 0273/1050] perf/x86/amd: Make HW_CACHE_REFERENCES and HW_CACHE_MISSES measure L2 While the Intel PMU monitors the LLC when perf enables the HW_CACHE_REFERENCES and HW_CACHE_MISSES events, these events monitor L1 instruction cache fetches (0x0080) and instruction cache misses (0x0081) on the AMD PMU. This is extremely confusing when monitoring the same workload across Intel and AMD machines, since parameters like, $ perf stat -e cache-references,cache-misses measure completely different things. Instead, make the AMD PMU measure instruction/data cache and TLB fill requests to the L2 and instruction/data cache and TLB misses in the L2 when HW_CACHE_REFERENCES and HW_CACHE_MISSES are enabled, respectively. That way the events measure unified caches on both platforms. Signed-off-by: Matt Fleming Acked-by: Peter Zijlstra Cc: Cc: Borislav Petkov Cc: Linus Torvalds Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1472044328-21302-1-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/x86/events/amd/core.c | 4 ++-- arch/x86/kvm/pmu_amd.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index e07a22bb9308..f5f4b3fbbbc2 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -119,8 +119,8 @@ static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, - [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x077d, + [PERF_COUNT_HW_CACHE_MISSES] = 0x077e, [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */ diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c index 39b91127ef07..cd944435dfbd 100644 --- a/arch/x86/kvm/pmu_amd.c +++ b/arch/x86/kvm/pmu_amd.c @@ -23,8 +23,8 @@ static struct kvm_event_hw_type_mapping amd_event_mapping[] = { [0] = { 0x76, 0x00, PERF_COUNT_HW_CPU_CYCLES }, [1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS }, - [2] = { 0x80, 0x00, PERF_COUNT_HW_CACHE_REFERENCES }, - [3] = { 0x81, 0x00, PERF_COUNT_HW_CACHE_MISSES }, + [2] = { 0x7d, 0x07, PERF_COUNT_HW_CACHE_REFERENCES }, + [3] = { 0x7e, 0x07, PERF_COUNT_HW_CACHE_MISSES }, [4] = { 0xc2, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, [5] = { 0xc3, 0x00, PERF_COUNT_HW_BRANCH_MISSES }, [6] = { 0xd0, 0x00, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, From ed3d6d0ac036bcff6e4649a6f8fb987bb4e34444 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 16 Sep 2016 09:24:44 -0500 Subject: [PATCH 0274/1050] usb: musb: Fix tusb6010 compile error on blackfin We have CONFIG_BLACKFIN ifdef redefining all musb registers in musb_regs.h and tusb6010.h is never included causing a build error with blackfin-allmodconfig and COMPILE_TEST. Let's fix the issue by not building tusb6010 if CONFIG_BLACKFIN is selected. Reported-by: kbuild test robot Signed-off-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/musb/Kconfig b/drivers/usb/musb/Kconfig index 886526b5fcdd..73cfa13fc0dc 100644 --- a/drivers/usb/musb/Kconfig +++ b/drivers/usb/musb/Kconfig @@ -87,7 +87,7 @@ config USB_MUSB_DA8XX config USB_MUSB_TUSB6010 tristate "TUSB6010" depends on HAS_IOMEM - depends on ARCH_OMAP2PLUS || COMPILE_TEST + depends on (ARCH_OMAP2PLUS || COMPILE_TEST) && !BLACKFIN depends on NOP_USB_XCEIV = USB_MUSB_HDRC # both built-in or both modules config USB_MUSB_OMAP2PLUS From 08c5cd37480f59ea39682f4585d92269be6b1424 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 16 Sep 2016 10:24:26 -0400 Subject: [PATCH 0275/1050] USB: change bInterval default to 10 ms Some full-speed mceusb infrared transceivers contain invalid endpoint descriptors for their interrupt endpoints, with bInterval set to 0. In the past they have worked out okay with the mceusb driver, because the driver sets the bInterval field in the descriptor to 1, overwriting whatever value may have been there before. However, this approach was never sanctioned by the USB core, and in fact it does not work with xHCI controllers, because they use the bInterval value that was present when the configuration was installed. Currently usbcore uses 32 ms as the default interval if the value in the endpoint descriptor is invalid. It turns out that these IR transceivers don't work properly unless the interval is set to 10 ms or below. To work around this mceusb problem, this patch changes the endpoint-descriptor parsing routine, making the default interval value be 10 ms rather than 32 ms. Signed-off-by: Alan Stern Tested-by: Wade Berrier CC: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 15ce4ab11688..a2d90aca779f 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -240,8 +240,10 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum, memcpy(&endpoint->desc, d, n); INIT_LIST_HEAD(&endpoint->urb_list); - /* Fix up bInterval values outside the legal range. Use 32 ms if no - * proper value can be guessed. */ + /* + * Fix up bInterval values outside the legal range. + * Use 10 or 8 ms if no proper value can be guessed. + */ i = 0; /* i = min, j = max, n = default */ j = 255; if (usb_endpoint_xfer_int(d)) { @@ -250,13 +252,15 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum, case USB_SPEED_SUPER_PLUS: case USB_SPEED_SUPER: case USB_SPEED_HIGH: - /* Many device manufacturers are using full-speed + /* + * Many device manufacturers are using full-speed * bInterval values in high-speed interrupt endpoint - * descriptors. Try to fix those and fall back to a - * 32 ms default value otherwise. */ + * descriptors. Try to fix those and fall back to an + * 8-ms default value otherwise. + */ n = fls(d->bInterval*8); if (n == 0) - n = 9; /* 32 ms = 2^(9-1) uframes */ + n = 7; /* 8 ms = 2^(7-1) uframes */ j = 16; /* @@ -271,10 +275,12 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum, } break; default: /* USB_SPEED_FULL or _LOW */ - /* For low-speed, 10 ms is the official minimum. + /* + * For low-speed, 10 ms is the official minimum. * But some "overclocked" devices might want faster - * polling so we'll allow it. */ - n = 32; + * polling so we'll allow it. + */ + n = 10; break; } } else if (usb_endpoint_xfer_isoc(d)) { @@ -282,10 +288,10 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum, j = 16; switch (to_usb_device(ddev)->speed) { case USB_SPEED_HIGH: - n = 9; /* 32 ms = 2^(9-1) uframes */ + n = 7; /* 8 ms = 2^(7-1) uframes */ break; default: /* USB_SPEED_FULL */ - n = 6; /* 32 ms = 2^(6-1) frames */ + n = 4; /* 8 ms = 2^(4-1) frames */ break; } } From 8a15b81741879fa89601b03c0e50b0d780d65bc0 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 16 Sep 2016 13:02:37 +0000 Subject: [PATCH 0276/1050] cpuset: fix non static symbol warning Fixes the following sparse warning: kernel/cpuset.c:2088:6: warning: symbol 'cpuset_fork' was not declared. Should it be static? Signed-off-by: Wei Yongjun Signed-off-by: Tejun Heo --- kernel/cpuset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index c08585ad996b..2b4c20ab5bbe 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2085,7 +2085,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css) * which could have been changed by cpuset just after it inherits the * state from the parent and before it sits on the cgroup's task list. */ -void cpuset_fork(struct task_struct *task) +static void cpuset_fork(struct task_struct *task) { if (task_css_is_root(task, cpuset_cgrp_id)) return; From 5e102b3b4fa944815aead89e63eec2a35069a07b Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Fri, 2 Sep 2016 23:46:53 +0300 Subject: [PATCH 0277/1050] IB/rxe: fix GFP_KERNEL in spinlock context There is skb_clone(skb, GFP_KERNEL) in spinlock context in rxe_rcv_mcast_pkt(). Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Acked-by: Moni Shoua Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_recv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 3d464c23e08b..144d2f129fcd 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -312,7 +312,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) * make a copy of the skb to post to the next qp */ skb_copy = (mce->qp_list.next != &mcg->qp_list) ? - skb_clone(skb, GFP_KERNEL) : NULL; + skb_clone(skb, GFP_ATOMIC) : NULL; pkt->qp = qp; rxe_add_ref(qp); From 344bacca8cd811809fc33a249f2738ab757d327f Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Mon, 12 Sep 2016 09:55:28 +0300 Subject: [PATCH 0278/1050] IB/ipoib: Don't allow MC joins during light MC flush This fix solves a race between light flush and on the fly joins. Light flush doesn't set the device to down and unset IPOIB_OPER_UP flag, this means that if while flushing we have a MC join in progress and the QP was attached to BC MGID we can have a mismatches when re-attaching a QP to the BC MGID. The light flush would set the broadcast group to NULL causing an on the fly join to rejoin and reattach to the BC MCG as well as adding the BC MGID to the multicast list. The flush process would later on remove the BC MGID and detach it from the QP. On the next flush the BC MGID is present in the multicast list but not found when trying to detach it because of the previous double attach and single detach. [18332.714265] ------------[ cut here ]------------ [18332.717775] WARNING: CPU: 6 PID: 3767 at drivers/infiniband/core/verbs.c:280 ib_dealloc_pd+0xff/0x120 [ib_core] ... [18332.775198] Hardware name: Red Hat KVM, BIOS Bochs 01/01/2011 [18332.779411] 0000000000000000 ffff8800b50dfbb0 ffffffff813fed47 0000000000000000 [18332.784960] 0000000000000000 ffff8800b50dfbf0 ffffffff8109add1 0000011832f58300 [18332.790547] ffff880226a596c0 ffff880032482000 ffff880032482830 ffff880226a59280 [18332.796199] Call Trace: [18332.798015] [] dump_stack+0x63/0x8c [18332.801831] [] __warn+0xd1/0xf0 [18332.805403] [] warn_slowpath_null+0x1d/0x20 [18332.809706] [] ib_dealloc_pd+0xff/0x120 [ib_core] [18332.814384] [] ipoib_transport_dev_cleanup+0xfc/0x1d0 [ib_ipoib] [18332.820031] [] ipoib_ib_dev_cleanup+0x98/0x110 [ib_ipoib] [18332.825220] [] ipoib_dev_cleanup+0x2d8/0x550 [ib_ipoib] [18332.830290] [] ipoib_uninit+0x2f/0x40 [ib_ipoib] [18332.834911] [] rollback_registered_many+0x1aa/0x2c0 [18332.839741] [] rollback_registered+0x31/0x40 [18332.844091] [] unregister_netdevice_queue+0x48/0x80 [18332.848880] [] ipoib_vlan_delete+0x1fb/0x290 [ib_ipoib] [18332.853848] [] delete_child+0x7d/0xf0 [ib_ipoib] [18332.858474] [] dev_attr_store+0x18/0x30 [18332.862510] [] sysfs_kf_write+0x3a/0x50 [18332.866349] [] kernfs_fop_write+0x120/0x170 [18332.870471] [] __vfs_write+0x28/0xe0 [18332.874152] [] ? percpu_down_read+0x1f/0x50 [18332.878274] [] vfs_write+0xa2/0x1a0 [18332.881896] [] SyS_write+0x46/0xa0 [18332.885632] [] do_syscall_64+0x57/0xb0 [18332.889709] [] entry_SYSCALL64_slow_path+0x25/0x25 [18332.894727] ---[ end trace 09ebbe31f831ef17 ]--- Fixes: ee1e2c82c245 ("IPoIB: Refresh paths instead of flushing them on SM change events") Signed-off-by: Alex Vesker Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index dc6d241b9406..be11d5d5b8c1 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -1161,8 +1161,17 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, } if (level == IPOIB_FLUSH_LIGHT) { + int oper_up; ipoib_mark_paths_invalid(dev); + /* Set IPoIB operation as down to prevent races between: + * the flush flow which leaves MCG and on the fly joins + * which can happen during that time. mcast restart task + * should deal with join requests we missed. + */ + oper_up = test_and_clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags); ipoib_mcast_dev_flush(dev); + if (oper_up) + set_bit(IPOIB_FLAG_OPER_UP, &priv->flags); ipoib_flush_ah(dev); } From e5ac40cd66c2f3cd11bc5edc658f012661b16347 Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Mon, 12 Sep 2016 19:16:18 +0300 Subject: [PATCH 0279/1050] IB/mlx4: Fix incorrect MC join state bit-masking on SR-IOV Because of an incorrect bit-masking done on the join state bits, when handling a join request we failed to detect a difference between the group join state and the request join state when joining as send only full member (0x8). This caused the MC join request not to be sent. This issue is relevant only when SRIOV is enabled and SM supports send only full member. This fix separates scope bits and join states bits a nibble each. Fixes: b9c5d6a64358 ('IB/mlx4: Add multicast group (MCG) paravirtualization for SR-IOV') Signed-off-by: Alex Vesker Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/mcg.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c index 8f7ad07915b0..097bfcc4ee99 100644 --- a/drivers/infiniband/hw/mlx4/mcg.c +++ b/drivers/infiniband/hw/mlx4/mcg.c @@ -489,7 +489,7 @@ static u8 get_leave_state(struct mcast_group *group) if (!group->members[i]) leave_state |= (1 << i); - return leave_state & (group->rec.scope_join_state & 7); + return leave_state & (group->rec.scope_join_state & 0xf); } static int join_group(struct mcast_group *group, int slave, u8 join_mask) @@ -564,8 +564,8 @@ static void mlx4_ib_mcg_timeout_handler(struct work_struct *work) } else mcg_warn_group(group, "DRIVER BUG\n"); } else if (group->state == MCAST_LEAVE_SENT) { - if (group->rec.scope_join_state & 7) - group->rec.scope_join_state &= 0xf8; + if (group->rec.scope_join_state & 0xf) + group->rec.scope_join_state &= 0xf0; group->state = MCAST_IDLE; mutex_unlock(&group->lock); if (release_group(group, 1)) @@ -605,7 +605,7 @@ static int handle_leave_req(struct mcast_group *group, u8 leave_mask, static int handle_join_req(struct mcast_group *group, u8 join_mask, struct mcast_req *req) { - u8 group_join_state = group->rec.scope_join_state & 7; + u8 group_join_state = group->rec.scope_join_state & 0xf; int ref = 0; u16 status; struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data; @@ -690,8 +690,8 @@ static void mlx4_ib_mcg_work_handler(struct work_struct *work) u8 cur_join_state; resp_join_state = ((struct ib_sa_mcmember_data *) - group->response_sa_mad.data)->scope_join_state & 7; - cur_join_state = group->rec.scope_join_state & 7; + group->response_sa_mad.data)->scope_join_state & 0xf; + cur_join_state = group->rec.scope_join_state & 0xf; if (method == IB_MGMT_METHOD_GET_RESP) { /* successfull join */ @@ -710,7 +710,7 @@ process_requests: req = list_first_entry(&group->pending_list, struct mcast_req, group_list); sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data; - req_join_state = sa_data->scope_join_state & 0x7; + req_join_state = sa_data->scope_join_state & 0xf; /* For a leave request, we will immediately answer the VF, and * update our internal counters. The actual leave will be sent From baa0be7026e2f7d1d40bfd45909044169e9e3c68 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Mon, 12 Sep 2016 19:16:19 +0300 Subject: [PATCH 0280/1050] IB/mlx4: Fix code indentation in QP1 MAD flow The indentation in the QP1 GRH flow in procedure build_mlx_header is really confusing. Fix it, in preparation for a commit which touches this code. Fixes: 1ffeb2eb8be9 ("IB/mlx4: SR-IOV IB context objects and proxy/tunnel SQP support") Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/qp.c | 36 +++++++++++++++++---------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 768085f59566..e398c04903fa 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -2493,24 +2493,26 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, sqp->ud_header.grh.flow_label = ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff); sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit; - if (is_eth) + if (is_eth) { memcpy(sqp->ud_header.grh.source_gid.raw, sgid.raw, 16); - else { - if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { - /* When multi-function is enabled, the ib_core gid - * indexes don't necessarily match the hw ones, so - * we must use our own cache */ - sqp->ud_header.grh.source_gid.global.subnet_prefix = - to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. - subnet_prefix; - sqp->ud_header.grh.source_gid.global.interface_id = - to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. - guid_cache[ah->av.ib.gid_index]; - } else - ib_get_cached_gid(ib_dev, - be32_to_cpu(ah->av.ib.port_pd) >> 24, - ah->av.ib.gid_index, - &sqp->ud_header.grh.source_gid, NULL); + } else { + if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { + /* When multi-function is enabled, the ib_core gid + * indexes don't necessarily match the hw ones, so + * we must use our own cache + */ + sqp->ud_header.grh.source_gid.global.subnet_prefix = + to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. + subnet_prefix; + sqp->ud_header.grh.source_gid.global.interface_id = + to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. + guid_cache[ah->av.ib.gid_index]; + } else { + ib_get_cached_gid(ib_dev, + be32_to_cpu(ah->av.ib.port_pd) >> 24, + ah->av.ib.gid_index, + &sqp->ud_header.grh.source_gid, NULL); + } } memcpy(sqp->ud_header.grh.destination_gid.raw, ah->av.ib.dgid, 16); From 8ec07bf8a8b57d6c58927a16a0a22c0115cf2855 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Mon, 12 Sep 2016 19:16:20 +0300 Subject: [PATCH 0281/1050] IB/mlx4: Use correct subnet-prefix in QP1 mads under SR-IOV When sending QP1 MAD packets which use a GRH, the source GID (which consists of the 64-bit subnet prefix, and the 64 bit port GUID) must be included in the packet GRH. For SR-IOV, a GID cache is used, since the source GID needs to be the slave's source GID, and not the Hypervisor's GID. This cache also included a subnet_prefix. Unfortunately, the subnet_prefix field in the cache was never initialized (to the default subnet prefix 0xfe80::0). As a result, this field remained all zeroes. Therefore, when SR-IOV was active, all QP1 packets which included a GRH had a source GID subnet prefix of all-zeroes. However, the subnet-prefix should initially be 0xfe80::0 (the default subnet prefix). In addition, if OpenSM modifies a port's subnet prefix, the new subnet prefix must be used in the GRH when sending QP1 packets. To fix this we now initialize the subnet prefix in the SR-IOV GID cache to the default subnet prefix. We update the cached value if/when OpenSM modifies the port's subnet prefix. We take this cached value when sending QP1 packets when SR-IOV is active. Note that the value is stored as an atomic64. This eliminates any need for locking when the subnet prefix is being updated. Note also that we depend on the FW generating the "port management change" event for tracking subnet-prefix changes performed by OpenSM. If running early FW (before 2.9.4630), subnet prefix changes will not be tracked (but the default subnet prefix still will be stored in the cache; therefore users who do not modify the subnet prefix will not have a problem). IF there is a need for such tracking also for early FW, we will add that capability in a subsequent patch. Fixes: 1ffeb2eb8be9 ("IB/mlx4: SR-IOV IB context objects and proxy/tunnel SQP support") Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/mad.c | 23 +++++++++++++++++++++++ drivers/infiniband/hw/mlx4/mlx4_ib.h | 2 +- drivers/infiniband/hw/mlx4/qp.c | 5 +++-- 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 9c2e53d28f98..0f21c3a25552 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1128,6 +1128,27 @@ void handle_port_mgmt_change_event(struct work_struct *work) /* Generate GUID changed event */ if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) { + if (mlx4_is_master(dev->dev)) { + union ib_gid gid; + int err = 0; + + if (!eqe->event.port_mgmt_change.params.port_info.gid_prefix) + err = __mlx4_ib_query_gid(&dev->ib_dev, port, 0, &gid, 1); + else + gid.global.subnet_prefix = + eqe->event.port_mgmt_change.params.port_info.gid_prefix; + if (err) { + pr_warn("Could not change QP1 subnet prefix for port %d: query_gid error (%d)\n", + port, err); + } else { + pr_debug("Changing QP1 subnet prefix for port %d. old=0x%llx. new=0x%llx\n", + port, + (u64)atomic64_read(&dev->sriov.demux[port - 1].subnet_prefix), + be64_to_cpu(gid.global.subnet_prefix)); + atomic64_set(&dev->sriov.demux[port - 1].subnet_prefix, + be64_to_cpu(gid.global.subnet_prefix)); + } + } mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE); /*if master, notify all slaves*/ if (mlx4_is_master(dev->dev)) @@ -2202,6 +2223,8 @@ int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev) if (err) goto demux_err; dev->sriov.demux[i].guid_cache[0] = gid.global.interface_id; + atomic64_set(&dev->sriov.demux[i].subnet_prefix, + be64_to_cpu(gid.global.subnet_prefix)); err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1, &dev->sriov.sqps[i]); if (err) diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 7c5832ede4bd..686ab48ff644 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -448,7 +448,7 @@ struct mlx4_ib_demux_ctx { struct workqueue_struct *wq; struct workqueue_struct *ud_wq; spinlock_t ud_lock; - __be64 subnet_prefix; + atomic64_t subnet_prefix; __be64 guid_cache[128]; struct mlx4_ib_dev *dev; /* the following lock protects both mcg_table and mcg_mgid0_list */ diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index e398c04903fa..7fb9629bd12b 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -2502,8 +2502,9 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, * we must use our own cache */ sqp->ud_header.grh.source_gid.global.subnet_prefix = - to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. - subnet_prefix; + cpu_to_be64(atomic64_read(&(to_mdev(ib_dev)->sriov. + demux[sqp->qp.port - 1]. + subnet_prefix))); sqp->ud_header.grh.source_gid.global.interface_id = to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. guid_cache[ah->av.ib.gid_index]; From 69d269d38910e697e461ec5677368f57d2046cbe Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Mon, 12 Sep 2016 19:16:22 +0300 Subject: [PATCH 0282/1050] IB/mlx4: Diagnostic HW counters are not supported in slave mode Modify the mlx4_ib_diag_counters() to avoid the following error in the hypervisor when the slave tries to query the hardware counters in SR-IOV mode. mlx4_core 0000:81:00.0: Unknown command:0x30 accepted from slave:1 Fixes: 3f85f2aaabf7 ("IB/mlx4: Add diagnostic hardware counters") Signed-off-by: Kamal Heib Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 2af44c2de262..87ba9bca4181 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2202,6 +2202,9 @@ static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev) bool per_port = !!(ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT); + if (mlx4_is_slave(ibdev->dev)) + return 0; + for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) { /* i == 1 means we are building port counters */ if (i && !per_port) From 7fae6655a0c897875bd34501ec092232b526d3e4 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Mon, 12 Sep 2016 19:16:23 +0300 Subject: [PATCH 0283/1050] IB/mlx5: Enable MAD_IFC commands for IB ports only MAD_IFC command is supported only for physical functions (PF) and when physical port is IB. The proposed fix enforces it. Fixes: d603c809ef91 ("IB/mlx5: Fix decision on using MAD_IFC") Reported-by: David Chang Signed-off-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 8150ea372c53..0480b643fb9c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -288,7 +288,9 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev) { - return !MLX5_CAP_GEN(dev->mdev, ib_virt); + if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB) + return !MLX5_CAP_GEN(dev->mdev, ib_virt); + return 0; } enum { From ee3da804ad1b1dd4c766199a6e8443542b0aaaef Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 12 Sep 2016 19:16:24 +0300 Subject: [PATCH 0284/1050] IB/mlx5: Set source mac address in FTE Set the source mac address in the FTE when L2 specification is provided. Fixes: 038d2ef87572 ('IB/mlx5: Add flow steering support') Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 0480b643fb9c..e19537cf44ab 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1430,6 +1430,13 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v, dmac_47_16), ib_spec->eth.val.dst_mac); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + smac_47_16), + ib_spec->eth.mask.src_mac); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + smac_47_16), + ib_spec->eth.val.src_mac); + if (ib_spec->eth.mask.vlan_tag) { MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, vlan_tag, 1); From dfdd6158ca2c8838313470c5ab79243d04da70b2 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Wed, 7 Sep 2016 14:04:04 +0300 Subject: [PATCH 0285/1050] IB/rxe: Fix kernel panic in udp_setup_tunnel Disable creation of a UDP socket for ipv6 when CONFIG_IPV6 is not enabeld. Since udp_sock_create6() returns 0 when CONFIG_IPV6 is not set [ 46.888632] IP: [] setup_udp_tunnel_sock+0x6/0x4f [ 46.891355] *pdpt = 0000000000000000 *pde = f000ff53f000ff53 [ 46.893918] Oops: 0002 [#1] PREEMPT [ 46.896014] CPU: 0 PID: 1 Comm: swapper Not tainted 4.7.0-rc4-00001-g8700e3e #1 [ 46.900280] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Debian-1.8.2-1 04/01/2014 [ 46.904905] task: cf06c040 ti: cf05e000 task.ti: cf05e000 [ 46.907854] EIP: 0060:[] EFLAGS: 00210246 CPU: 0 [ 46.911137] EIP is at setup_udp_tunnel_sock+0x6/0x4f [ 46.914070] EAX: 00000044 EBX: 00000001 ECX: cf05fef0 EDX: ca8142e0 [ 46.917236] ESI: c2c4505b EDI: cf05fef0 EBP: cf05fed0 ESP: cf05fed0 [ 46.919836] DS: 007b ES: 007b FS: 0000 GS: 00e0 SS: 0068 [ 46.922046] CR0: 80050033 CR2: 000001fc CR3: 02cec000 CR4: 000006b0 [ 46.924550] Stack: [ 46.926014] cf05ff10 c1fd4657 ca8142e0 0000000a 00000000 00000000 0000b712 00000008 [ 46.931274] 00000000 6bb5bd01 c1fd48de 00000000 00000000 cf05ff1c 00000000 00000000 [ 46.936122] cf05ff1c c1fd4bdf 00000000 cf05ff28 c2c4507b ffffffff cf05ff88 c2bf1c74 [ 46.942350] Call Trace: [ 46.944403] [] rxe_setup_udp_tunnel+0x8f/0x99 [ 46.947689] [] ? net_to_rxe+0x4e/0x4e [ 46.950567] [] rxe_net_init+0xe/0xa4 [ 46.953147] [] rxe_module_init+0x20/0x4c [ 46.955448] [] do_one_initcall+0x89/0x113 [ 46.957797] [] ? set_debug_rodata+0xf/0xf [ 46.959966] [] ? kernel_init_freeable+0xbe/0x15b [ 46.962262] [] kernel_init_freeable+0xde/0x15b [ 46.964418] [] kernel_init+0x8/0xd0 [ 46.966618] [] ret_from_kernel_thread+0xe/0x24 [ 46.969592] [] ? rest_init+0x6f/0x6f Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yonatan Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe.c | 25 +++++++++++-- drivers/infiniband/sw/rxe/rxe_net.c | 57 +++++++++++++---------------- drivers/infiniband/sw/rxe/rxe_net.h | 5 ++- 3 files changed, 52 insertions(+), 35 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index 55f0e8f0ca79..ddd59270ff6d 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -362,15 +362,34 @@ static int __init rxe_module_init(void) return err; } - err = rxe_net_init(); + err = rxe_net_ipv4_init(); if (err) { - pr_err("rxe: unable to init\n"); + pr_err("rxe: unable to init ipv4 tunnel\n"); rxe_cache_exit(); - return err; + goto exit; } + + err = rxe_net_ipv6_init(); + if (err) { + pr_err("rxe: unable to init ipv6 tunnel\n"); + rxe_cache_exit(); + goto exit; + } + + err = register_netdevice_notifier(&rxe_net_notifier); + if (err) { + pr_err("rxe: Failed to rigister netdev notifier\n"); + goto exit; + } + pr_info("rxe: loaded\n"); return 0; + +exit: + rxe_release_udp_tunnel(recv_sockets.sk4); + rxe_release_udp_tunnel(recv_sockets.sk6); + return err; } static void __exit rxe_module_exit(void) diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 0b8d2ea8b41d..eedf2f1cafdf 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -275,9 +275,10 @@ static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port, return sock; } -static void rxe_release_udp_tunnel(struct socket *sk) +void rxe_release_udp_tunnel(struct socket *sk) { - udp_tunnel_sock_release(sk); + if (sk) + udp_tunnel_sock_release(sk); } static void prepare_udp_hdr(struct sk_buff *skb, __be16 src_port, @@ -658,51 +659,45 @@ out: return NOTIFY_OK; } -static struct notifier_block rxe_net_notifier = { +struct notifier_block rxe_net_notifier = { .notifier_call = rxe_notify, }; -int rxe_net_init(void) +int rxe_net_ipv4_init(void) { - int err; + spin_lock_init(&dev_list_lock); + + recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net, + htons(ROCE_V2_UDP_DPORT), false); + if (IS_ERR(recv_sockets.sk4)) { + recv_sockets.sk4 = NULL; + pr_err("rxe: Failed to create IPv4 UDP tunnel\n"); + return -1; + } + + return 0; +} + +int rxe_net_ipv6_init(void) +{ +#if IS_ENABLED(CONFIG_IPV6) spin_lock_init(&dev_list_lock); recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net, - htons(ROCE_V2_UDP_DPORT), true); + htons(ROCE_V2_UDP_DPORT), true); if (IS_ERR(recv_sockets.sk6)) { recv_sockets.sk6 = NULL; pr_err("rxe: Failed to create IPv6 UDP tunnel\n"); return -1; } - - recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net, - htons(ROCE_V2_UDP_DPORT), false); - if (IS_ERR(recv_sockets.sk4)) { - rxe_release_udp_tunnel(recv_sockets.sk6); - recv_sockets.sk4 = NULL; - recv_sockets.sk6 = NULL; - pr_err("rxe: Failed to create IPv4 UDP tunnel\n"); - return -1; - } - - err = register_netdevice_notifier(&rxe_net_notifier); - if (err) { - rxe_release_udp_tunnel(recv_sockets.sk6); - rxe_release_udp_tunnel(recv_sockets.sk4); - pr_err("rxe: Failed to rigister netdev notifier\n"); - } - - return err; +#endif + return 0; } void rxe_net_exit(void) { - if (recv_sockets.sk6) - rxe_release_udp_tunnel(recv_sockets.sk6); - - if (recv_sockets.sk4) - rxe_release_udp_tunnel(recv_sockets.sk4); - + rxe_release_udp_tunnel(recv_sockets.sk6); + rxe_release_udp_tunnel(recv_sockets.sk4); unregister_netdevice_notifier(&rxe_net_notifier); } diff --git a/drivers/infiniband/sw/rxe/rxe_net.h b/drivers/infiniband/sw/rxe/rxe_net.h index 7b06f76d16cc..0daf7f09e5b5 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.h +++ b/drivers/infiniband/sw/rxe/rxe_net.h @@ -44,10 +44,13 @@ struct rxe_recv_sockets { }; extern struct rxe_recv_sockets recv_sockets; +extern struct notifier_block rxe_net_notifier; +void rxe_release_udp_tunnel(struct socket *sk); struct rxe_dev *rxe_net_add(struct net_device *ndev); -int rxe_net_init(void); +int rxe_net_ipv4_init(void); +int rxe_net_ipv6_init(void); void rxe_net_exit(void); #endif /* RXE_NET_H */ From 908948877bbbb240f67d7eb27d3a392beb394a07 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Wed, 7 Sep 2016 14:04:05 +0300 Subject: [PATCH 0286/1050] IB/rxe: Fix duplicate atomic request handling When handling ack for atomic opcodes like "fetch&add" or "cmp&swp", the method send_atomic_ack() saves the ack before sending it, in case it gets lost and never reach the requester. In which case the method duplicate_request() will need to find it using the duplicated request.psn. But send_atomic_ack() used a wrong psn value and thus the above ack was never found. This fix uses the ack.psn to locate the ack in case its needed. This fix also copies the ack packet to the skb's control buffer since duplicate_request() will need it when calling rxe_xmit_packet() Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yonatan Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_resp.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index ebb03b46e2ad..3e0f0f2baace 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -972,11 +972,13 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, free_rd_atomic_resource(qp, res); rxe_advance_resp_resource(qp); + memcpy(SKB_TO_PKT(skb), &ack_pkt, sizeof(skb->cb)); + res->type = RXE_ATOMIC_MASK; res->atomic.skb = skb; - res->first_psn = qp->resp.psn; - res->last_psn = qp->resp.psn; - res->cur_psn = qp->resp.psn; + res->first_psn = ack_pkt.psn; + res->last_psn = ack_pkt.psn; + res->cur_psn = ack_pkt.psn; rc = rxe_xmit_packet(rxe, qp, &ack_pkt, skb_copy); if (rc) { @@ -1116,8 +1118,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp, rc = RESPST_CLEANUP; goto out; } - bth_set_psn(SKB_TO_PKT(skb_copy), - qp->resp.psn - 1); + /* Resend the result. */ rc = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, pkt, skb_copy); From 3050b99850247695cb07a5c15265afcc08bcf400 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Wed, 7 Sep 2016 14:04:06 +0300 Subject: [PATCH 0287/1050] IB/rxe: Fix race condition between requester and completer rxe_requester() is sending a pkt with rxe_xmit_packet() and then calls rxe_update() to update the wqe and qp's psn values. But sometimes the response is received before the requester had time to update the wqe in which case the completer acts on errornous wqe values. This fix updates the wqe and qp before actually sending the request and rolls back when xmit fails. Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yonatan Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_req.c | 57 ++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 33b2d9d77021..13a848a518e8 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -511,24 +511,21 @@ static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe, } static void update_wqe_state(struct rxe_qp *qp, - struct rxe_send_wqe *wqe, - struct rxe_pkt_info *pkt, - enum wqe_state *prev_state) + struct rxe_send_wqe *wqe, + struct rxe_pkt_info *pkt) { - enum wqe_state prev_state_ = wqe->state; - if (pkt->mask & RXE_END_MASK) { if (qp_type(qp) == IB_QPT_RC) wqe->state = wqe_state_pending; } else { wqe->state = wqe_state_processing; } - - *prev_state = prev_state_; } -static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, - struct rxe_pkt_info *pkt, int payload) +static void update_wqe_psn(struct rxe_qp *qp, + struct rxe_send_wqe *wqe, + struct rxe_pkt_info *pkt, + int payload) { /* number of packets left to send including current one */ int num_pkt = (wqe->dma.resid + payload + qp->mtu - 1) / qp->mtu; @@ -546,10 +543,35 @@ static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, qp->req.psn = (wqe->first_psn + num_pkt) & BTH_PSN_MASK; else qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK; +} +static void save_state(struct rxe_send_wqe *wqe, + struct rxe_qp *qp, + struct rxe_send_wqe *rollback_wqe, + struct rxe_qp *rollback_qp) +{ + rollback_wqe->state = wqe->state; + rollback_wqe->first_psn = wqe->first_psn; + rollback_wqe->last_psn = wqe->last_psn; + rollback_qp->req.psn = qp->req.psn; +} + +static void rollback_state(struct rxe_send_wqe *wqe, + struct rxe_qp *qp, + struct rxe_send_wqe *rollback_wqe, + struct rxe_qp *rollback_qp) +{ + wqe->state = rollback_wqe->state; + wqe->first_psn = rollback_wqe->first_psn; + wqe->last_psn = rollback_wqe->last_psn; + qp->req.psn = rollback_qp->req.psn; +} + +static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, + struct rxe_pkt_info *pkt, int payload) +{ qp->req.opcode = pkt->opcode; - if (pkt->mask & RXE_END_MASK) qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index); @@ -571,7 +593,8 @@ int rxe_requester(void *arg) int mtu; int opcode; int ret; - enum wqe_state prev_state; + struct rxe_qp rollback_qp; + struct rxe_send_wqe rollback_wqe; next_wqe: if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR)) @@ -688,13 +711,21 @@ next_wqe: goto err; } - update_wqe_state(qp, wqe, &pkt, &prev_state); + /* + * To prevent a race on wqe access between requester and completer, + * wqe members state and psn need to be set before calling + * rxe_xmit_packet(). + * Otherwise, completer might initiate an unjustified retry flow. + */ + save_state(wqe, qp, &rollback_wqe, &rollback_qp); + update_wqe_state(qp, wqe, &pkt); + update_wqe_psn(qp, wqe, &pkt, payload); ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb); if (ret) { qp->need_req_skb = 1; kfree_skb(skb); - wqe->state = prev_state; + rollback_state(wqe, qp, &rollback_wqe, &rollback_qp); if (ret == -EAGAIN) { rxe_run_task(&qp->req.task, 1); From c1cc72cb6ff95cbe02ed747866576dccab4a5911 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Wed, 7 Sep 2016 14:04:07 +0300 Subject: [PATCH 0288/1050] IB/rxe: Fix kmem_cache leak Decrement qp reference when handling error path in completer to prevent kmem_cache leak. Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yonatan Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_comp.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 36f67de44095..1c59ef2c67aa 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -689,7 +689,14 @@ int rxe_completer(void *arg) qp->req.need_retry = 1; rxe_run_task(&qp->req.task, 1); } + + if (pkt) { + rxe_drop_ref(pkt->qp); + kfree_skb(skb); + } + goto exit; + } else { wqe->status = IB_WC_RETRY_EXC_ERR; state = COMPST_ERROR; @@ -716,6 +723,12 @@ int rxe_completer(void *arg) case COMPST_ERROR: do_complete(qp, wqe); rxe_qp_error(qp); + + if (pkt) { + rxe_drop_ref(pkt->qp); + kfree_skb(skb); + } + goto exit; } } From e4618d40eb3dc1a6d1f55f7150ea25bb23ab410a Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 9 Sep 2016 08:15:37 +0100 Subject: [PATCH 0289/1050] IB/rdmavt: Don't vfree a kzalloc'ed memory region The userspace memory region 'mr' is allocated with kzalloc in __rvt_alloc_mr however it is incorrectly being freed with vfree in __rvt_free_mr. Fix this by using kfree to free it. Signed-off-by: Colin Ian King Reviewed-by: Leon Romanovsky Acked-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 80c4b6b401b8..46b64970058e 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -294,7 +294,7 @@ static void __rvt_free_mr(struct rvt_mr *mr) { rvt_deinit_mregion(&mr->mr); rvt_free_lkey(&mr->mr); - vfree(mr); + kfree(mr); } /** From fabf9201806255d70386d8bc9f6a2942c0940da2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:11 +0100 Subject: [PATCH 0290/1050] rxrpc: Remove some whitespace. Remove a tab that's on a line that should otherwise be blank. Signed-off-by: David Howells --- net/rxrpc/call_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 61432049869b..9367c3be31eb 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -31,7 +31,7 @@ static void rxrpc_set_timer(struct rxrpc_call *call) _enter("{%ld,%ld,%ld:%ld}", call->ack_at - now, call->resend_at - now, call->expire_at - now, call->timer.expires - now); - + read_lock_bh(&call->state_lock); if (call->state < RXRPC_CALL_COMPLETE) { From 4b22457c06a3a950e14938c486283ad0f308c13d Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:11 +0100 Subject: [PATCH 0291/1050] rxrpc: Move the check of rx_pkt_offset from rxrpc_locate_data() to caller Move the check of rx_pkt_offset from rxrpc_locate_data() to the caller, rxrpc_recvmsg_data(), so that it's more clear what's going on there. Signed-off-by: David Howells --- net/rxrpc/recvmsg.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index a284205b8ecf..0d085f5cf1bf 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -240,9 +240,6 @@ static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb, int ret; u8 annotation = *_annotation; - if (offset > 0) - return 0; - /* Locate the subpacket */ offset = sp->offset; len = skb->len - sp->offset; @@ -303,8 +300,10 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, if (msg) sock_recv_timestamp(msg, sock->sk, skb); - ret = rxrpc_locate_data(call, skb, &call->rxtx_annotations[ix], - &rx_pkt_offset, &rx_pkt_len); + if (rx_pkt_offset == 0) + ret = rxrpc_locate_data(call, skb, + &call->rxtx_annotations[ix], + &rx_pkt_offset, &rx_pkt_len); _debug("recvmsg %x DATA #%u { %d, %d }", sp->hdr.callNumber, seq, rx_pkt_offset, rx_pkt_len); From 2e2ea51dec2ab6a81950d4b436eb66ebf45dd507 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:11 +0100 Subject: [PATCH 0292/1050] rxrpc: Check the return value of rxrpc_locate_data() Check the return value of rxrpc_locate_data() in rxrpc_recvmsg_data(). Signed-off-by: David Howells --- net/rxrpc/recvmsg.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 0d085f5cf1bf..1edf2cf62cc5 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -300,10 +300,13 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, if (msg) sock_recv_timestamp(msg, sock->sk, skb); - if (rx_pkt_offset == 0) + if (rx_pkt_offset == 0) { ret = rxrpc_locate_data(call, skb, &call->rxtx_annotations[ix], &rx_pkt_offset, &rx_pkt_len); + if (ret < 0) + goto out; + } _debug("recvmsg %x DATA #%u { %d, %d }", sp->hdr.callNumber, seq, rx_pkt_offset, rx_pkt_len); From 816c9fce12f3745abc959c0fca8ace1c2c51421c Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:11 +0100 Subject: [PATCH 0293/1050] rxrpc: Fix handling of the last packet in rxrpc_recvmsg_data() The code for determining the last packet in rxrpc_recvmsg_data() has been using the RXRPC_CALL_RX_LAST flag to determine if the rx_top pointer points to the last packet or not. This isn't a good idea, however, as the input code may be running simultaneously on another CPU and that sets the flag *before* updating the top pointer. Fix this by the following means: (1) Restrict the use of RXRPC_CALL_RX_LAST to the input routines only. There's otherwise a synchronisation problem between detecting the flag and checking tx_top. This could probably be dealt with by appropriate application of memory barriers, but there's a simpler way. (2) Set RXRPC_CALL_RX_LAST after setting rx_top. (3) Make rxrpc_rotate_rx_window() consult the flags header field of the DATA packet it's about to discard to see if that was the last packet. Use this as the basis for ending the Rx phase. This shouldn't be a problem because the recvmsg side of things is guaranteed to see the packets in order. (4) Make rxrpc_recvmsg_data() return 1 to indicate the end of the data if: (a) the packet it has just processed is marked as RXRPC_LAST_PACKET (b) the call's Rx phase has been ended. Signed-off-by: David Howells --- net/rxrpc/input.c | 4 +++- net/rxrpc/recvmsg.c | 49 ++++++++++++++++++++++++++++++--------------- 2 files changed, 36 insertions(+), 17 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 75af0bd316c7..f0d9115b9b7e 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -238,7 +238,7 @@ next_subpacket: len = RXRPC_JUMBO_DATALEN; if (flags & RXRPC_LAST_PACKET) { - if (test_and_set_bit(RXRPC_CALL_RX_LAST, &call->flags) && + if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && seq != call->rx_top) return rxrpc_proto_abort("LSN", call, seq); } else { @@ -282,6 +282,8 @@ next_subpacket: call->rxtx_buffer[ix] = skb; if (after(seq, call->rx_top)) smp_store_release(&call->rx_top, seq); + if (flags & RXRPC_LAST_PACKET) + set_bit(RXRPC_CALL_RX_LAST, &call->flags); queued = true; if (after_eq(seq, call->rx_expect_next)) { diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 1edf2cf62cc5..8b8d7e14f800 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -134,6 +134,8 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call) { _enter("%d,%s", call->debug_id, rxrpc_call_states[call->state]); + ASSERTCMP(call->rx_hard_ack, ==, call->rx_top); + if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, true, false); rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); @@ -163,8 +165,10 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call) */ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) { + struct rxrpc_skb_priv *sp; struct sk_buff *skb; rxrpc_seq_t hard_ack, top; + u8 flags; int ix; _enter("%d", call->debug_id); @@ -177,6 +181,8 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) ix = hard_ack & RXRPC_RXTX_BUFF_MASK; skb = call->rxtx_buffer[ix]; rxrpc_see_skb(skb); + sp = rxrpc_skb(skb); + flags = sp->hdr.flags; call->rxtx_buffer[ix] = NULL; call->rxtx_annotations[ix] = 0; /* Barrier against rxrpc_input_data(). */ @@ -184,8 +190,8 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) rxrpc_free_skb(skb); - _debug("%u,%u,%lx", hard_ack, top, call->flags); - if (hard_ack == top && test_bit(RXRPC_CALL_RX_LAST, &call->flags)) + _debug("%u,%u,%02x", hard_ack, top, flags); + if (flags & RXRPC_LAST_PACKET) rxrpc_end_rx_phase(call); } @@ -278,13 +284,19 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, size_t remain; bool last; unsigned int rx_pkt_offset, rx_pkt_len; - int ix, copy, ret = 0; + int ix, copy, ret = -EAGAIN, ret2; _enter(""); rx_pkt_offset = call->rx_pkt_offset; rx_pkt_len = call->rx_pkt_len; + if (call->state >= RXRPC_CALL_SERVER_ACK_REQUEST) { + seq = call->rx_hard_ack; + ret = 1; + goto done; + } + /* Barriers against rxrpc_input_data(). */ hard_ack = call->rx_hard_ack; top = smp_load_acquire(&call->rx_top); @@ -301,11 +313,13 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, sock_recv_timestamp(msg, sock->sk, skb); if (rx_pkt_offset == 0) { - ret = rxrpc_locate_data(call, skb, - &call->rxtx_annotations[ix], - &rx_pkt_offset, &rx_pkt_len); - if (ret < 0) + ret2 = rxrpc_locate_data(call, skb, + &call->rxtx_annotations[ix], + &rx_pkt_offset, &rx_pkt_len); + if (ret2 < 0) { + ret = ret2; goto out; + } } _debug("recvmsg %x DATA #%u { %d, %d }", sp->hdr.callNumber, seq, rx_pkt_offset, rx_pkt_len); @@ -316,10 +330,12 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, if (copy > remain) copy = remain; if (copy > 0) { - ret = skb_copy_datagram_iter(skb, rx_pkt_offset, iter, - copy); - if (ret < 0) + ret2 = skb_copy_datagram_iter(skb, rx_pkt_offset, iter, + copy); + if (ret2 < 0) { + ret = ret2; goto out; + } /* handle piecemeal consumption of data packets */ _debug("copied %d @%zu", copy, *_offset); @@ -332,6 +348,7 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, if (rx_pkt_len > 0) { _debug("buffer full"); ASSERTCMP(*_offset, ==, len); + ret = 0; break; } @@ -342,19 +359,19 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, rx_pkt_offset = 0; rx_pkt_len = 0; - ASSERTIFCMP(last, seq, ==, top); + if (last) { + ASSERTCMP(seq, ==, READ_ONCE(call->rx_top)); + ret = 1; + goto out; + } } - if (after(seq, top)) { - ret = -EAGAIN; - if (test_bit(RXRPC_CALL_RX_LAST, &call->flags)) - ret = 1; - } out: if (!(flags & MSG_PEEK)) { call->rx_pkt_offset = rx_pkt_offset; call->rx_pkt_len = rx_pkt_len; } +done: _leave(" = %d [%u/%u]", ret, seq, top); return ret; } From e6f3afb3fc058e17b407b6f7cac08058b19e641c Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:11 +0100 Subject: [PATCH 0294/1050] rxrpc: Record calls that need to be accepted Record calls that need to be accepted using sk_acceptq_added() otherwise the backlog counter goes negative because sk_acceptq_removed() is called. This causes the preallocator to malfunction. Calls that are preaccepted by AFS within the kernel aren't affected by this. Signed-off-by: David Howells --- net/rxrpc/call_accept.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 26c293ef98eb..323b8da50163 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -369,6 +369,8 @@ found_service: if (rx->notify_new_call) rx->notify_new_call(&rx->sk, call, call->user_call_ID); + else + sk_acceptq_added(&rx->sk); spin_lock(&conn->state_lock); switch (conn->state) { From 0360da6db7d6390e7bd2f6c93b01af29bcd36ad5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:11 +0100 Subject: [PATCH 0295/1050] rxrpc: Purge the to_be_accepted queue on socket release Purge the queue of to_be_accepted calls on socket release. Note that purging sock_calls doesn't release the ref owned by to_be_accepted. Probably the sock_calls list is redundant given a purges of the recvmsg_q, the to_be_accepted queue and the calls tree. Signed-off-by: David Howells --- net/rxrpc/call_object.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 22f9b0d1a138..b0ffbd9664e6 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -476,6 +476,16 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) _enter("%p", rx); + while (!list_empty(&rx->to_be_accepted)) { + call = list_entry(rx->to_be_accepted.next, + struct rxrpc_call, accept_link); + list_del(&call->accept_link); + rxrpc_abort_call("SKR", call, 0, RX_CALL_DEAD, ECONNRESET); + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + rxrpc_release_call(rx, call); + rxrpc_put_call(call, rxrpc_call_put); + } + while (!list_empty(&rx->sock_calls)) { call = list_entry(rx->sock_calls.next, struct rxrpc_call, sock_link); From 66d58af7f4af53e8318e852efa31a7cb0e31bfb6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:12 +0100 Subject: [PATCH 0296/1050] rxrpc: Fix the putting of client connections In rxrpc_put_one_client_conn(), if a connection has RXRPC_CONN_COUNTED set on it, then it's accounted for in rxrpc_nr_client_conns and may be on various lists - and this is cleaned up correctly. However, if the connection doesn't have RXRPC_CONN_COUNTED set on it, then the put routine returns rather than just skipping the extra bit of cleanup. Fix this by making the extra bit of clean up conditional instead and always killing off the connection. This manifests itself as connections with a zero usage count hanging around in /proc/net/rxrpc_conns because the connection allocated, but discarded, due to a race with another process that set up a parallel connection, which was then shared instead. Signed-off-by: David Howells --- net/rxrpc/conn_client.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 9344a8416ceb..5a675c43cace 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -818,7 +818,7 @@ idle_connection: static struct rxrpc_connection * rxrpc_put_one_client_conn(struct rxrpc_connection *conn) { - struct rxrpc_connection *next; + struct rxrpc_connection *next = NULL; struct rxrpc_local *local = conn->params.local; unsigned int nr_conns; @@ -834,24 +834,22 @@ rxrpc_put_one_client_conn(struct rxrpc_connection *conn) ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_INACTIVE); - if (!test_bit(RXRPC_CONN_COUNTED, &conn->flags)) - return NULL; + if (test_bit(RXRPC_CONN_COUNTED, &conn->flags)) { + spin_lock(&rxrpc_client_conn_cache_lock); + nr_conns = --rxrpc_nr_client_conns; - spin_lock(&rxrpc_client_conn_cache_lock); - nr_conns = --rxrpc_nr_client_conns; + if (nr_conns < rxrpc_max_client_connections && + !list_empty(&rxrpc_waiting_client_conns)) { + next = list_entry(rxrpc_waiting_client_conns.next, + struct rxrpc_connection, cache_link); + rxrpc_get_connection(next); + rxrpc_activate_conn(next); + } - next = NULL; - if (nr_conns < rxrpc_max_client_connections && - !list_empty(&rxrpc_waiting_client_conns)) { - next = list_entry(rxrpc_waiting_client_conns.next, - struct rxrpc_connection, cache_link); - rxrpc_get_connection(next); - rxrpc_activate_conn(next); + spin_unlock(&rxrpc_client_conn_cache_lock); } - spin_unlock(&rxrpc_client_conn_cache_lock); rxrpc_kill_connection(conn); - if (next) rxrpc_activate_channels(next); From 357f5ef64628c2d6c532e7a6bfc0bc3830b4c221 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:12 +0100 Subject: [PATCH 0297/1050] rxrpc: Call rxrpc_release_call() on error in rxrpc_new_client_call() Call rxrpc_release_call() on getting an error in rxrpc_new_client_call() rather than trying to do the cleanup ourselves. This isn't a problem, provided we set RXRPC_CALL_HAS_USERID only if we actually add the call to the calls tree as cleanup code fragments that would otherwise cause problems are conditional. Without this, we miss some of the cleanup. Signed-off-by: David Howells --- net/rxrpc/call_object.c | 36 ++++++++++++------------------------ 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index b0ffbd9664e6..23f5a5f58282 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -226,9 +226,6 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, (const void *)user_call_ID); /* Publish the call, even though it is incompletely set up as yet */ - call->user_call_ID = user_call_ID; - __set_bit(RXRPC_CALL_HAS_USERID, &call->flags); - write_lock(&rx->call_lock); pp = &rx->calls.rb_node; @@ -242,10 +239,12 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, else if (user_call_ID > xcall->user_call_ID) pp = &(*pp)->rb_right; else - goto found_user_ID_now_present; + goto error_dup_user_ID; } rcu_assign_pointer(call->socket, rx); + call->user_call_ID = user_call_ID; + __set_bit(RXRPC_CALL_HAS_USERID, &call->flags); rxrpc_get_call(call, rxrpc_call_got_userid); rb_link_node(&call->sock_node, parent, pp); rb_insert_color(&call->sock_node, &rx->calls); @@ -276,33 +275,22 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, _leave(" = %p [new]", call); return call; -error: - write_lock(&rx->call_lock); - rb_erase(&call->sock_node, &rx->calls); - write_unlock(&rx->call_lock); - rxrpc_put_call(call, rxrpc_call_put_userid); - - write_lock(&rxrpc_call_lock); - list_del_init(&call->link); - write_unlock(&rxrpc_call_lock); - -error_out: - __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, - RX_CALL_DEAD, ret); - set_bit(RXRPC_CALL_RELEASED, &call->flags); - rxrpc_put_call(call, rxrpc_call_put); - _leave(" = %d", ret); - return ERR_PTR(ret); - /* We unexpectedly found the user ID in the list after taking * the call_lock. This shouldn't happen unless the user races * with itself and tries to add the same user ID twice at the * same time in different threads. */ -found_user_ID_now_present: +error_dup_user_ID: write_unlock(&rx->call_lock); ret = -EEXIST; - goto error_out; + +error: + __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, + RX_CALL_DEAD, ret); + rxrpc_release_call(rx, call); + rxrpc_put_call(call, rxrpc_call_put); + _leave(" = %d", ret); + return ERR_PTR(ret); } /* From 78883793f8ac4bb3f97d48db7a8c71d8476bcf98 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:12 +0100 Subject: [PATCH 0298/1050] rxrpc: Fix unexposed client conn release If the last call on a client connection is release after the connection has had a bunch of calls allocated but before any DATA packets are sent (so that it's not yet marked RXRPC_CONN_EXPOSED), an assertion will happen in rxrpc_disconnect_client_call(). af_rxrpc: Assertion failed - 1(0x1) >= 2(0x2) is false ------------[ cut here ]------------ kernel BUG at ../net/rxrpc/conn_client.c:753! This is because it's expecting the conn to have been exposed and to have 2 or more refs - but this isn't necessarily the case. Simply remove the assertion. This allows the conn to be moved into the inactive state and deleted if it isn't resurrected before the final put is called. Signed-off-by: David Howells --- net/rxrpc/conn_client.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 5a675c43cace..226bc910e556 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -721,7 +721,6 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) } ASSERTCMP(rcu_access_pointer(chan->call), ==, call); - ASSERTCMP(atomic_read(&conn->usage), >=, 2); /* If a client call was exposed to the world, we save the result for * retransmission. From d01dc4c3c1209e865368d5f8d3b5e08f97326ca9 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:12 +0100 Subject: [PATCH 0299/1050] rxrpc: Fix the parsing of soft-ACKs The soft-ACK parser doesn't increment the pointer into the soft-ACK list, resulting in the first ACK/NACK value being applied to all the relevant packets in the Tx queue. This has the potential to miss retransmissions and cause excessive retransmissions. Fix this by incrementing the pointer. Signed-off-by: David Howells --- net/rxrpc/input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index f0d9115b9b7e..c1f83d22f9b7 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -384,7 +384,7 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, for (; nr_acks > 0; nr_acks--, seq++) { ix = seq & RXRPC_RXTX_BUFF_MASK; - switch (*acks) { + switch (*acks++) { case RXRPC_ACK_TYPE_ACK: call->rxtx_annotations[ix] = RXRPC_TX_ANNO_ACK; break; From dfa7d9204054b091949d87270e55e0fd5800c3ae Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:12 +0100 Subject: [PATCH 0300/1050] rxrpc: Fix retransmission algorithm Make the retransmission algorithm use for-loops instead of do-loops and move the counter increments into the for-statement increment slots. Though the do-loops are slighly more efficient since there will be at least one pass through the each loop, the counter increments are harder to get right as the continue-statements skip them. Without this, if there are any positive acks within the loop, the do-loop will cycle forever because the counter increment is never done. Signed-off-by: David Howells --- net/rxrpc/call_event.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 9367c3be31eb..f0cabc48a1b7 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -163,8 +163,7 @@ static void rxrpc_resend(struct rxrpc_call *call) */ now = jiffies; resend_at = now + rxrpc_resend_timeout; - seq = cursor + 1; - do { + for (seq = cursor + 1; before_eq(seq, top); seq++) { ix = seq & RXRPC_RXTX_BUFF_MASK; annotation = call->rxtx_annotations[ix]; if (annotation == RXRPC_TX_ANNO_ACK) @@ -184,8 +183,7 @@ static void rxrpc_resend(struct rxrpc_call *call) /* Okay, we need to retransmit a packet. */ call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS; - seq++; - } while (before_eq(seq, top)); + } call->resend_at = resend_at; @@ -194,8 +192,7 @@ static void rxrpc_resend(struct rxrpc_call *call) * lock is dropped, it may clear some of the retransmission markers for * packets that it soft-ACKs. */ - seq = cursor + 1; - do { + for (seq = cursor + 1; before_eq(seq, top); seq++) { ix = seq & RXRPC_RXTX_BUFF_MASK; annotation = call->rxtx_annotations[ix]; if (annotation != RXRPC_TX_ANNO_RETRANS) @@ -237,8 +234,7 @@ static void rxrpc_resend(struct rxrpc_call *call) if (after(call->tx_hard_ack, seq)) seq = call->tx_hard_ack; - seq++; - } while (before_eq(seq, top)); + } out_unlock: spin_unlock_bh(&call->lock); From 27d0fc431c6b4847231c1490fa541bc3f5a7a351 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:13 +0100 Subject: [PATCH 0301/1050] rxrpc: Don't transmit an ACK if there's no reason set Don't transmit an ACK if call->ackr_reason in unset. There's the possibility of a race between recvmsg() sending an ACK and the background processing thread trying to send the same one. Signed-off-by: David Howells --- net/rxrpc/output.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 06a9aca739d1..aa0507214b31 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -137,6 +137,11 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) switch (type) { case RXRPC_PACKET_TYPE_ACK: spin_lock_bh(&call->lock); + if (!call->ackr_reason) { + spin_unlock_bh(&call->lock); + ret = 0; + goto out; + } n = rxrpc_fill_out_ack(call, pkt); call->ackr_reason = 0; From 2311e327cda015a24a201efc7655a9a983679e55 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:12 +0100 Subject: [PATCH 0302/1050] rxrpc: Be consistent about switch value in rxrpc_send_call_packet() rxrpc_send_call_packet() should use type in both its switch-statements rather than using pkt->whdr.type. This might give the compiler an easier job of uninitialised variable checking. Signed-off-by: David Howells --- net/rxrpc/output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index aa0507214b31..0b21ed859de7 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -182,7 +182,7 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) &msg, iov, ioc, len); if (ret < 0 && call->state < RXRPC_CALL_COMPLETE) { - switch (pkt->whdr.type) { + switch (type) { case RXRPC_PACKET_TYPE_ACK: rxrpc_propose_ACK(call, pkt->ack.reason, ntohs(pkt->ack.maxSkew), From 182f50562490e5861afaa7a2e42dcc0dd9dcfcca Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:12 +0100 Subject: [PATCH 0303/1050] rxrpc: Fix the basic transmit DATA packet content size at 1412 bytes Fix the basic transmit DATA packet content size at 1412 bytes so that they can be arbitrarily assembled into jumbo packets. In the future, I'm thinking of moving to keeping a jumbo packet header at the beginning of each packet in the Tx queue and creating the packet header on the spot when kernel_sendmsg() is invoked. That way, jumbo packets can be assembled on the spur of the moment for (re-)transmission. Signed-off-by: David Howells --- net/rxrpc/sendmsg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index cba236575073..8bfddf4e338c 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -214,7 +214,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, goto maybe_error; } - max = call->conn->params.peer->maxdata; + max = RXRPC_JUMBO_DATALEN; max -= call->conn->security_size; max &= ~(call->conn->size_align - 1UL); From a3868bfc8d5b0f36c784deab644ee1d2b0e6974b Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:13 +0100 Subject: [PATCH 0304/1050] rxrpc: Print the packet type name in the Rx packet trace Print a symbolic packet type name for each valid received packet in the trace output, not just a number. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 5 +++-- net/rxrpc/ar-internal.h | 6 +++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index ea3b10ed91a8..0a30c673509c 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -93,11 +93,12 @@ TRACE_EVENT(rxrpc_rx_packet, memcpy(&__entry->hdr, &sp->hdr, sizeof(__entry->hdr)); ), - TP_printk("%08x:%08x:%08x:%04x %08x %08x %02x %02x", + TP_printk("%08x:%08x:%08x:%04x %08x %08x %02x %02x %s", __entry->hdr.epoch, __entry->hdr.cid, __entry->hdr.callNumber, __entry->hdr.serviceId, __entry->hdr.serial, __entry->hdr.seq, - __entry->hdr.type, __entry->hdr.flags) + __entry->hdr.type, __entry->hdr.flags, + __entry->hdr.type <= 15 ? rxrpc_pkts[__entry->hdr.type] : "?UNK") ); TRACE_EVENT(rxrpc_rx_done, diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index e78c40b37db5..0f6fafa2c271 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -551,6 +551,9 @@ enum rxrpc_call_trace { extern const char rxrpc_call_traces[rxrpc_call__nr_trace][4]; +extern const char *const rxrpc_pkts[]; +extern const char *rxrpc_acks(u8 reason); + #include /* @@ -851,11 +854,8 @@ extern unsigned int rxrpc_rx_mtu; extern unsigned int rxrpc_rx_jumbo_max; extern unsigned int rxrpc_resend_timeout; -extern const char *const rxrpc_pkts[]; extern const s8 rxrpc_ack_priority[]; -extern const char *rxrpc_acks(u8 reason); - /* * output.c */ From a84a46d73050f70fd8820c74840e2815c78d8690 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:14 +0100 Subject: [PATCH 0305/1050] rxrpc: Add some additional call tracing Add additional call tracepoint points for noting call-connected, call-released and connection-failed events. Also fix one tracepoint that was using an integer instead of the corresponding enum value as the point type. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 3 +++ net/rxrpc/call_object.c | 18 ++++++++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 0f6fafa2c271..4a73c20d9436 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -539,6 +539,8 @@ enum rxrpc_call_trace { rxrpc_call_queued, rxrpc_call_queued_ref, rxrpc_call_seen, + rxrpc_call_connected, + rxrpc_call_release, rxrpc_call_got, rxrpc_call_got_userid, rxrpc_call_got_kernel, @@ -546,6 +548,7 @@ enum rxrpc_call_trace { rxrpc_call_put_userid, rxrpc_call_put_kernel, rxrpc_call_put_noqueue, + rxrpc_call_error, rxrpc_call__nr_trace }; diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 23f5a5f58282..0df9d1af8edb 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -53,6 +53,8 @@ const char rxrpc_call_traces[rxrpc_call__nr_trace][4] = { [rxrpc_call_new_service] = "NWs", [rxrpc_call_queued] = "QUE", [rxrpc_call_queued_ref] = "QUR", + [rxrpc_call_connected] = "CON", + [rxrpc_call_release] = "RLS", [rxrpc_call_seen] = "SEE", [rxrpc_call_got] = "GOT", [rxrpc_call_got_userid] = "Gus", @@ -61,6 +63,7 @@ const char rxrpc_call_traces[rxrpc_call__nr_trace][4] = { [rxrpc_call_put_userid] = "Pus", [rxrpc_call_put_kernel] = "Pke", [rxrpc_call_put_noqueue] = "PNQ", + [rxrpc_call_error] = "*E*", }; struct kmem_cache *rxrpc_call_jar; @@ -222,8 +225,8 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, return call; } - trace_rxrpc_call(call, 0, atomic_read(&call->usage), here, - (const void *)user_call_ID); + trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage), + here, (const void *)user_call_ID); /* Publish the call, even though it is incompletely set up as yet */ write_lock(&rx->call_lock); @@ -263,6 +266,9 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, if (ret < 0) goto error; + trace_rxrpc_call(call, rxrpc_call_connected, atomic_read(&call->usage), + here, ERR_PTR(ret)); + spin_lock_bh(&call->conn->params.peer->lock); hlist_add_head(&call->error_link, &call->conn->params.peer->error_targets); @@ -287,6 +293,8 @@ error_dup_user_ID: error: __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, RX_CALL_DEAD, ret); + trace_rxrpc_call(call, rxrpc_call_error, atomic_read(&call->usage), + here, ERR_PTR(ret)); rxrpc_release_call(rx, call); rxrpc_put_call(call, rxrpc_call_put); _leave(" = %d", ret); @@ -396,15 +404,17 @@ void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op) */ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) { + const void *here = __builtin_return_address(0); struct rxrpc_connection *conn = call->conn; bool put = false; int i; _enter("{%d,%d}", call->debug_id, atomic_read(&call->usage)); - ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); + trace_rxrpc_call(call, rxrpc_call_release, atomic_read(&call->usage), + here, (const void *)call->flags); - rxrpc_see_call(call); + ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); spin_lock_bh(&call->lock); if (test_and_set_bit(RXRPC_CALL_RELEASED, &call->flags)) From 363deeab6d0f308d33d011323661ae9cf5f9f8d6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:14 +0100 Subject: [PATCH 0306/1050] rxrpc: Add connection tracepoint and client conn state tracepoint Add a pair of tracepoints, one to track rxrpc_connection struct ref counting and the other to track the client connection cache state. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 60 +++++++++++++++++++++++++++ net/rxrpc/ar-internal.h | 76 +++++++++++++++++++++------------- net/rxrpc/call_accept.c | 4 ++ net/rxrpc/call_object.c | 2 - net/rxrpc/conn_client.c | 80 +++++++++++++++++++++++++----------- net/rxrpc/conn_event.c | 2 +- net/rxrpc/conn_object.c | 72 ++++++++++++++++++++++++++++++-- net/rxrpc/conn_service.c | 4 ++ net/rxrpc/misc.c | 31 ++++++++++++++ 9 files changed, 273 insertions(+), 58 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 0a30c673509c..c0c496c83f31 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -16,6 +16,66 @@ #include +TRACE_EVENT(rxrpc_conn, + TP_PROTO(struct rxrpc_connection *conn, enum rxrpc_conn_trace op, + int usage, const void *where), + + TP_ARGS(conn, op, usage, where), + + TP_STRUCT__entry( + __field(struct rxrpc_connection *, conn ) + __field(int, op ) + __field(int, usage ) + __field(const void *, where ) + ), + + TP_fast_assign( + __entry->conn = conn; + __entry->op = op; + __entry->usage = usage; + __entry->where = where; + ), + + TP_printk("C=%p %s u=%d sp=%pSR", + __entry->conn, + rxrpc_conn_traces[__entry->op], + __entry->usage, + __entry->where) + ); + +TRACE_EVENT(rxrpc_client, + TP_PROTO(struct rxrpc_connection *conn, int channel, + enum rxrpc_client_trace op), + + TP_ARGS(conn, channel, op), + + TP_STRUCT__entry( + __field(struct rxrpc_connection *, conn ) + __field(u32, cid ) + __field(int, channel ) + __field(int, usage ) + __field(enum rxrpc_client_trace, op ) + __field(enum rxrpc_conn_cache_state, cs ) + ), + + TP_fast_assign( + __entry->conn = conn; + __entry->channel = channel; + __entry->usage = atomic_read(&conn->usage); + __entry->op = op; + __entry->cid = conn->proto.cid; + __entry->cs = conn->cache_state; + ), + + TP_printk("C=%p h=%2d %s %s i=%08x u=%d", + __entry->conn, + __entry->channel, + rxrpc_client_traces[__entry->op], + rxrpc_conn_cache_states[__entry->cs], + __entry->cid, + __entry->usage) + ); + TRACE_EVENT(rxrpc_call, TP_PROTO(struct rxrpc_call *call, enum rxrpc_call_trace op, int usage, const void *where, const void *aux), diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 4a73c20d9436..6ca40eea3022 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -314,6 +314,7 @@ enum rxrpc_conn_cache_state { RXRPC_CONN_CLIENT_ACTIVE, /* Conn is on active list, doing calls */ RXRPC_CONN_CLIENT_CULLED, /* Conn is culled and delisted, doing calls */ RXRPC_CONN_CLIENT_IDLE, /* Conn is on idle list, doing mostly nothing */ + RXRPC_CONN__NR_CACHE_STATES }; /* @@ -533,6 +534,44 @@ struct rxrpc_call { rxrpc_serial_t acks_latest; /* serial number of latest ACK received */ }; +enum rxrpc_conn_trace { + rxrpc_conn_new_client, + rxrpc_conn_new_service, + rxrpc_conn_queued, + rxrpc_conn_seen, + rxrpc_conn_got, + rxrpc_conn_put_client, + rxrpc_conn_put_service, + rxrpc_conn__nr_trace +}; + +extern const char rxrpc_conn_traces[rxrpc_conn__nr_trace][4]; + +enum rxrpc_client_trace { + rxrpc_client_activate_chans, + rxrpc_client_alloc, + rxrpc_client_chan_activate, + rxrpc_client_chan_disconnect, + rxrpc_client_chan_pass, + rxrpc_client_chan_unstarted, + rxrpc_client_cleanup, + rxrpc_client_count, + rxrpc_client_discard, + rxrpc_client_duplicate, + rxrpc_client_exposed, + rxrpc_client_replace, + rxrpc_client_to_active, + rxrpc_client_to_culled, + rxrpc_client_to_idle, + rxrpc_client_to_inactive, + rxrpc_client_to_waiting, + rxrpc_client_uncount, + rxrpc_client__nr_trace +}; + +extern const char rxrpc_client_traces[rxrpc_client__nr_trace][7]; +extern const char rxrpc_conn_cache_states[RXRPC_CONN__NR_CACHE_STATES][5]; + enum rxrpc_call_trace { rxrpc_call_new_client, rxrpc_call_new_service, @@ -734,7 +773,11 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *, void __rxrpc_disconnect_call(struct rxrpc_connection *, struct rxrpc_call *); void rxrpc_disconnect_call(struct rxrpc_call *); void rxrpc_kill_connection(struct rxrpc_connection *); -void __rxrpc_put_connection(struct rxrpc_connection *); +bool rxrpc_queue_conn(struct rxrpc_connection *); +void rxrpc_see_connection(struct rxrpc_connection *); +void rxrpc_get_connection(struct rxrpc_connection *); +struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *); +void rxrpc_put_service_conn(struct rxrpc_connection *); void __exit rxrpc_destroy_all_connections(void); static inline bool rxrpc_conn_is_client(const struct rxrpc_connection *conn) @@ -747,38 +790,15 @@ static inline bool rxrpc_conn_is_service(const struct rxrpc_connection *conn) return !rxrpc_conn_is_client(conn); } -static inline void rxrpc_get_connection(struct rxrpc_connection *conn) -{ - atomic_inc(&conn->usage); -} - -static inline -struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *conn) -{ - return atomic_inc_not_zero(&conn->usage) ? conn : NULL; -} - static inline void rxrpc_put_connection(struct rxrpc_connection *conn) { if (!conn) return; - if (rxrpc_conn_is_client(conn)) { - if (atomic_dec_and_test(&conn->usage)) - rxrpc_put_client_conn(conn); - } else { - if (atomic_dec_return(&conn->usage) == 1) - __rxrpc_put_connection(conn); - } -} - -static inline bool rxrpc_queue_conn(struct rxrpc_connection *conn) -{ - if (!rxrpc_get_connection_maybe(conn)) - return false; - if (!rxrpc_queue_work(&conn->processor)) - rxrpc_put_connection(conn); - return true; + if (rxrpc_conn_is_client(conn)) + rxrpc_put_client_conn(conn); + else + rxrpc_put_service_conn(conn); } /* diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 323b8da50163..3e474508ba75 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -85,6 +85,9 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, b->conn_backlog[head] = conn; smp_store_release(&b->conn_backlog_head, (head + 1) & (size - 1)); + + trace_rxrpc_conn(conn, rxrpc_conn_new_service, + atomic_read(&conn->usage), here); } /* Now it gets complicated, because calls get registered with the @@ -290,6 +293,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, rxrpc_get_local(local); conn->params.local = local; conn->params.peer = peer; + rxrpc_see_connection(conn); rxrpc_new_incoming_connection(conn, skb); } else { rxrpc_get_connection(conn); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 0df9d1af8edb..54f30482a7fd 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -479,8 +479,6 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) struct rxrpc_call, accept_link); list_del(&call->accept_link); rxrpc_abort_call("SKR", call, 0, RX_CALL_DEAD, ECONNRESET); - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); - rxrpc_release_call(rx, call); rxrpc_put_call(call, rxrpc_call_put); } diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 226bc910e556..c76a125df891 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -105,6 +105,14 @@ static void rxrpc_discard_expired_client_conns(struct work_struct *); static DECLARE_DELAYED_WORK(rxrpc_client_conn_reap, rxrpc_discard_expired_client_conns); +const char rxrpc_conn_cache_states[RXRPC_CONN__NR_CACHE_STATES][5] = { + [RXRPC_CONN_CLIENT_INACTIVE] = "Inac", + [RXRPC_CONN_CLIENT_WAITING] = "Wait", + [RXRPC_CONN_CLIENT_ACTIVE] = "Actv", + [RXRPC_CONN_CLIENT_CULLED] = "Cull", + [RXRPC_CONN_CLIENT_IDLE] = "Idle", +}; + /* * Get a connection ID and epoch for a client connection from the global pool. * The connection struct pointer is then recorded in the idr radix tree. The @@ -220,6 +228,9 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp) rxrpc_get_local(conn->params.local); key_get(conn->params.key); + trace_rxrpc_conn(conn, rxrpc_conn_new_client, atomic_read(&conn->usage), + __builtin_return_address(0)); + trace_rxrpc_client(conn, -1, rxrpc_client_alloc); _leave(" = %p", conn); return conn; @@ -385,6 +396,7 @@ static int rxrpc_get_client_conn(struct rxrpc_call *call, rb_replace_node(&conn->client_node, &candidate->client_node, &local->client_conns); + trace_rxrpc_client(conn, -1, rxrpc_client_replace); goto candidate_published; } } @@ -409,8 +421,11 @@ found_extant_conn: _debug("found conn"); spin_unlock(&local->client_conns_lock); - rxrpc_put_connection(candidate); - candidate = NULL; + if (candidate) { + trace_rxrpc_client(candidate, -1, rxrpc_client_duplicate); + rxrpc_put_connection(candidate); + candidate = NULL; + } spin_lock(&conn->channel_lock); call->conn = conn; @@ -433,6 +448,7 @@ error: */ static void rxrpc_activate_conn(struct rxrpc_connection *conn) { + trace_rxrpc_client(conn, -1, rxrpc_client_to_active); conn->cache_state = RXRPC_CONN_CLIENT_ACTIVE; rxrpc_nr_active_client_conns++; list_move_tail(&conn->cache_link, &rxrpc_active_client_conns); @@ -462,8 +478,10 @@ static void rxrpc_animate_client_conn(struct rxrpc_connection *conn) spin_lock(&rxrpc_client_conn_cache_lock); nr_conns = rxrpc_nr_client_conns; - if (!test_and_set_bit(RXRPC_CONN_COUNTED, &conn->flags)) + if (!test_and_set_bit(RXRPC_CONN_COUNTED, &conn->flags)) { + trace_rxrpc_client(conn, -1, rxrpc_client_count); rxrpc_nr_client_conns = nr_conns + 1; + } switch (conn->cache_state) { case RXRPC_CONN_CLIENT_ACTIVE: @@ -494,6 +512,7 @@ activate_conn: wait_for_capacity: _debug("wait"); + trace_rxrpc_client(conn, -1, rxrpc_client_to_waiting); conn->cache_state = RXRPC_CONN_CLIENT_WAITING; list_move_tail(&conn->cache_link, &rxrpc_waiting_client_conns); goto out_unlock; @@ -524,6 +543,8 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, struct rxrpc_call, chan_wait_link); u32 call_id = chan->call_counter + 1; + trace_rxrpc_client(conn, channel, rxrpc_client_chan_activate); + write_lock_bh(&call->state_lock); call->state = RXRPC_CALL_CLIENT_SEND_REQUEST; write_unlock_bh(&call->state_lock); @@ -563,6 +584,8 @@ static void rxrpc_activate_channels(struct rxrpc_connection *conn) _enter("%d", conn->debug_id); + trace_rxrpc_client(conn, -1, rxrpc_client_activate_chans); + if (conn->cache_state != RXRPC_CONN_CLIENT_ACTIVE || conn->active_chans == RXRPC_ACTIVE_CHANS_MASK) return; @@ -657,10 +680,13 @@ int rxrpc_connect_call(struct rxrpc_call *call, * had a chance at re-use (the per-connection security negotiation is * expensive). */ -static void rxrpc_expose_client_conn(struct rxrpc_connection *conn) +static void rxrpc_expose_client_conn(struct rxrpc_connection *conn, + unsigned int channel) { - if (!test_and_set_bit(RXRPC_CONN_EXPOSED, &conn->flags)) + if (!test_and_set_bit(RXRPC_CONN_EXPOSED, &conn->flags)) { + trace_rxrpc_client(conn, channel, rxrpc_client_exposed); rxrpc_get_connection(conn); + } } /* @@ -669,9 +695,9 @@ static void rxrpc_expose_client_conn(struct rxrpc_connection *conn) */ void rxrpc_expose_client_call(struct rxrpc_call *call) { + unsigned int channel = call->cid & RXRPC_CHANNELMASK; struct rxrpc_connection *conn = call->conn; - struct rxrpc_channel *chan = - &conn->channels[call->cid & RXRPC_CHANNELMASK]; + struct rxrpc_channel *chan = &conn->channels[channel]; if (!test_and_set_bit(RXRPC_CALL_EXPOSED, &call->flags)) { /* Mark the call ID as being used. If the callNumber counter @@ -682,7 +708,7 @@ void rxrpc_expose_client_call(struct rxrpc_call *call) chan->call_counter++; if (chan->call_counter >= INT_MAX) set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); - rxrpc_expose_client_conn(conn); + rxrpc_expose_client_conn(conn, channel); } } @@ -695,6 +721,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) struct rxrpc_connection *conn = call->conn; struct rxrpc_channel *chan = &conn->channels[channel]; + trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect); call->conn = NULL; spin_lock(&conn->channel_lock); @@ -709,6 +736,8 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) ASSERT(!test_bit(RXRPC_CALL_EXPOSED, &call->flags)); list_del_init(&call->chan_wait_link); + trace_rxrpc_client(conn, channel, rxrpc_client_chan_unstarted); + /* We must deactivate or idle the connection if it's now * waiting for nothing. */ @@ -739,7 +768,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) /* See if we can pass the channel directly to another call. */ if (conn->cache_state == RXRPC_CONN_CLIENT_ACTIVE && !list_empty(&conn->waiting_calls)) { - _debug("pass chan"); + trace_rxrpc_client(conn, channel, rxrpc_client_chan_pass); rxrpc_activate_one_channel(conn, channel); goto out_2; } @@ -762,7 +791,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) goto out; } - _debug("pass chan 2"); + trace_rxrpc_client(conn, channel, rxrpc_client_chan_pass); rxrpc_activate_one_channel(conn, channel); goto out; @@ -794,7 +823,7 @@ idle_connection: * immediately or moved to the idle list for a short while. */ if (test_bit(RXRPC_CONN_EXPOSED, &conn->flags)) { - _debug("make idle"); + trace_rxrpc_client(conn, channel, rxrpc_client_to_idle); conn->idle_timestamp = jiffies; conn->cache_state = RXRPC_CONN_CLIENT_IDLE; list_move_tail(&conn->cache_link, &rxrpc_idle_client_conns); @@ -804,7 +833,7 @@ idle_connection: &rxrpc_client_conn_reap, rxrpc_conn_idle_client_expiry); } else { - _debug("make inactive"); + trace_rxrpc_client(conn, channel, rxrpc_client_to_inactive); conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE; list_del_init(&conn->cache_link); } @@ -821,6 +850,8 @@ rxrpc_put_one_client_conn(struct rxrpc_connection *conn) struct rxrpc_local *local = conn->params.local; unsigned int nr_conns; + trace_rxrpc_client(conn, -1, rxrpc_client_cleanup); + if (test_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags)) { spin_lock(&local->client_conns_lock); if (test_and_clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, @@ -834,6 +865,7 @@ rxrpc_put_one_client_conn(struct rxrpc_connection *conn) ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_INACTIVE); if (test_bit(RXRPC_CONN_COUNTED, &conn->flags)) { + trace_rxrpc_client(conn, -1, rxrpc_client_uncount); spin_lock(&rxrpc_client_conn_cache_lock); nr_conns = --rxrpc_nr_client_conns; @@ -863,20 +895,18 @@ rxrpc_put_one_client_conn(struct rxrpc_connection *conn) */ void rxrpc_put_client_conn(struct rxrpc_connection *conn) { - struct rxrpc_connection *next; + const void *here = __builtin_return_address(0); + int n; do { - _enter("%p{u=%d,d=%d}", - conn, atomic_read(&conn->usage), conn->debug_id); + n = atomic_dec_return(&conn->usage); + trace_rxrpc_conn(conn, rxrpc_conn_put_client, n, here); + if (n > 0) + return; + ASSERTCMP(n, >=, 0); - next = rxrpc_put_one_client_conn(conn); - - if (!next) - break; - conn = next; - } while (atomic_dec_and_test(&conn->usage)); - - _leave(""); + conn = rxrpc_put_one_client_conn(conn); + } while (conn); } /* @@ -907,9 +937,11 @@ static void rxrpc_cull_active_client_conns(void) ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_ACTIVE); if (list_empty(&conn->waiting_calls)) { + trace_rxrpc_client(conn, -1, rxrpc_client_to_culled); conn->cache_state = RXRPC_CONN_CLIENT_CULLED; list_del_init(&conn->cache_link); } else { + trace_rxrpc_client(conn, -1, rxrpc_client_to_waiting); conn->cache_state = RXRPC_CONN_CLIENT_WAITING; list_move_tail(&conn->cache_link, &rxrpc_waiting_client_conns); @@ -983,7 +1015,7 @@ next: goto not_yet_expired; } - _debug("discard conn %d", conn->debug_id); + trace_rxrpc_client(conn, -1, rxrpc_client_discard); if (!test_and_clear_bit(RXRPC_CONN_EXPOSED, &conn->flags)) BUG(); conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE; diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 0691007cfc02..a43f4c94a88d 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -377,7 +377,7 @@ void rxrpc_process_connection(struct work_struct *work) u32 abort_code = RX_PROTOCOL_ERROR; int ret; - _enter("{%d}", conn->debug_id); + rxrpc_see_connection(conn); if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events)) rxrpc_secure_connection(conn); diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index bb1f29280aea..3b55aee0c436 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -246,11 +246,77 @@ void rxrpc_kill_connection(struct rxrpc_connection *conn) } /* - * release a virtual connection + * Queue a connection's work processor, getting a ref to pass to the work + * queue. */ -void __rxrpc_put_connection(struct rxrpc_connection *conn) +bool rxrpc_queue_conn(struct rxrpc_connection *conn) { - rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0); + const void *here = __builtin_return_address(0); + int n = __atomic_add_unless(&conn->usage, 1, 0); + if (n == 0) + return false; + if (rxrpc_queue_work(&conn->processor)) + trace_rxrpc_conn(conn, rxrpc_conn_queued, n + 1, here); + else + rxrpc_put_connection(conn); + return true; +} + +/* + * Note the re-emergence of a connection. + */ +void rxrpc_see_connection(struct rxrpc_connection *conn) +{ + const void *here = __builtin_return_address(0); + if (conn) { + int n = atomic_read(&conn->usage); + + trace_rxrpc_conn(conn, rxrpc_conn_seen, n, here); + } +} + +/* + * Get a ref on a connection. + */ +void rxrpc_get_connection(struct rxrpc_connection *conn) +{ + const void *here = __builtin_return_address(0); + int n = atomic_inc_return(&conn->usage); + + trace_rxrpc_conn(conn, rxrpc_conn_got, n, here); +} + +/* + * Try to get a ref on a connection. + */ +struct rxrpc_connection * +rxrpc_get_connection_maybe(struct rxrpc_connection *conn) +{ + const void *here = __builtin_return_address(0); + + if (conn) { + int n = __atomic_add_unless(&conn->usage, 1, 0); + if (n > 0) + trace_rxrpc_conn(conn, rxrpc_conn_got, n + 1, here); + else + conn = NULL; + } + return conn; +} + +/* + * Release a service connection + */ +void rxrpc_put_service_conn(struct rxrpc_connection *conn) +{ + const void *here = __builtin_return_address(0); + int n; + + n = atomic_dec_return(&conn->usage); + trace_rxrpc_conn(conn, rxrpc_conn_put_service, n, here); + ASSERTCMP(n, >=, 0); + if (n == 0) + rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0); } /* diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c index 83d54da4ce8b..eef551f40dc2 100644 --- a/net/rxrpc/conn_service.c +++ b/net/rxrpc/conn_service.c @@ -136,6 +136,10 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t gfp) list_add_tail(&conn->link, &rxrpc_connections); list_add_tail(&conn->proc_link, &rxrpc_connection_proc_list); write_unlock(&rxrpc_connection_lock); + + trace_rxrpc_conn(conn, rxrpc_conn_new_service, + atomic_read(&conn->usage), + __builtin_return_address(0)); } return conn; diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 8b910780f1ac..598064d3bdd2 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -101,3 +101,34 @@ const char *rxrpc_acks(u8 reason) reason = ARRAY_SIZE(str) - 1; return str[reason]; } + +const char rxrpc_conn_traces[rxrpc_conn__nr_trace][4] = { + [rxrpc_conn_new_client] = "NWc", + [rxrpc_conn_new_service] = "NWs", + [rxrpc_conn_queued] = "QUE", + [rxrpc_conn_seen] = "SEE", + [rxrpc_conn_got] = "GOT", + [rxrpc_conn_put_client] = "PTc", + [rxrpc_conn_put_service] = "PTs", +}; + +const char rxrpc_client_traces[rxrpc_client__nr_trace][7] = { + [rxrpc_client_activate_chans] = "Activa", + [rxrpc_client_alloc] = "Alloc ", + [rxrpc_client_chan_activate] = "ChActv", + [rxrpc_client_chan_disconnect] = "ChDisc", + [rxrpc_client_chan_pass] = "ChPass", + [rxrpc_client_chan_unstarted] = "ChUnst", + [rxrpc_client_cleanup] = "Clean ", + [rxrpc_client_count] = "Count ", + [rxrpc_client_discard] = "Discar", + [rxrpc_client_duplicate] = "Duplic", + [rxrpc_client_exposed] = "Expose", + [rxrpc_client_replace] = "Replac", + [rxrpc_client_to_active] = "->Actv", + [rxrpc_client_to_culled] = "->Cull", + [rxrpc_client_to_idle] = "->Idle", + [rxrpc_client_to_inactive] = "->Inac", + [rxrpc_client_to_waiting] = "->Wait", + [rxrpc_client_uncount] = "Uncoun", +}; From a124fe3ee5d82f2c9a9b8818ed5cb9f61685f1d3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:13 +0100 Subject: [PATCH 0307/1050] rxrpc: Add a tracepoint to follow the life of a packet in the Tx buffer Add a tracepoint to follow the insertion of a packet into the transmit buffer, its transmission and its rotation out of the buffer. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 26 ++++++++++++++++++++++++++ net/rxrpc/ar-internal.h | 12 ++++++++++++ net/rxrpc/input.c | 2 ++ net/rxrpc/misc.c | 9 +++++++++ net/rxrpc/sendmsg.c | 9 ++++++++- 5 files changed, 57 insertions(+), 1 deletion(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index c0c496c83f31..ffc74b3e5b76 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -208,6 +208,32 @@ TRACE_EVENT(rxrpc_abort, __entry->abort_code, __entry->error, __entry->why) ); +TRACE_EVENT(rxrpc_transmit, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_transmit_trace why), + + TP_ARGS(call, why), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(enum rxrpc_transmit_trace, why ) + __field(rxrpc_seq_t, tx_hard_ack ) + __field(rxrpc_seq_t, tx_top ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->why = why; + __entry->tx_hard_ack = call->tx_hard_ack; + __entry->tx_top = call->tx_top; + ), + + TP_printk("c=%p %s f=%08x n=%u", + __entry->call, + rxrpc_transmit_traces[__entry->why], + __entry->tx_hard_ack + 1, + __entry->tx_top - __entry->tx_hard_ack) + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 6ca40eea3022..afa5dcc05fe0 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -593,6 +593,18 @@ enum rxrpc_call_trace { extern const char rxrpc_call_traces[rxrpc_call__nr_trace][4]; +enum rxrpc_transmit_trace { + rxrpc_transmit_wait, + rxrpc_transmit_queue, + rxrpc_transmit_queue_reqack, + rxrpc_transmit_queue_last, + rxrpc_transmit_rotate, + rxrpc_transmit_end, + rxrpc_transmit__nr_trace +}; + +extern const char rxrpc_transmit_traces[rxrpc_transmit__nr_trace][4]; + extern const char *const rxrpc_pkts[]; extern const char *rxrpc_acks(u8 reason); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index c1f83d22f9b7..c7eb5104e91a 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -59,6 +59,7 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to) spin_unlock(&call->lock); + trace_rxrpc_transmit(call, rxrpc_transmit_rotate); wake_up(&call->waitq); while (list) { @@ -107,6 +108,7 @@ static bool rxrpc_end_tx_phase(struct rxrpc_call *call, const char *abort_why) } write_unlock(&call->state_lock); + trace_rxrpc_transmit(call, rxrpc_transmit_end); _leave(" = ok"); return true; } diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 598064d3bdd2..dca89995f03e 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -132,3 +132,12 @@ const char rxrpc_client_traces[rxrpc_client__nr_trace][7] = { [rxrpc_client_to_waiting] = "->Wait", [rxrpc_client_uncount] = "Uncoun", }; + +const char rxrpc_transmit_traces[rxrpc_transmit__nr_trace][4] = { + [rxrpc_transmit_wait] = "WAI", + [rxrpc_transmit_queue] = "QUE", + [rxrpc_transmit_queue_reqack] = "QRA", + [rxrpc_transmit_queue_last] = "QLS", + [rxrpc_transmit_rotate] = "ROT", + [rxrpc_transmit_end] = "END", +}; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 8bfddf4e338c..28d8f73cf11d 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -56,6 +56,7 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx, break; } + trace_rxrpc_transmit(call, rxrpc_transmit_wait); release_sock(&rx->sk); *timeo = schedule_timeout(*timeo); lock_sock(&rx->sk); @@ -104,8 +105,14 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, smp_wmb(); call->rxtx_buffer[ix] = skb; call->tx_top = seq; - if (last) + if (last) { set_bit(RXRPC_CALL_TX_LAST, &call->flags); + trace_rxrpc_transmit(call, rxrpc_transmit_queue_last); + } else if (sp->hdr.flags & RXRPC_REQUEST_ACK) { + trace_rxrpc_transmit(call, rxrpc_transmit_queue_reqack); + } else { + trace_rxrpc_transmit(call, rxrpc_transmit_queue); + } if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) { _debug("________awaiting reply/ACK__________"); From ec71eb9ada34f8d1a58b7c35d906c59411295445 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:13 +0100 Subject: [PATCH 0308/1050] rxrpc: Add a tracepoint to log received ACK packets Add a tracepoint to log information from received ACK packets. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 26 ++++++++++++++++++++++++++ net/rxrpc/input.c | 2 ++ 2 files changed, 28 insertions(+) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index ffc74b3e5b76..2b19f3fa5174 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -234,6 +234,32 @@ TRACE_EVENT(rxrpc_transmit, __entry->tx_top - __entry->tx_hard_ack) ); +TRACE_EVENT(rxrpc_rx_ack, + TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t first, u8 reason, u8 n_acks), + + TP_ARGS(call, first, reason, n_acks), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(rxrpc_seq_t, first ) + __field(u8, reason ) + __field(u8, n_acks ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->first = first; + __entry->reason = reason; + __entry->n_acks = n_acks; + ), + + TP_printk("c=%p %s f=%08x n=%u", + __entry->call, + rxrpc_acks(__entry->reason), + __entry->first, + __entry->n_acks) + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index c7eb5104e91a..7b18ca124978 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -440,6 +440,8 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, hard_ack = first_soft_ack - 1; nr_acks = buf.ack.nAcks; + trace_rxrpc_rx_ack(call, first_soft_ack, buf.ack.reason, nr_acks); + _proto("Rx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", sp->hdr.serial, ntohs(buf.ack.maxSkew), From f3639df2d90bc919328c459b3c7c49ed5667a52f Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:13 +0100 Subject: [PATCH 0309/1050] rxrpc: Add a tracepoint to log ACK transmission Add a tracepoint to log information about ACK transmission. Signed-off-by: David Howels --- include/trace/events/rxrpc.h | 30 ++++++++++++++++++++++++++++++ net/rxrpc/conn_event.c | 3 +++ net/rxrpc/output.c | 7 ++++++- 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 2b19f3fa5174..d545d692ae22 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -260,6 +260,36 @@ TRACE_EVENT(rxrpc_rx_ack, __entry->n_acks) ); +TRACE_EVENT(rxrpc_tx_ack, + TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t first, + rxrpc_serial_t serial, u8 reason, u8 n_acks), + + TP_ARGS(call, first, serial, reason, n_acks), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(rxrpc_seq_t, first ) + __field(rxrpc_serial_t, serial ) + __field(u8, reason ) + __field(u8, n_acks ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->first = first; + __entry->serial = serial; + __entry->reason = reason; + __entry->n_acks = n_acks; + ), + + TP_printk("c=%p %s f=%08x r=%08x n=%u", + __entry->call, + rxrpc_acks(__entry->reason), + __entry->first, + __entry->serial, + __entry->n_acks) + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index a43f4c94a88d..9b19c51831aa 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -98,6 +98,9 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, pkt.info.rwind = htonl(rxrpc_rx_window_size); pkt.info.jumbo_max = htonl(rxrpc_rx_jumbo_max); len += sizeof(pkt.ack) + sizeof(pkt.info); + + trace_rxrpc_tx_ack(NULL, chan->last_seq, 0, + RXRPC_ACK_DUPLICATE, 0); break; } diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 0b21ed859de7..2c9daeadce87 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -38,12 +38,14 @@ struct rxrpc_pkt_buffer { static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, struct rxrpc_pkt_buffer *pkt) { + rxrpc_serial_t serial; rxrpc_seq_t hard_ack, top, seq; int ix; u32 mtu, jmax; u8 *ackp = pkt->acks; /* Barrier against rxrpc_input_data(). */ + serial = call->ackr_serial; hard_ack = READ_ONCE(call->rx_hard_ack); top = smp_load_acquire(&call->rx_top); @@ -51,7 +53,7 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, pkt->ack.maxSkew = htons(call->ackr_skew); pkt->ack.firstPacket = htonl(hard_ack + 1); pkt->ack.previousPacket = htonl(call->ackr_prev_seq); - pkt->ack.serial = htonl(call->ackr_serial); + pkt->ack.serial = htonl(serial); pkt->ack.reason = call->ackr_reason; pkt->ack.nAcks = top - hard_ack; @@ -75,6 +77,9 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, pkt->ackinfo.rwind = htonl(call->rx_winsize); pkt->ackinfo.jumbo_max = htonl(jmax); + trace_rxrpc_tx_ack(call, hard_ack + 1, serial, call->ackr_reason, + top - hard_ack); + *ackp++ = 0; *ackp++ = 0; *ackp++ = 0; From 58dc63c998ea3c5a27e2bf9251eddbf0977056a6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:13 +0100 Subject: [PATCH 0310/1050] rxrpc: Add a tracepoint to follow packets in the Rx buffer Add a tracepoint to follow the life of packets that get added to a call's receive buffer. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 33 +++++++++++++++++++++++++++++++++ net/rxrpc/ar-internal.h | 12 ++++++++++++ net/rxrpc/call_accept.c | 3 +++ net/rxrpc/input.c | 6 +++++- net/rxrpc/misc.c | 9 +++++++++ net/rxrpc/recvmsg.c | 11 +++++++++++ 6 files changed, 73 insertions(+), 1 deletion(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index d545d692ae22..7dd5f0188681 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -290,6 +290,39 @@ TRACE_EVENT(rxrpc_tx_ack, __entry->n_acks) ); +TRACE_EVENT(rxrpc_receive, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_receive_trace why, + rxrpc_serial_t serial, rxrpc_seq_t seq), + + TP_ARGS(call, why, serial, seq), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(enum rxrpc_receive_trace, why ) + __field(rxrpc_serial_t, serial ) + __field(rxrpc_seq_t, seq ) + __field(rxrpc_seq_t, hard_ack ) + __field(rxrpc_seq_t, top ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->why = why; + __entry->serial = serial; + __entry->seq = seq; + __entry->hard_ack = call->rx_hard_ack; + __entry->top = call->rx_top; + ), + + TP_printk("c=%p %s r=%08x q=%08x w=%08x-%08x", + __entry->call, + rxrpc_receive_traces[__entry->why], + __entry->serial, + __entry->seq, + __entry->hard_ack, + __entry->top) + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index afa5dcc05fe0..e5d2f2fb8e41 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -605,6 +605,18 @@ enum rxrpc_transmit_trace { extern const char rxrpc_transmit_traces[rxrpc_transmit__nr_trace][4]; +enum rxrpc_receive_trace { + rxrpc_receive_incoming, + rxrpc_receive_queue, + rxrpc_receive_queue_last, + rxrpc_receive_front, + rxrpc_receive_rotate, + rxrpc_receive_end, + rxrpc_receive__nr_trace +}; + +extern const char rxrpc_receive_traces[rxrpc_receive__nr_trace][4]; + extern const char *const rxrpc_pkts[]; extern const char *rxrpc_acks(u8 reason); diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 3e474508ba75..a8d39d7cf42c 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -367,6 +367,9 @@ found_service: goto out; } + trace_rxrpc_receive(call, rxrpc_receive_incoming, + sp->hdr.serial, sp->hdr.seq); + /* Make the call live. */ rxrpc_incoming_call(rx, call, skb); conn = call->conn; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 7b18ca124978..b690220533c6 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -284,8 +284,12 @@ next_subpacket: call->rxtx_buffer[ix] = skb; if (after(seq, call->rx_top)) smp_store_release(&call->rx_top, seq); - if (flags & RXRPC_LAST_PACKET) + if (flags & RXRPC_LAST_PACKET) { set_bit(RXRPC_CALL_RX_LAST, &call->flags); + trace_rxrpc_receive(call, rxrpc_receive_queue_last, serial, seq); + } else { + trace_rxrpc_receive(call, rxrpc_receive_queue, serial, seq); + } queued = true; if (after_eq(seq, call->rx_expect_next)) { diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index dca89995f03e..db5f1d54fc90 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -141,3 +141,12 @@ const char rxrpc_transmit_traces[rxrpc_transmit__nr_trace][4] = { [rxrpc_transmit_rotate] = "ROT", [rxrpc_transmit_end] = "END", }; + +const char rxrpc_receive_traces[rxrpc_receive__nr_trace][4] = { + [rxrpc_receive_incoming] = "INC", + [rxrpc_receive_queue] = "QUE", + [rxrpc_receive_queue_last] = "QLS", + [rxrpc_receive_front] = "FRN", + [rxrpc_receive_rotate] = "ROT", + [rxrpc_receive_end] = "END", +}; diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 8b8d7e14f800..22d51087c580 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -134,6 +134,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call) { _enter("%d,%s", call->debug_id, rxrpc_call_states[call->state]); + trace_rxrpc_receive(call, rxrpc_receive_end, 0, call->rx_top); ASSERTCMP(call->rx_hard_ack, ==, call->rx_top); if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { @@ -167,6 +168,7 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) { struct rxrpc_skb_priv *sp; struct sk_buff *skb; + rxrpc_serial_t serial; rxrpc_seq_t hard_ack, top; u8 flags; int ix; @@ -183,6 +185,10 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) rxrpc_see_skb(skb); sp = rxrpc_skb(skb); flags = sp->hdr.flags; + serial = sp->hdr.serial; + if (call->rxtx_annotations[ix] & RXRPC_RX_ANNO_JUMBO) + serial += (call->rxtx_annotations[ix] & RXRPC_RX_ANNO_JUMBO) - 1; + call->rxtx_buffer[ix] = NULL; call->rxtx_annotations[ix] = 0; /* Barrier against rxrpc_input_data(). */ @@ -191,6 +197,7 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) rxrpc_free_skb(skb); _debug("%u,%u,%02x", hard_ack, top, flags); + trace_rxrpc_receive(call, rxrpc_receive_rotate, serial, hard_ack); if (flags & RXRPC_LAST_PACKET) rxrpc_end_rx_phase(call); } @@ -309,6 +316,10 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, rxrpc_see_skb(skb); sp = rxrpc_skb(skb); + if (!(flags & MSG_PEEK)) + trace_rxrpc_receive(call, rxrpc_receive_front, + sp->hdr.serial, seq); + if (msg) sock_recv_timestamp(msg, sock->sk, skb); From 849979051cbc9352857d8bb31895ae55afe19d96 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 11:13:31 +0100 Subject: [PATCH 0311/1050] rxrpc: Add a tracepoint to follow what recvmsg does Add a tracepoint to follow what recvmsg does within AF_RXRPC. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 34 ++++++++++++++++++++++++++++++++++ net/rxrpc/ar-internal.h | 17 +++++++++++++++++ net/rxrpc/misc.c | 14 ++++++++++++++ net/rxrpc/recvmsg.c | 34 ++++++++++++++++++++++++++-------- 4 files changed, 91 insertions(+), 8 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 7dd5f0188681..58732202e9f0 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -323,6 +323,40 @@ TRACE_EVENT(rxrpc_receive, __entry->top) ); +TRACE_EVENT(rxrpc_recvmsg, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_recvmsg_trace why, + rxrpc_seq_t seq, unsigned int offset, unsigned int len, + int ret), + + TP_ARGS(call, why, seq, offset, len, ret), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(enum rxrpc_recvmsg_trace, why ) + __field(rxrpc_seq_t, seq ) + __field(unsigned int, offset ) + __field(unsigned int, len ) + __field(int, ret ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->why = why; + __entry->seq = seq; + __entry->offset = offset; + __entry->len = len; + __entry->ret = ret; + ), + + TP_printk("c=%p %s q=%08x o=%u l=%u ret=%d", + __entry->call, + rxrpc_recvmsg_traces[__entry->why], + __entry->seq, + __entry->offset, + __entry->len, + __entry->ret) + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index e5d2f2fb8e41..a17341d2df3d 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -617,6 +617,23 @@ enum rxrpc_receive_trace { extern const char rxrpc_receive_traces[rxrpc_receive__nr_trace][4]; +enum rxrpc_recvmsg_trace { + rxrpc_recvmsg_enter, + rxrpc_recvmsg_wait, + rxrpc_recvmsg_dequeue, + rxrpc_recvmsg_hole, + rxrpc_recvmsg_next, + rxrpc_recvmsg_cont, + rxrpc_recvmsg_full, + rxrpc_recvmsg_data_return, + rxrpc_recvmsg_terminal, + rxrpc_recvmsg_to_be_accepted, + rxrpc_recvmsg_return, + rxrpc_recvmsg__nr_trace +}; + +extern const char rxrpc_recvmsg_traces[rxrpc_recvmsg__nr_trace][5]; + extern const char *const rxrpc_pkts[]; extern const char *rxrpc_acks(u8 reason); diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index db5f1d54fc90..c7065d893d1e 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -150,3 +150,17 @@ const char rxrpc_receive_traces[rxrpc_receive__nr_trace][4] = { [rxrpc_receive_rotate] = "ROT", [rxrpc_receive_end] = "END", }; + +const char rxrpc_recvmsg_traces[rxrpc_recvmsg__nr_trace][5] = { + [rxrpc_recvmsg_enter] = "ENTR", + [rxrpc_recvmsg_wait] = "WAIT", + [rxrpc_recvmsg_dequeue] = "DEQU", + [rxrpc_recvmsg_hole] = "HOLE", + [rxrpc_recvmsg_next] = "NEXT", + [rxrpc_recvmsg_cont] = "CONT", + [rxrpc_recvmsg_full] = "FULL", + [rxrpc_recvmsg_data_return] = "DATA", + [rxrpc_recvmsg_terminal] = "TERM", + [rxrpc_recvmsg_to_be_accepted] = "TBAC", + [rxrpc_recvmsg_return] = "RETN", +}; diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 22d51087c580..b62a08151895 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -94,6 +94,8 @@ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg) break; } + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_terminal, call->rx_hard_ack, + call->rx_pkt_offset, call->rx_pkt_len, ret); return ret; } @@ -124,6 +126,7 @@ static int rxrpc_recvmsg_new_call(struct rxrpc_sock *rx, write_unlock(&rx->call_lock); } + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_to_be_accepted, 1, 0, 0, ret); return ret; } @@ -310,8 +313,11 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, for (seq = hard_ack + 1; before_eq(seq, top); seq++) { ix = seq & RXRPC_RXTX_BUFF_MASK; skb = call->rxtx_buffer[ix]; - if (!skb) + if (!skb) { + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_hole, seq, + rx_pkt_offset, rx_pkt_len, 0); break; + } smp_rmb(); rxrpc_see_skb(skb); sp = rxrpc_skb(skb); @@ -327,10 +333,15 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, ret2 = rxrpc_locate_data(call, skb, &call->rxtx_annotations[ix], &rx_pkt_offset, &rx_pkt_len); + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_next, seq, + rx_pkt_offset, rx_pkt_len, ret2); if (ret2 < 0) { ret = ret2; goto out; } + } else { + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_cont, seq, + rx_pkt_offset, rx_pkt_len, 0); } _debug("recvmsg %x DATA #%u { %d, %d }", sp->hdr.callNumber, seq, rx_pkt_offset, rx_pkt_len); @@ -357,6 +368,8 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, } if (rx_pkt_len > 0) { + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_full, seq, + rx_pkt_offset, rx_pkt_len, 0); _debug("buffer full"); ASSERTCMP(*_offset, ==, len); ret = 0; @@ -383,6 +396,8 @@ out: call->rx_pkt_len = rx_pkt_len; } done: + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_data_return, seq, + rx_pkt_offset, rx_pkt_len, ret); _leave(" = %d [%u/%u]", ret, seq, top); return ret; } @@ -404,7 +419,7 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, DEFINE_WAIT(wait); - _enter(",,,%zu,%d", len, flags); + trace_rxrpc_recvmsg(NULL, rxrpc_recvmsg_enter, 0, 0, 0, 0); if (flags & (MSG_OOB | MSG_TRUNC)) return -EOPNOTSUPP; @@ -424,8 +439,10 @@ try_again: if (list_empty(&rx->recvmsg_q)) { ret = -EWOULDBLOCK; - if (timeo == 0) + if (timeo == 0) { + call = NULL; goto error_no_call; + } release_sock(&rx->sk); @@ -439,6 +456,8 @@ try_again: if (list_empty(&rx->recvmsg_q)) { if (signal_pending(current)) goto wait_interrupted; + trace_rxrpc_recvmsg(NULL, rxrpc_recvmsg_wait, + 0, 0, 0, 0); timeo = schedule_timeout(timeo); } finish_wait(sk_sleep(&rx->sk), &wait); @@ -457,7 +476,7 @@ try_again: rxrpc_get_call(call, rxrpc_call_got); write_unlock_bh(&rx->recvmsg_lock); - _debug("recvmsg call %p", call); + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_dequeue, 0, 0, 0, 0); if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) BUG(); @@ -527,16 +546,15 @@ error: rxrpc_put_call(call, rxrpc_call_put); error_no_call: release_sock(&rx->sk); - _leave(" = %d", ret); + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret); return ret; wait_interrupted: ret = sock_intr_errno(timeo); wait_error: finish_wait(sk_sleep(&rx->sk), &wait); - release_sock(&rx->sk); - _leave(" = %d [wait]", ret); - return ret; + call = NULL; + goto error_no_call; } /** From ba39f3a0ed756ccd882adf4a77916ec863db3ce4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:14 +0100 Subject: [PATCH 0312/1050] rxrpc: Remove printks from rxrpc_recvmsg_data() to fix uninit var Remove _enter/_debug/_leave calls from rxrpc_recvmsg_data() of which one uses an uninitialised variable. Signed-off-by: David Howells --- net/rxrpc/recvmsg.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index b62a08151895..79e65668bc58 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -296,8 +296,6 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, unsigned int rx_pkt_offset, rx_pkt_len; int ix, copy, ret = -EAGAIN, ret2; - _enter(""); - rx_pkt_offset = call->rx_pkt_offset; rx_pkt_len = call->rx_pkt_len; @@ -343,8 +341,6 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, trace_rxrpc_recvmsg(call, rxrpc_recvmsg_cont, seq, rx_pkt_offset, rx_pkt_len, 0); } - _debug("recvmsg %x DATA #%u { %d, %d }", - sp->hdr.callNumber, seq, rx_pkt_offset, rx_pkt_len); /* We have to handle short, empty and used-up DATA packets. */ remain = len - *_offset; @@ -360,8 +356,6 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, } /* handle piecemeal consumption of data packets */ - _debug("copied %d @%zu", copy, *_offset); - rx_pkt_offset += copy; rx_pkt_len -= copy; *_offset += copy; @@ -370,7 +364,6 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, if (rx_pkt_len > 0) { trace_rxrpc_recvmsg(call, rxrpc_recvmsg_full, seq, rx_pkt_offset, rx_pkt_len, 0); - _debug("buffer full"); ASSERTCMP(*_offset, ==, len); ret = 0; break; @@ -398,7 +391,6 @@ out: done: trace_rxrpc_recvmsg(call, rxrpc_recvmsg_data_return, seq, rx_pkt_offset, rx_pkt_len, ret); - _leave(" = %d [%u/%u]", ret, seq, top); return ret; } From 71f3ca408fd43b586c02480768a503af075b247e Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:14 +0100 Subject: [PATCH 0313/1050] rxrpc: Improve skb tracing Improve sk_buff tracing within AF_RXRPC by the following means: (1) Use an enum to note the event type rather than plain integers and use an array of event names rather than a big multi ?: list. (2) Distinguish Rx from Tx packets and account them separately. This requires the call phase to be tracked so that we know what we might find in rxtx_buffer[]. (3) Add a parameter to rxrpc_{new,see,get,free}_skb() to indicate the event type. (4) A pair of 'rotate' events are added to indicate packets that are about to be rotated out of the Rx and Tx windows. (5) A pair of 'lost' events are added, along with rxrpc_lose_skb() for packet loss injection recording. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 12 +++----- net/rxrpc/af_rxrpc.c | 5 ++-- net/rxrpc/ar-internal.h | 33 ++++++++++++++++++---- net/rxrpc/call_event.c | 8 +++--- net/rxrpc/call_object.c | 11 ++++++-- net/rxrpc/conn_event.c | 6 ++-- net/rxrpc/input.c | 13 +++++---- net/rxrpc/local_event.c | 4 +-- net/rxrpc/misc.c | 18 ++++++++++++ net/rxrpc/output.c | 4 +-- net/rxrpc/peer_event.c | 10 +++---- net/rxrpc/recvmsg.c | 7 +++-- net/rxrpc/sendmsg.c | 10 +++---- net/rxrpc/skbuff.c | 53 ++++++++++++++++++++++++++---------- 14 files changed, 131 insertions(+), 63 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 58732202e9f0..75a5d8bf50e1 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -107,14 +107,14 @@ TRACE_EVENT(rxrpc_call, ); TRACE_EVENT(rxrpc_skb, - TP_PROTO(struct sk_buff *skb, int op, int usage, int mod_count, - const void *where), + TP_PROTO(struct sk_buff *skb, enum rxrpc_skb_trace op, + int usage, int mod_count, const void *where), TP_ARGS(skb, op, usage, mod_count, where), TP_STRUCT__entry( __field(struct sk_buff *, skb ) - __field(int, op ) + __field(enum rxrpc_skb_trace, op ) __field(int, usage ) __field(int, mod_count ) __field(const void *, where ) @@ -130,11 +130,7 @@ TRACE_EVENT(rxrpc_skb, TP_printk("s=%p %s u=%d m=%d p=%pSR", __entry->skb, - (__entry->op == 0 ? "NEW" : - __entry->op == 1 ? "SEE" : - __entry->op == 2 ? "GET" : - __entry->op == 3 ? "FRE" : - "PUR"), + rxrpc_skb_traces[__entry->op], __entry->usage, __entry->mod_count, __entry->where) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 09f81befc705..8dbf7bed2cc4 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -45,7 +45,7 @@ u32 rxrpc_epoch; atomic_t rxrpc_debug_id; /* count of skbs currently in use */ -atomic_t rxrpc_n_skbs; +atomic_t rxrpc_n_tx_skbs, rxrpc_n_rx_skbs; struct workqueue_struct *rxrpc_workqueue; @@ -867,7 +867,8 @@ static void __exit af_rxrpc_exit(void) proto_unregister(&rxrpc_proto); rxrpc_destroy_all_calls(); rxrpc_destroy_all_connections(); - ASSERTCMP(atomic_read(&rxrpc_n_skbs), ==, 0); + ASSERTCMP(atomic_read(&rxrpc_n_tx_skbs), ==, 0); + ASSERTCMP(atomic_read(&rxrpc_n_rx_skbs), ==, 0); rxrpc_destroy_all_locals(); remove_proc_entry("rxrpc_conns", init_net.proc_net); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index a17341d2df3d..034f525f2235 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -520,6 +520,7 @@ struct rxrpc_call { rxrpc_seq_t rx_expect_next; /* Expected next packet sequence number */ u8 rx_winsize; /* Size of Rx window */ u8 tx_winsize; /* Maximum size of Tx window */ + bool tx_phase; /* T if transmission phase, F if receive phase */ u8 nr_jumbo_bad; /* Number of jumbo dups/exceeds-windows */ /* receive-phase ACK management */ @@ -534,6 +535,27 @@ struct rxrpc_call { rxrpc_serial_t acks_latest; /* serial number of latest ACK received */ }; +enum rxrpc_skb_trace { + rxrpc_skb_rx_cleaned, + rxrpc_skb_rx_freed, + rxrpc_skb_rx_got, + rxrpc_skb_rx_lost, + rxrpc_skb_rx_received, + rxrpc_skb_rx_rotated, + rxrpc_skb_rx_purged, + rxrpc_skb_rx_seen, + rxrpc_skb_tx_cleaned, + rxrpc_skb_tx_freed, + rxrpc_skb_tx_got, + rxrpc_skb_tx_lost, + rxrpc_skb_tx_new, + rxrpc_skb_tx_rotated, + rxrpc_skb_tx_seen, + rxrpc_skb__nr_trace +}; + +extern const char rxrpc_skb_traces[rxrpc_skb__nr_trace][7]; + enum rxrpc_conn_trace { rxrpc_conn_new_client, rxrpc_conn_new_service, @@ -642,7 +664,7 @@ extern const char *rxrpc_acks(u8 reason); /* * af_rxrpc.c */ -extern atomic_t rxrpc_n_skbs; +extern atomic_t rxrpc_n_tx_skbs, rxrpc_n_rx_skbs; extern u32 rxrpc_epoch; extern atomic_t rxrpc_debug_id; extern struct workqueue_struct *rxrpc_workqueue; @@ -1000,10 +1022,11 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t); */ void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *); void rxrpc_packet_destructor(struct sk_buff *); -void rxrpc_new_skb(struct sk_buff *); -void rxrpc_see_skb(struct sk_buff *); -void rxrpc_get_skb(struct sk_buff *); -void rxrpc_free_skb(struct sk_buff *); +void rxrpc_new_skb(struct sk_buff *, enum rxrpc_skb_trace); +void rxrpc_see_skb(struct sk_buff *, enum rxrpc_skb_trace); +void rxrpc_get_skb(struct sk_buff *, enum rxrpc_skb_trace); +void rxrpc_free_skb(struct sk_buff *, enum rxrpc_skb_trace); +void rxrpc_lose_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_purge_queue(struct sk_buff_head *); /* diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index f0cabc48a1b7..7d1b99824ed9 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -170,7 +170,7 @@ static void rxrpc_resend(struct rxrpc_call *call) continue; skb = call->rxtx_buffer[ix]; - rxrpc_see_skb(skb); + rxrpc_see_skb(skb, rxrpc_skb_tx_seen); sp = rxrpc_skb(skb); if (annotation == RXRPC_TX_ANNO_UNACK) { @@ -199,7 +199,7 @@ static void rxrpc_resend(struct rxrpc_call *call) continue; skb = call->rxtx_buffer[ix]; - rxrpc_get_skb(skb); + rxrpc_get_skb(skb, rxrpc_skb_tx_got); spin_unlock_bh(&call->lock); sp = rxrpc_skb(skb); @@ -211,7 +211,7 @@ static void rxrpc_resend(struct rxrpc_call *call) if (rxrpc_send_data_packet(call->conn, skb) < 0) { call->resend_at = now + 2; - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_tx_freed); return; } @@ -219,7 +219,7 @@ static void rxrpc_resend(struct rxrpc_call *call) rxrpc_expose_client_call(call); sp->resend_at = now + rxrpc_resend_timeout; - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_tx_freed); spin_lock_bh(&call->lock); /* We need to clear the retransmit state, but there are two diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 54f30482a7fd..f50a6094e198 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -182,6 +182,7 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx, return ERR_PTR(-ENOMEM); call->state = RXRPC_CALL_CLIENT_AWAIT_CONN; call->service_id = srx->srx_service; + call->tx_phase = true; _leave(" = %p", call); return call; @@ -458,7 +459,9 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) rxrpc_disconnect_call(call); for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) { - rxrpc_free_skb(call->rxtx_buffer[i]); + rxrpc_free_skb(call->rxtx_buffer[i], + (call->tx_phase ? rxrpc_skb_tx_cleaned : + rxrpc_skb_rx_cleaned)); call->rxtx_buffer[i] = NULL; } @@ -552,9 +555,11 @@ void rxrpc_cleanup_call(struct rxrpc_call *call) /* Clean up the Rx/Tx buffer */ for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) - rxrpc_free_skb(call->rxtx_buffer[i]); + rxrpc_free_skb(call->rxtx_buffer[i], + (call->tx_phase ? rxrpc_skb_tx_cleaned : + rxrpc_skb_rx_cleaned)); - rxrpc_free_skb(call->tx_pending); + rxrpc_free_skb(call->tx_pending, rxrpc_skb_tx_cleaned); call_rcu(&call->rcu, rxrpc_rcu_destroy_call); } diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 9b19c51831aa..75a15a4c74c3 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -388,7 +388,7 @@ void rxrpc_process_connection(struct work_struct *work) /* go through the conn-level event packets, releasing the ref on this * connection that each one has when we've finished with it */ while ((skb = skb_dequeue(&conn->rx_queue))) { - rxrpc_see_skb(skb); + rxrpc_see_skb(skb, rxrpc_skb_rx_seen); ret = rxrpc_process_event(conn, skb, &abort_code); switch (ret) { case -EPROTO: @@ -399,7 +399,7 @@ void rxrpc_process_connection(struct work_struct *work) goto requeue_and_leave; case -ECONNABORTED: default: - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); break; } } @@ -416,7 +416,7 @@ requeue_and_leave: protocol_error: if (rxrpc_abort_connection(conn, -ret, abort_code) < 0) goto requeue_and_leave; - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); _leave(" [EPROTO]"); goto out; } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index b690220533c6..84bb16d47b85 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -50,7 +50,7 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to) call->tx_hard_ack++; ix = call->tx_hard_ack & RXRPC_RXTX_BUFF_MASK; skb = call->rxtx_buffer[ix]; - rxrpc_see_skb(skb); + rxrpc_see_skb(skb, rxrpc_skb_tx_rotated); call->rxtx_buffer[ix] = NULL; call->rxtx_annotations[ix] = 0; skb->next = list; @@ -66,7 +66,7 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to) skb = list; list = skb->next; skb->next = NULL; - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_tx_freed); } } @@ -99,6 +99,7 @@ static bool rxrpc_end_tx_phase(struct rxrpc_call *call, const char *abort_why) default: break; case RXRPC_CALL_CLIENT_AWAIT_REPLY: + call->tx_phase = false; call->state = RXRPC_CALL_CLIENT_RECV_REPLY; break; case RXRPC_CALL_SERVER_AWAIT_ACK: @@ -278,7 +279,7 @@ next_subpacket: * Barriers against rxrpc_recvmsg_data() and rxrpc_rotate_rx_window() * and also rxrpc_fill_out_ack(). */ - rxrpc_get_skb(skb); + rxrpc_get_skb(skb, rxrpc_skb_rx_got); call->rxtx_annotations[ix] = annotation; smp_wmb(); call->rxtx_buffer[ix] = skb; @@ -691,13 +692,13 @@ void rxrpc_data_ready(struct sock *udp_sk) return; } - rxrpc_new_skb(skb); + rxrpc_new_skb(skb, rxrpc_skb_rx_received); _net("recv skb %p", skb); /* we'll probably need to checksum it (didn't call sock_recvmsg) */ if (skb_checksum_complete(skb)) { - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); __UDP_INC_STATS(&init_net, UDP_MIB_INERRORS, 0); _leave(" [CSUM failed]"); return; @@ -821,7 +822,7 @@ void rxrpc_data_ready(struct sock *udp_sk) discard_unlock: rcu_read_unlock(); discard: - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); out: trace_rxrpc_rx_done(0, 0); return; diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c index f073e932500e..190f68bd9e27 100644 --- a/net/rxrpc/local_event.c +++ b/net/rxrpc/local_event.c @@ -90,7 +90,7 @@ void rxrpc_process_local_events(struct rxrpc_local *local) if (skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - rxrpc_see_skb(skb); + rxrpc_see_skb(skb, rxrpc_skb_rx_seen); _debug("{%d},{%u}", local->debug_id, sp->hdr.type); switch (sp->hdr.type) { @@ -107,7 +107,7 @@ void rxrpc_process_local_events(struct rxrpc_local *local) break; } - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); } _leave(""); diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index c7065d893d1e..026e1f2e83ff 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -102,6 +102,24 @@ const char *rxrpc_acks(u8 reason) return str[reason]; } +const char rxrpc_skb_traces[rxrpc_skb__nr_trace][7] = { + [rxrpc_skb_rx_cleaned] = "Rx CLN", + [rxrpc_skb_rx_freed] = "Rx FRE", + [rxrpc_skb_rx_got] = "Rx GOT", + [rxrpc_skb_rx_lost] = "Rx *L*", + [rxrpc_skb_rx_received] = "Rx RCV", + [rxrpc_skb_rx_purged] = "Rx PUR", + [rxrpc_skb_rx_rotated] = "Rx ROT", + [rxrpc_skb_rx_seen] = "Rx SEE", + [rxrpc_skb_tx_cleaned] = "Tx CLN", + [rxrpc_skb_tx_freed] = "Tx FRE", + [rxrpc_skb_tx_got] = "Tx GOT", + [rxrpc_skb_tx_lost] = "Tx *L*", + [rxrpc_skb_tx_new] = "Tx NEW", + [rxrpc_skb_tx_rotated] = "Tx ROT", + [rxrpc_skb_tx_seen] = "Tx SEE", +}; + const char rxrpc_conn_traces[rxrpc_conn__nr_trace][4] = { [rxrpc_conn_new_client] = "NWc", [rxrpc_conn_new_service] = "NWs", diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 2c9daeadce87..a2cad5ce7416 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -324,7 +324,7 @@ void rxrpc_reject_packets(struct rxrpc_local *local) whdr.type = RXRPC_PACKET_TYPE_ABORT; while ((skb = skb_dequeue(&local->reject_queue))) { - rxrpc_see_skb(skb); + rxrpc_see_skb(skb, rxrpc_skb_rx_seen); sp = rxrpc_skb(skb); if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) { @@ -343,7 +343,7 @@ void rxrpc_reject_packets(struct rxrpc_local *local) kernel_sendmsg(local->socket, &msg, iov, 2, size); } - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); } _leave(""); diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 9e0725f5652b..18276e7cb9e0 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -155,11 +155,11 @@ void rxrpc_error_report(struct sock *sk) _leave("UDP socket errqueue empty"); return; } - rxrpc_new_skb(skb); + rxrpc_new_skb(skb, rxrpc_skb_rx_received); serr = SKB_EXT_ERR(skb); if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) { _leave("UDP empty message"); - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); return; } @@ -169,7 +169,7 @@ void rxrpc_error_report(struct sock *sk) peer = NULL; if (!peer) { rcu_read_unlock(); - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); _leave(" [no peer]"); return; } @@ -179,7 +179,7 @@ void rxrpc_error_report(struct sock *sk) serr->ee.ee_code == ICMP_FRAG_NEEDED)) { rxrpc_adjust_mtu(peer, serr); rcu_read_unlock(); - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); rxrpc_put_peer(peer); _leave(" [MTU update]"); return; @@ -187,7 +187,7 @@ void rxrpc_error_report(struct sock *sk) rxrpc_store_error(peer, serr); rcu_read_unlock(); - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); /* The ref we obtained is passed off to the work item */ rxrpc_queue_work(&peer->error_distributor); diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 79e65668bc58..6ba4af5a8d95 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -155,6 +155,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call) break; case RXRPC_CALL_SERVER_RECV_REQUEST: + call->tx_phase = true; call->state = RXRPC_CALL_SERVER_ACK_REQUEST; break; default: @@ -185,7 +186,7 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) hard_ack++; ix = hard_ack & RXRPC_RXTX_BUFF_MASK; skb = call->rxtx_buffer[ix]; - rxrpc_see_skb(skb); + rxrpc_see_skb(skb, rxrpc_skb_rx_rotated); sp = rxrpc_skb(skb); flags = sp->hdr.flags; serial = sp->hdr.serial; @@ -197,7 +198,7 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) /* Barrier against rxrpc_input_data(). */ smp_store_release(&call->rx_hard_ack, hard_ack); - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); _debug("%u,%u,%02x", hard_ack, top, flags); trace_rxrpc_receive(call, rxrpc_receive_rotate, serial, hard_ack); @@ -317,7 +318,7 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, break; } smp_rmb(); - rxrpc_see_skb(skb); + rxrpc_see_skb(skb, rxrpc_skb_rx_seen); sp = rxrpc_skb(skb); if (!(flags & MSG_PEEK)) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 28d8f73cf11d..6a39ee97a0b7 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -100,7 +100,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, ASSERTCMP(seq, ==, call->tx_top + 1); ix = seq & RXRPC_RXTX_BUFF_MASK; - rxrpc_get_skb(skb); + rxrpc_get_skb(skb, rxrpc_skb_tx_got); call->rxtx_annotations[ix] = RXRPC_TX_ANNO_UNACK; smp_wmb(); call->rxtx_buffer[ix] = skb; @@ -146,7 +146,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, rxrpc_instant_resend(call, ix); } - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_tx_freed); _leave(""); } @@ -201,7 +201,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, skb = call->tx_pending; call->tx_pending = NULL; - rxrpc_see_skb(skb); + rxrpc_see_skb(skb, rxrpc_skb_tx_seen); copied = 0; do { @@ -242,7 +242,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, if (!skb) goto maybe_error; - rxrpc_new_skb(skb); + rxrpc_new_skb(skb, rxrpc_skb_tx_new); _debug("ALLOC SEND %p", skb); @@ -352,7 +352,7 @@ out: return ret; call_terminated: - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_tx_freed); _leave(" = %d", -call->error); return -call->error; diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index 620d9ccaf3c1..5154cbf7e540 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -18,54 +18,76 @@ #include #include "ar-internal.h" +#define select_skb_count(op) (op >= rxrpc_skb_tx_cleaned ? &rxrpc_n_tx_skbs : &rxrpc_n_rx_skbs) + /* - * Note the existence of a new-to-us socket buffer (allocated or dequeued). + * Note the allocation or reception of a socket buffer. */ -void rxrpc_new_skb(struct sk_buff *skb) +void rxrpc_new_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) { const void *here = __builtin_return_address(0); - int n = atomic_inc_return(&rxrpc_n_skbs); - trace_rxrpc_skb(skb, 0, atomic_read(&skb->users), n, here); + int n = atomic_inc_return(select_skb_count(op)); + trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); } /* * Note the re-emergence of a socket buffer from a queue or buffer. */ -void rxrpc_see_skb(struct sk_buff *skb) +void rxrpc_see_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) { const void *here = __builtin_return_address(0); if (skb) { - int n = atomic_read(&rxrpc_n_skbs); - trace_rxrpc_skb(skb, 1, atomic_read(&skb->users), n, here); + int n = atomic_read(select_skb_count(op)); + trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); } } /* * Note the addition of a ref on a socket buffer. */ -void rxrpc_get_skb(struct sk_buff *skb) +void rxrpc_get_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) { const void *here = __builtin_return_address(0); - int n = atomic_inc_return(&rxrpc_n_skbs); - trace_rxrpc_skb(skb, 2, atomic_read(&skb->users), n, here); + int n = atomic_inc_return(select_skb_count(op)); + trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); skb_get(skb); } /* * Note the destruction of a socket buffer. */ -void rxrpc_free_skb(struct sk_buff *skb) +void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) { const void *here = __builtin_return_address(0); if (skb) { int n; CHECK_SLAB_OKAY(&skb->users); - n = atomic_dec_return(&rxrpc_n_skbs); - trace_rxrpc_skb(skb, 3, atomic_read(&skb->users), n, here); + n = atomic_dec_return(select_skb_count(op)); + trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); kfree_skb(skb); } } +/* + * Note the injected loss of a socket buffer. + */ +void rxrpc_lose_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) +{ + const void *here = __builtin_return_address(0); + if (skb) { + int n; + CHECK_SLAB_OKAY(&skb->users); + if (op == rxrpc_skb_tx_lost) { + n = atomic_read(select_skb_count(op)); + trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); + } else { + n = atomic_dec_return(select_skb_count(op)); + trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); + kfree_skb(skb); + } + } +} + /* * Clear a queue of socket buffers. */ @@ -74,8 +96,9 @@ void rxrpc_purge_queue(struct sk_buff_head *list) const void *here = __builtin_return_address(0); struct sk_buff *skb; while ((skb = skb_dequeue((list))) != NULL) { - int n = atomic_dec_return(&rxrpc_n_skbs); - trace_rxrpc_skb(skb, 4, atomic_read(&skb->users), n, here); + int n = atomic_dec_return(select_skb_count(rxrpc_skb_rx_purged)); + trace_rxrpc_skb(skb, rxrpc_skb_rx_purged, + atomic_read(&skb->users), n, here); kfree_skb(skb); } } From 8a681c360559f75a80b37e6a6a9590457361ccb0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:15 +0100 Subject: [PATCH 0314/1050] rxrpc: Add config to inject packet loss Add a configuration option to inject packet loss by discarding approximately every 8th packet received and approximately every 8th DATA packet transmitted. Note that no locking is used, but it shouldn't really matter. Signed-off-by: David Howells --- net/rxrpc/Kconfig | 7 +++++++ net/rxrpc/input.c | 8 ++++++++ net/rxrpc/output.c | 9 +++++++++ 3 files changed, 24 insertions(+) diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig index 13396c74b5c1..86f8853a038c 100644 --- a/net/rxrpc/Kconfig +++ b/net/rxrpc/Kconfig @@ -26,6 +26,13 @@ config AF_RXRPC_IPV6 Say Y here to allow AF_RXRPC to use IPV6 UDP as well as IPV4 UDP as its network transport. +config AF_RXRPC_INJECT_LOSS + bool "Inject packet loss into RxRPC packet stream" + depends on AF_RXRPC + help + Say Y here to inject packet loss by discarding some received and some + transmitted packets. + config AF_RXRPC_DEBUG bool "RxRPC dynamic debugging" diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 84bb16d47b85..7ac1edf3aac7 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -712,6 +712,14 @@ void rxrpc_data_ready(struct sock *udp_sk) skb_orphan(skb); sp = rxrpc_skb(skb); + if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { + static int lose; + if ((lose++ & 7) == 7) { + rxrpc_lose_skb(skb, rxrpc_skb_rx_lost); + return; + } + } + _net("Rx UDP packet from %08x:%04hu", ntohl(ip_hdr(skb)->saddr), ntohs(udp_hdr(skb)->source)); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index a2cad5ce7416..16e18a94ffa6 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -225,6 +225,15 @@ int rxrpc_send_data_packet(struct rxrpc_connection *conn, struct sk_buff *skb) msg.msg_controllen = 0; msg.msg_flags = 0; + if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { + static int lose; + if ((lose++ & 7) == 7) { + rxrpc_lose_skb(skb, rxrpc_skb_tx_lost); + _leave(" = 0 [lose]"); + return 0; + } + } + /* send the packet with the don't fragment bit set if we currently * think it's small enough */ if (skb->len - sizeof(struct rxrpc_wire_header) < conn->params.peer->maxdata) { From cce94483e47e8e3d74cf4475dea33f9fd4b6ad9f Mon Sep 17 00:00:00 2001 From: Filipe Manco Date: Thu, 15 Sep 2016 17:10:46 +0200 Subject: [PATCH 0315/1050] xen-netback: fix error handling on netback_probe() In case of error during netback_probe() (e.g. an entry missing on the xenstore) netback_remove() is called on the new device, which will set the device backend state to XenbusStateClosed by calling set_backend_state(). However, the backend state wasn't initialized by netback_probe() at this point, which will cause and invalid transaction and set_backend_state() to BUG(). Initialize the backend state at the beginning of netback_probe() to XenbusStateInitialising, and create two new valid state transitions on set_backend_state(), from XenbusStateInitialising to XenbusStateClosed, and from XenbusStateInitialising to XenbusStateInitWait. Signed-off-by: Filipe Manco Acked-by: Wei Liu Signed-off-by: David S. Miller --- drivers/net/xen-netback/xenbus.c | 46 +++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 6a31f2610c23..daf4c7867102 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -271,6 +271,11 @@ static int netback_probe(struct xenbus_device *dev, be->dev = dev; dev_set_drvdata(&dev->dev, be); + be->state = XenbusStateInitialising; + err = xenbus_switch_state(dev, XenbusStateInitialising); + if (err) + goto fail; + sg = 1; do { @@ -383,11 +388,6 @@ static int netback_probe(struct xenbus_device *dev, be->hotplug_script = script; - err = xenbus_switch_state(dev, XenbusStateInitWait); - if (err) - goto fail; - - be->state = XenbusStateInitWait; /* This kicks hotplug scripts, so do it immediately. */ err = backend_create_xenvif(be); @@ -492,20 +492,20 @@ static inline void backend_switch_state(struct backend_info *be, /* Handle backend state transitions: * - * The backend state starts in InitWait and the following transitions are + * The backend state starts in Initialising and the following transitions are * allowed. * - * InitWait -> Connected + * Initialising -> InitWait -> Connected + * \ + * \ ^ \ | + * \ | \ | + * \ | \ | + * \ | \ | + * \ | \ | + * \ | \ | + * V | V V * - * ^ \ | - * | \ | - * | \ | - * | \ | - * | \ | - * | \ | - * | V V - * - * Closed <-> Closing + * Closed <-> Closing * * The state argument specifies the eventual state of the backend and the * function transitions to that state via the shortest path. @@ -515,6 +515,20 @@ static void set_backend_state(struct backend_info *be, { while (be->state != state) { switch (be->state) { + case XenbusStateInitialising: + switch (state) { + case XenbusStateInitWait: + case XenbusStateConnected: + case XenbusStateClosing: + backend_switch_state(be, XenbusStateInitWait); + break; + case XenbusStateClosed: + backend_switch_state(be, XenbusStateClosed); + break; + default: + BUG(); + } + break; case XenbusStateClosed: switch (state) { case XenbusStateInitWait: From ffb4d6c8508657824bcef68a36b2a0f9d8c09d10 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 15 Sep 2016 08:12:33 -0700 Subject: [PATCH 0316/1050] tcp: fix overflow in __tcp_retransmit_skb() If a TCP socket gets a large write queue, an overflow can happen in a test in __tcp_retransmit_skb() preventing all retransmits. The flow then stalls and resets after timeouts. Tested: sysctl -w net.core.wmem_max=1000000000 netperf -H dest -- -s 1000000000 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index bdaef7fd6e47..f53d0cca5fa4 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2605,7 +2605,8 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) * copying overhead: fragmentation, tunneling, mangling etc. */ if (atomic_read(&sk->sk_wmem_alloc) > - min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf)) + min_t(u32, sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), + sk->sk_sndbuf)) return -EAGAIN; if (skb_still_in_host_queue(sk, skb)) From 20c64d5cd5a2bdcdc8982a06cb05e5e1bd851a3d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 15 Sep 2016 08:48:46 -0700 Subject: [PATCH 0317/1050] net: avoid sk_forward_alloc overflows A malicious TCP receiver, sending SACK, can force the sender to split skbs in write queue and increase its memory usage. Then, when socket is closed and its write queue purged, we might overflow sk_forward_alloc (It becomes negative) sk_mem_reclaim() does nothing in this case, and more than 2GB are leaked from TCP perspective (tcp_memory_allocated is not changed) Then warnings trigger from inet_sock_destruct() and sk_stream_kill_queues() seeing a not zero sk_forward_alloc All TCP stack can be stuck because TCP is under memory pressure. A simple fix is to preemptively reclaim from sk_mem_uncharge(). This makes sure a socket wont have more than 2 MB forward allocated, after burst and idle period. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/include/net/sock.h b/include/net/sock.h index ff5be7e8ddea..8741988e6880 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1332,6 +1332,16 @@ static inline void sk_mem_uncharge(struct sock *sk, int size) if (!sk_has_account(sk)) return; sk->sk_forward_alloc += size; + + /* Avoid a possible overflow. + * TCP send queues can make this happen, if sk_mem_reclaim() + * is not called and more than 2 GBytes are released at once. + * + * If we reach 2 MBytes, reclaim 1 MBytes right now, there is + * no need to hold that much forward allocation anyway. + */ + if (unlikely(sk->sk_forward_alloc >= 1 << 21)) + __sk_mem_reclaim(sk, 1 << 20); } static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) From 8ab86c00e349cef9fb14719093a7f198bcc72629 Mon Sep 17 00:00:00 2001 From: "phil.turnbull@oracle.com" Date: Thu, 15 Sep 2016 12:41:44 -0400 Subject: [PATCH 0318/1050] irda: Free skb on irda_accept error path. skb is not freed if newsk is NULL. Rework the error path so free_skb is unconditionally called on function exit. Fixes: c3ea9fa27413 ("[IrDA] af_irda: IRDA_ASSERT cleanups") Signed-off-by: Phil Turnbull Signed-off-by: David S. Miller --- net/irda/af_irda.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 8d2f7c9b491d..ccc244406fb9 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -832,7 +832,7 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags) struct sock *sk = sock->sk; struct irda_sock *new, *self = irda_sk(sk); struct sock *newsk; - struct sk_buff *skb; + struct sk_buff *skb = NULL; int err; err = irda_create(sock_net(sk), newsock, sk->sk_protocol, 0); @@ -900,7 +900,6 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags) err = -EPERM; /* value does not seem to make sense. -arnd */ if (!new->tsap) { pr_debug("%s(), dup failed!\n", __func__); - kfree_skb(skb); goto out; } @@ -919,7 +918,6 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags) /* Clean up the original one to keep it in listen state */ irttp_listen(self->tsap); - kfree_skb(skb); sk->sk_ack_backlog--; newsock->state = SS_CONNECTED; @@ -927,6 +925,7 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags) irda_connect_response(new); err = 0; out: + kfree_skb(skb); release_sock(sk); return err; } From 4496195ddd75c4ad57b783739414e69b7d79843e Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Thu, 15 Sep 2016 15:02:38 -0300 Subject: [PATCH 0319/1050] sctp: fix SSN comparision This function actually operates on u32 yet its paramteres were declared as u16, causing integer truncation upon calling. Note in patch context that ADDIP_SERIAL_SIGN_BIT is already 32 bits. Signed-off-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/sctp/sm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index efc01743b9d6..bafe2a0ab908 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -382,7 +382,7 @@ enum { ADDIP_SERIAL_SIGN_BIT = (1<<31) }; -static inline int ADDIP_SERIAL_gte(__u16 s, __u16 t) +static inline int ADDIP_SERIAL_gte(__u32 s, __u32 t) { return ((s) == (t)) || (((t) - (s)) & ADDIP_SERIAL_SIGN_BIT); } From 2835d2d9e366a2985b24051d228333bfba82f3a7 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 15 Sep 2016 22:47:51 +0200 Subject: [PATCH 0320/1050] bna: add missing per queue ethtool stat Commit ba5ca784 "bna: check for dma mapping errors" added besides other things a statistic that counts number of DMA buffer mapping failures per each Rx queue. This counter is not included in ethtool stats output. Fixes: ba5ca784 "bna: check for dma mapping errors" Signed-off-by: Ivan Vecera Signed-off-by: David S. Miller --- drivers/net/ethernet/brocade/bna/bnad_ethtool.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c index 0e4fdc3dd729..5671353fc7bc 100644 --- a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c +++ b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c @@ -31,7 +31,7 @@ #define BNAD_NUM_TXF_COUNTERS 12 #define BNAD_NUM_RXF_COUNTERS 10 #define BNAD_NUM_CQ_COUNTERS (3 + 5) -#define BNAD_NUM_RXQ_COUNTERS 6 +#define BNAD_NUM_RXQ_COUNTERS 7 #define BNAD_NUM_TXQ_COUNTERS 5 #define BNAD_ETHTOOL_STATS_NUM \ @@ -658,6 +658,8 @@ bnad_get_strings(struct net_device *netdev, u32 stringset, u8 *string) string += ETH_GSTRING_LEN; sprintf(string, "rxq%d_allocbuf_failed", q_num); string += ETH_GSTRING_LEN; + sprintf(string, "rxq%d_mapbuf_failed", q_num); + string += ETH_GSTRING_LEN; sprintf(string, "rxq%d_producer_index", q_num); string += ETH_GSTRING_LEN; sprintf(string, "rxq%d_consumer_index", q_num); @@ -678,6 +680,9 @@ bnad_get_strings(struct net_device *netdev, u32 stringset, u8 *string) sprintf(string, "rxq%d_allocbuf_failed", q_num); string += ETH_GSTRING_LEN; + sprintf(string, "rxq%d_mapbuf_failed", + q_num); + string += ETH_GSTRING_LEN; sprintf(string, "rxq%d_producer_index", q_num); string += ETH_GSTRING_LEN; From 37dd348270c1a48f0234354a06c0ce052b6c85b1 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 15 Sep 2016 22:47:52 +0200 Subject: [PATCH 0321/1050] bna: fix crash in bnad_get_strings() Commit 6e7333d "net: add rx_nohandler stat counter" added the new entry rx_nohandler into struct rtnl_link_stats64. Unfortunately the bna driver foolishly depends on the structure. It uses part of it for ethtool statistics and it's not bad but the driver assumes its size is constant as it defines string for each existing entry. The problem occurs when the structure is extended because you need to modify bna driver as well. If not any attempt to retrieve ethtool statistics results in crash in bnad_get_strings(). The patch changes BNAD_ETHTOOL_STATS_NUM so it counts real number of strings in the array and also removes rtnl_link_stats64 entries that are not used in output and are always zero. Fixes: 6e7333d "net: add rx_nohandler stat counter" Signed-off-by: Ivan Vecera Signed-off-by: David S. Miller --- .../net/ethernet/brocade/bna/bnad_ethtool.c | 48 +++++++++---------- 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c index 5671353fc7bc..31f61a744d66 100644 --- a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c +++ b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c @@ -34,12 +34,7 @@ #define BNAD_NUM_RXQ_COUNTERS 7 #define BNAD_NUM_TXQ_COUNTERS 5 -#define BNAD_ETHTOOL_STATS_NUM \ - (sizeof(struct rtnl_link_stats64) / sizeof(u64) + \ - sizeof(struct bnad_drv_stats) / sizeof(u64) + \ - offsetof(struct bfi_enet_stats, rxf_stats[0]) / sizeof(u64)) - -static const char *bnad_net_stats_strings[BNAD_ETHTOOL_STATS_NUM] = { +static const char *bnad_net_stats_strings[] = { "rx_packets", "tx_packets", "rx_bytes", @@ -50,22 +45,10 @@ static const char *bnad_net_stats_strings[BNAD_ETHTOOL_STATS_NUM] = { "tx_dropped", "multicast", "collisions", - "rx_length_errors", - "rx_over_errors", "rx_crc_errors", "rx_frame_errors", - "rx_fifo_errors", - "rx_missed_errors", - - "tx_aborted_errors", - "tx_carrier_errors", "tx_fifo_errors", - "tx_heartbeat_errors", - "tx_window_errors", - - "rx_compressed", - "tx_compressed", "netif_queue_stop", "netif_queue_wakeup", @@ -254,6 +237,8 @@ static const char *bnad_net_stats_strings[BNAD_ETHTOOL_STATS_NUM] = { "fc_tx_fid_parity_errors", }; +#define BNAD_ETHTOOL_STATS_NUM ARRAY_SIZE(bnad_net_stats_strings) + static int bnad_get_settings(struct net_device *netdev, struct ethtool_cmd *cmd) { @@ -859,9 +844,9 @@ bnad_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats *stats, u64 *buf) { struct bnad *bnad = netdev_priv(netdev); - int i, j, bi; + int i, j, bi = 0; unsigned long flags; - struct rtnl_link_stats64 *net_stats64; + struct rtnl_link_stats64 net_stats64; u64 *stats64; u32 bmap; @@ -876,14 +861,25 @@ bnad_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats *stats, * under the same lock */ spin_lock_irqsave(&bnad->bna_lock, flags); - bi = 0; - memset(buf, 0, stats->n_stats * sizeof(u64)); - net_stats64 = (struct rtnl_link_stats64 *)buf; - bnad_netdev_qstats_fill(bnad, net_stats64); - bnad_netdev_hwstats_fill(bnad, net_stats64); + memset(&net_stats64, 0, sizeof(net_stats64)); + bnad_netdev_qstats_fill(bnad, &net_stats64); + bnad_netdev_hwstats_fill(bnad, &net_stats64); - bi = sizeof(*net_stats64) / sizeof(u64); + buf[bi++] = net_stats64.rx_packets; + buf[bi++] = net_stats64.tx_packets; + buf[bi++] = net_stats64.rx_bytes; + buf[bi++] = net_stats64.tx_bytes; + buf[bi++] = net_stats64.rx_errors; + buf[bi++] = net_stats64.tx_errors; + buf[bi++] = net_stats64.rx_dropped; + buf[bi++] = net_stats64.tx_dropped; + buf[bi++] = net_stats64.multicast; + buf[bi++] = net_stats64.collisions; + buf[bi++] = net_stats64.rx_length_errors; + buf[bi++] = net_stats64.rx_crc_errors; + buf[bi++] = net_stats64.rx_frame_errors; + buf[bi++] = net_stats64.tx_fifo_errors; /* Get netif_queue_stopped from stack */ bnad->stats.drv_stats.netif_queue_stopped = netif_queue_stopped(netdev); From f1785fbf7c0bc17211c299a647ebc38968a42181 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Tue, 13 Sep 2016 15:03:15 -0400 Subject: [PATCH 0322/1050] rtl8xxxu: Implement 8192e specific power down sequence This powers down the 8192e correctly, or at least to the point where the firmware will load again, when reloading the driver module. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- .../realtek/rtl8xxxu/rtl8xxxu_8192e.c | 144 +++++++++++++++++- .../wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h | 1 + 2 files changed, 144 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c index 841522ee6379..df54d27e7851 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c @@ -1396,6 +1396,114 @@ exit: return ret; } +static int rtl8192eu_active_to_lps(struct rtl8xxxu_priv *priv) +{ + struct device *dev = &priv->udev->dev; + u8 val8; + u16 val16; + u32 val32; + int retry, retval; + + rtl8xxxu_write8(priv, REG_TXPAUSE, 0xff); + + retry = 100; + retval = -EBUSY; + /* + * Poll 32 bit wide 0x05f8 for 0x00000000 to ensure no TX is pending. + */ + do { + val32 = rtl8xxxu_read32(priv, REG_SCH_TX_CMD); + if (!val32) { + retval = 0; + break; + } + } while (retry--); + + if (!retry) { + dev_warn(dev, "Failed to flush TX queue\n"); + retval = -EBUSY; + goto out; + } + + /* Disable CCK and OFDM, clock gated */ + val8 = rtl8xxxu_read8(priv, REG_SYS_FUNC); + val8 &= ~SYS_FUNC_BBRSTB; + rtl8xxxu_write8(priv, REG_SYS_FUNC, val8); + + udelay(2); + + /* Reset whole BB */ + val8 = rtl8xxxu_read8(priv, REG_SYS_FUNC); + val8 &= ~SYS_FUNC_BB_GLB_RSTN; + rtl8xxxu_write8(priv, REG_SYS_FUNC, val8); + + /* Reset MAC TRX */ + val16 = rtl8xxxu_read16(priv, REG_CR); + val16 &= 0xff00; + val16 |= (CR_HCI_TXDMA_ENABLE | CR_HCI_RXDMA_ENABLE); + rtl8xxxu_write16(priv, REG_CR, val16); + + val16 = rtl8xxxu_read16(priv, REG_CR); + val16 &= ~CR_SECURITY_ENABLE; + rtl8xxxu_write16(priv, REG_CR, val16); + + val8 = rtl8xxxu_read8(priv, REG_DUAL_TSF_RST); + val8 |= DUAL_TSF_TX_OK; + rtl8xxxu_write8(priv, REG_DUAL_TSF_RST, val8); + +out: + return retval; +} + +static int rtl8192eu_active_to_emu(struct rtl8xxxu_priv *priv) +{ + u8 val8; + int count, ret = 0; + + /* Turn off RF */ + rtl8xxxu_write8(priv, REG_RF_CTRL, 0); + + /* Switch DPDT_SEL_P output from register 0x65[2] */ + val8 = rtl8xxxu_read8(priv, REG_LEDCFG2); + val8 &= ~LEDCFG2_DPDT_SELECT; + rtl8xxxu_write8(priv, REG_LEDCFG2, val8); + + /* 0x0005[1] = 1 turn off MAC by HW state machine*/ + val8 = rtl8xxxu_read8(priv, REG_APS_FSMCO + 1); + val8 |= BIT(1); + rtl8xxxu_write8(priv, REG_APS_FSMCO + 1, val8); + + for (count = RTL8XXXU_MAX_REG_POLL; count; count--) { + val8 = rtl8xxxu_read8(priv, REG_APS_FSMCO + 1); + if ((val8 & BIT(1)) == 0) + break; + udelay(10); + } + + if (!count) { + dev_warn(&priv->udev->dev, "%s: Disabling MAC timed out\n", + __func__); + ret = -EBUSY; + goto exit; + } + +exit: + return ret; +} + +static int rtl8192eu_emu_to_disabled(struct rtl8xxxu_priv *priv) +{ + u8 val8; + + /* 0x04[12:11] = 01 enable WL suspend */ + val8 = rtl8xxxu_read8(priv, REG_APS_FSMCO + 1); + val8 &= ~(BIT(3) | BIT(4)); + val8 |= BIT(3); + rtl8xxxu_write8(priv, REG_APS_FSMCO + 1, val8); + + return 0; +} + static int rtl8192eu_power_on(struct rtl8xxxu_priv *priv) { u16 val16; @@ -1446,6 +1554,40 @@ exit: return ret; } +void rtl8192eu_power_off(struct rtl8xxxu_priv *priv) +{ + u8 val8; + u16 val16; + + rtl8xxxu_flush_fifo(priv); + + val8 = rtl8xxxu_read8(priv, REG_TX_REPORT_CTRL); + val8 &= ~TX_REPORT_CTRL_TIMER_ENABLE; + rtl8xxxu_write8(priv, REG_TX_REPORT_CTRL, val8); + + /* Turn off RF */ + rtl8xxxu_write8(priv, REG_RF_CTRL, 0x00); + + rtl8192eu_active_to_lps(priv); + + /* Reset Firmware if running in RAM */ + if (rtl8xxxu_read8(priv, REG_MCU_FW_DL) & MCU_FW_RAM_SEL) + rtl8xxxu_firmware_self_reset(priv); + + /* Reset MCU */ + val16 = rtl8xxxu_read16(priv, REG_SYS_FUNC); + val16 &= ~SYS_FUNC_CPU_ENABLE; + rtl8xxxu_write16(priv, REG_SYS_FUNC, val16); + + /* Reset MCU ready status */ + rtl8xxxu_write8(priv, REG_MCU_FW_DL, 0x00); + + rtl8xxxu_reset_8051(priv); + + rtl8192eu_active_to_emu(priv); + rtl8192eu_emu_to_disabled(priv); +} + static void rtl8192e_enable_rf(struct rtl8xxxu_priv *priv) { u32 val32; @@ -1487,7 +1629,7 @@ struct rtl8xxxu_fileops rtl8192eu_fops = { .parse_efuse = rtl8192eu_parse_efuse, .load_firmware = rtl8192eu_load_firmware, .power_on = rtl8192eu_power_on, - .power_off = rtl8xxxu_power_off, + .power_off = rtl8192eu_power_off, .reset_8051 = rtl8xxxu_reset_8051, .llt_init = rtl8xxxu_auto_llt_table, .init_phy_bb = rtl8192eu_init_phy_bb, diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h index 3555a2f24285..315ccfb2dff5 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h @@ -676,6 +676,7 @@ #define REG_SCH_TXCMD 0x05d0 /* define REG_FW_TSF_SYNC_CNT 0x04a0 */ +#define REG_SCH_TX_CMD 0x05f8 #define REG_FW_RESET_TSF_CNT_1 0x05fc #define REG_FW_RESET_TSF_CNT_0 0x05fd #define REG_FW_BCN_DIS_CNT 0x05fe From 92ca4f92eca7aa362d51f7657d3fea47861600ee Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Wed, 14 Sep 2016 08:42:36 -0400 Subject: [PATCH 0323/1050] mwifiex: fix error handling in mwifiex_create_custom_regdomain smatch reports: sta_cmdresp.c:1053 mwifiex_create_custom_regdomain() warn: possible memory leak of 'regd' Indeed, mwifiex_create_custom_regdomain() returns NULL in the case that channel is missing in the TLV without freeing regd. Moreover, some other error paths in this function return ERR_PTR values which are assigned without checking to the regd field in the mwifiex_adapter struct. The latter is only null-checked where used. Fix by freeing regd in the error path, and only update priv->adapter->regd if the returned pointer is valid. Cc: Amitkumar Karwar Cc: Nishant Sarmukadam Signed-off-by: Bob Copeland Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c index 3344a26bed03..8548027abf71 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c @@ -1049,8 +1049,10 @@ mwifiex_create_custom_regdomain(struct mwifiex_private *priv, enum nl80211_band band; chan = *buf++; - if (!chan) + if (!chan) { + kfree(regd); return NULL; + } chflags = *buf++; band = (chan <= 14) ? NL80211_BAND_2GHZ : NL80211_BAND_5GHZ; freq = ieee80211_channel_to_frequency(chan, band); @@ -1116,6 +1118,7 @@ static int mwifiex_ret_chan_region_cfg(struct mwifiex_private *priv, u16 action = le16_to_cpu(reg->action); u16 tlv, tlv_buf_len, tlv_buf_left; struct mwifiex_ie_types_header *head; + struct ieee80211_regdomain *regd; u8 *tlv_buf; if (action != HostCmd_ACT_GEN_GET) @@ -1137,10 +1140,10 @@ static int mwifiex_ret_chan_region_cfg(struct mwifiex_private *priv, mwifiex_dbg_dump(priv->adapter, CMD_D, "CHAN:", (u8 *)head + sizeof(*head), tlv_buf_len); - priv->adapter->regd = - mwifiex_create_custom_regdomain(priv, - (u8 *)head + - sizeof(*head), tlv_buf_len); + regd = mwifiex_create_custom_regdomain(priv, + (u8 *)head + sizeof(*head), tlv_buf_len); + if (!IS_ERR(regd)) + priv->adapter->regd = regd; break; } From 80ba4f1d365af206b9e818d17d22fed02fe5def0 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 16 Sep 2016 10:37:31 +0100 Subject: [PATCH 0324/1050] mwifiex: fix null pointer deference when adapter is null If adapter is null the error exit path in mwifiex_shutdown_sw is to down the semaphore sem and print some debug via mwifiex_dbg. However, passing a NULL adapter to mwifiex_dbg causes a null pointer deference when accessing adapter->dev. This fix checks for a null adapter at the start of the function and to exit without the need to up the semaphore and we also skip the debug to avoid the null pointer dereference. Signed-off-by: Colin Ian King Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c index 9b2e98cfe090..2478ccd6f2d9 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.c +++ b/drivers/net/wireless/marvell/mwifiex/main.c @@ -1369,12 +1369,12 @@ mwifiex_shutdown_sw(struct mwifiex_adapter *adapter, struct semaphore *sem) struct mwifiex_private *priv; int i; + if (!adapter) + goto exit_return; + if (down_interruptible(sem)) goto exit_sem_err; - if (!adapter) - goto exit_remove; - priv = mwifiex_get_priv(adapter, MWIFIEX_BSS_ROLE_ANY); mwifiex_deauthenticate(priv, NULL); @@ -1430,10 +1430,10 @@ mwifiex_shutdown_sw(struct mwifiex_adapter *adapter, struct semaphore *sem) rtnl_unlock(); } -exit_remove: up(sem); exit_sem_err: mwifiex_dbg(adapter, INFO, "%s, successful\n", __func__); +exit_return: return 0; } From d4690f1e1cdabb4d61207b6787b1605a0dc0aeab Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 16 Sep 2016 00:11:45 +0100 Subject: [PATCH 0325/1050] fix iov_iter_fault_in_readable() ... by turning it into what used to be multipages counterpart Cc: stable@vger.kernel.org Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- include/linux/uio.h | 2 +- lib/iov_iter.c | 24 ++---------------------- 2 files changed, 3 insertions(+), 23 deletions(-) diff --git a/include/linux/uio.h b/include/linux/uio.h index 1b5d1cd796e2..75b4aaf31a9d 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -76,7 +76,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, struct iov_iter *i, unsigned long offset, size_t bytes); void iov_iter_advance(struct iov_iter *i, size_t bytes); int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); -int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes); +#define iov_iter_fault_in_multipages_readable iov_iter_fault_in_readable size_t iov_iter_single_seg_count(const struct iov_iter *i); size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 9e8c7386b3a0..7e3138cfc8c9 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -290,26 +290,6 @@ done: return wanted - bytes; } -/* - * Fault in the first iovec of the given iov_iter, to a maximum length - * of bytes. Returns 0 on success, or non-zero if the memory could not be - * accessed (ie. because it is an invalid address). - * - * writev-intensive code may want this to prefault several iovecs -- that - * would be possible (callers must not rely on the fact that _only_ the - * first iovec will be faulted with the current implementation). - */ -int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) -{ - if (!(i->type & (ITER_BVEC|ITER_KVEC))) { - char __user *buf = i->iov->iov_base + i->iov_offset; - bytes = min(bytes, i->iov->iov_len - i->iov_offset); - return fault_in_pages_readable(buf, bytes); - } - return 0; -} -EXPORT_SYMBOL(iov_iter_fault_in_readable); - /* * Fault in one or more iovecs of the given iov_iter, to a maximum length of * bytes. For each iovec, fault in each page that constitutes the iovec. @@ -317,7 +297,7 @@ EXPORT_SYMBOL(iov_iter_fault_in_readable); * Return 0 on success, or non-zero if the memory could not be accessed (i.e. * because it is an invalid address). */ -int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes) +int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) { size_t skip = i->iov_offset; const struct iovec *iov; @@ -334,7 +314,7 @@ int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes) } return 0; } -EXPORT_SYMBOL(iov_iter_fault_in_multipages_readable); +EXPORT_SYMBOL(iov_iter_fault_in_readable); void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov, unsigned long nr_segs, From 6244bd651236d86f59387d43c531b5f942a92b38 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Mon, 8 Aug 2016 17:48:20 -0600 Subject: [PATCH 0326/1050] exynos-drm: Fix unsupported GEM memory type error message to be clear Fix unsupported GEM memory type error message to include the memory type information. Signed-off-by: Shuah Khan Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_fb.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c index e0166403b4bd..40ce841eb952 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fb.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c @@ -55,11 +55,11 @@ static int check_fb_gem_memory_type(struct drm_device *drm_dev, flags = exynos_gem->flags; /* - * without iommu support, not support physically non-continuous memory - * for framebuffer. + * Physically non-contiguous memory type for framebuffer is not + * supported without IOMMU. */ if (IS_NONCONTIG_BUFFER(flags)) { - DRM_ERROR("cannot use this gem memory type for fb.\n"); + DRM_ERROR("Non-contiguous GEM memory is not supported.\n"); return -EINVAL; } From 479f12545460809cfc9093d90d6ed82d76388e97 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 31 Aug 2016 14:55:54 +0200 Subject: [PATCH 0327/1050] drm/exynos: fimc: fix system and runtime pm integration Use generic helpers instead of open-coding usage of runtime pm for system sleep pm, which was potentially broken for some corner cases. Signed-off-by: Marek Szyprowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_fimc.c | 29 ++---------------------- 1 file changed, 2 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c index 0525c56145db..147ef0d298cb 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c @@ -1753,32 +1753,6 @@ static int fimc_clk_ctrl(struct fimc_context *ctx, bool enable) return 0; } -#ifdef CONFIG_PM_SLEEP -static int fimc_suspend(struct device *dev) -{ - struct fimc_context *ctx = get_fimc_context(dev); - - DRM_DEBUG_KMS("id[%d]\n", ctx->id); - - if (pm_runtime_suspended(dev)) - return 0; - - return fimc_clk_ctrl(ctx, false); -} - -static int fimc_resume(struct device *dev) -{ - struct fimc_context *ctx = get_fimc_context(dev); - - DRM_DEBUG_KMS("id[%d]\n", ctx->id); - - if (!pm_runtime_suspended(dev)) - return fimc_clk_ctrl(ctx, true); - - return 0; -} -#endif - static int fimc_runtime_suspend(struct device *dev) { struct fimc_context *ctx = get_fimc_context(dev); @@ -1799,7 +1773,8 @@ static int fimc_runtime_resume(struct device *dev) #endif static const struct dev_pm_ops fimc_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(fimc_suspend, fimc_resume) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) SET_RUNTIME_PM_OPS(fimc_runtime_suspend, fimc_runtime_resume, NULL) }; From 83bd7b20aaf499030bf857ef64de3c19309b107d Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 31 Aug 2016 14:55:55 +0200 Subject: [PATCH 0328/1050] drm/exynos: gsc: fix system and runtime pm integration Use generic helpers instead of open-coding usage of runtime pm for system sleep pm, which was potentially broken for some corner cases. Signed-off-by: Marek Szyprowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_gsc.c | 29 ++----------------------- 1 file changed, 2 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c index 5d20da8f957e..b1894aa9286e 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c @@ -1760,32 +1760,6 @@ static int gsc_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM_SLEEP -static int gsc_suspend(struct device *dev) -{ - struct gsc_context *ctx = get_gsc_context(dev); - - DRM_DEBUG_KMS("id[%d]\n", ctx->id); - - if (pm_runtime_suspended(dev)) - return 0; - - return gsc_clk_ctrl(ctx, false); -} - -static int gsc_resume(struct device *dev) -{ - struct gsc_context *ctx = get_gsc_context(dev); - - DRM_DEBUG_KMS("id[%d]\n", ctx->id); - - if (!pm_runtime_suspended(dev)) - return gsc_clk_ctrl(ctx, true); - - return 0; -} -#endif - #ifdef CONFIG_PM static int gsc_runtime_suspend(struct device *dev) { @@ -1807,7 +1781,8 @@ static int gsc_runtime_resume(struct device *dev) #endif static const struct dev_pm_ops gsc_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(gsc_suspend, gsc_resume) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) SET_RUNTIME_PM_OPS(gsc_runtime_suspend, gsc_runtime_resume, NULL) }; From 5b67723e6096f5470f361656cd108430d3b12c67 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 31 Aug 2016 14:55:56 +0200 Subject: [PATCH 0329/1050] drm/exynos: rotator: fix system and runtime pm integration Use generic helpers instead of open-coding usage of runtime pm for system sleep pm, which was potentially broken for some corner cases. Signed-off-by: Marek Szyprowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_rotator.c | 26 ++------------------- 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c index 404367a430b5..6591e406084c 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c +++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c @@ -794,29 +794,6 @@ static int rotator_clk_crtl(struct rot_context *rot, bool enable) return 0; } - -#ifdef CONFIG_PM_SLEEP -static int rotator_suspend(struct device *dev) -{ - struct rot_context *rot = dev_get_drvdata(dev); - - if (pm_runtime_suspended(dev)) - return 0; - - return rotator_clk_crtl(rot, false); -} - -static int rotator_resume(struct device *dev) -{ - struct rot_context *rot = dev_get_drvdata(dev); - - if (!pm_runtime_suspended(dev)) - return rotator_clk_crtl(rot, true); - - return 0; -} -#endif - static int rotator_runtime_suspend(struct device *dev) { struct rot_context *rot = dev_get_drvdata(dev); @@ -833,7 +810,8 @@ static int rotator_runtime_resume(struct device *dev) #endif static const struct dev_pm_ops rotator_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(rotator_suspend, rotator_resume) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) SET_RUNTIME_PM_OPS(rotator_runtime_suspend, rotator_runtime_resume, NULL) }; From b05984e21a7e000bf5074ace00d7a574944b2c16 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 31 Aug 2016 14:55:57 +0200 Subject: [PATCH 0330/1050] drm/exynos: g2d: fix system and runtime pm integration Move code from system sleep pm to runtime pm callbacks to ensure proper driver state preservation when device is under power domain. Then, use generic helpers for using runtime pm for system sleep pm. Signed-off-by: Marek Szyprowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_g2d.c | 29 ++++++------------------- 1 file changed, 7 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c index 4bf00f57ffe8..6eca8bb88648 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c +++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c @@ -1475,8 +1475,8 @@ static int g2d_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM_SLEEP -static int g2d_suspend(struct device *dev) +#ifdef CONFIG_PM +static int g2d_runtime_suspend(struct device *dev) { struct g2d_data *g2d = dev_get_drvdata(dev); @@ -1490,25 +1490,6 @@ static int g2d_suspend(struct device *dev) flush_work(&g2d->runqueue_work); - return 0; -} - -static int g2d_resume(struct device *dev) -{ - struct g2d_data *g2d = dev_get_drvdata(dev); - - g2d->suspended = false; - g2d_exec_runqueue(g2d); - - return 0; -} -#endif - -#ifdef CONFIG_PM -static int g2d_runtime_suspend(struct device *dev) -{ - struct g2d_data *g2d = dev_get_drvdata(dev); - clk_disable_unprepare(g2d->gate_clk); return 0; @@ -1523,12 +1504,16 @@ static int g2d_runtime_resume(struct device *dev) if (ret < 0) dev_warn(dev, "failed to enable clock.\n"); + g2d->suspended = false; + g2d_exec_runqueue(g2d); + return ret; } #endif static const struct dev_pm_ops g2d_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(g2d_suspend, g2d_resume) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) SET_RUNTIME_PM_OPS(g2d_runtime_suspend, g2d_runtime_resume, NULL) }; From 65c0044ca8d7c7bbccae37f0ff2972f0210e9f41 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 17 Sep 2016 07:52:49 -0700 Subject: [PATCH 0331/1050] avr32: fix 'undefined reference to `___copy_from_user' avr32 builds fail with: arch/avr32/kernel/built-in.o: In function `arch_ptrace': (.text+0x650): undefined reference to `___copy_from_user' arch/avr32/kernel/built-in.o:(___ksymtab+___copy_from_user+0x0): undefined reference to `___copy_from_user' kernel/built-in.o: In function `proc_doulongvec_ms_jiffies_minmax': (.text+0x5dd8): undefined reference to `___copy_from_user' kernel/built-in.o: In function `proc_dointvec_minmax_sysadmin': sysctl.c:(.text+0x6174): undefined reference to `___copy_from_user' kernel/built-in.o: In function `ptrace_has_cap': ptrace.c:(.text+0x69c0): undefined reference to `___copy_from_user' kernel/built-in.o:ptrace.c:(.text+0x6b90): more undefined references to `___copy_from_user' follow Fixes: 8630c32275ba ("avr32: fix copy_from_user()") Cc: Al Viro Acked-by: Havard Skinnemoen Acked-by: Hans-Christian Noren Egtvedt Signed-off-by: Guenter Roeck --- arch/avr32/lib/copy_user.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/avr32/lib/copy_user.S b/arch/avr32/lib/copy_user.S index 96a6de9d578f..075373471da1 100644 --- a/arch/avr32/lib/copy_user.S +++ b/arch/avr32/lib/copy_user.S @@ -23,8 +23,8 @@ */ .text .align 1 - .global copy_from_user - .type copy_from_user, @function + .global ___copy_from_user + .type ___copy_from_user, @function ___copy_from_user: branch_if_kernel r8, __copy_user ret_if_privileged r8, r11, r10, r10 From 8e4b72054f554967827e18be1de0e8122e6efc04 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 17 Sep 2016 12:57:24 -0700 Subject: [PATCH 0332/1050] openrisc: fix the fix of copy_from_user() Since commit acb2505d0119 ("openrisc: fix copy_from_user()"), copy_from_user() returns the number of bytes requested, not the number of bytes not copied. Cc: Al Viro Fixes: acb2505d0119 ("openrisc: fix copy_from_user()") Signed-off-by: Guenter Roeck --- arch/openrisc/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h index cbad29b5a131..5cc6b4f1b795 100644 --- a/arch/openrisc/include/asm/uaccess.h +++ b/arch/openrisc/include/asm/uaccess.h @@ -276,7 +276,7 @@ copy_from_user(void *to, const void *from, unsigned long n) unsigned long res = n; if (likely(access_ok(VERIFY_READ, from, n))) - n = __copy_tofrom_user(to, from, n); + res = __copy_tofrom_user(to, from, n); if (unlikely(res)) memset(to + (n - res), 0, res); return res; From 3be7988674ab33565700a37b210f502563d932e6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 18 Sep 2016 17:27:41 -0700 Subject: [PATCH 0333/1050] Linux 4.8-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1a8c8ddcb22f..74e22c2f408b 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 8 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Psychotic Stoned Sheep # *DOCUMENTATION* From 4158dbe1be9b420e1fdd9ec5c033647a605ca485 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 18 Sep 2016 22:51:38 +0900 Subject: [PATCH 0334/1050] Subject: [PATCH, RESEND] drm: exynos: avoid unused function warning When CONFIG_PM is not set, we get a warning about an unused function: drivers/gpu/drm/exynos/exynos_drm_gsc.c:1219:12: error: 'gsc_clk_ctrl' defined but not used [-Werror=unused-function] static int gsc_clk_ctrl(struct gsc_context *ctx, bool enable) ^~~~~~~~~~~~ This removes the two #ifdef checks in this file and instead marks the functions as __maybe_unused, which is a more reliable way of doing the same, allowing better build coverage and avoiding the warning above. Signed-off-by: Arnd Bergmann Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_gsc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c index b1894aa9286e..52a9d269484e 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c @@ -1760,8 +1760,7 @@ static int gsc_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM -static int gsc_runtime_suspend(struct device *dev) +static int __maybe_unused gsc_runtime_suspend(struct device *dev) { struct gsc_context *ctx = get_gsc_context(dev); @@ -1770,7 +1769,7 @@ static int gsc_runtime_suspend(struct device *dev) return gsc_clk_ctrl(ctx, false); } -static int gsc_runtime_resume(struct device *dev) +static int __maybe_unused gsc_runtime_resume(struct device *dev) { struct gsc_context *ctx = get_gsc_context(dev); @@ -1778,7 +1777,6 @@ static int gsc_runtime_resume(struct device *dev) return gsc_clk_ctrl(ctx, true); } -#endif static const struct dev_pm_ops gsc_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, From 19cd120319ef5390404a5d9c829c3a7962f184a8 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Fri, 16 Sep 2016 10:50:13 +0200 Subject: [PATCH 0335/1050] stmmac: fix PWRDWN into the PMT register for global unicast. MAC devices use the RWKPKTEN and MGKPKTEN bits of the PMT Control/Status register to generate power management events. So this patch is to properly set the RWKPKTEN [BIT(2)] inside the PMT register (needed in case of global unicast). Reported-by: Aditi SHARMA Signed-off-by: Giuseppe Cavallaro Cc: Alexandre TORGUE Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c | 2 +- drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index cbefe9e2207c..885a5e64519d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -261,7 +261,7 @@ static void dwmac1000_pmt(struct mac_device_info *hw, unsigned long mode) } if (mode & WAKE_UCAST) { pr_debug("GMAC: WOL on global unicast\n"); - pmt |= global_unicast; + pmt |= power_down | global_unicast | wake_up_frame_en; } writel(pmt, ioaddr + GMAC_PMT); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index df5580dcdfed..51019b794be5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -102,7 +102,7 @@ static void dwmac4_pmt(struct mac_device_info *hw, unsigned long mode) } if (mode & WAKE_UCAST) { pr_debug("GMAC: WOL on global unicast\n"); - pmt |= global_unicast; + pmt |= power_down | global_unicast | wake_up_frame_en; } writel(pmt, ioaddr + GMAC_PMT); From 2c9d85d4d82d9e0a62aad08bf50650804e68ed30 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Fri, 16 Sep 2016 15:05:36 +0200 Subject: [PATCH 0336/1050] netdevice: Add offload statistics ndo Add a new ndo to return statistics for offloaded operation. Since there can be many different offloaded operation with many stats types, the ndo gets an attribute id by which it knows which stats are wanted. The ndo also gets a void pointer to be cast according to the attribute id. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/netdevice.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2095b6ab3661..a10d8d18ce19 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -924,6 +924,14 @@ struct netdev_xdp { * 3. Update dev->stats asynchronously and atomically, and define * neither operation. * + * bool (*ndo_has_offload_stats)(int attr_id) + * Return true if this device supports offload stats of this attr_id. + * + * int (*ndo_get_offload_stats)(int attr_id, const struct net_device *dev, + * void *attr_data) + * Get statistics for offload operations by attr_id. Write it into the + * attr_data pointer. + * * int (*ndo_vlan_rx_add_vid)(struct net_device *dev, __be16 proto, u16 vid); * If device supports VLAN filtering this function is called when a * VLAN id is registered. @@ -1155,6 +1163,10 @@ struct net_device_ops { struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev, struct rtnl_link_stats64 *storage); + bool (*ndo_has_offload_stats)(int attr_id); + int (*ndo_get_offload_stats)(int attr_id, + const struct net_device *dev, + void *attr_data); struct net_device_stats* (*ndo_get_stats)(struct net_device *dev); int (*ndo_vlan_rx_add_vid)(struct net_device *dev, From 69ae6ad2ff37911903a90256e216d7e7ae460002 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Fri, 16 Sep 2016 15:05:37 +0200 Subject: [PATCH 0337/1050] net: core: Add offload stats to if_stats_msg Add a nested attribute of offload stats to if_stats_msg named IFLA_STATS_LINK_OFFLOAD_XSTATS. Under it, add SW stats, meaning stats only per packets that went via slowpath to the cpu, named IFLA_OFFLOAD_XSTATS_CPU_HIT. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 9 +++ net/core/rtnetlink.c | 111 +++++++++++++++++++++++++++++++++-- 2 files changed, 116 insertions(+), 4 deletions(-) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 9bf3aecfe05b..2351776a724f 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -826,6 +826,7 @@ enum { IFLA_STATS_LINK_64, IFLA_STATS_LINK_XSTATS, IFLA_STATS_LINK_XSTATS_SLAVE, + IFLA_STATS_LINK_OFFLOAD_XSTATS, __IFLA_STATS_MAX, }; @@ -845,6 +846,14 @@ enum { }; #define LINK_XSTATS_TYPE_MAX (__LINK_XSTATS_TYPE_MAX - 1) +/* These are stats embedded into IFLA_STATS_LINK_OFFLOAD_XSTATS */ +enum { + IFLA_OFFLOAD_XSTATS_UNSPEC, + IFLA_OFFLOAD_XSTATS_CPU_HIT, /* struct rtnl_link_stats64 */ + __IFLA_OFFLOAD_XSTATS_MAX +}; +#define IFLA_OFFLOAD_XSTATS_MAX (__IFLA_OFFLOAD_XSTATS_MAX - 1) + /* XDP section */ enum { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 937e459bdaa9..0dbae4244a89 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3577,6 +3577,91 @@ static bool stats_attr_valid(unsigned int mask, int attrid, int idxattr) (!idxattr || idxattr == attrid); } +#define IFLA_OFFLOAD_XSTATS_FIRST (IFLA_OFFLOAD_XSTATS_UNSPEC + 1) +static int rtnl_get_offload_stats_attr_size(int attr_id) +{ + switch (attr_id) { + case IFLA_OFFLOAD_XSTATS_CPU_HIT: + return sizeof(struct rtnl_link_stats64); + } + + return 0; +} + +static int rtnl_get_offload_stats(struct sk_buff *skb, struct net_device *dev, + int *prividx) +{ + struct nlattr *attr = NULL; + int attr_id, size; + void *attr_data; + int err; + + if (!(dev->netdev_ops && dev->netdev_ops->ndo_has_offload_stats && + dev->netdev_ops->ndo_get_offload_stats)) + return -ENODATA; + + for (attr_id = IFLA_OFFLOAD_XSTATS_FIRST; + attr_id <= IFLA_OFFLOAD_XSTATS_MAX; attr_id++) { + if (attr_id < *prividx) + continue; + + size = rtnl_get_offload_stats_attr_size(attr_id); + if (!size) + continue; + + if (!dev->netdev_ops->ndo_has_offload_stats(attr_id)) + continue; + + attr = nla_reserve_64bit(skb, attr_id, size, + IFLA_OFFLOAD_XSTATS_UNSPEC); + if (!attr) + goto nla_put_failure; + + attr_data = nla_data(attr); + memset(attr_data, 0, size); + err = dev->netdev_ops->ndo_get_offload_stats(attr_id, dev, + attr_data); + if (err) + goto get_offload_stats_failure; + } + + if (!attr) + return -ENODATA; + + *prividx = 0; + return 0; + +nla_put_failure: + err = -EMSGSIZE; +get_offload_stats_failure: + *prividx = attr_id; + return err; +} + +static int rtnl_get_offload_stats_size(const struct net_device *dev) +{ + int nla_size = 0; + int attr_id; + int size; + + if (!(dev->netdev_ops && dev->netdev_ops->ndo_has_offload_stats && + dev->netdev_ops->ndo_get_offload_stats)) + return 0; + + for (attr_id = IFLA_OFFLOAD_XSTATS_FIRST; + attr_id <= IFLA_OFFLOAD_XSTATS_MAX; attr_id++) { + if (!dev->netdev_ops->ndo_has_offload_stats(attr_id)) + continue; + size = rtnl_get_offload_stats_attr_size(attr_id); + nla_size += nla_total_size_64bit(size); + } + + if (nla_size != 0) + nla_size += nla_total_size(0); + + return nla_size; +} + static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, int type, u32 pid, u32 seq, u32 change, unsigned int flags, unsigned int filter_mask, @@ -3586,6 +3671,7 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, struct nlmsghdr *nlh; struct nlattr *attr; int s_prividx = *prividx; + int err; ASSERT_RTNL(); @@ -3614,8 +3700,6 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, const struct rtnl_link_ops *ops = dev->rtnl_link_ops; if (ops && ops->fill_linkxstats) { - int err; - *idxattr = IFLA_STATS_LINK_XSTATS; attr = nla_nest_start(skb, IFLA_STATS_LINK_XSTATS); @@ -3639,8 +3723,6 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, if (master) ops = master->rtnl_link_ops; if (ops && ops->fill_linkxstats) { - int err; - *idxattr = IFLA_STATS_LINK_XSTATS_SLAVE; attr = nla_nest_start(skb, IFLA_STATS_LINK_XSTATS_SLAVE); @@ -3655,6 +3737,24 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, } } + if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS, + *idxattr)) { + *idxattr = IFLA_STATS_LINK_OFFLOAD_XSTATS; + attr = nla_nest_start(skb, IFLA_STATS_LINK_OFFLOAD_XSTATS); + if (!attr) + goto nla_put_failure; + + err = rtnl_get_offload_stats(skb, dev, prividx); + if (err == -ENODATA) + nla_nest_cancel(skb, attr); + else + nla_nest_end(skb, attr); + + if (err && err != -ENODATA) + goto nla_put_failure; + *idxattr = 0; + } + nlmsg_end(skb, nlh); return 0; @@ -3708,6 +3808,9 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev, } } + if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS, 0)) + size += rtnl_get_offload_stats_size(dev); + return size; } From fc1bbb0f1831cc22326c86fb21d88cca44999b3e Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Fri, 16 Sep 2016 15:05:38 +0200 Subject: [PATCH 0338/1050] mlxsw: spectrum: Implement offload stats ndo and expose HW stats by default Change the default statistics ndo to return HW statistics (like the one returned by ethtool_ops). The HW stats are collected to a cache by delayed work every 1 sec. Implement the offload stat ndo. Add a function to get SW statistics, to be called from this function. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 129 +++++++++++++++++- .../net/ethernet/mellanox/mlxsw/spectrum.h | 5 + 2 files changed, 127 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 27bbcaf9cfcd..171f8dd19efa 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -819,9 +819,9 @@ err_span_port_mtu_update: return err; } -static struct rtnl_link_stats64 * -mlxsw_sp_port_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats) +int +mlxsw_sp_port_get_sw_stats64(const struct net_device *dev, + struct rtnl_link_stats64 *stats) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp_port_pcpu_stats *p; @@ -848,6 +848,107 @@ mlxsw_sp_port_get_stats64(struct net_device *dev, tx_dropped += p->tx_dropped; } stats->tx_dropped = tx_dropped; + return 0; +} + +bool mlxsw_sp_port_has_offload_stats(int attr_id) +{ + switch (attr_id) { + case IFLA_OFFLOAD_XSTATS_CPU_HIT: + return true; + } + + return false; +} + +int mlxsw_sp_port_get_offload_stats(int attr_id, const struct net_device *dev, + void *sp) +{ + switch (attr_id) { + case IFLA_OFFLOAD_XSTATS_CPU_HIT: + return mlxsw_sp_port_get_sw_stats64(dev, sp); + } + + return -EINVAL; +} + +static int mlxsw_sp_port_get_stats_raw(struct net_device *dev, int grp, + int prio, char *ppcnt_pl) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + + mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sp_port->local_port, grp, prio); + return mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ppcnt), ppcnt_pl); +} + +static int mlxsw_sp_port_get_hw_stats(struct net_device *dev, + struct rtnl_link_stats64 *stats) +{ + char ppcnt_pl[MLXSW_REG_PPCNT_LEN]; + int err; + + err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_IEEE_8023_CNT, + 0, ppcnt_pl); + if (err) + goto out; + + stats->tx_packets = + mlxsw_reg_ppcnt_a_frames_transmitted_ok_get(ppcnt_pl); + stats->rx_packets = + mlxsw_reg_ppcnt_a_frames_received_ok_get(ppcnt_pl); + stats->tx_bytes = + mlxsw_reg_ppcnt_a_octets_transmitted_ok_get(ppcnt_pl); + stats->rx_bytes = + mlxsw_reg_ppcnt_a_octets_received_ok_get(ppcnt_pl); + stats->multicast = + mlxsw_reg_ppcnt_a_multicast_frames_received_ok_get(ppcnt_pl); + + stats->rx_crc_errors = + mlxsw_reg_ppcnt_a_frame_check_sequence_errors_get(ppcnt_pl); + stats->rx_frame_errors = + mlxsw_reg_ppcnt_a_alignment_errors_get(ppcnt_pl); + + stats->rx_length_errors = ( + mlxsw_reg_ppcnt_a_in_range_length_errors_get(ppcnt_pl) + + mlxsw_reg_ppcnt_a_out_of_range_length_field_get(ppcnt_pl) + + mlxsw_reg_ppcnt_a_frame_too_long_errors_get(ppcnt_pl)); + + stats->rx_errors = (stats->rx_crc_errors + + stats->rx_frame_errors + stats->rx_length_errors); + +out: + return err; +} + +static void update_stats_cache(struct work_struct *work) +{ + struct mlxsw_sp_port *mlxsw_sp_port = + container_of(work, struct mlxsw_sp_port, + hw_stats.update_dw.work); + + if (!netif_carrier_ok(mlxsw_sp_port->dev)) + goto out; + + mlxsw_sp_port_get_hw_stats(mlxsw_sp_port->dev, + mlxsw_sp_port->hw_stats.cache); + +out: + mlxsw_core_schedule_dw(&mlxsw_sp_port->hw_stats.update_dw, + MLXSW_HW_STATS_UPDATE_TIME); +} + +/* Return the stats from a cache that is updated periodically, + * as this function might get called in an atomic context. + */ +static struct rtnl_link_stats64 * +mlxsw_sp_port_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + + memcpy(stats, mlxsw_sp_port->hw_stats.cache, sizeof(*stats)); + return stats; } @@ -1209,6 +1310,8 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops = { .ndo_set_mac_address = mlxsw_sp_port_set_mac_address, .ndo_change_mtu = mlxsw_sp_port_change_mtu, .ndo_get_stats64 = mlxsw_sp_port_get_stats64, + .ndo_has_offload_stats = mlxsw_sp_port_has_offload_stats, + .ndo_get_offload_stats = mlxsw_sp_port_get_offload_stats, .ndo_vlan_rx_add_vid = mlxsw_sp_port_add_vid, .ndo_vlan_rx_kill_vid = mlxsw_sp_port_kill_vid, .ndo_neigh_construct = mlxsw_sp_router_neigh_construct, @@ -1547,8 +1650,6 @@ static void __mlxsw_sp_port_get_stats(struct net_device *dev, enum mlxsw_reg_ppcnt_grp grp, int prio, u64 *data, int data_index) { - struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_port_hw_stats *hw_stats; char ppcnt_pl[MLXSW_REG_PPCNT_LEN]; int i, len; @@ -1557,8 +1658,7 @@ static void __mlxsw_sp_port_get_stats(struct net_device *dev, err = mlxsw_sp_get_hw_stats_by_group(&hw_stats, &len, grp); if (err) return; - mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sp_port->local_port, grp, prio); - err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ppcnt), ppcnt_pl); + mlxsw_sp_port_get_stats_raw(dev, grp, prio, ppcnt_pl); for (i = 0; i < len; i++) data[data_index + i] = !err ? hw_stats[i].getter(ppcnt_pl) : 0; } @@ -2145,6 +2245,16 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_alloc_stats; } + mlxsw_sp_port->hw_stats.cache = + kzalloc(sizeof(*mlxsw_sp_port->hw_stats.cache), GFP_KERNEL); + + if (!mlxsw_sp_port->hw_stats.cache) { + err = -ENOMEM; + goto err_alloc_hw_stats; + } + INIT_DELAYED_WORK(&mlxsw_sp_port->hw_stats.update_dw, + &update_stats_cache); + dev->netdev_ops = &mlxsw_sp_port_netdev_ops; dev->ethtool_ops = &mlxsw_sp_port_ethtool_ops; @@ -2245,6 +2355,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_core_port_init; } + mlxsw_core_schedule_dw(&mlxsw_sp_port->hw_stats.update_dw, 0); return 0; err_core_port_init: @@ -2265,6 +2376,8 @@ err_port_system_port_mapping_set: err_dev_addr_init: mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT); err_port_swid_set: + kfree(mlxsw_sp_port->hw_stats.cache); +err_alloc_hw_stats: free_percpu(mlxsw_sp_port->pcpu_stats); err_alloc_stats: kfree(mlxsw_sp_port->untagged_vlans); @@ -2281,6 +2394,7 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) if (!mlxsw_sp_port) return; + cancel_delayed_work_sync(&mlxsw_sp_port->hw_stats.update_dw); mlxsw_core_port_fini(&mlxsw_sp_port->core_port); unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */ mlxsw_sp->ports[local_port] = NULL; @@ -2290,6 +2404,7 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT); mlxsw_sp_port_module_unmap(mlxsw_sp, mlxsw_sp_port->local_port); free_percpu(mlxsw_sp_port->pcpu_stats); + kfree(mlxsw_sp_port->hw_stats.cache); kfree(mlxsw_sp_port->untagged_vlans); kfree(mlxsw_sp_port->active_vlans); WARN_ON_ONCE(!list_empty(&mlxsw_sp_port->vports_list)); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 969c250b3048..49f4cafce148 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -361,6 +361,11 @@ struct mlxsw_sp_port { struct list_head vports_list; /* TC handles */ struct list_head mall_tc_list; + struct { + #define MLXSW_HW_STATS_UPDATE_TIME HZ + struct rtnl_link_stats64 *cache; + struct delayed_work update_dw; + } hw_stats; }; struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev); From d409b84768037ad03d1d73538d99fb902adf7365 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Fri, 16 Sep 2016 12:59:08 -0700 Subject: [PATCH 0339/1050] ipv6: Export p6_route_input_lookup symbol Make ip6_route_input_lookup available outside of ipv6 the module similar to ip_route_input_noref in the IPv4 world. Signed-off-by: Mahesh Bandewar Signed-off-by: David S. Miller --- include/net/ip6_route.h | 3 +++ net/ipv6/route.c | 7 ++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index d97305d0e71f..e0cd318d5103 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -64,6 +64,9 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr) } void ip6_route_input(struct sk_buff *skb); +struct dst_entry *ip6_route_input_lookup(struct net *net, + struct net_device *dev, + struct flowi6 *fl6, int flags); struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, struct flowi6 *fl6, int flags); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ad4a7ff301fc..4dab585f7642 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1147,15 +1147,16 @@ static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table * return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags); } -static struct dst_entry *ip6_route_input_lookup(struct net *net, - struct net_device *dev, - struct flowi6 *fl6, int flags) +struct dst_entry *ip6_route_input_lookup(struct net *net, + struct net_device *dev, + struct flowi6 *fl6, int flags) { if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG) flags |= RT6_LOOKUP_F_IFACE; return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input); } +EXPORT_SYMBOL_GPL(ip6_route_input_lookup); void ip6_route_input(struct sk_buff *skb) { From e8bffe0cf964f0330595bb376b74921cccdaac88 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Fri, 16 Sep 2016 12:59:13 -0700 Subject: [PATCH 0340/1050] net: Add _nf_(un)register_hooks symbols Add _nf_register_hooks() and _nf_unregister_hooks() calls which allow caller to hold RTNL mutex. Signed-off-by: Mahesh Bandewar CC: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/linux/netfilter.h | 2 ++ net/netfilter/core.c | 51 +++++++++++++++++++++++++++++++++++---- 2 files changed, 48 insertions(+), 5 deletions(-) diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 9230f9aee896..e82b76781bf6 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -133,6 +133,8 @@ int nf_register_hook(struct nf_hook_ops *reg); void nf_unregister_hook(struct nf_hook_ops *reg); int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n); void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n); +int _nf_register_hooks(struct nf_hook_ops *reg, unsigned int n); +void _nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n); /* Functions to register get/setsockopt ranges (non-inclusive). You need to check permissions yourself! */ diff --git a/net/netfilter/core.c b/net/netfilter/core.c index f39276d1c2d7..2c5327e43a88 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -188,19 +188,17 @@ EXPORT_SYMBOL(nf_unregister_net_hooks); static LIST_HEAD(nf_hook_list); -int nf_register_hook(struct nf_hook_ops *reg) +static int _nf_register_hook(struct nf_hook_ops *reg) { struct net *net, *last; int ret; - rtnl_lock(); for_each_net(net) { ret = nf_register_net_hook(net, reg); if (ret && ret != -ENOENT) goto rollback; } list_add_tail(®->list, &nf_hook_list); - rtnl_unlock(); return 0; rollback: @@ -210,19 +208,34 @@ rollback: break; nf_unregister_net_hook(net, reg); } + return ret; +} + +int nf_register_hook(struct nf_hook_ops *reg) +{ + int ret; + + rtnl_lock(); + ret = _nf_register_hook(reg); rtnl_unlock(); + return ret; } EXPORT_SYMBOL(nf_register_hook); -void nf_unregister_hook(struct nf_hook_ops *reg) +static void _nf_unregister_hook(struct nf_hook_ops *reg) { struct net *net; - rtnl_lock(); list_del(®->list); for_each_net(net) nf_unregister_net_hook(net, reg); +} + +void nf_unregister_hook(struct nf_hook_ops *reg) +{ + rtnl_lock(); + _nf_unregister_hook(reg); rtnl_unlock(); } EXPORT_SYMBOL(nf_unregister_hook); @@ -246,6 +259,26 @@ err: } EXPORT_SYMBOL(nf_register_hooks); +/* Caller MUST take rtnl_lock() */ +int _nf_register_hooks(struct nf_hook_ops *reg, unsigned int n) +{ + unsigned int i; + int err = 0; + + for (i = 0; i < n; i++) { + err = _nf_register_hook(®[i]); + if (err) + goto err; + } + return err; + +err: + if (i > 0) + _nf_unregister_hooks(reg, i); + return err; +} +EXPORT_SYMBOL(_nf_register_hooks); + void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n) { while (n-- > 0) @@ -253,6 +286,14 @@ void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n) } EXPORT_SYMBOL(nf_unregister_hooks); +/* Caller MUST take rtnl_lock */ +void _nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n) +{ + while (n-- > 0) + _nf_unregister_hook(®[n]); +} +EXPORT_SYMBOL(_nf_unregister_hooks); + unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb, struct nf_hook_state *state, From 4fbae7d83c98c30efcf0a2a2ac55fbb75ef5a1a5 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Fri, 16 Sep 2016 12:59:19 -0700 Subject: [PATCH 0341/1050] ipvlan: Introduce l3s mode In a typical IPvlan L3 setup where master is in default-ns and each slave is into different (slave) ns. In this setup egress packet processing for traffic originating from slave-ns will hit all NF_HOOKs in slave-ns as well as default-ns. However same is not true for ingress processing. All these NF_HOOKs are hit only in the slave-ns skipping them in the default-ns. IPvlan in L3 mode is restrictive and if admins want to deploy iptables rules in default-ns, this asymmetric data path makes it impossible to do so. This patch makes use of the l3_rcv() (added as part of l3mdev enhancements) to perform input route lookup on RX packets without changing the skb->dev and then uses nf_hook at NF_INET_LOCAL_IN to change the skb->dev just before handing over skb to L4. Signed-off-by: Mahesh Bandewar CC: David Ahern Reviewed-by: David Ahern Signed-off-by: David S. Miller --- Documentation/networking/ipvlan.txt | 7 ++- drivers/net/Kconfig | 1 + drivers/net/ipvlan/ipvlan.h | 6 ++ drivers/net/ipvlan/ipvlan_core.c | 94 +++++++++++++++++++++++++++++ drivers/net/ipvlan/ipvlan_main.c | 87 +++++++++++++++++++++++--- include/uapi/linux/if_link.h | 1 + 6 files changed, 188 insertions(+), 8 deletions(-) diff --git a/Documentation/networking/ipvlan.txt b/Documentation/networking/ipvlan.txt index 14422f8fcdc4..24196cef7c91 100644 --- a/Documentation/networking/ipvlan.txt +++ b/Documentation/networking/ipvlan.txt @@ -22,7 +22,7 @@ The driver can be built into the kernel (CONFIG_IPVLAN=y) or as a module There are no module parameters for this driver and it can be configured using IProute2/ip utility. - ip link add link type ipvlan mode { l2 | L3 } + ip link add link type ipvlan mode { l2 | l3 | l3s } e.g. ip link add link ipvl0 eth0 type ipvlan mode l2 @@ -48,6 +48,11 @@ master device for the L2 processing and routing from that instance will be used before packets are queued on the outbound device. In this mode the slaves will not receive nor can send multicast / broadcast traffic. +4.3 L3S mode: + This is very similar to the L3 mode except that iptables (conn-tracking) +works in this mode and hence it is L3-symmetric (L3s). This will have slightly less +performance but that shouldn't matter since you are choosing this mode over plain-L3 +mode to make conn-tracking work. 5. What to choose (macvlan vs. ipvlan)? These two devices are very similar in many regards and the specific use diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 0c5415b05ea9..8768a625350d 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -149,6 +149,7 @@ config IPVLAN tristate "IP-VLAN support" depends on INET depends on IPV6 + depends on NET_L3_MASTER_DEV ---help--- This allows one to create virtual devices off of a main interface and packets will be delivered based on the dest L3 (IPv6/IPv4 addr) diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index 695a5dc9ace3..7e0732f5ea07 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -23,11 +23,13 @@ #include #include #include +#include #include #include #include #include #include +#include #define IPVLAN_DRV "ipvlan" #define IPV_DRV_VER "0.1" @@ -124,4 +126,8 @@ struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan, const void *iaddr, bool is_v6); bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6); void ipvlan_ht_addr_del(struct ipvl_addr *addr); +struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb, + u16 proto); +unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state); #endif /* __IPVLAN_H */ diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index b5f9511d819e..b4e990743e1d 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -560,6 +560,7 @@ int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) case IPVLAN_MODE_L2: return ipvlan_xmit_mode_l2(skb, dev); case IPVLAN_MODE_L3: + case IPVLAN_MODE_L3S: return ipvlan_xmit_mode_l3(skb, dev); } @@ -664,6 +665,8 @@ rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb) return ipvlan_handle_mode_l2(pskb, port); case IPVLAN_MODE_L3: return ipvlan_handle_mode_l3(pskb, port); + case IPVLAN_MODE_L3S: + return RX_HANDLER_PASS; } /* Should not reach here */ @@ -672,3 +675,94 @@ rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb) kfree_skb(skb); return RX_HANDLER_CONSUMED; } + +static struct ipvl_addr *ipvlan_skb_to_addr(struct sk_buff *skb, + struct net_device *dev) +{ + struct ipvl_addr *addr = NULL; + struct ipvl_port *port; + void *lyr3h; + int addr_type; + + if (!dev || !netif_is_ipvlan_port(dev)) + goto out; + + port = ipvlan_port_get_rcu(dev); + if (!port || port->mode != IPVLAN_MODE_L3S) + goto out; + + lyr3h = ipvlan_get_L3_hdr(skb, &addr_type); + if (!lyr3h) + goto out; + + addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true); +out: + return addr; +} + +struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb, + u16 proto) +{ + struct ipvl_addr *addr; + struct net_device *sdev; + + addr = ipvlan_skb_to_addr(skb, dev); + if (!addr) + goto out; + + sdev = addr->master->dev; + switch (proto) { + case AF_INET: + { + int err; + struct iphdr *ip4h = ip_hdr(skb); + + err = ip_route_input_noref(skb, ip4h->daddr, ip4h->saddr, + ip4h->tos, sdev); + if (unlikely(err)) + goto out; + break; + } + case AF_INET6: + { + struct dst_entry *dst; + struct ipv6hdr *ip6h = ipv6_hdr(skb); + int flags = RT6_LOOKUP_F_HAS_SADDR; + struct flowi6 fl6 = { + .flowi6_iif = sdev->ifindex, + .daddr = ip6h->daddr, + .saddr = ip6h->saddr, + .flowlabel = ip6_flowinfo(ip6h), + .flowi6_mark = skb->mark, + .flowi6_proto = ip6h->nexthdr, + }; + + skb_dst_drop(skb); + dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, flags); + skb_dst_set(skb, dst); + break; + } + default: + break; + } + +out: + return skb; +} + +unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct ipvl_addr *addr; + unsigned int len; + + addr = ipvlan_skb_to_addr(skb, skb->dev); + if (!addr) + goto out; + + skb->dev = addr->master->dev; + len = skb->len + ETH_HLEN; + ipvlan_count_rx(addr->master, len, true, false); +out: + return NF_ACCEPT; +} diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 18b4e8c7f68a..f442eb366863 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -9,24 +9,87 @@ #include "ipvlan.h" +static u32 ipvl_nf_hook_refcnt = 0; + +static struct nf_hook_ops ipvl_nfops[] __read_mostly = { + { + .hook = ipvlan_nf_input, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_IN, + .priority = INT_MAX, + }, + { + .hook = ipvlan_nf_input, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_LOCAL_IN, + .priority = INT_MAX, + }, +}; + +static struct l3mdev_ops ipvl_l3mdev_ops __read_mostly = { + .l3mdev_l3_rcv = ipvlan_l3_rcv, +}; + static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev) { ipvlan->dev->mtu = dev->mtu - ipvlan->mtu_adj; } -static void ipvlan_set_port_mode(struct ipvl_port *port, u16 nval) +static int ipvlan_register_nf_hook(void) +{ + int err = 0; + + if (!ipvl_nf_hook_refcnt) { + err = _nf_register_hooks(ipvl_nfops, ARRAY_SIZE(ipvl_nfops)); + if (!err) + ipvl_nf_hook_refcnt = 1; + } else { + ipvl_nf_hook_refcnt++; + } + + return err; +} + +static void ipvlan_unregister_nf_hook(void) +{ + WARN_ON(!ipvl_nf_hook_refcnt); + + ipvl_nf_hook_refcnt--; + if (!ipvl_nf_hook_refcnt) + _nf_unregister_hooks(ipvl_nfops, ARRAY_SIZE(ipvl_nfops)); +} + +static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval) { struct ipvl_dev *ipvlan; + struct net_device *mdev = port->dev; + int err = 0; + ASSERT_RTNL(); if (port->mode != nval) { + if (nval == IPVLAN_MODE_L3S) { + /* New mode is L3S */ + err = ipvlan_register_nf_hook(); + if (!err) { + mdev->l3mdev_ops = &ipvl_l3mdev_ops; + mdev->priv_flags |= IFF_L3MDEV_MASTER; + } else + return err; + } else if (port->mode == IPVLAN_MODE_L3S) { + /* Old mode was L3S */ + mdev->priv_flags &= ~IFF_L3MDEV_MASTER; + ipvlan_unregister_nf_hook(); + mdev->l3mdev_ops = NULL; + } list_for_each_entry(ipvlan, &port->ipvlans, pnode) { - if (nval == IPVLAN_MODE_L3) + if (nval == IPVLAN_MODE_L3 || nval == IPVLAN_MODE_L3S) ipvlan->dev->flags |= IFF_NOARP; else ipvlan->dev->flags &= ~IFF_NOARP; } port->mode = nval; } + return err; } static int ipvlan_port_create(struct net_device *dev) @@ -74,6 +137,11 @@ static void ipvlan_port_destroy(struct net_device *dev) struct ipvl_port *port = ipvlan_port_get_rtnl(dev); dev->priv_flags &= ~IFF_IPVLAN_MASTER; + if (port->mode == IPVLAN_MODE_L3S) { + dev->priv_flags &= ~IFF_L3MDEV_MASTER; + ipvlan_unregister_nf_hook(); + dev->l3mdev_ops = NULL; + } netdev_rx_handler_unregister(dev); cancel_work_sync(&port->wq); __skb_queue_purge(&port->backlog); @@ -132,7 +200,8 @@ static int ipvlan_open(struct net_device *dev) struct net_device *phy_dev = ipvlan->phy_dev; struct ipvl_addr *addr; - if (ipvlan->port->mode == IPVLAN_MODE_L3) + if (ipvlan->port->mode == IPVLAN_MODE_L3 || + ipvlan->port->mode == IPVLAN_MODE_L3S) dev->flags |= IFF_NOARP; else dev->flags &= ~IFF_NOARP; @@ -372,13 +441,14 @@ static int ipvlan_nl_changelink(struct net_device *dev, { struct ipvl_dev *ipvlan = netdev_priv(dev); struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev); + int err = 0; if (data && data[IFLA_IPVLAN_MODE]) { u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]); - ipvlan_set_port_mode(port, nmode); + err = ipvlan_set_port_mode(port, nmode); } - return 0; + return err; } static size_t ipvlan_nl_getsize(const struct net_device *dev) @@ -473,10 +543,13 @@ static int ipvlan_link_new(struct net *src_net, struct net_device *dev, unregister_netdevice(dev); return err; } + err = ipvlan_set_port_mode(port, mode); + if (err) { + unregister_netdevice(dev); + return err; + } list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans); - ipvlan_set_port_mode(port, mode); - netif_stacked_transfer_operstate(phy_dev, dev); return 0; } diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 2351776a724f..7ec9e99d5491 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -464,6 +464,7 @@ enum { enum ipvlan_mode { IPVLAN_MODE_L2 = 0, IPVLAN_MODE_L3, + IPVLAN_MODE_L3S, IPVLAN_MODE_MAX }; From 95357907ae73a8039c2106897ee2694f26ac3caf Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 16 Sep 2016 22:36:12 +0200 Subject: [PATCH 0342/1050] mlx4: fix XDP_TX is acting like XDP_PASS on TX ring full The XDP_TX action can fail transmitting the frame in case the TX ring is full or port is down. In case of TX failure it should drop the frame, and not as now call 'break' which is the same as XDP_PASS. Fixes: 9ecc2d86171a ("net/mlx4_en: add xdp forwarding and data write support") Signed-off-by: Jesper Dangaard Brouer Reviewed-by: Brenden Blanco Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 6758292311f4..c80073e4947f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -906,7 +906,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud length, tx_index, &doorbell_pending)) goto consumed; - break; + goto next; /* Drop on xmit failure */ default: bpf_warn_invalid_xdp_action(act); case XDP_ABORTED: From e8bc8f9a670e26e91562e724a2114243898bd616 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Fri, 16 Sep 2016 23:05:35 +0200 Subject: [PATCH 0343/1050] sctp: Remove some redundant code In commit 311b21774f13 ("sctp: simplify sk_receive_queue locking"), a call to 'skb_queue_splice_tail_init()' has been made explicit. Previously it was hidden in 'sctp_skb_list_tail()' Now, the code around it looks redundant. The '_init()' part of 'skb_queue_splice_tail_init()' should already do the same. Signed-off-by: Christophe JAILLET Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/ulpqueue.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 877e55066f89..84d0fdaf7de9 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -140,11 +140,8 @@ int sctp_clear_pd(struct sock *sk, struct sctp_association *asoc) * we can go ahead and clear out the lobby in one shot */ if (!skb_queue_empty(&sp->pd_lobby)) { - struct list_head *list; skb_queue_splice_tail_init(&sp->pd_lobby, &sk->sk_receive_queue); - list = (struct list_head *)&sctp_sk(sk)->pd_lobby; - INIT_LIST_HEAD(list); return 1; } } else { From 0fbc81b3ad513fecaaf62b48f42b89fcd57f7682 Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Sat, 17 Sep 2016 08:12:39 +0530 Subject: [PATCH 0344/1050] chcr/cxgb4i/cxgbit/RDMA/cxgb4: Allocate resources dynamically for all cxgb4 ULD's Allocate resources dynamically to cxgb4's Upper layer driver's(ULD) like cxgbit, iw_cxgb4 and cxgb4i. Allocate resources when they register with cxgb4 driver and free them while unregistering. All the queues and the interrupts for them will be allocated during ULD probe only and freed during remove. Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/crypto/chelsio/chcr_core.c | 10 +- drivers/infiniband/hw/cxgb4/device.c | 4 + drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 47 +- .../ethernet/chelsio/cxgb4/cxgb4_debugfs.c | 127 +--- .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 611 ++++-------------- .../net/ethernet/chelsio/cxgb4/cxgb4_uld.c | 223 +++++-- .../net/ethernet/chelsio/cxgb4/cxgb4_uld.h | 31 +- drivers/net/ethernet/chelsio/cxgb4/sge.c | 18 +- drivers/scsi/cxgbi/cxgb4i/cxgb4i.c | 3 + drivers/target/iscsi/cxgbit/cxgbit_main.c | 3 + 10 files changed, 384 insertions(+), 693 deletions(-) diff --git a/drivers/crypto/chelsio/chcr_core.c b/drivers/crypto/chelsio/chcr_core.c index 2f6156b672ce..fb5f9bbfa09c 100644 --- a/drivers/crypto/chelsio/chcr_core.c +++ b/drivers/crypto/chelsio/chcr_core.c @@ -39,12 +39,10 @@ static chcr_handler_func work_handlers[NUM_CPL_CMDS] = { [CPL_FW6_PLD] = cpl_fw6_pld_handler, }; -static struct cxgb4_pci_uld_info chcr_uld_info = { +static struct cxgb4_uld_info chcr_uld_info = { .name = DRV_MODULE_NAME, - .nrxq = 4, + .nrxq = MAX_ULD_QSETS, .rxq_size = 1024, - .nciq = 0, - .ciq_size = 0, .add = chcr_uld_add, .state_change = chcr_uld_state_change, .rx_handler = chcr_uld_rx_handler, @@ -205,7 +203,7 @@ static int chcr_uld_state_change(void *handle, enum cxgb4_state state) static int __init chcr_crypto_init(void) { - if (cxgb4_register_pci_uld(CXGB4_PCI_ULD1, &chcr_uld_info)) { + if (cxgb4_register_uld(CXGB4_ULD_CRYPTO, &chcr_uld_info)) { pr_err("ULD register fail: No chcr crypto support in cxgb4"); return -1; } @@ -228,7 +226,7 @@ static void __exit chcr_crypto_exit(void) kfree(u_ctx); } mutex_unlock(&dev_mutex); - cxgb4_unregister_pci_uld(CXGB4_PCI_ULD1); + cxgb4_unregister_uld(CXGB4_ULD_CRYPTO); } module_init(chcr_crypto_init); diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 071d7332ec06..f170b63e6eb9 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -1475,6 +1475,10 @@ static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...) static struct cxgb4_uld_info c4iw_uld_info = { .name = DRV_NAME, + .nrxq = MAX_ULD_QSETS, + .rxq_size = 511, + .ciq = true, + .lro = false, .add = c4iw_uld_add, .rx_handler = c4iw_uld_rx_handler, .state_change = c4iw_uld_state_change, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 45955691edc7..1f9867db3b78 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -437,11 +437,6 @@ enum { MAX_ETH_QSETS = 32, /* # of Ethernet Tx/Rx queue sets */ MAX_OFLD_QSETS = 16, /* # of offload Tx, iscsi Rx queue sets */ MAX_CTRL_QUEUES = NCHAN, /* # of control Tx queues */ - MAX_RDMA_QUEUES = NCHAN, /* # of streaming RDMA Rx queues */ - MAX_RDMA_CIQS = 32, /* # of RDMA concentrator IQs */ - - /* # of streaming iSCSIT Rx queues */ - MAX_ISCSIT_QUEUES = MAX_OFLD_QSETS, }; enum { @@ -458,8 +453,7 @@ enum { enum { INGQ_EXTRAS = 2, /* firmware event queue and */ /* forwarded interrupts */ - MAX_INGQ = MAX_ETH_QSETS + MAX_OFLD_QSETS + MAX_RDMA_QUEUES + - MAX_RDMA_CIQS + MAX_ISCSIT_QUEUES + INGQ_EXTRAS, + MAX_INGQ = MAX_ETH_QSETS + INGQ_EXTRAS, }; struct adapter; @@ -704,10 +698,6 @@ struct sge { struct sge_ctrl_txq ctrlq[MAX_CTRL_QUEUES]; struct sge_eth_rxq ethrxq[MAX_ETH_QSETS]; - struct sge_ofld_rxq iscsirxq[MAX_OFLD_QSETS]; - struct sge_ofld_rxq iscsitrxq[MAX_ISCSIT_QUEUES]; - struct sge_ofld_rxq rdmarxq[MAX_RDMA_QUEUES]; - struct sge_ofld_rxq rdmaciq[MAX_RDMA_CIQS]; struct sge_rspq fw_evtq ____cacheline_aligned_in_smp; struct sge_uld_rxq_info **uld_rxq_info; @@ -717,15 +707,8 @@ struct sge { u16 max_ethqsets; /* # of available Ethernet queue sets */ u16 ethqsets; /* # of active Ethernet queue sets */ u16 ethtxq_rover; /* Tx queue to clean up next */ - u16 iscsiqsets; /* # of active iSCSI queue sets */ - u16 niscsitq; /* # of available iSCST Rx queues */ - u16 rdmaqs; /* # of available RDMA Rx queues */ - u16 rdmaciqs; /* # of available RDMA concentrator IQs */ + u16 ofldqsets; /* # of active ofld queue sets */ u16 nqs_per_uld; /* # of Rx queues per ULD */ - u16 iscsi_rxq[MAX_OFLD_QSETS]; - u16 iscsit_rxq[MAX_ISCSIT_QUEUES]; - u16 rdma_rxq[MAX_RDMA_QUEUES]; - u16 rdma_ciq[MAX_RDMA_CIQS]; u16 timer_val[SGE_NTIMERS]; u8 counter_val[SGE_NCOUNTERS]; u32 fl_pg_order; /* large page allocation size */ @@ -749,10 +732,7 @@ struct sge { }; #define for_each_ethrxq(sge, i) for (i = 0; i < (sge)->ethqsets; i++) -#define for_each_iscsirxq(sge, i) for (i = 0; i < (sge)->iscsiqsets; i++) -#define for_each_iscsitrxq(sge, i) for (i = 0; i < (sge)->niscsitq; i++) -#define for_each_rdmarxq(sge, i) for (i = 0; i < (sge)->rdmaqs; i++) -#define for_each_rdmaciq(sge, i) for (i = 0; i < (sge)->rdmaciqs; i++) +#define for_each_ofldtxq(sge, i) for (i = 0; i < (sge)->ofldqsets; i++) struct l2t_data; @@ -786,6 +766,7 @@ struct uld_msix_bmap { struct uld_msix_info { unsigned short vec; char desc[IFNAMSIZ + 10]; + unsigned int idx; }; struct vf_info { @@ -818,7 +799,7 @@ struct adapter { } msix_info[MAX_INGQ + 1]; struct uld_msix_info *msix_info_ulds; /* msix info for uld's */ struct uld_msix_bmap msix_bmap_ulds; /* msix bitmap for all uld */ - unsigned int msi_idx; + int msi_idx; struct doorbell_stats db_stats; struct sge sge; @@ -836,9 +817,10 @@ struct adapter { unsigned int clipt_start; unsigned int clipt_end; struct clip_tbl *clipt; - struct cxgb4_pci_uld_info *uld; + struct cxgb4_uld_info *uld; void *uld_handle[CXGB4_ULD_MAX]; unsigned int num_uld; + unsigned int num_ofld_uld; struct list_head list_node; struct list_head rcu_node; struct list_head mac_hlist; /* list of MAC addresses in MPS Hash */ @@ -858,6 +840,8 @@ struct adapter { #define T4_OS_LOG_MBOX_CMDS 256 struct mbox_cmd_log *mbox_log; + struct mutex uld_mutex; + struct dentry *debugfs_root; bool use_bd; /* Use SGE Back Door intfc for reading SGE Contexts */ bool trace_rss; /* 1 implies that different RSS flit per filter is @@ -1051,6 +1035,11 @@ static inline int is_pci_uld(const struct adapter *adap) return adap->params.crypto; } +static inline int is_uld(const struct adapter *adap) +{ + return (adap->params.offload || adap->params.crypto); +} + static inline u32 t4_read_reg(struct adapter *adap, u32 reg_addr) { return readl(adap->regs + reg_addr); @@ -1277,6 +1266,8 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq, int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq, struct net_device *dev, unsigned int iqid, unsigned int cmplqid); +int t4_sge_mod_ctrl_txq(struct adapter *adap, unsigned int eqid, + unsigned int cmplqid); int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq, struct net_device *dev, unsigned int iqid); irqreturn_t t4_sge_intr_msix(int irq, void *cookie); @@ -1635,7 +1626,9 @@ void t4_idma_monitor(struct adapter *adapter, int hz, int ticks); int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf, unsigned int naddr, u8 *addr); -void uld_mem_free(struct adapter *adap); -int uld_mem_alloc(struct adapter *adap); +void t4_uld_mem_free(struct adapter *adap); +int t4_uld_mem_alloc(struct adapter *adap); +void t4_uld_clean_up(struct adapter *adap); +void t4_register_netevent_notifier(void); void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq, struct sge_fl *fl); #endif /* __CXGB4_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index 91fb50850fff..52be9a4ef97a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -2432,17 +2432,11 @@ static int sge_qinfo_show(struct seq_file *seq, void *v) { struct adapter *adap = seq->private; int eth_entries = DIV_ROUND_UP(adap->sge.ethqsets, 4); - int iscsi_entries = DIV_ROUND_UP(adap->sge.iscsiqsets, 4); - int iscsit_entries = DIV_ROUND_UP(adap->sge.niscsitq, 4); - int rdma_entries = DIV_ROUND_UP(adap->sge.rdmaqs, 4); - int ciq_entries = DIV_ROUND_UP(adap->sge.rdmaciqs, 4); + int ofld_entries = DIV_ROUND_UP(adap->sge.ofldqsets, 4); int ctrl_entries = DIV_ROUND_UP(MAX_CTRL_QUEUES, 4); int i, r = (uintptr_t)v - 1; - int iscsi_idx = r - eth_entries; - int iscsit_idx = iscsi_idx - iscsi_entries; - int rdma_idx = iscsit_idx - iscsit_entries; - int ciq_idx = rdma_idx - rdma_entries; - int ctrl_idx = ciq_idx - ciq_entries; + int ofld_idx = r - eth_entries; + int ctrl_idx = ofld_idx - ofld_entries; int fq_idx = ctrl_idx - ctrl_entries; if (r) @@ -2518,119 +2512,17 @@ do { \ RL("FLLow:", fl.low); RL("FLStarving:", fl.starving); - } else if (iscsi_idx < iscsi_entries) { - const struct sge_ofld_rxq *rx = - &adap->sge.iscsirxq[iscsi_idx * 4]; + } else if (ofld_idx < ofld_entries) { const struct sge_ofld_txq *tx = - &adap->sge.ofldtxq[iscsi_idx * 4]; - int n = min(4, adap->sge.iscsiqsets - 4 * iscsi_idx); + &adap->sge.ofldtxq[ofld_idx * 4]; + int n = min(4, adap->sge.ofldqsets - 4 * ofld_idx); - S("QType:", "iSCSI"); + S("QType:", "OFLD-Txq"); T("TxQ ID:", q.cntxt_id); T("TxQ size:", q.size); T("TxQ inuse:", q.in_use); T("TxQ CIDX:", q.cidx); T("TxQ PIDX:", q.pidx); - R("RspQ ID:", rspq.abs_id); - R("RspQ size:", rspq.size); - R("RspQE size:", rspq.iqe_len); - R("RspQ CIDX:", rspq.cidx); - R("RspQ Gen:", rspq.gen); - S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq)); - S3("u", "Intr pktcnt:", - adap->sge.counter_val[rx[i].rspq.pktcnt_idx]); - R("FL ID:", fl.cntxt_id); - R("FL size:", fl.size - 8); - R("FL pend:", fl.pend_cred); - R("FL avail:", fl.avail); - R("FL PIDX:", fl.pidx); - R("FL CIDX:", fl.cidx); - RL("RxPackets:", stats.pkts); - RL("RxImmPkts:", stats.imm); - RL("RxNoMem:", stats.nomem); - RL("FLAllocErr:", fl.alloc_failed); - RL("FLLrgAlcErr:", fl.large_alloc_failed); - RL("FLMapErr:", fl.mapping_err); - RL("FLLow:", fl.low); - RL("FLStarving:", fl.starving); - - } else if (iscsit_idx < iscsit_entries) { - const struct sge_ofld_rxq *rx = - &adap->sge.iscsitrxq[iscsit_idx * 4]; - int n = min(4, adap->sge.niscsitq - 4 * iscsit_idx); - - S("QType:", "iSCSIT"); - R("RspQ ID:", rspq.abs_id); - R("RspQ size:", rspq.size); - R("RspQE size:", rspq.iqe_len); - R("RspQ CIDX:", rspq.cidx); - R("RspQ Gen:", rspq.gen); - S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq)); - S3("u", "Intr pktcnt:", - adap->sge.counter_val[rx[i].rspq.pktcnt_idx]); - R("FL ID:", fl.cntxt_id); - R("FL size:", fl.size - 8); - R("FL pend:", fl.pend_cred); - R("FL avail:", fl.avail); - R("FL PIDX:", fl.pidx); - R("FL CIDX:", fl.cidx); - RL("RxPackets:", stats.pkts); - RL("RxImmPkts:", stats.imm); - RL("RxNoMem:", stats.nomem); - RL("FLAllocErr:", fl.alloc_failed); - RL("FLLrgAlcErr:", fl.large_alloc_failed); - RL("FLMapErr:", fl.mapping_err); - RL("FLLow:", fl.low); - RL("FLStarving:", fl.starving); - - } else if (rdma_idx < rdma_entries) { - const struct sge_ofld_rxq *rx = - &adap->sge.rdmarxq[rdma_idx * 4]; - int n = min(4, adap->sge.rdmaqs - 4 * rdma_idx); - - S("QType:", "RDMA-CPL"); - S("Interface:", - rx[i].rspq.netdev ? rx[i].rspq.netdev->name : "N/A"); - R("RspQ ID:", rspq.abs_id); - R("RspQ size:", rspq.size); - R("RspQE size:", rspq.iqe_len); - R("RspQ CIDX:", rspq.cidx); - R("RspQ Gen:", rspq.gen); - S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq)); - S3("u", "Intr pktcnt:", - adap->sge.counter_val[rx[i].rspq.pktcnt_idx]); - R("FL ID:", fl.cntxt_id); - R("FL size:", fl.size - 8); - R("FL pend:", fl.pend_cred); - R("FL avail:", fl.avail); - R("FL PIDX:", fl.pidx); - R("FL CIDX:", fl.cidx); - RL("RxPackets:", stats.pkts); - RL("RxImmPkts:", stats.imm); - RL("RxNoMem:", stats.nomem); - RL("FLAllocErr:", fl.alloc_failed); - RL("FLLrgAlcErr:", fl.large_alloc_failed); - RL("FLMapErr:", fl.mapping_err); - RL("FLLow:", fl.low); - RL("FLStarving:", fl.starving); - - } else if (ciq_idx < ciq_entries) { - const struct sge_ofld_rxq *rx = &adap->sge.rdmaciq[ciq_idx * 4]; - int n = min(4, adap->sge.rdmaciqs - 4 * ciq_idx); - - S("QType:", "RDMA-CIQ"); - S("Interface:", - rx[i].rspq.netdev ? rx[i].rspq.netdev->name : "N/A"); - R("RspQ ID:", rspq.abs_id); - R("RspQ size:", rspq.size); - R("RspQE size:", rspq.iqe_len); - R("RspQ CIDX:", rspq.cidx); - R("RspQ Gen:", rspq.gen); - S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq)); - S3("u", "Intr pktcnt:", - adap->sge.counter_val[rx[i].rspq.pktcnt_idx]); - RL("RxAN:", stats.an); - RL("RxNoMem:", stats.nomem); } else if (ctrl_idx < ctrl_entries) { const struct sge_ctrl_txq *tx = &adap->sge.ctrlq[ctrl_idx * 4]; @@ -2672,10 +2564,7 @@ do { \ static int sge_queue_entries(const struct adapter *adap) { return DIV_ROUND_UP(adap->sge.ethqsets, 4) + - DIV_ROUND_UP(adap->sge.iscsiqsets, 4) + - DIV_ROUND_UP(adap->sge.niscsitq, 4) + - DIV_ROUND_UP(adap->sge.rdmaqs, 4) + - DIV_ROUND_UP(adap->sge.rdmaciqs, 4) + + DIV_ROUND_UP(adap->sge.ofldqsets, 4) + DIV_ROUND_UP(MAX_CTRL_QUEUES, 4) + 1; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 44cc9767936f..d1ebb84c073e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -226,11 +226,6 @@ static struct dentry *cxgb4_debugfs_root; LIST_HEAD(adapter_list); DEFINE_MUTEX(uld_mutex); -/* Adapter list to be accessed from atomic context */ -static LIST_HEAD(adap_rcu_list); -static DEFINE_SPINLOCK(adap_rcu_lock); -static struct cxgb4_uld_info ulds[CXGB4_ULD_MAX]; -static const char *const uld_str[] = { "RDMA", "iSCSI", "iSCSIT" }; static void link_report(struct net_device *dev) { @@ -678,56 +673,6 @@ out: return 0; } -/* Flush the aggregated lro sessions */ -static void uldrx_flush_handler(struct sge_rspq *q) -{ - if (ulds[q->uld].lro_flush) - ulds[q->uld].lro_flush(&q->lro_mgr); -} - -/** - * uldrx_handler - response queue handler for ULD queues - * @q: the response queue that received the packet - * @rsp: the response queue descriptor holding the offload message - * @gl: the gather list of packet fragments - * - * Deliver an ingress offload packet to a ULD. All processing is done by - * the ULD, we just maintain statistics. - */ -static int uldrx_handler(struct sge_rspq *q, const __be64 *rsp, - const struct pkt_gl *gl) -{ - struct sge_ofld_rxq *rxq = container_of(q, struct sge_ofld_rxq, rspq); - int ret; - - /* FW can send CPLs encapsulated in a CPL_FW4_MSG. - */ - if (((const struct rss_header *)rsp)->opcode == CPL_FW4_MSG && - ((const struct cpl_fw4_msg *)(rsp + 1))->type == FW_TYPE_RSSCPL) - rsp += 2; - - if (q->flush_handler) - ret = ulds[q->uld].lro_rx_handler(q->adap->uld_handle[q->uld], - rsp, gl, &q->lro_mgr, - &q->napi); - else - ret = ulds[q->uld].rx_handler(q->adap->uld_handle[q->uld], - rsp, gl); - - if (ret) { - rxq->stats.nomem++; - return -1; - } - - if (gl == NULL) - rxq->stats.imm++; - else if (gl == CXGB4_MSG_AN) - rxq->stats.an++; - else - rxq->stats.pkts++; - return 0; -} - static void disable_msi(struct adapter *adapter) { if (adapter->flags & USING_MSIX) { @@ -779,30 +724,12 @@ static void name_msix_vecs(struct adapter *adap) snprintf(adap->msix_info[msi_idx].desc, n, "%s-Rx%d", d->name, i); } - - /* offload queues */ - for_each_iscsirxq(&adap->sge, i) - snprintf(adap->msix_info[msi_idx++].desc, n, "%s-iscsi%d", - adap->port[0]->name, i); - - for_each_iscsitrxq(&adap->sge, i) - snprintf(adap->msix_info[msi_idx++].desc, n, "%s-iSCSIT%d", - adap->port[0]->name, i); - - for_each_rdmarxq(&adap->sge, i) - snprintf(adap->msix_info[msi_idx++].desc, n, "%s-rdma%d", - adap->port[0]->name, i); - - for_each_rdmaciq(&adap->sge, i) - snprintf(adap->msix_info[msi_idx++].desc, n, "%s-rdma-ciq%d", - adap->port[0]->name, i); } static int request_msix_queue_irqs(struct adapter *adap) { struct sge *s = &adap->sge; - int err, ethqidx, iscsiqidx = 0, rdmaqidx = 0, rdmaciqqidx = 0; - int iscsitqidx = 0; + int err, ethqidx; int msi_index = 2; err = request_irq(adap->msix_info[1].vec, t4_sge_intr_msix, 0, @@ -819,57 +746,9 @@ static int request_msix_queue_irqs(struct adapter *adap) goto unwind; msi_index++; } - for_each_iscsirxq(s, iscsiqidx) { - err = request_irq(adap->msix_info[msi_index].vec, - t4_sge_intr_msix, 0, - adap->msix_info[msi_index].desc, - &s->iscsirxq[iscsiqidx].rspq); - if (err) - goto unwind; - msi_index++; - } - for_each_iscsitrxq(s, iscsitqidx) { - err = request_irq(adap->msix_info[msi_index].vec, - t4_sge_intr_msix, 0, - adap->msix_info[msi_index].desc, - &s->iscsitrxq[iscsitqidx].rspq); - if (err) - goto unwind; - msi_index++; - } - for_each_rdmarxq(s, rdmaqidx) { - err = request_irq(adap->msix_info[msi_index].vec, - t4_sge_intr_msix, 0, - adap->msix_info[msi_index].desc, - &s->rdmarxq[rdmaqidx].rspq); - if (err) - goto unwind; - msi_index++; - } - for_each_rdmaciq(s, rdmaciqqidx) { - err = request_irq(adap->msix_info[msi_index].vec, - t4_sge_intr_msix, 0, - adap->msix_info[msi_index].desc, - &s->rdmaciq[rdmaciqqidx].rspq); - if (err) - goto unwind; - msi_index++; - } return 0; unwind: - while (--rdmaciqqidx >= 0) - free_irq(adap->msix_info[--msi_index].vec, - &s->rdmaciq[rdmaciqqidx].rspq); - while (--rdmaqidx >= 0) - free_irq(adap->msix_info[--msi_index].vec, - &s->rdmarxq[rdmaqidx].rspq); - while (--iscsitqidx >= 0) - free_irq(adap->msix_info[--msi_index].vec, - &s->iscsitrxq[iscsitqidx].rspq); - while (--iscsiqidx >= 0) - free_irq(adap->msix_info[--msi_index].vec, - &s->iscsirxq[iscsiqidx].rspq); while (--ethqidx >= 0) free_irq(adap->msix_info[--msi_index].vec, &s->ethrxq[ethqidx].rspq); @@ -885,16 +764,6 @@ static void free_msix_queue_irqs(struct adapter *adap) free_irq(adap->msix_info[1].vec, &s->fw_evtq); for_each_ethrxq(s, i) free_irq(adap->msix_info[msi_index++].vec, &s->ethrxq[i].rspq); - for_each_iscsirxq(s, i) - free_irq(adap->msix_info[msi_index++].vec, - &s->iscsirxq[i].rspq); - for_each_iscsitrxq(s, i) - free_irq(adap->msix_info[msi_index++].vec, - &s->iscsitrxq[i].rspq); - for_each_rdmarxq(s, i) - free_irq(adap->msix_info[msi_index++].vec, &s->rdmarxq[i].rspq); - for_each_rdmaciq(s, i) - free_irq(adap->msix_info[msi_index++].vec, &s->rdmaciq[i].rspq); } /** @@ -1033,42 +902,11 @@ static void enable_rx(struct adapter *adap) } } -static int alloc_ofld_rxqs(struct adapter *adap, struct sge_ofld_rxq *q, - unsigned int nq, unsigned int per_chan, int msi_idx, - u16 *ids, bool lro) -{ - int i, err; - for (i = 0; i < nq; i++, q++) { - if (msi_idx > 0) - msi_idx++; - err = t4_sge_alloc_rxq(adap, &q->rspq, false, - adap->port[i / per_chan], - msi_idx, q->fl.size ? &q->fl : NULL, - uldrx_handler, - lro ? uldrx_flush_handler : NULL, - 0); - if (err) - return err; - memset(&q->stats, 0, sizeof(q->stats)); - if (ids) - ids[i] = q->rspq.abs_id; - } - return 0; -} - -/** - * setup_sge_queues - configure SGE Tx/Rx/response queues - * @adap: the adapter - * - * Determines how many sets of SGE queues to use and initializes them. - * We support multiple queue sets per port if we have MSI-X, otherwise - * just one queue set per port. - */ -static int setup_sge_queues(struct adapter *adap) +static int setup_fw_sge_queues(struct adapter *adap) { - int err, i, j; struct sge *s = &adap->sge; + int err = 0; bitmap_zero(s->starving_fl, s->egr_sz); bitmap_zero(s->txq_maperr, s->egr_sz); @@ -1083,25 +921,27 @@ static int setup_sge_queues(struct adapter *adap) adap->msi_idx = -((int)s->intrq.abs_id + 1); } - /* NOTE: If you add/delete any Ingress/Egress Queue allocations in here, - * don't forget to update the following which need to be - * synchronized to and changes here. - * - * 1. The calculations of MAX_INGQ in cxgb4.h. - * - * 2. Update enable_msix/name_msix_vecs/request_msix_queue_irqs - * to accommodate any new/deleted Ingress Queues - * which need MSI-X Vectors. - * - * 3. Update sge_qinfo_show() to include information on the - * new/deleted queues. - */ err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0], adap->msi_idx, NULL, fwevtq_handler, NULL, -1); - if (err) { -freeout: t4_free_sge_resources(adap); - return err; - } + if (err) + t4_free_sge_resources(adap); + return err; +} + +/** + * setup_sge_queues - configure SGE Tx/Rx/response queues + * @adap: the adapter + * + * Determines how many sets of SGE queues to use and initializes them. + * We support multiple queue sets per port if we have MSI-X, otherwise + * just one queue set per port. + */ +static int setup_sge_queues(struct adapter *adap) +{ + int err, i, j; + struct sge *s = &adap->sge; + struct sge_uld_rxq_info *rxq_info = s->uld_rxq_info[CXGB4_ULD_RDMA]; + unsigned int cmplqid = 0; for_each_port(adap, i) { struct net_device *dev = adap->port[i]; @@ -1132,8 +972,8 @@ freeout: t4_free_sge_resources(adap); } } - j = s->iscsiqsets / adap->params.nports; /* iscsi queues per channel */ - for_each_iscsirxq(s, i) { + j = s->ofldqsets / adap->params.nports; /* iscsi queues per channel */ + for_each_ofldtxq(s, i) { err = t4_sge_alloc_ofld_txq(adap, &s->ofldtxq[i], adap->port[i / j], s->fw_evtq.cntxt_id); @@ -1141,30 +981,15 @@ freeout: t4_free_sge_resources(adap); goto freeout; } -#define ALLOC_OFLD_RXQS(firstq, nq, per_chan, ids, lro) do { \ - err = alloc_ofld_rxqs(adap, firstq, nq, per_chan, adap->msi_idx, ids, lro); \ - if (err) \ - goto freeout; \ - if (adap->msi_idx > 0) \ - adap->msi_idx += nq; \ -} while (0) - - ALLOC_OFLD_RXQS(s->iscsirxq, s->iscsiqsets, j, s->iscsi_rxq, false); - ALLOC_OFLD_RXQS(s->iscsitrxq, s->niscsitq, j, s->iscsit_rxq, true); - ALLOC_OFLD_RXQS(s->rdmarxq, s->rdmaqs, 1, s->rdma_rxq, false); - j = s->rdmaciqs / adap->params.nports; /* rdmaq queues per channel */ - ALLOC_OFLD_RXQS(s->rdmaciq, s->rdmaciqs, j, s->rdma_ciq, false); - -#undef ALLOC_OFLD_RXQS - for_each_port(adap, i) { - /* - * Note that ->rdmarxq[i].rspq.cntxt_id below is 0 if we don't + /* Note that cmplqid below is 0 if we don't * have RDMA queues, and that's the right value. */ + if (rxq_info) + cmplqid = rxq_info->uldrxq[i].rspq.cntxt_id; + err = t4_sge_alloc_ctrl_txq(adap, &s->ctrlq[i], adap->port[i], - s->fw_evtq.cntxt_id, - s->rdmarxq[i].rspq.cntxt_id); + s->fw_evtq.cntxt_id, cmplqid); if (err) goto freeout; } @@ -1175,6 +1000,9 @@ freeout: t4_free_sge_resources(adap); RSSCONTROL_V(netdev2pinfo(adap->port[0])->tx_chan) | QUEUENUMBER_V(s->ethrxq[0].rspq.abs_id)); return 0; +freeout: + t4_free_sge_resources(adap); + return err; } /* @@ -2317,7 +2145,7 @@ static void disable_dbs(struct adapter *adap) for_each_ethrxq(&adap->sge, i) disable_txq_db(&adap->sge.ethtxq[i].q); - for_each_iscsirxq(&adap->sge, i) + for_each_ofldtxq(&adap->sge, i) disable_txq_db(&adap->sge.ofldtxq[i].q); for_each_port(adap, i) disable_txq_db(&adap->sge.ctrlq[i].q); @@ -2329,7 +2157,7 @@ static void enable_dbs(struct adapter *adap) for_each_ethrxq(&adap->sge, i) enable_txq_db(adap, &adap->sge.ethtxq[i].q); - for_each_iscsirxq(&adap->sge, i) + for_each_ofldtxq(&adap->sge, i) enable_txq_db(adap, &adap->sge.ofldtxq[i].q); for_each_port(adap, i) enable_txq_db(adap, &adap->sge.ctrlq[i].q); @@ -2337,9 +2165,10 @@ static void enable_dbs(struct adapter *adap) static void notify_rdma_uld(struct adapter *adap, enum cxgb4_control cmd) { - if (adap->uld_handle[CXGB4_ULD_RDMA]) - ulds[CXGB4_ULD_RDMA].control(adap->uld_handle[CXGB4_ULD_RDMA], - cmd); + enum cxgb4_uld type = CXGB4_ULD_RDMA; + + if (adap->uld && adap->uld[type].handle) + adap->uld[type].control(adap->uld[type].handle, cmd); } static void process_db_full(struct work_struct *work) @@ -2393,13 +2222,14 @@ out: if (ret) CH_WARN(adap, "DB drop recovery failed.\n"); } + static void recover_all_queues(struct adapter *adap) { int i; for_each_ethrxq(&adap->sge, i) sync_txq_pidx(adap, &adap->sge.ethtxq[i].q); - for_each_iscsirxq(&adap->sge, i) + for_each_ofldtxq(&adap->sge, i) sync_txq_pidx(adap, &adap->sge.ofldtxq[i].q); for_each_port(adap, i) sync_txq_pidx(adap, &adap->sge.ctrlq[i].q); @@ -2464,94 +2294,12 @@ void t4_db_dropped(struct adapter *adap) queue_work(adap->workq, &adap->db_drop_task); } -static void uld_attach(struct adapter *adap, unsigned int uld) +void t4_register_netevent_notifier(void) { - void *handle; - struct cxgb4_lld_info lli; - unsigned short i; - - lli.pdev = adap->pdev; - lli.pf = adap->pf; - lli.l2t = adap->l2t; - lli.tids = &adap->tids; - lli.ports = adap->port; - lli.vr = &adap->vres; - lli.mtus = adap->params.mtus; - if (uld == CXGB4_ULD_RDMA) { - lli.rxq_ids = adap->sge.rdma_rxq; - lli.ciq_ids = adap->sge.rdma_ciq; - lli.nrxq = adap->sge.rdmaqs; - lli.nciq = adap->sge.rdmaciqs; - } else if (uld == CXGB4_ULD_ISCSI) { - lli.rxq_ids = adap->sge.iscsi_rxq; - lli.nrxq = adap->sge.iscsiqsets; - } else if (uld == CXGB4_ULD_ISCSIT) { - lli.rxq_ids = adap->sge.iscsit_rxq; - lli.nrxq = adap->sge.niscsitq; - } - lli.ntxq = adap->sge.iscsiqsets; - lli.nchan = adap->params.nports; - lli.nports = adap->params.nports; - lli.wr_cred = adap->params.ofldq_wr_cred; - lli.adapter_type = adap->params.chip; - lli.iscsi_iolen = MAXRXDATA_G(t4_read_reg(adap, TP_PARA_REG2_A)); - lli.iscsi_tagmask = t4_read_reg(adap, ULP_RX_ISCSI_TAGMASK_A); - lli.iscsi_pgsz_order = t4_read_reg(adap, ULP_RX_ISCSI_PSZ_A); - lli.iscsi_llimit = t4_read_reg(adap, ULP_RX_ISCSI_LLIMIT_A); - lli.iscsi_ppm = &adap->iscsi_ppm; - lli.cclk_ps = 1000000000 / adap->params.vpd.cclk; - lli.udb_density = 1 << adap->params.sge.eq_qpp; - lli.ucq_density = 1 << adap->params.sge.iq_qpp; - lli.filt_mode = adap->params.tp.vlan_pri_map; - /* MODQ_REQ_MAP sets queues 0-3 to chan 0-3 */ - for (i = 0; i < NCHAN; i++) - lli.tx_modq[i] = i; - lli.gts_reg = adap->regs + MYPF_REG(SGE_PF_GTS_A); - lli.db_reg = adap->regs + MYPF_REG(SGE_PF_KDOORBELL_A); - lli.fw_vers = adap->params.fw_vers; - lli.dbfifo_int_thresh = dbfifo_int_thresh; - lli.sge_ingpadboundary = adap->sge.fl_align; - lli.sge_egrstatuspagesize = adap->sge.stat_len; - lli.sge_pktshift = adap->sge.pktshift; - lli.enable_fw_ofld_conn = adap->flags & FW_OFLD_CONN; - lli.max_ordird_qp = adap->params.max_ordird_qp; - lli.max_ird_adapter = adap->params.max_ird_adapter; - lli.ulptx_memwrite_dsgl = adap->params.ulptx_memwrite_dsgl; - lli.nodeid = dev_to_node(adap->pdev_dev); - - handle = ulds[uld].add(&lli); - if (IS_ERR(handle)) { - dev_warn(adap->pdev_dev, - "could not attach to the %s driver, error %ld\n", - uld_str[uld], PTR_ERR(handle)); - return; - } - - adap->uld_handle[uld] = handle; - if (!netevent_registered) { register_netevent_notifier(&cxgb4_netevent_nb); netevent_registered = true; } - - if (adap->flags & FULL_INIT_DONE) - ulds[uld].state_change(handle, CXGB4_STATE_UP); -} - -static void attach_ulds(struct adapter *adap) -{ - unsigned int i; - - spin_lock(&adap_rcu_lock); - list_add_tail_rcu(&adap->rcu_node, &adap_rcu_list); - spin_unlock(&adap_rcu_lock); - - mutex_lock(&uld_mutex); - list_add_tail(&adap->list_node, &adapter_list); - for (i = 0; i < CXGB4_ULD_MAX; i++) - if (ulds[i].add) - uld_attach(adap, i); - mutex_unlock(&uld_mutex); } static void detach_ulds(struct adapter *adap) @@ -2561,12 +2309,6 @@ static void detach_ulds(struct adapter *adap) mutex_lock(&uld_mutex); list_del(&adap->list_node); for (i = 0; i < CXGB4_ULD_MAX; i++) - if (adap->uld_handle[i]) { - ulds[i].state_change(adap->uld_handle[i], - CXGB4_STATE_DETACH); - adap->uld_handle[i] = NULL; - } - for (i = 0; i < CXGB4_PCI_ULD_MAX; i++) if (adap->uld && adap->uld[i].handle) { adap->uld[i].state_change(adap->uld[i].handle, CXGB4_STATE_DETACH); @@ -2577,10 +2319,6 @@ static void detach_ulds(struct adapter *adap) netevent_registered = false; } mutex_unlock(&uld_mutex); - - spin_lock(&adap_rcu_lock); - list_del_rcu(&adap->rcu_node); - spin_unlock(&adap_rcu_lock); } static void notify_ulds(struct adapter *adap, enum cxgb4_state new_state) @@ -2589,65 +2327,12 @@ static void notify_ulds(struct adapter *adap, enum cxgb4_state new_state) mutex_lock(&uld_mutex); for (i = 0; i < CXGB4_ULD_MAX; i++) - if (adap->uld_handle[i]) - ulds[i].state_change(adap->uld_handle[i], new_state); - for (i = 0; i < CXGB4_PCI_ULD_MAX; i++) if (adap->uld && adap->uld[i].handle) adap->uld[i].state_change(adap->uld[i].handle, new_state); mutex_unlock(&uld_mutex); } -/** - * cxgb4_register_uld - register an upper-layer driver - * @type: the ULD type - * @p: the ULD methods - * - * Registers an upper-layer driver with this driver and notifies the ULD - * about any presently available devices that support its type. Returns - * %-EBUSY if a ULD of the same type is already registered. - */ -int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p) -{ - int ret = 0; - struct adapter *adap; - - if (type >= CXGB4_ULD_MAX) - return -EINVAL; - mutex_lock(&uld_mutex); - if (ulds[type].add) { - ret = -EBUSY; - goto out; - } - ulds[type] = *p; - list_for_each_entry(adap, &adapter_list, list_node) - uld_attach(adap, type); -out: mutex_unlock(&uld_mutex); - return ret; -} -EXPORT_SYMBOL(cxgb4_register_uld); - -/** - * cxgb4_unregister_uld - unregister an upper-layer driver - * @type: the ULD type - * - * Unregisters an existing upper-layer driver. - */ -int cxgb4_unregister_uld(enum cxgb4_uld type) -{ - struct adapter *adap; - - if (type >= CXGB4_ULD_MAX) - return -EINVAL; - mutex_lock(&uld_mutex); - list_for_each_entry(adap, &adapter_list, list_node) - adap->uld_handle[type] = NULL; - ulds[type].add = NULL; - mutex_unlock(&uld_mutex); - return 0; -} -EXPORT_SYMBOL(cxgb4_unregister_uld); - #if IS_ENABLED(CONFIG_IPV6) static int cxgb4_inet6addr_handler(struct notifier_block *this, unsigned long event, void *data) @@ -2752,7 +2437,6 @@ static int cxgb_up(struct adapter *adap) adap->msix_info[0].desc, adap); if (err) goto irq_err; - err = request_msix_queue_irqs(adap); if (err) { free_irq(adap->msix_info[0].vec, adap); @@ -4262,6 +3946,7 @@ static int adap_init0(struct adapter *adap) adap->params.ofldq_wr_cred = val[5]; adap->params.offload = 1; + adap->num_ofld_uld += 1; } if (caps_cmd.rdmacaps) { params[0] = FW_PARAM_PFVF(STAG_START); @@ -4314,6 +3999,7 @@ static int adap_init0(struct adapter *adap) "max_ordird_qp %d max_ird_adapter %d\n", adap->params.max_ordird_qp, adap->params.max_ird_adapter); + adap->num_ofld_uld += 2; } if (caps_cmd.iscsicaps) { params[0] = FW_PARAM_PFVF(ISCSI_START); @@ -4324,6 +4010,8 @@ static int adap_init0(struct adapter *adap) goto bye; adap->vres.iscsi.start = val[0]; adap->vres.iscsi.size = val[1] - val[0] + 1; + /* LIO target and cxgb4i initiaitor */ + adap->num_ofld_uld += 2; } if (caps_cmd.cryptocaps) { /* Should query params here...TODO */ @@ -4523,14 +4211,14 @@ static void cfg_queues(struct adapter *adap) #ifndef CONFIG_CHELSIO_T4_DCB int q10g = 0; #endif - int ciq_size; /* Reduce memory usage in kdump environment, disable all offload. */ if (is_kdump_kernel()) { adap->params.offload = 0; adap->params.crypto = 0; - } else if (adap->num_uld && uld_mem_alloc(adap)) { + } else if (is_uld(adap) && t4_uld_mem_alloc(adap)) { + adap->params.offload = 0; adap->params.crypto = 0; } @@ -4576,33 +4264,18 @@ static void cfg_queues(struct adapter *adap) s->ethqsets = qidx; s->max_ethqsets = qidx; /* MSI-X may lower it later */ - if (is_offload(adap)) { + if (is_uld(adap)) { /* * For offload we use 1 queue/channel if all ports are up to 1G, * otherwise we divide all available queues amongst the channels * capped by the number of available cores. */ if (n10g) { - i = min_t(int, ARRAY_SIZE(s->iscsirxq), - num_online_cpus()); - s->iscsiqsets = roundup(i, adap->params.nports); - } else - s->iscsiqsets = adap->params.nports; - /* For RDMA one Rx queue per channel suffices */ - s->rdmaqs = adap->params.nports; - /* Try and allow at least 1 CIQ per cpu rounding down - * to the number of ports, with a minimum of 1 per port. - * A 2 port card in a 6 cpu system: 6 CIQs, 3 / port. - * A 4 port card in a 6 cpu system: 4 CIQs, 1 / port. - * A 4 port card in a 2 cpu system: 4 CIQs, 1 / port. - */ - s->rdmaciqs = min_t(int, MAX_RDMA_CIQS, num_online_cpus()); - s->rdmaciqs = (s->rdmaciqs / adap->params.nports) * - adap->params.nports; - s->rdmaciqs = max_t(int, s->rdmaciqs, adap->params.nports); - - if (!is_t4(adap->params.chip)) - s->niscsitq = s->iscsiqsets; + i = num_online_cpus(); + s->ofldqsets = roundup(i, adap->params.nports); + } else { + s->ofldqsets = adap->params.nports; + } } for (i = 0; i < ARRAY_SIZE(s->ethrxq); i++) { @@ -4621,47 +4294,8 @@ static void cfg_queues(struct adapter *adap) for (i = 0; i < ARRAY_SIZE(s->ofldtxq); i++) s->ofldtxq[i].q.size = 1024; - for (i = 0; i < ARRAY_SIZE(s->iscsirxq); i++) { - struct sge_ofld_rxq *r = &s->iscsirxq[i]; - - init_rspq(adap, &r->rspq, 5, 1, 1024, 64); - r->rspq.uld = CXGB4_ULD_ISCSI; - r->fl.size = 72; - } - - if (!is_t4(adap->params.chip)) { - for (i = 0; i < ARRAY_SIZE(s->iscsitrxq); i++) { - struct sge_ofld_rxq *r = &s->iscsitrxq[i]; - - init_rspq(adap, &r->rspq, 5, 1, 1024, 64); - r->rspq.uld = CXGB4_ULD_ISCSIT; - r->fl.size = 72; - } - } - - for (i = 0; i < ARRAY_SIZE(s->rdmarxq); i++) { - struct sge_ofld_rxq *r = &s->rdmarxq[i]; - - init_rspq(adap, &r->rspq, 5, 1, 511, 64); - r->rspq.uld = CXGB4_ULD_RDMA; - r->fl.size = 72; - } - - ciq_size = 64 + adap->vres.cq.size + adap->tids.nftids; - if (ciq_size > SGE_MAX_IQ_SIZE) { - CH_WARN(adap, "CIQ size too small for available IQs\n"); - ciq_size = SGE_MAX_IQ_SIZE; - } - - for (i = 0; i < ARRAY_SIZE(s->rdmaciq); i++) { - struct sge_ofld_rxq *r = &s->rdmaciq[i]; - - init_rspq(adap, &r->rspq, 5, 1, ciq_size, 64); - r->rspq.uld = CXGB4_ULD_RDMA; - } - init_rspq(adap, &s->fw_evtq, 0, 1, 1024, 64); - init_rspq(adap, &s->intrq, 0, 1, 2 * MAX_INGQ, 64); + init_rspq(adap, &s->intrq, 0, 1, 512, 64); } /* @@ -4695,7 +4329,15 @@ static void reduce_ethqs(struct adapter *adap, int n) static int get_msix_info(struct adapter *adap) { struct uld_msix_info *msix_info; - int max_ingq = (MAX_OFLD_QSETS * adap->num_uld); + unsigned int max_ingq = 0; + + if (is_offload(adap)) + max_ingq += MAX_OFLD_QSETS * adap->num_ofld_uld; + if (is_pci_uld(adap)) + max_ingq += MAX_OFLD_QSETS * adap->num_uld; + + if (!max_ingq) + goto out; msix_info = kcalloc(max_ingq, sizeof(*msix_info), GFP_KERNEL); if (!msix_info) @@ -4709,12 +4351,13 @@ static int get_msix_info(struct adapter *adap) } spin_lock_init(&adap->msix_bmap_ulds.lock); adap->msix_info_ulds = msix_info; +out: return 0; } static void free_msix_info(struct adapter *adap) { - if (!adap->num_uld) + if (!(adap->num_uld && adap->num_ofld_uld)) return; kfree(adap->msix_info_ulds); @@ -4733,32 +4376,32 @@ static int enable_msix(struct adapter *adap) struct msix_entry *entries; int max_ingq = MAX_INGQ; - max_ingq += (MAX_OFLD_QSETS * adap->num_uld); + if (is_pci_uld(adap)) + max_ingq += (MAX_OFLD_QSETS * adap->num_uld); + if (is_offload(adap)) + max_ingq += (MAX_OFLD_QSETS * adap->num_ofld_uld); entries = kmalloc(sizeof(*entries) * (max_ingq + 1), GFP_KERNEL); if (!entries) return -ENOMEM; /* map for msix */ - if (is_pci_uld(adap) && get_msix_info(adap)) + if (get_msix_info(adap)) { + adap->params.offload = 0; adap->params.crypto = 0; + } for (i = 0; i < max_ingq + 1; ++i) entries[i].entry = i; want = s->max_ethqsets + EXTRA_VECS; if (is_offload(adap)) { - want += s->rdmaqs + s->rdmaciqs + s->iscsiqsets + - s->niscsitq; - /* need nchan for each possible ULD */ - if (is_t4(adap->params.chip)) - ofld_need = 3 * nchan; - else - ofld_need = 4 * nchan; + want += adap->num_ofld_uld * s->ofldqsets; + ofld_need = adap->num_ofld_uld * nchan; } if (is_pci_uld(adap)) { - want += netif_get_num_default_rss_queues() * nchan; - uld_need = nchan; + want += adap->num_uld * s->ofldqsets; + uld_need = adap->num_uld * nchan; } #ifdef CONFIG_CHELSIO_T4_DCB /* For Data Center Bridging we need 8 Ethernet TX Priority Queues for @@ -4786,43 +4429,25 @@ static int enable_msix(struct adapter *adap) if (i < s->ethqsets) reduce_ethqs(adap, i); } - if (is_pci_uld(adap)) { + if (is_uld(adap)) { if (allocated < want) s->nqs_per_uld = nchan; else - s->nqs_per_uld = netif_get_num_default_rss_queues() * - nchan; + s->nqs_per_uld = s->ofldqsets; } - if (is_offload(adap)) { - if (allocated < want) { - s->rdmaqs = nchan; - s->rdmaciqs = nchan; - - if (!is_t4(adap->params.chip)) - s->niscsitq = nchan; - } - - /* leftovers go to OFLD */ - i = allocated - EXTRA_VECS - s->max_ethqsets - - s->rdmaqs - s->rdmaciqs - s->niscsitq; - if (is_pci_uld(adap)) - i -= s->nqs_per_uld * adap->num_uld; - s->iscsiqsets = (i / nchan) * nchan; /* round down */ - - } - - for (i = 0; i < (allocated - (s->nqs_per_uld * adap->num_uld)); ++i) + for (i = 0; i < (s->max_ethqsets + EXTRA_VECS); ++i) adap->msix_info[i].vec = entries[i].vector; - if (is_pci_uld(adap)) { - for (j = 0 ; i < allocated; ++i, j++) + if (is_uld(adap)) { + for (j = 0 ; i < allocated; ++i, j++) { adap->msix_info_ulds[j].vec = entries[i].vector; + adap->msix_info_ulds[j].idx = i; + } adap->msix_bmap_ulds.mapsize = j; } dev_info(adap->pdev_dev, "%d MSI-X vectors allocated, " - "nic %d iscsi %d rdma cpl %d rdma ciq %d uld %d\n", - allocated, s->max_ethqsets, s->iscsiqsets, s->rdmaqs, - s->rdmaciqs, s->nqs_per_uld); + "nic %d per uld %d\n", + allocated, s->max_ethqsets, s->nqs_per_uld); kfree(entries); return 0; @@ -5535,10 +5160,14 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* PCIe EEH recovery on powerpc platforms needs fundamental reset */ pdev->needs_freset = 1; - if (is_offload(adapter)) - attach_ulds(adapter); + if (is_uld(adapter)) { + mutex_lock(&uld_mutex); + list_add_tail(&adapter->list_node, &adapter_list); + mutex_unlock(&uld_mutex); + } print_adapter_info(adapter); + setup_fw_sge_queues(adapter); return 0; sriov: @@ -5593,8 +5222,8 @@ sriov: free_some_resources(adapter); if (adapter->flags & USING_MSIX) free_msix_info(adapter); - if (adapter->num_uld) - uld_mem_free(adapter); + if (adapter->num_uld || adapter->num_ofld_uld) + t4_uld_mem_free(adapter); out_unmap_bar: if (!is_t4(adapter->params.chip)) iounmap(adapter->bar2); @@ -5631,7 +5260,7 @@ static void remove_one(struct pci_dev *pdev) */ destroy_workqueue(adapter->workq); - if (is_offload(adapter)) + if (is_uld(adapter)) detach_ulds(adapter); disable_interrupts(adapter); @@ -5658,8 +5287,8 @@ static void remove_one(struct pci_dev *pdev) if (adapter->flags & USING_MSIX) free_msix_info(adapter); - if (adapter->num_uld) - uld_mem_free(adapter); + if (adapter->num_uld || adapter->num_ofld_uld) + t4_uld_mem_free(adapter); free_some_resources(adapter); #if IS_ENABLED(CONFIG_IPV6) t4_cleanup_clip_tbl(adapter); @@ -5690,12 +5319,58 @@ static void remove_one(struct pci_dev *pdev) #endif } +/* "Shutdown" quiesces the device, stopping Ingress Packet and Interrupt + * delivery. This is essentially a stripped down version of the PCI remove() + * function where we do the minimal amount of work necessary to shutdown any + * further activity. + */ +static void shutdown_one(struct pci_dev *pdev) +{ + struct adapter *adapter = pci_get_drvdata(pdev); + + /* As with remove_one() above (see extended comment), we only want do + * do cleanup on PCI Devices which went all the way through init_one() + * ... + */ + if (!adapter) { + pci_release_regions(pdev); + return; + } + + if (adapter->pf == 4) { + int i; + + for_each_port(adapter, i) + if (adapter->port[i]->reg_state == NETREG_REGISTERED) + cxgb_close(adapter->port[i]); + + t4_uld_clean_up(adapter); + disable_interrupts(adapter); + disable_msi(adapter); + + t4_sge_stop(adapter); + if (adapter->flags & FW_OK) + t4_fw_bye(adapter, adapter->mbox); + } +#ifdef CONFIG_PCI_IOV + else { + if (adapter->port[0]) + unregister_netdev(adapter->port[0]); + iounmap(adapter->regs); + kfree(adapter->vfinfo); + kfree(adapter); + pci_disable_sriov(pdev); + pci_release_regions(pdev); + } +#endif +} + static struct pci_driver cxgb4_driver = { .name = KBUILD_MODNAME, .id_table = cxgb4_pci_tbl, .probe = init_one, .remove = remove_one, - .shutdown = remove_one, + .shutdown = shutdown_one, #ifdef CONFIG_PCI_IOV .sriov_configure = cxgb4_iov_configure, #endif diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c index 5d402bace6c1..fc04e3b878e8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c @@ -82,6 +82,24 @@ static void free_msix_idx_in_bmap(struct adapter *adap, unsigned int msix_idx) spin_unlock_irqrestore(&bmap->lock, flags); } +/* Flush the aggregated lro sessions */ +static void uldrx_flush_handler(struct sge_rspq *q) +{ + struct adapter *adap = q->adap; + + if (adap->uld[q->uld].lro_flush) + adap->uld[q->uld].lro_flush(&q->lro_mgr); +} + +/** + * uldrx_handler - response queue handler for ULD queues + * @q: the response queue that received the packet + * @rsp: the response queue descriptor holding the offload message + * @gl: the gather list of packet fragments + * + * Deliver an ingress offload packet to a ULD. All processing is done by + * the ULD, we just maintain statistics. + */ static int uldrx_handler(struct sge_rspq *q, const __be64 *rsp, const struct pkt_gl *gl) { @@ -124,8 +142,8 @@ static int alloc_uld_rxqs(struct adapter *adap, struct sge_ofld_rxq *q = rxq_info->uldrxq + offset; unsigned short *ids = rxq_info->rspq_id + offset; unsigned int per_chan = nq / adap->params.nports; - unsigned int msi_idx, bmap_idx; - int i, err; + unsigned int bmap_idx = 0; + int i, err, msi_idx; if (adap->flags & USING_MSIX) msi_idx = 1; @@ -135,14 +153,14 @@ static int alloc_uld_rxqs(struct adapter *adap, for (i = 0; i < nq; i++, q++) { if (msi_idx >= 0) { bmap_idx = get_msix_idx_from_bmap(adap); - adap->msi_idx++; + msi_idx = adap->msix_info_ulds[bmap_idx].idx; } err = t4_sge_alloc_rxq(adap, &q->rspq, false, adap->port[i / per_chan], - adap->msi_idx, + msi_idx, q->fl.size ? &q->fl : NULL, uldrx_handler, - NULL, + lro ? uldrx_flush_handler : NULL, 0); if (err) goto freeout; @@ -159,7 +177,6 @@ freeout: if (q->rspq.desc) free_rspq_fl(adap, &q->rspq, q->fl.size ? &q->fl : NULL); - adap->msi_idx--; } /* We need to free rxq also in case of ciq allocation failure */ @@ -169,7 +186,6 @@ freeout: if (q->rspq.desc) free_rspq_fl(adap, &q->rspq, q->fl.size ? &q->fl : NULL); - adap->msi_idx--; } } return err; @@ -178,17 +194,38 @@ freeout: int setup_sge_queues_uld(struct adapter *adap, unsigned int uld_type, bool lro) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; + int i, ret = 0; if (adap->flags & USING_MSIX) { - rxq_info->msix_tbl = kzalloc(rxq_info->nrxq + rxq_info->nciq, + rxq_info->msix_tbl = kcalloc((rxq_info->nrxq + rxq_info->nciq), + sizeof(unsigned short), GFP_KERNEL); if (!rxq_info->msix_tbl) return -ENOMEM; } - return !(!alloc_uld_rxqs(adap, rxq_info, rxq_info->nrxq, 0, lro) && + ret = !(!alloc_uld_rxqs(adap, rxq_info, rxq_info->nrxq, 0, lro) && !alloc_uld_rxqs(adap, rxq_info, rxq_info->nciq, rxq_info->nrxq, lro)); + + /* Tell uP to route control queue completions to rdma rspq */ + if (adap->flags & FULL_INIT_DONE && + !ret && uld_type == CXGB4_ULD_RDMA) { + struct sge *s = &adap->sge; + unsigned int cmplqid; + u32 param, cmdop; + + cmdop = FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL; + for_each_port(adap, i) { + cmplqid = rxq_info->uldrxq[i].rspq.cntxt_id; + param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) | + FW_PARAMS_PARAM_X_V(cmdop) | + FW_PARAMS_PARAM_YZ_V(s->ctrlq[i].q.cntxt_id)); + ret = t4_set_params(adap, adap->mbox, adap->pf, + 0, 1, ¶m, &cmplqid); + } + } + return ret; } static void t4_free_uld_rxqs(struct adapter *adap, int n, @@ -198,7 +235,6 @@ static void t4_free_uld_rxqs(struct adapter *adap, int n, if (q->rspq.desc) free_rspq_fl(adap, &q->rspq, q->fl.size ? &q->fl : NULL); - adap->msi_idx--; } } @@ -206,6 +242,21 @@ void free_sge_queues_uld(struct adapter *adap, unsigned int uld_type) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; + if (adap->flags & FULL_INIT_DONE && uld_type == CXGB4_ULD_RDMA) { + struct sge *s = &adap->sge; + u32 param, cmdop, cmplqid = 0; + int i; + + cmdop = FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL; + for_each_port(adap, i) { + param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) | + FW_PARAMS_PARAM_X_V(cmdop) | + FW_PARAMS_PARAM_YZ_V(s->ctrlq[i].q.cntxt_id)); + t4_set_params(adap, adap->mbox, adap->pf, + 0, 1, ¶m, &cmplqid); + } + } + if (rxq_info->nciq) t4_free_uld_rxqs(adap, rxq_info->nciq, rxq_info->uldrxq + rxq_info->nrxq); @@ -215,26 +266,38 @@ void free_sge_queues_uld(struct adapter *adap, unsigned int uld_type) } int cfg_queues_uld(struct adapter *adap, unsigned int uld_type, - const struct cxgb4_pci_uld_info *uld_info) + const struct cxgb4_uld_info *uld_info) { struct sge *s = &adap->sge; struct sge_uld_rxq_info *rxq_info; - int i, nrxq; + int i, nrxq, ciq_size; rxq_info = kzalloc(sizeof(*rxq_info), GFP_KERNEL); if (!rxq_info) return -ENOMEM; - if (uld_info->nrxq > s->nqs_per_uld) - rxq_info->nrxq = s->nqs_per_uld; - else - rxq_info->nrxq = uld_info->nrxq; - if (!uld_info->nciq) + if (adap->flags & USING_MSIX && uld_info->nrxq > s->nqs_per_uld) { + i = s->nqs_per_uld; + rxq_info->nrxq = roundup(i, adap->params.nports); + } else { + i = min_t(int, uld_info->nrxq, + num_online_cpus()); + rxq_info->nrxq = roundup(i, adap->params.nports); + } + if (!uld_info->ciq) { rxq_info->nciq = 0; - else if (uld_info->nciq && uld_info->nciq > s->nqs_per_uld) - rxq_info->nciq = s->nqs_per_uld; - else - rxq_info->nciq = uld_info->nciq; + } else { + if (adap->flags & USING_MSIX) + rxq_info->nciq = min_t(int, s->nqs_per_uld, + num_online_cpus()); + else + rxq_info->nciq = min_t(int, MAX_OFLD_QSETS, + num_online_cpus()); + rxq_info->nciq = ((rxq_info->nciq / adap->params.nports) * + adap->params.nports); + rxq_info->nciq = max_t(int, rxq_info->nciq, + adap->params.nports); + } nrxq = rxq_info->nrxq + rxq_info->nciq; /* total rxq's */ rxq_info->uldrxq = kcalloc(nrxq, sizeof(struct sge_ofld_rxq), @@ -259,12 +322,17 @@ int cfg_queues_uld(struct adapter *adap, unsigned int uld_type, r->fl.size = 72; } + ciq_size = 64 + adap->vres.cq.size + adap->tids.nftids; + if (ciq_size > SGE_MAX_IQ_SIZE) { + dev_warn(adap->pdev_dev, "CIQ size too small for available IQs\n"); + ciq_size = SGE_MAX_IQ_SIZE; + } + for (i = rxq_info->nrxq; i < nrxq; i++) { struct sge_ofld_rxq *r = &rxq_info->uldrxq[i]; - init_rspq(adap, &r->rspq, 5, 1, uld_info->ciq_size, 64); + init_rspq(adap, &r->rspq, 5, 1, ciq_size, 64); r->rspq.uld = uld_type; - r->fl.size = 72; } memcpy(rxq_info->name, uld_info->name, IFNAMSIZ); @@ -285,7 +353,8 @@ void free_queues_uld(struct adapter *adap, unsigned int uld_type) int request_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; - int idx, bmap_idx, err = 0; + int err = 0; + unsigned int idx, bmap_idx; for_each_uldrxq(rxq_info, idx) { bmap_idx = rxq_info->msix_tbl[idx]; @@ -310,10 +379,10 @@ unwind: void free_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; - int idx; + unsigned int idx, bmap_idx; for_each_uldrxq(rxq_info, idx) { - unsigned int bmap_idx = rxq_info->msix_tbl[idx]; + bmap_idx = rxq_info->msix_tbl[idx]; free_msix_idx_in_bmap(adap, bmap_idx); free_irq(adap->msix_info_ulds[bmap_idx].vec, @@ -325,10 +394,10 @@ void name_msix_vecs_uld(struct adapter *adap, unsigned int uld_type) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; int n = sizeof(adap->msix_info_ulds[0].desc); - int idx; + unsigned int idx, bmap_idx; for_each_uldrxq(rxq_info, idx) { - unsigned int bmap_idx = rxq_info->msix_tbl[idx]; + bmap_idx = rxq_info->msix_tbl[idx]; snprintf(adap->msix_info_ulds[bmap_idx].desc, n, "%s-%s%d", adap->port[0]->name, rxq_info->name, idx); @@ -390,15 +459,15 @@ static void uld_queue_init(struct adapter *adap, unsigned int uld_type, lli->nciq = rxq_info->nciq; } -int uld_mem_alloc(struct adapter *adap) +int t4_uld_mem_alloc(struct adapter *adap) { struct sge *s = &adap->sge; - adap->uld = kcalloc(adap->num_uld, sizeof(*adap->uld), GFP_KERNEL); + adap->uld = kcalloc(CXGB4_ULD_MAX, sizeof(*adap->uld), GFP_KERNEL); if (!adap->uld) return -ENOMEM; - s->uld_rxq_info = kzalloc(adap->num_uld * + s->uld_rxq_info = kzalloc(CXGB4_ULD_MAX * sizeof(struct sge_uld_rxq_info *), GFP_KERNEL); if (!s->uld_rxq_info) @@ -410,7 +479,7 @@ err_uld: return -ENOMEM; } -void uld_mem_free(struct adapter *adap) +void t4_uld_mem_free(struct adapter *adap) { struct sge *s = &adap->sge; @@ -418,6 +487,26 @@ void uld_mem_free(struct adapter *adap) kfree(adap->uld); } +void t4_uld_clean_up(struct adapter *adap) +{ + struct sge_uld_rxq_info *rxq_info; + unsigned int i; + + if (!adap->uld) + return; + for (i = 0; i < CXGB4_ULD_MAX; i++) { + if (!adap->uld[i].handle) + continue; + rxq_info = adap->sge.uld_rxq_info[i]; + if (adap->flags & FULL_INIT_DONE) + quiesce_rx_uld(adap, i); + if (adap->flags & USING_MSIX) + free_msix_queue_irqs_uld(adap, i); + free_sge_queues_uld(adap, i); + free_queues_uld(adap, i); + } +} + static void uld_init(struct adapter *adap, struct cxgb4_lld_info *lld) { int i; @@ -429,10 +518,15 @@ static void uld_init(struct adapter *adap, struct cxgb4_lld_info *lld) lld->ports = adap->port; lld->vr = &adap->vres; lld->mtus = adap->params.mtus; - lld->ntxq = adap->sge.iscsiqsets; + lld->ntxq = adap->sge.ofldqsets; lld->nchan = adap->params.nports; lld->nports = adap->params.nports; lld->wr_cred = adap->params.ofldq_wr_cred; + lld->iscsi_iolen = MAXRXDATA_G(t4_read_reg(adap, TP_PARA_REG2_A)); + lld->iscsi_tagmask = t4_read_reg(adap, ULP_RX_ISCSI_TAGMASK_A); + lld->iscsi_pgsz_order = t4_read_reg(adap, ULP_RX_ISCSI_PSZ_A); + lld->iscsi_llimit = t4_read_reg(adap, ULP_RX_ISCSI_LLIMIT_A); + lld->iscsi_ppm = &adap->iscsi_ppm; lld->adapter_type = adap->params.chip; lld->cclk_ps = 1000000000 / adap->params.vpd.cclk; lld->udb_density = 1 << adap->params.sge.eq_qpp; @@ -472,23 +566,37 @@ static void uld_attach(struct adapter *adap, unsigned int uld) } adap->uld[uld].handle = handle; + t4_register_netevent_notifier(); if (adap->flags & FULL_INIT_DONE) adap->uld[uld].state_change(handle, CXGB4_STATE_UP); } -int cxgb4_register_pci_uld(enum cxgb4_pci_uld type, - struct cxgb4_pci_uld_info *p) +/** + * cxgb4_register_uld - register an upper-layer driver + * @type: the ULD type + * @p: the ULD methods + * + * Registers an upper-layer driver with this driver and notifies the ULD + * about any presently available devices that support its type. Returns + * %-EBUSY if a ULD of the same type is already registered. + */ +int cxgb4_register_uld(enum cxgb4_uld type, + const struct cxgb4_uld_info *p) { int ret = 0; + unsigned int adap_idx = 0; struct adapter *adap; - if (type >= CXGB4_PCI_ULD_MAX) + if (type >= CXGB4_ULD_MAX) return -EINVAL; mutex_lock(&uld_mutex); list_for_each_entry(adap, &adapter_list, list_node) { - if (!is_pci_uld(adap)) + if ((type == CXGB4_ULD_CRYPTO && !is_pci_uld(adap)) || + (type != CXGB4_ULD_CRYPTO && !is_offload(adap))) + continue; + if (type == CXGB4_ULD_ISCSIT && is_t4(adap->params.chip)) continue; ret = cfg_queues_uld(adap, type, p); if (ret) @@ -510,11 +618,14 @@ int cxgb4_register_pci_uld(enum cxgb4_pci_uld type, } adap->uld[type] = *p; uld_attach(adap, type); + adap_idx++; } mutex_unlock(&uld_mutex); return 0; free_irq: + if (adap->flags & FULL_INIT_DONE) + quiesce_rx_uld(adap, type); if (adap->flags & USING_MSIX) free_msix_queue_irqs_uld(adap, type); free_rxq: @@ -522,21 +633,49 @@ free_rxq: free_queues: free_queues_uld(adap, type); out: + + list_for_each_entry(adap, &adapter_list, list_node) { + if ((type == CXGB4_ULD_CRYPTO && !is_pci_uld(adap)) || + (type != CXGB4_ULD_CRYPTO && !is_offload(adap))) + continue; + if (type == CXGB4_ULD_ISCSIT && is_t4(adap->params.chip)) + continue; + if (!adap_idx) + break; + adap->uld[type].handle = NULL; + adap->uld[type].add = NULL; + if (adap->flags & FULL_INIT_DONE) + quiesce_rx_uld(adap, type); + if (adap->flags & USING_MSIX) + free_msix_queue_irqs_uld(adap, type); + free_sge_queues_uld(adap, type); + free_queues_uld(adap, type); + adap_idx--; + } mutex_unlock(&uld_mutex); return ret; } -EXPORT_SYMBOL(cxgb4_register_pci_uld); +EXPORT_SYMBOL(cxgb4_register_uld); -int cxgb4_unregister_pci_uld(enum cxgb4_pci_uld type) +/** + * cxgb4_unregister_uld - unregister an upper-layer driver + * @type: the ULD type + * + * Unregisters an existing upper-layer driver. + */ +int cxgb4_unregister_uld(enum cxgb4_uld type) { struct adapter *adap; - if (type >= CXGB4_PCI_ULD_MAX) + if (type >= CXGB4_ULD_MAX) return -EINVAL; mutex_lock(&uld_mutex); list_for_each_entry(adap, &adapter_list, list_node) { - if (!is_pci_uld(adap)) + if ((type == CXGB4_ULD_CRYPTO && !is_pci_uld(adap)) || + (type != CXGB4_ULD_CRYPTO && !is_offload(adap))) + continue; + if (type == CXGB4_ULD_ISCSIT && is_t4(adap->params.chip)) continue; adap->uld[type].handle = NULL; adap->uld[type].add = NULL; @@ -551,4 +690,4 @@ int cxgb4_unregister_pci_uld(enum cxgb4_pci_uld type) return 0; } -EXPORT_SYMBOL(cxgb4_unregister_pci_uld); +EXPORT_SYMBOL(cxgb4_unregister_uld); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index ab4037222f8d..b3544f6b88ca 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -42,6 +42,8 @@ #include #include "cxgb4.h" +#define MAX_ULD_QSETS 16 + /* CPL message priority levels */ enum { CPL_PRIORITY_DATA = 0, /* data messages */ @@ -189,9 +191,11 @@ static inline void set_wr_txq(struct sk_buff *skb, int prio, int queue) } enum cxgb4_uld { + CXGB4_ULD_INIT, CXGB4_ULD_RDMA, CXGB4_ULD_ISCSI, CXGB4_ULD_ISCSIT, + CXGB4_ULD_CRYPTO, CXGB4_ULD_MAX }; @@ -284,31 +288,11 @@ struct cxgb4_lld_info { struct cxgb4_uld_info { const char *name; - void *(*add)(const struct cxgb4_lld_info *p); - int (*rx_handler)(void *handle, const __be64 *rsp, - const struct pkt_gl *gl); - int (*state_change)(void *handle, enum cxgb4_state new_state); - int (*control)(void *handle, enum cxgb4_control control, ...); - int (*lro_rx_handler)(void *handle, const __be64 *rsp, - const struct pkt_gl *gl, - struct t4_lro_mgr *lro_mgr, - struct napi_struct *napi); - void (*lro_flush)(struct t4_lro_mgr *); -}; - -enum cxgb4_pci_uld { - CXGB4_PCI_ULD1, - CXGB4_PCI_ULD_MAX -}; - -struct cxgb4_pci_uld_info { - const char *name; - bool lro; void *handle; unsigned int nrxq; - unsigned int nciq; unsigned int rxq_size; - unsigned int ciq_size; + bool ciq; + bool lro; void *(*add)(const struct cxgb4_lld_info *p); int (*rx_handler)(void *handle, const __be64 *rsp, const struct pkt_gl *gl); @@ -323,9 +307,6 @@ struct cxgb4_pci_uld_info { int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p); int cxgb4_unregister_uld(enum cxgb4_uld type); -int cxgb4_register_pci_uld(enum cxgb4_pci_uld type, - struct cxgb4_pci_uld_info *p); -int cxgb4_unregister_pci_uld(enum cxgb4_pci_uld type); int cxgb4_ofld_send(struct net_device *dev, struct sk_buff *skb); unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo); unsigned int cxgb4_port_chan(const struct net_device *dev); diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 9a607dbc6ca8..1e74fd6085df 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2860,6 +2860,18 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq, return 0; } +int t4_sge_mod_ctrl_txq(struct adapter *adap, unsigned int eqid, + unsigned int cmplqid) +{ + u32 param, val; + + param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) | + FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL) | + FW_PARAMS_PARAM_YZ_V(eqid)); + val = cmplqid; + return t4_set_params(adap, adap->mbox, adap->pf, 0, 1, ¶m, &val); +} + int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq, struct net_device *dev, unsigned int iqid) { @@ -3014,12 +3026,6 @@ void t4_free_sge_resources(struct adapter *adap) } } - /* clean up RDMA and iSCSI Rx queues */ - t4_free_ofld_rxqs(adap, adap->sge.iscsiqsets, adap->sge.iscsirxq); - t4_free_ofld_rxqs(adap, adap->sge.niscsitq, adap->sge.iscsitrxq); - t4_free_ofld_rxqs(adap, adap->sge.rdmaqs, adap->sge.rdmarxq); - t4_free_ofld_rxqs(adap, adap->sge.rdmaciqs, adap->sge.rdmaciq); - /* clean up offload Tx queues */ for (i = 0; i < ARRAY_SIZE(adap->sge.ofldtxq); i++) { struct sge_ofld_txq *q = &adap->sge.ofldtxq[i]; diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c index e4ba2d2616cd..7c0d7af0d3b7 100644 --- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c +++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c @@ -84,6 +84,9 @@ static inline int send_tx_flowc_wr(struct cxgbi_sock *); static const struct cxgb4_uld_info cxgb4i_uld_info = { .name = DRV_MODULE_NAME, + .nrxq = MAX_ULD_QSETS, + .rxq_size = 1024, + .lro = false, .add = t4_uld_add, .rx_handler = t4_uld_rx_handler, .state_change = t4_uld_state_change, diff --git a/drivers/target/iscsi/cxgbit/cxgbit_main.c b/drivers/target/iscsi/cxgbit/cxgbit_main.c index 27dd11aff934..ad26b9372f10 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_main.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_main.c @@ -652,6 +652,9 @@ static struct iscsit_transport cxgbit_transport = { static struct cxgb4_uld_info cxgbit_uld_info = { .name = DRV_NAME, + .nrxq = MAX_ULD_QSETS, + .rxq_size = 1024, + .lro = true, .add = cxgbit_uld_add, .state_change = cxgbit_uld_state_change, .lro_rx_handler = cxgbit_uld_lro_rx_handler, From ee40681037c0e5fa0058447d7603a4fb77308bce Mon Sep 17 00:00:00 2001 From: Nelson Chang Date: Sat, 17 Sep 2016 23:50:55 +0800 Subject: [PATCH 0345/1050] net: ethernet: mediatek: add HW LRO functions of PDMA RX rings The codes add the large receive offload (LRO) functions by hardware as below: 1) PDMA has total four RX rings that one is the normal ring, and others can be configured as LRO rings. 2) Only TCP/IP RX flows can be offloaded. The hardware can set four IP addresses at most, if the destination IP of the RX flow matches one of them, it has the chance to be offloaded. 3) There three RX flows can be offloaded at most, and one flow is mapped to one RX ring. 4) If there are more than three candidate RX flows, the hardware can choose three of them by throughput comparison results. Signed-off-by: Nelson Chang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 215 ++++++++++++++++++-- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 75 ++++++- 2 files changed, 265 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 522fe8dda6c3..18600cbbb907 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -820,11 +820,51 @@ drop: return NETDEV_TX_OK; } +static struct mtk_rx_ring *mtk_get_rx_ring(struct mtk_eth *eth) +{ + int i; + struct mtk_rx_ring *ring; + int idx; + + if (!eth->hwlro) + return ð->rx_ring[0]; + + for (i = 0; i < MTK_MAX_RX_RING_NUM; i++) { + ring = ð->rx_ring[i]; + idx = NEXT_RX_DESP_IDX(ring->calc_idx, ring->dma_size); + if (ring->dma[idx].rxd2 & RX_DMA_DONE) { + ring->calc_idx_update = true; + return ring; + } + } + + return NULL; +} + +static void mtk_update_rx_cpu_idx(struct mtk_eth *eth) +{ + struct mtk_rx_ring *ring; + int i; + + if (!eth->hwlro) { + ring = ð->rx_ring[0]; + mtk_w32(eth, ring->calc_idx, ring->crx_idx_reg); + } else { + for (i = 0; i < MTK_MAX_RX_RING_NUM; i++) { + ring = ð->rx_ring[i]; + if (ring->calc_idx_update) { + ring->calc_idx_update = false; + mtk_w32(eth, ring->calc_idx, ring->crx_idx_reg); + } + } + } +} + static int mtk_poll_rx(struct napi_struct *napi, int budget, struct mtk_eth *eth) { - struct mtk_rx_ring *ring = ð->rx_ring; - int idx = ring->calc_idx; + struct mtk_rx_ring *ring; + int idx; struct sk_buff *skb; u8 *data, *new_data; struct mtk_rx_dma *rxd, trxd; @@ -836,7 +876,11 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, dma_addr_t dma_addr; int mac = 0; - idx = NEXT_RX_DESP_IDX(idx); + ring = mtk_get_rx_ring(eth); + if (unlikely(!ring)) + goto rx_done; + + idx = NEXT_RX_DESP_IDX(ring->calc_idx, ring->dma_size); rxd = &ring->dma[idx]; data = ring->data[idx]; @@ -907,12 +951,13 @@ release_desc: done++; } +rx_done: if (done) { /* make sure that all changes to the dma ring are flushed before * we continue */ wmb(); - mtk_w32(eth, ring->calc_idx, MTK_PRX_CRX_IDX0); + mtk_update_rx_cpu_idx(eth); } return done; @@ -1135,32 +1180,41 @@ static void mtk_tx_clean(struct mtk_eth *eth) } } -static int mtk_rx_alloc(struct mtk_eth *eth) +static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag) { - struct mtk_rx_ring *ring = ð->rx_ring; + struct mtk_rx_ring *ring = ð->rx_ring[ring_no]; + int rx_data_len, rx_dma_size; int i; - ring->frag_size = mtk_max_frag_size(ETH_DATA_LEN); + if (rx_flag == MTK_RX_FLAGS_HWLRO) { + rx_data_len = MTK_MAX_LRO_RX_LENGTH; + rx_dma_size = MTK_HW_LRO_DMA_SIZE; + } else { + rx_data_len = ETH_DATA_LEN; + rx_dma_size = MTK_DMA_SIZE; + } + + ring->frag_size = mtk_max_frag_size(rx_data_len); ring->buf_size = mtk_max_buf_size(ring->frag_size); - ring->data = kcalloc(MTK_DMA_SIZE, sizeof(*ring->data), + ring->data = kcalloc(rx_dma_size, sizeof(*ring->data), GFP_KERNEL); if (!ring->data) return -ENOMEM; - for (i = 0; i < MTK_DMA_SIZE; i++) { + for (i = 0; i < rx_dma_size; i++) { ring->data[i] = netdev_alloc_frag(ring->frag_size); if (!ring->data[i]) return -ENOMEM; } ring->dma = dma_alloc_coherent(eth->dev, - MTK_DMA_SIZE * sizeof(*ring->dma), + rx_dma_size * sizeof(*ring->dma), &ring->phys, GFP_ATOMIC | __GFP_ZERO); if (!ring->dma) return -ENOMEM; - for (i = 0; i < MTK_DMA_SIZE; i++) { + for (i = 0; i < rx_dma_size; i++) { dma_addr_t dma_addr = dma_map_single(eth->dev, ring->data[i] + NET_SKB_PAD, ring->buf_size, @@ -1171,27 +1225,30 @@ static int mtk_rx_alloc(struct mtk_eth *eth) ring->dma[i].rxd2 = RX_DMA_PLEN0(ring->buf_size); } - ring->calc_idx = MTK_DMA_SIZE - 1; + ring->dma_size = rx_dma_size; + ring->calc_idx_update = false; + ring->calc_idx = rx_dma_size - 1; + ring->crx_idx_reg = MTK_PRX_CRX_IDX_CFG(ring_no); /* make sure that all changes to the dma ring are flushed before we * continue */ wmb(); - mtk_w32(eth, eth->rx_ring.phys, MTK_PRX_BASE_PTR0); - mtk_w32(eth, MTK_DMA_SIZE, MTK_PRX_MAX_CNT0); - mtk_w32(eth, eth->rx_ring.calc_idx, MTK_PRX_CRX_IDX0); - mtk_w32(eth, MTK_PST_DRX_IDX0, MTK_PDMA_RST_IDX); + mtk_w32(eth, ring->phys, MTK_PRX_BASE_PTR_CFG(ring_no)); + mtk_w32(eth, rx_dma_size, MTK_PRX_MAX_CNT_CFG(ring_no)); + mtk_w32(eth, ring->calc_idx, ring->crx_idx_reg); + mtk_w32(eth, MTK_PST_DRX_IDX_CFG(ring_no), MTK_PDMA_RST_IDX); return 0; } -static void mtk_rx_clean(struct mtk_eth *eth) +static void mtk_rx_clean(struct mtk_eth *eth, int ring_no) { - struct mtk_rx_ring *ring = ð->rx_ring; + struct mtk_rx_ring *ring = ð->rx_ring[ring_no]; int i; if (ring->data && ring->dma) { - for (i = 0; i < MTK_DMA_SIZE; i++) { + for (i = 0; i < ring->dma_size; i++) { if (!ring->data[i]) continue; if (!ring->dma[i].rxd1) @@ -1208,13 +1265,98 @@ static void mtk_rx_clean(struct mtk_eth *eth) if (ring->dma) { dma_free_coherent(eth->dev, - MTK_DMA_SIZE * sizeof(*ring->dma), + ring->dma_size * sizeof(*ring->dma), ring->dma, ring->phys); ring->dma = NULL; } } +static int mtk_hwlro_rx_init(struct mtk_eth *eth) +{ + int i; + u32 ring_ctrl_dw1 = 0, ring_ctrl_dw2 = 0, ring_ctrl_dw3 = 0; + u32 lro_ctrl_dw0 = 0, lro_ctrl_dw3 = 0; + + /* set LRO rings to auto-learn modes */ + ring_ctrl_dw2 |= MTK_RING_AUTO_LERAN_MODE; + + /* validate LRO ring */ + ring_ctrl_dw2 |= MTK_RING_VLD; + + /* set AGE timer (unit: 20us) */ + ring_ctrl_dw2 |= MTK_RING_AGE_TIME_H; + ring_ctrl_dw1 |= MTK_RING_AGE_TIME_L; + + /* set max AGG timer (unit: 20us) */ + ring_ctrl_dw2 |= MTK_RING_MAX_AGG_TIME; + + /* set max LRO AGG count */ + ring_ctrl_dw2 |= MTK_RING_MAX_AGG_CNT_L; + ring_ctrl_dw3 |= MTK_RING_MAX_AGG_CNT_H; + + for (i = 1; i < MTK_MAX_RX_RING_NUM; i++) { + mtk_w32(eth, ring_ctrl_dw1, MTK_LRO_CTRL_DW1_CFG(i)); + mtk_w32(eth, ring_ctrl_dw2, MTK_LRO_CTRL_DW2_CFG(i)); + mtk_w32(eth, ring_ctrl_dw3, MTK_LRO_CTRL_DW3_CFG(i)); + } + + /* IPv4 checksum update enable */ + lro_ctrl_dw0 |= MTK_L3_CKS_UPD_EN; + + /* switch priority comparison to packet count mode */ + lro_ctrl_dw0 |= MTK_LRO_ALT_PKT_CNT_MODE; + + /* bandwidth threshold setting */ + mtk_w32(eth, MTK_HW_LRO_BW_THRE, MTK_PDMA_LRO_CTRL_DW2); + + /* auto-learn score delta setting */ + mtk_w32(eth, MTK_HW_LRO_REPLACE_DELTA, MTK_PDMA_LRO_ALT_SCORE_DELTA); + + /* set refresh timer for altering flows to 1 sec. (unit: 20us) */ + mtk_w32(eth, (MTK_HW_LRO_TIMER_UNIT << 16) | MTK_HW_LRO_REFRESH_TIME, + MTK_PDMA_LRO_ALT_REFRESH_TIMER); + + /* set HW LRO mode & the max aggregation count for rx packets */ + lro_ctrl_dw3 |= MTK_ADMA_MODE | (MTK_HW_LRO_MAX_AGG_CNT & 0xff); + + /* the minimal remaining room of SDL0 in RXD for lro aggregation */ + lro_ctrl_dw3 |= MTK_LRO_MIN_RXD_SDL; + + /* enable HW LRO */ + lro_ctrl_dw0 |= MTK_LRO_EN; + + mtk_w32(eth, lro_ctrl_dw3, MTK_PDMA_LRO_CTRL_DW3); + mtk_w32(eth, lro_ctrl_dw0, MTK_PDMA_LRO_CTRL_DW0); + + return 0; +} + +static void mtk_hwlro_rx_uninit(struct mtk_eth *eth) +{ + int i; + u32 val; + + /* relinquish lro rings, flush aggregated packets */ + mtk_w32(eth, MTK_LRO_RING_RELINQUISH_REQ, MTK_PDMA_LRO_CTRL_DW0); + + /* wait for relinquishments done */ + for (i = 0; i < 10; i++) { + val = mtk_r32(eth, MTK_PDMA_LRO_CTRL_DW0); + if (val & MTK_LRO_RING_RELINQUISH_DONE) { + msleep(20); + continue; + } + } + + /* invalidate lro rings */ + for (i = 1; i < MTK_MAX_RX_RING_NUM; i++) + mtk_w32(eth, 0, MTK_LRO_CTRL_DW2_CFG(i)); + + /* disable HW LRO */ + mtk_w32(eth, 0, MTK_PDMA_LRO_CTRL_DW0); +} + /* wait for DMA to finish whatever it is doing before we start using it again */ static int mtk_dma_busy_wait(struct mtk_eth *eth) { @@ -1235,6 +1377,7 @@ static int mtk_dma_busy_wait(struct mtk_eth *eth) static int mtk_dma_init(struct mtk_eth *eth) { int err; + u32 i; if (mtk_dma_busy_wait(eth)) return -EBUSY; @@ -1250,10 +1393,21 @@ static int mtk_dma_init(struct mtk_eth *eth) if (err) return err; - err = mtk_rx_alloc(eth); + err = mtk_rx_alloc(eth, 0, MTK_RX_FLAGS_NORMAL); if (err) return err; + if (eth->hwlro) { + for (i = 1; i < MTK_MAX_RX_RING_NUM; i++) { + err = mtk_rx_alloc(eth, i, MTK_RX_FLAGS_HWLRO); + if (err) + return err; + } + err = mtk_hwlro_rx_init(eth); + if (err) + return err; + } + /* Enable random early drop and set drop threshold automatically */ mtk_w32(eth, FC_THRES_DROP_MODE | FC_THRES_DROP_EN | FC_THRES_MIN, MTK_QDMA_FC_THRES); @@ -1278,7 +1432,14 @@ static void mtk_dma_free(struct mtk_eth *eth) eth->phy_scratch_ring = 0; } mtk_tx_clean(eth); - mtk_rx_clean(eth); + mtk_rx_clean(eth, 0); + + if (eth->hwlro) { + mtk_hwlro_rx_uninit(eth); + for (i = 1; i < MTK_MAX_RX_RING_NUM; i++) + mtk_rx_clean(eth, i); + } + kfree(eth->scratch_head); } @@ -1873,6 +2034,9 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) mac->hw = eth; mac->of_node = np; + memset(mac->hwlro_ip, 0, sizeof(mac->hwlro_ip)); + mac->hwlro_ip_cnt = 0; + mac->hw_stats = devm_kzalloc(eth->dev, sizeof(*mac->hw_stats), GFP_KERNEL); @@ -1889,6 +2053,11 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) eth->netdev[id]->watchdog_timeo = 5 * HZ; eth->netdev[id]->netdev_ops = &mtk_netdev_ops; eth->netdev[id]->base_addr = (unsigned long)eth->base; + + eth->netdev[id]->hw_features = MTK_HW_FEATURES; + if (eth->hwlro) + eth->netdev[id]->hw_features |= NETIF_F_LRO; + eth->netdev[id]->vlan_features = MTK_HW_FEATURES & ~(NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX); eth->netdev[id]->features |= MTK_HW_FEATURES; @@ -1941,6 +2110,8 @@ static int mtk_probe(struct platform_device *pdev) return PTR_ERR(eth->pctl); } + eth->hwlro = of_property_read_bool(pdev->dev.of_node, "mediatek,hwlro"); + for (i = 0; i < 3; i++) { eth->irq[i] = platform_get_irq(pdev, i); if (eth->irq[i] < 0) { diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 79954b419b53..7c5e534d2120 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -39,7 +39,21 @@ NETIF_F_SG | NETIF_F_TSO | \ NETIF_F_TSO6 | \ NETIF_F_IPV6_CSUM) -#define NEXT_RX_DESP_IDX(X) (((X) + 1) & (MTK_DMA_SIZE - 1)) +#define NEXT_RX_DESP_IDX(X, Y) (((X) + 1) & ((Y) - 1)) + +#define MTK_MAX_RX_RING_NUM 4 +#define MTK_HW_LRO_DMA_SIZE 8 + +#define MTK_MAX_LRO_RX_LENGTH (4096 * 3) +#define MTK_MAX_LRO_IP_CNT 2 +#define MTK_HW_LRO_TIMER_UNIT 1 /* 20 us */ +#define MTK_HW_LRO_REFRESH_TIME 50000 /* 1 sec. */ +#define MTK_HW_LRO_AGG_TIME 10 /* 200us */ +#define MTK_HW_LRO_AGE_TIME 50 /* 1ms */ +#define MTK_HW_LRO_MAX_AGG_CNT 64 +#define MTK_HW_LRO_BW_THRE 3000 +#define MTK_HW_LRO_REPLACE_DELTA 1000 +#define MTK_HW_LRO_SDL_REMAIN_ROOM 1522 /* Frame Engine Global Reset Register */ #define MTK_RST_GL 0x04 @@ -50,6 +64,9 @@ #define MTK_GDM1_AF BIT(28) #define MTK_GDM2_AF BIT(29) +/* PDMA HW LRO Alter Flow Timer Register */ +#define MTK_PDMA_LRO_ALT_REFRESH_TIMER 0x1c + /* Frame Engine Interrupt Grouping Register */ #define MTK_FE_INT_GRP 0x20 @@ -70,12 +87,29 @@ /* PDMA RX Base Pointer Register */ #define MTK_PRX_BASE_PTR0 0x900 +#define MTK_PRX_BASE_PTR_CFG(x) (MTK_PRX_BASE_PTR0 + (x * 0x10)) /* PDMA RX Maximum Count Register */ #define MTK_PRX_MAX_CNT0 0x904 +#define MTK_PRX_MAX_CNT_CFG(x) (MTK_PRX_MAX_CNT0 + (x * 0x10)) /* PDMA RX CPU Pointer Register */ #define MTK_PRX_CRX_IDX0 0x908 +#define MTK_PRX_CRX_IDX_CFG(x) (MTK_PRX_CRX_IDX0 + (x * 0x10)) + +/* PDMA HW LRO Control Registers */ +#define MTK_PDMA_LRO_CTRL_DW0 0x980 +#define MTK_LRO_EN BIT(0) +#define MTK_L3_CKS_UPD_EN BIT(7) +#define MTK_LRO_ALT_PKT_CNT_MODE BIT(21) +#define MTK_LRO_RING_RELINQUISH_REQ (0x3 << 26) +#define MTK_LRO_RING_RELINQUISH_DONE (0x3 << 29) + +#define MTK_PDMA_LRO_CTRL_DW1 0x984 +#define MTK_PDMA_LRO_CTRL_DW2 0x988 +#define MTK_PDMA_LRO_CTRL_DW3 0x98c +#define MTK_ADMA_MODE BIT(15) +#define MTK_LRO_MIN_RXD_SDL (MTK_HW_LRO_SDL_REMAIN_ROOM << 16) /* PDMA Global Configuration Register */ #define MTK_PDMA_GLO_CFG 0xa04 @@ -84,6 +118,7 @@ /* PDMA Reset Index Register */ #define MTK_PDMA_RST_IDX 0xa08 #define MTK_PST_DRX_IDX0 BIT(16) +#define MTK_PST_DRX_IDX_CFG(x) (MTK_PST_DRX_IDX0 << (x)) /* PDMA Delay Interrupt Register */ #define MTK_PDMA_DELAY_INT 0xa0c @@ -94,10 +129,33 @@ /* PDMA Interrupt Mask Register */ #define MTK_PDMA_INT_MASK 0xa28 +/* PDMA HW LRO Alter Flow Delta Register */ +#define MTK_PDMA_LRO_ALT_SCORE_DELTA 0xa4c + /* PDMA Interrupt grouping registers */ #define MTK_PDMA_INT_GRP1 0xa50 #define MTK_PDMA_INT_GRP2 0xa54 +/* PDMA HW LRO IP Setting Registers */ +#define MTK_LRO_RX_RING0_DIP_DW0 0xb04 +#define MTK_LRO_DIP_DW0_CFG(x) (MTK_LRO_RX_RING0_DIP_DW0 + (x * 0x40)) +#define MTK_RING_MYIP_VLD BIT(9) + +/* PDMA HW LRO Ring Control Registers */ +#define MTK_LRO_RX_RING0_CTRL_DW1 0xb28 +#define MTK_LRO_RX_RING0_CTRL_DW2 0xb2c +#define MTK_LRO_RX_RING0_CTRL_DW3 0xb30 +#define MTK_LRO_CTRL_DW1_CFG(x) (MTK_LRO_RX_RING0_CTRL_DW1 + (x * 0x40)) +#define MTK_LRO_CTRL_DW2_CFG(x) (MTK_LRO_RX_RING0_CTRL_DW2 + (x * 0x40)) +#define MTK_LRO_CTRL_DW3_CFG(x) (MTK_LRO_RX_RING0_CTRL_DW3 + (x * 0x40)) +#define MTK_RING_AGE_TIME_L ((MTK_HW_LRO_AGE_TIME & 0x3ff) << 22) +#define MTK_RING_AGE_TIME_H ((MTK_HW_LRO_AGE_TIME >> 10) & 0x3f) +#define MTK_RING_AUTO_LERAN_MODE (3 << 6) +#define MTK_RING_VLD BIT(8) +#define MTK_RING_MAX_AGG_TIME ((MTK_HW_LRO_AGG_TIME & 0xffff) << 10) +#define MTK_RING_MAX_AGG_CNT_L ((MTK_HW_LRO_MAX_AGG_CNT & 0x3f) << 26) +#define MTK_RING_MAX_AGG_CNT_H ((MTK_HW_LRO_MAX_AGG_CNT >> 6) & 0x3) + /* QDMA TX Queue Configuration Registers */ #define MTK_QTX_CFG(x) (0x1800 + (x * 0x10)) #define QDMA_RES_THRES 4 @@ -132,7 +190,6 @@ /* QDMA Reset Index Register */ #define MTK_QDMA_RST_IDX 0x1A08 -#define MTK_PST_DRX_IDX0 BIT(16) /* QDMA Delay Interrupt Register */ #define MTK_QDMA_DELAY_INT 0x1A0C @@ -377,6 +434,12 @@ struct mtk_tx_ring { atomic_t free_count; }; +/* PDMA rx ring mode */ +enum mtk_rx_flags { + MTK_RX_FLAGS_NORMAL = 0, + MTK_RX_FLAGS_HWLRO, +}; + /* struct mtk_rx_ring - This struct holds info describing a RX ring * @dma: The descriptor ring * @data: The memory pointed at by the ring @@ -391,7 +454,10 @@ struct mtk_rx_ring { dma_addr_t phys; u16 frag_size; u16 buf_size; + u16 dma_size; + bool calc_idx_update; u16 calc_idx; + u32 crx_idx_reg; }; /* currently no SoC has more than 2 macs */ @@ -439,9 +505,10 @@ struct mtk_eth { unsigned long sysclk; struct regmap *ethsys; struct regmap *pctl; + bool hwlro; atomic_t dma_refcnt; struct mtk_tx_ring tx_ring; - struct mtk_rx_ring rx_ring; + struct mtk_rx_ring rx_ring[MTK_MAX_RX_RING_NUM]; struct napi_struct tx_napi; struct napi_struct rx_napi; struct mtk_tx_dma *scratch_ring; @@ -470,6 +537,8 @@ struct mtk_mac { struct mtk_eth *hw; struct mtk_hw_stats *hw_stats; struct phy_device *phy_dev; + __be32 hwlro_ip[MTK_MAX_LRO_IP_CNT]; + int hwlro_ip_cnt; }; /* the struct describing the SoC. these are declared in the soc_xyz.c files */ From 7aab747e5563ecbc9f3cb64ddea13fe7b9fee2bd Mon Sep 17 00:00:00 2001 From: Nelson Chang Date: Sat, 17 Sep 2016 23:50:56 +0800 Subject: [PATCH 0346/1050] net: ethernet: mediatek: add ethtool functions to configure RX flows of HW LRO The codes add ethtool functions to set RX flows for HW LRO. Because the HW LRO hardware can only recognize the destination IP of TCP/IP RX flows, the ethtool command to add HW LRO flow is as below: ethtool -N [devname] flow-type tcp4 dst-ip [ip_addr] loc [0~1] Otherwise, cause the hardware can set total four destination IPs, each GMAC (GMAC1/GMAC2) can set two IPs separately at most. Signed-off-by: Nelson Chang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 236 ++++++++++++++++++++ 1 file changed, 236 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 18600cbbb907..481f3609a1ea 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1357,6 +1357,182 @@ static void mtk_hwlro_rx_uninit(struct mtk_eth *eth) mtk_w32(eth, 0, MTK_PDMA_LRO_CTRL_DW0); } +static void mtk_hwlro_val_ipaddr(struct mtk_eth *eth, int idx, __be32 ip) +{ + u32 reg_val; + + reg_val = mtk_r32(eth, MTK_LRO_CTRL_DW2_CFG(idx)); + + /* invalidate the IP setting */ + mtk_w32(eth, (reg_val & ~MTK_RING_MYIP_VLD), MTK_LRO_CTRL_DW2_CFG(idx)); + + mtk_w32(eth, ip, MTK_LRO_DIP_DW0_CFG(idx)); + + /* validate the IP setting */ + mtk_w32(eth, (reg_val | MTK_RING_MYIP_VLD), MTK_LRO_CTRL_DW2_CFG(idx)); +} + +static void mtk_hwlro_inval_ipaddr(struct mtk_eth *eth, int idx) +{ + u32 reg_val; + + reg_val = mtk_r32(eth, MTK_LRO_CTRL_DW2_CFG(idx)); + + /* invalidate the IP setting */ + mtk_w32(eth, (reg_val & ~MTK_RING_MYIP_VLD), MTK_LRO_CTRL_DW2_CFG(idx)); + + mtk_w32(eth, 0, MTK_LRO_DIP_DW0_CFG(idx)); +} + +static int mtk_hwlro_get_ip_cnt(struct mtk_mac *mac) +{ + int cnt = 0; + int i; + + for (i = 0; i < MTK_MAX_LRO_IP_CNT; i++) { + if (mac->hwlro_ip[i]) + cnt++; + } + + return cnt; +} + +static int mtk_hwlro_add_ipaddr(struct net_device *dev, + struct ethtool_rxnfc *cmd) +{ + struct ethtool_rx_flow_spec *fsp = + (struct ethtool_rx_flow_spec *)&cmd->fs; + struct mtk_mac *mac = netdev_priv(dev); + struct mtk_eth *eth = mac->hw; + int hwlro_idx; + + if ((fsp->flow_type != TCP_V4_FLOW) || + (!fsp->h_u.tcp_ip4_spec.ip4dst) || + (fsp->location > 1)) + return -EINVAL; + + mac->hwlro_ip[fsp->location] = htonl(fsp->h_u.tcp_ip4_spec.ip4dst); + hwlro_idx = (mac->id * MTK_MAX_LRO_IP_CNT) + fsp->location; + + mac->hwlro_ip_cnt = mtk_hwlro_get_ip_cnt(mac); + + mtk_hwlro_val_ipaddr(eth, hwlro_idx, mac->hwlro_ip[fsp->location]); + + return 0; +} + +static int mtk_hwlro_del_ipaddr(struct net_device *dev, + struct ethtool_rxnfc *cmd) +{ + struct ethtool_rx_flow_spec *fsp = + (struct ethtool_rx_flow_spec *)&cmd->fs; + struct mtk_mac *mac = netdev_priv(dev); + struct mtk_eth *eth = mac->hw; + int hwlro_idx; + + if (fsp->location > 1) + return -EINVAL; + + mac->hwlro_ip[fsp->location] = 0; + hwlro_idx = (mac->id * MTK_MAX_LRO_IP_CNT) + fsp->location; + + mac->hwlro_ip_cnt = mtk_hwlro_get_ip_cnt(mac); + + mtk_hwlro_inval_ipaddr(eth, hwlro_idx); + + return 0; +} + +static void mtk_hwlro_netdev_disable(struct net_device *dev) +{ + struct mtk_mac *mac = netdev_priv(dev); + struct mtk_eth *eth = mac->hw; + int i, hwlro_idx; + + for (i = 0; i < MTK_MAX_LRO_IP_CNT; i++) { + mac->hwlro_ip[i] = 0; + hwlro_idx = (mac->id * MTK_MAX_LRO_IP_CNT) + i; + + mtk_hwlro_inval_ipaddr(eth, hwlro_idx); + } + + mac->hwlro_ip_cnt = 0; +} + +static int mtk_hwlro_get_fdir_entry(struct net_device *dev, + struct ethtool_rxnfc *cmd) +{ + struct mtk_mac *mac = netdev_priv(dev); + struct ethtool_rx_flow_spec *fsp = + (struct ethtool_rx_flow_spec *)&cmd->fs; + + /* only tcp dst ipv4 is meaningful, others are meaningless */ + fsp->flow_type = TCP_V4_FLOW; + fsp->h_u.tcp_ip4_spec.ip4dst = ntohl(mac->hwlro_ip[fsp->location]); + fsp->m_u.tcp_ip4_spec.ip4dst = 0; + + fsp->h_u.tcp_ip4_spec.ip4src = 0; + fsp->m_u.tcp_ip4_spec.ip4src = 0xffffffff; + fsp->h_u.tcp_ip4_spec.psrc = 0; + fsp->m_u.tcp_ip4_spec.psrc = 0xffff; + fsp->h_u.tcp_ip4_spec.pdst = 0; + fsp->m_u.tcp_ip4_spec.pdst = 0xffff; + fsp->h_u.tcp_ip4_spec.tos = 0; + fsp->m_u.tcp_ip4_spec.tos = 0xff; + + return 0; +} + +static int mtk_hwlro_get_fdir_all(struct net_device *dev, + struct ethtool_rxnfc *cmd, + u32 *rule_locs) +{ + struct mtk_mac *mac = netdev_priv(dev); + int cnt = 0; + int i; + + for (i = 0; i < MTK_MAX_LRO_IP_CNT; i++) { + if (mac->hwlro_ip[i]) { + rule_locs[cnt] = i; + cnt++; + } + } + + cmd->rule_cnt = cnt; + + return 0; +} + +static netdev_features_t mtk_fix_features(struct net_device *dev, + netdev_features_t features) +{ + if (!(features & NETIF_F_LRO)) { + struct mtk_mac *mac = netdev_priv(dev); + int ip_cnt = mtk_hwlro_get_ip_cnt(mac); + + if (ip_cnt) { + netdev_info(dev, "RX flow is programmed, LRO should keep on\n"); + + features |= NETIF_F_LRO; + } + } + + return features; +} + +static int mtk_set_features(struct net_device *dev, netdev_features_t features) +{ + int err = 0; + + if (!((dev->features ^ features) & NETIF_F_LRO)) + return 0; + + if (!(features & NETIF_F_LRO)) + mtk_hwlro_netdev_disable(dev); + + return err; +} + /* wait for DMA to finish whatever it is doing before we start using it again */ static int mtk_dma_busy_wait(struct mtk_eth *eth) { @@ -1971,6 +2147,62 @@ static void mtk_get_ethtool_stats(struct net_device *dev, } while (u64_stats_fetch_retry_irq(&hwstats->syncp, start)); } +static int mtk_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, + u32 *rule_locs) +{ + int ret = -EOPNOTSUPP; + + switch (cmd->cmd) { + case ETHTOOL_GRXRINGS: + if (dev->features & NETIF_F_LRO) { + cmd->data = MTK_MAX_RX_RING_NUM; + ret = 0; + } + break; + case ETHTOOL_GRXCLSRLCNT: + if (dev->features & NETIF_F_LRO) { + struct mtk_mac *mac = netdev_priv(dev); + + cmd->rule_cnt = mac->hwlro_ip_cnt; + ret = 0; + } + break; + case ETHTOOL_GRXCLSRULE: + if (dev->features & NETIF_F_LRO) + ret = mtk_hwlro_get_fdir_entry(dev, cmd); + break; + case ETHTOOL_GRXCLSRLALL: + if (dev->features & NETIF_F_LRO) + ret = mtk_hwlro_get_fdir_all(dev, cmd, + rule_locs); + break; + default: + break; + } + + return ret; +} + +static int mtk_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +{ + int ret = -EOPNOTSUPP; + + switch (cmd->cmd) { + case ETHTOOL_SRXCLSRLINS: + if (dev->features & NETIF_F_LRO) + ret = mtk_hwlro_add_ipaddr(dev, cmd); + break; + case ETHTOOL_SRXCLSRLDEL: + if (dev->features & NETIF_F_LRO) + ret = mtk_hwlro_del_ipaddr(dev, cmd); + break; + default: + break; + } + + return ret; +} + static const struct ethtool_ops mtk_ethtool_ops = { .get_settings = mtk_get_settings, .set_settings = mtk_set_settings, @@ -1982,6 +2214,8 @@ static const struct ethtool_ops mtk_ethtool_ops = { .get_strings = mtk_get_strings, .get_sset_count = mtk_get_sset_count, .get_ethtool_stats = mtk_get_ethtool_stats, + .get_rxnfc = mtk_get_rxnfc, + .set_rxnfc = mtk_set_rxnfc, }; static const struct net_device_ops mtk_netdev_ops = { @@ -1996,6 +2230,8 @@ static const struct net_device_ops mtk_netdev_ops = { .ndo_change_mtu = eth_change_mtu, .ndo_tx_timeout = mtk_tx_timeout, .ndo_get_stats64 = mtk_get_stats64, + .ndo_fix_features = mtk_fix_features, + .ndo_set_features = mtk_set_features, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = mtk_poll_controller, #endif From 004e6cc6c181aa109427594fca35eb55d89d780e Mon Sep 17 00:00:00 2001 From: Nelson Chang Date: Sat, 17 Sep 2016 23:50:57 +0800 Subject: [PATCH 0347/1050] net: ethernet: mediatek: add the dts property to set if the HW supports LRO Add the dts property for the capability if the hardware supports LRO. Signed-off-by: Nelson Chang Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/mediatek-net.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/mediatek-net.txt b/Documentation/devicetree/bindings/net/mediatek-net.txt index 32eaaca04d9b..6103e5583070 100644 --- a/Documentation/devicetree/bindings/net/mediatek-net.txt +++ b/Documentation/devicetree/bindings/net/mediatek-net.txt @@ -24,7 +24,7 @@ Required properties: Optional properties: - interrupt-parent: Should be the phandle for the interrupt controller that services interrupts for this device - +- mediatek,hwlro: the capability if the hardware supports LRO functions * Ethernet MAC node @@ -51,6 +51,7 @@ eth: ethernet@1b100000 { reset-names = "eth"; mediatek,ethsys = <ðsys>; mediatek,pctl = <&syscfg_pctl_a>; + mediatek,hwlro; #address-cells = <1>; #size-cells = <0>; From 106323b905a6bcd21ff83dd4e19566282fd5eb52 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 17 Sep 2016 15:52:17 +0000 Subject: [PATCH 0348/1050] cxgb4: Fix return value check in cfg_queues_uld() Fix the retrn value check which testing the wrong variable in cfg_queues_uld(). Fixes: 94cdb8bb993a ("cxgb4: Add support for dynamic allocation of resources for ULD") Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c index fc04e3b878e8..d12a73e03565 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c @@ -308,7 +308,7 @@ int cfg_queues_uld(struct adapter *adap, unsigned int uld_type, } rxq_info->rspq_id = kcalloc(nrxq, sizeof(unsigned short), GFP_KERNEL); - if (!rxq_info->uldrxq) { + if (!rxq_info->rspq_id) { kfree(rxq_info->uldrxq); kfree(rxq_info); return -ENOMEM; From 1486587b2fcda08dee7eab23784d504eed772c45 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 18 Sep 2016 00:57:30 +0200 Subject: [PATCH 0349/1050] pie: use qdisc_dequeue_head wrapper Doesn't change generated code. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/sched/sch_pie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index a570b0bb254c..d976d74b22d7 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -511,7 +511,7 @@ static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d) static struct sk_buff *pie_qdisc_dequeue(struct Qdisc *sch) { struct sk_buff *skb; - skb = __qdisc_dequeue_head(sch, &sch->q); + skb = qdisc_dequeue_head(sch); if (!skb) return NULL; From 97d0678f913369af0dc8b510a682a641654ab743 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 18 Sep 2016 00:57:31 +0200 Subject: [PATCH 0350/1050] sched: don't use skb queue helpers A followup change will replace the sk_buff_head in the qdisc struct with a slightly different list. Use of the sk_buff_head helpers will thus cause compiler warnings. Open-code these accesses in an extra change to ease review. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/sched/sch_fifo.c | 4 ++-- net/sched/sch_generic.c | 2 +- net/sched/sch_netem.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index baeed6a78d28..1e37247656f8 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -31,7 +31,7 @@ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { - if (likely(skb_queue_len(&sch->q) < sch->limit)) + if (likely(sch->q.qlen < sch->limit)) return qdisc_enqueue_tail(skb, sch); return qdisc_drop(skb, sch, to_free); @@ -42,7 +42,7 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch, { unsigned int prev_backlog; - if (likely(skb_queue_len(&sch->q) < sch->limit)) + if (likely(sch->q.qlen < sch->limit)) return qdisc_enqueue_tail(skb, sch); prev_backlog = sch->qstats.backlog; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 0d21b567ff27..5e63bf638350 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -486,7 +486,7 @@ static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv, static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, struct sk_buff **to_free) { - if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) { + if (qdisc->q.qlen < qdisc_dev(qdisc)->tx_queue_len) { int band = prio2band[skb->priority & TC_PRIO_MAX]; struct pfifo_fast_priv *priv = qdisc_priv(qdisc); struct sk_buff_head *list = band2list(priv, band); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index aaaf02175338..1832d7732dbc 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -502,7 +502,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, 1<<(prandom_u32() % 8); } - if (unlikely(skb_queue_len(&sch->q) >= sch->limit)) + if (unlikely(sch->q.qlen >= sch->limit)) return qdisc_drop(skb, sch, to_free); qdisc_qstats_backlog_inc(sch, skb); @@ -522,7 +522,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (q->rate) { struct sk_buff *last; - if (!skb_queue_empty(&sch->q)) + if (sch->q.qlen) last = skb_peek_tail(&sch->q); else last = netem_rb_to_skb(rb_last(&q->t_root)); From ec323368793b8570c02e723127611a8d906a9b3f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 18 Sep 2016 00:57:32 +0200 Subject: [PATCH 0351/1050] sched: remove qdisc arg from __qdisc_dequeue_head Moves qdisc stat accouting to qdisc_dequeue_head. The only direct caller of the __qdisc_dequeue_head version open-codes this now. This allows us to later use __qdisc_dequeue_head as a replacement of __skb_dequeue() (which operates on sk_buff_head list). Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/sch_generic.h | 15 ++++++++------- net/sched/sch_generic.c | 7 ++++++- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 52a2015667b4..0741ed41575b 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -614,11 +614,17 @@ static inline int qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch) return __qdisc_enqueue_tail(skb, sch, &sch->q); } -static inline struct sk_buff *__qdisc_dequeue_head(struct Qdisc *sch, - struct sk_buff_head *list) +static inline struct sk_buff *__qdisc_dequeue_head(struct sk_buff_head *list) { struct sk_buff *skb = __skb_dequeue(list); + return skb; +} + +static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch) +{ + struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); + if (likely(skb != NULL)) { qdisc_qstats_backlog_dec(sch, skb); qdisc_bstats_update(sch, skb); @@ -627,11 +633,6 @@ static inline struct sk_buff *__qdisc_dequeue_head(struct Qdisc *sch, return skb; } -static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch) -{ - return __qdisc_dequeue_head(sch, &sch->q); -} - /* Instead of calling kfree_skb() while root qdisc lock is held, * queue the skb for future freeing at end of __dev_xmit_skb() */ diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 5e63bf638350..73877d9c2bcb 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -506,7 +506,12 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) if (likely(band >= 0)) { struct sk_buff_head *list = band2list(priv, band); - struct sk_buff *skb = __qdisc_dequeue_head(qdisc, list); + struct sk_buff *skb = __qdisc_dequeue_head(list); + + if (likely(skb != NULL)) { + qdisc_qstats_backlog_dec(qdisc, skb); + qdisc_bstats_update(qdisc, skb); + } qdisc->q.qlen--; if (skb_queue_empty(list)) From ed760cb8aae7c2b84c193d4a7637b0c9e752f07e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 18 Sep 2016 00:57:33 +0200 Subject: [PATCH 0352/1050] sched: replace __skb_dequeue with __qdisc_dequeue_head After previous patch these functions are identical. Replace __skb_dequeue in qdiscs with __qdisc_dequeue_head. Next patch will then make __qdisc_dequeue_head handle single-linked list instead of strcut sk_buff_head argument. Doesn't change generated code. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/sched/sch_codel.c | 4 ++-- net/sched/sch_netem.c | 2 +- net/sched/sch_pie.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index 4002df3c7d9f..5bfa79ee657c 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -69,7 +69,7 @@ struct codel_sched_data { static struct sk_buff *dequeue_func(struct codel_vars *vars, void *ctx) { struct Qdisc *sch = ctx; - struct sk_buff *skb = __skb_dequeue(&sch->q); + struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); if (skb) sch->qstats.backlog -= qdisc_pkt_len(skb); @@ -172,7 +172,7 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt) qlen = sch->q.qlen; while (sch->q.qlen > sch->limit) { - struct sk_buff *skb = __skb_dequeue(&sch->q); + struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); dropped += qdisc_pkt_len(skb); qdisc_qstats_backlog_dec(sch, skb); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 1832d7732dbc..0a964b35f8c7 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -587,7 +587,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) struct rb_node *p; tfifo_dequeue: - skb = __skb_dequeue(&sch->q); + skb = __qdisc_dequeue_head(&sch->q); if (skb) { qdisc_qstats_backlog_dec(sch, skb); deliver: diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index d976d74b22d7..5c3a99d6aa82 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -231,7 +231,7 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt) /* Drop excess packets if new limit is lower */ qlen = sch->q.qlen; while (sch->q.qlen > sch->limit) { - struct sk_buff *skb = __skb_dequeue(&sch->q); + struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); dropped += qdisc_pkt_len(skb); qdisc_qstats_backlog_dec(sch, skb); From 48da34b7a74201f15315cb1fc40bb9a7bd2b4940 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 18 Sep 2016 00:57:34 +0200 Subject: [PATCH 0353/1050] sched: add and use qdisc_skb_head helpers This change replaces sk_buff_head struct in Qdiscs with new qdisc_skb_head. Its similar to the skb_buff_head api, but does not use skb->prev pointers. Qdiscs will commonly enqueue at the tail of a list and dequeue at head. While skb_buff_head works fine for this, enqueue/dequeue needs to also adjust the prev pointer of next element. The ->prev pointer is not required for qdiscs so we can just leave it undefined and avoid one cacheline write access for en/dequeue. Suggested-by: Eric Dumazet Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/sch_generic.h | 65 +++++++++++++++++++++++++++++++-------- net/sched/sch_generic.c | 21 +++++++------ net/sched/sch_htb.c | 24 ++++++++++++--- net/sched/sch_netem.c | 14 +++++++-- 4 files changed, 95 insertions(+), 29 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 0741ed41575b..e6aa0a249672 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -36,6 +36,14 @@ struct qdisc_size_table { u16 data[]; }; +/* similar to sk_buff_head, but skb->prev pointer is undefined. */ +struct qdisc_skb_head { + struct sk_buff *head; + struct sk_buff *tail; + __u32 qlen; + spinlock_t lock; +}; + struct Qdisc { int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch, @@ -76,7 +84,7 @@ struct Qdisc { * For performance sake on SMP, we put highly modified fields at the end */ struct sk_buff *gso_skb ____cacheline_aligned_in_smp; - struct sk_buff_head q; + struct qdisc_skb_head q; struct gnet_stats_basic_packed bstats; seqcount_t running; struct gnet_stats_queue qstats; @@ -600,10 +608,27 @@ static inline void qdisc_qstats_overlimit(struct Qdisc *sch) sch->qstats.overlimits++; } -static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch, - struct sk_buff_head *list) +static inline void qdisc_skb_head_init(struct qdisc_skb_head *qh) { - __skb_queue_tail(list, skb); + qh->head = NULL; + qh->tail = NULL; + qh->qlen = 0; +} + +static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch, + struct qdisc_skb_head *qh) +{ + struct sk_buff *last = qh->tail; + + if (last) { + skb->next = NULL; + last->next = skb; + qh->tail = skb; + } else { + qh->tail = skb; + qh->head = skb; + } + qh->qlen++; qdisc_qstats_backlog_inc(sch, skb); return NET_XMIT_SUCCESS; @@ -614,9 +639,17 @@ static inline int qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch) return __qdisc_enqueue_tail(skb, sch, &sch->q); } -static inline struct sk_buff *__qdisc_dequeue_head(struct sk_buff_head *list) +static inline struct sk_buff *__qdisc_dequeue_head(struct qdisc_skb_head *qh) { - struct sk_buff *skb = __skb_dequeue(list); + struct sk_buff *skb = qh->head; + + if (likely(skb != NULL)) { + qh->head = skb->next; + qh->qlen--; + if (qh->head == NULL) + qh->tail = NULL; + skb->next = NULL; + } return skb; } @@ -643,10 +676,10 @@ static inline void __qdisc_drop(struct sk_buff *skb, struct sk_buff **to_free) } static inline unsigned int __qdisc_queue_drop_head(struct Qdisc *sch, - struct sk_buff_head *list, + struct qdisc_skb_head *qh, struct sk_buff **to_free) { - struct sk_buff *skb = __skb_dequeue(list); + struct sk_buff *skb = __qdisc_dequeue_head(qh); if (likely(skb != NULL)) { unsigned int len = qdisc_pkt_len(skb); @@ -667,7 +700,9 @@ static inline unsigned int qdisc_queue_drop_head(struct Qdisc *sch, static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch) { - return skb_peek(&sch->q); + const struct qdisc_skb_head *qh = &sch->q; + + return qh->head; } /* generic pseudo peek method for non-work-conserving qdisc */ @@ -702,15 +737,19 @@ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) return skb; } -static inline void __qdisc_reset_queue(struct sk_buff_head *list) +static inline void __qdisc_reset_queue(struct qdisc_skb_head *qh) { /* * We do not know the backlog in bytes of this list, it * is up to the caller to correct it */ - if (!skb_queue_empty(list)) { - rtnl_kfree_skbs(list->next, list->prev); - __skb_queue_head_init(list); + ASSERT_RTNL(); + if (qh->qlen) { + rtnl_kfree_skbs(qh->head, qh->tail); + + qh->head = NULL; + qh->tail = NULL; + qh->qlen = 0; } } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 73877d9c2bcb..6cfb6e9038c2 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -466,7 +466,7 @@ static const u8 prio2band[TC_PRIO_MAX + 1] = { */ struct pfifo_fast_priv { u32 bitmap; - struct sk_buff_head q[PFIFO_FAST_BANDS]; + struct qdisc_skb_head q[PFIFO_FAST_BANDS]; }; /* @@ -477,7 +477,7 @@ struct pfifo_fast_priv { */ static const int bitmap2band[] = {-1, 0, 1, 0, 2, 0, 1, 0}; -static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv, +static inline struct qdisc_skb_head *band2list(struct pfifo_fast_priv *priv, int band) { return priv->q + band; @@ -489,7 +489,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, if (qdisc->q.qlen < qdisc_dev(qdisc)->tx_queue_len) { int band = prio2band[skb->priority & TC_PRIO_MAX]; struct pfifo_fast_priv *priv = qdisc_priv(qdisc); - struct sk_buff_head *list = band2list(priv, band); + struct qdisc_skb_head *list = band2list(priv, band); priv->bitmap |= (1 << band); qdisc->q.qlen++; @@ -505,8 +505,8 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) int band = bitmap2band[priv->bitmap]; if (likely(band >= 0)) { - struct sk_buff_head *list = band2list(priv, band); - struct sk_buff *skb = __qdisc_dequeue_head(list); + struct qdisc_skb_head *qh = band2list(priv, band); + struct sk_buff *skb = __qdisc_dequeue_head(qh); if (likely(skb != NULL)) { qdisc_qstats_backlog_dec(qdisc, skb); @@ -514,7 +514,7 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) } qdisc->q.qlen--; - if (skb_queue_empty(list)) + if (qh->qlen == 0) priv->bitmap &= ~(1 << band); return skb; @@ -529,9 +529,9 @@ static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc) int band = bitmap2band[priv->bitmap]; if (band >= 0) { - struct sk_buff_head *list = band2list(priv, band); + struct qdisc_skb_head *qh = band2list(priv, band); - return skb_peek(list); + return qh->head; } return NULL; @@ -569,7 +569,7 @@ static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt) struct pfifo_fast_priv *priv = qdisc_priv(qdisc); for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) - __skb_queue_head_init(band2list(priv, prio)); + qdisc_skb_head_init(band2list(priv, prio)); /* Can by-pass the queue discipline */ qdisc->flags |= TCQ_F_CAN_BYPASS; @@ -617,7 +617,8 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p); sch->padded = (char *) sch - (char *) p; } - skb_queue_head_init(&sch->q); + qdisc_skb_head_init(&sch->q); + spin_lock_init(&sch->q.lock); spin_lock_init(&sch->busylock); lockdep_set_class(&sch->busylock, diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 53dbfa187870..c798d0de8a9d 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -162,7 +162,7 @@ struct htb_sched { struct work_struct work; /* non shaped skbs; let them go directly thru */ - struct sk_buff_head direct_queue; + struct qdisc_skb_head direct_queue; long direct_pkts; struct qdisc_watchdog watchdog; @@ -570,6 +570,22 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) list_del_init(&cl->un.leaf.drop_list); } +static void htb_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch, + struct qdisc_skb_head *qh) +{ + struct sk_buff *last = qh->tail; + + if (last) { + skb->next = NULL; + last->next = skb; + qh->tail = skb; + } else { + qh->tail = skb; + qh->head = skb; + } + qh->qlen++; +} + static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { @@ -580,7 +596,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (cl == HTB_DIRECT) { /* enqueue to helper queue */ if (q->direct_queue.qlen < q->direct_qlen) { - __skb_queue_tail(&q->direct_queue, skb); + htb_enqueue_tail(skb, sch, &q->direct_queue); q->direct_pkts++; } else { return qdisc_drop(skb, sch, to_free); @@ -888,7 +904,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) unsigned long start_at; /* try to dequeue direct packets as high prio (!) to minimize cpu work */ - skb = __skb_dequeue(&q->direct_queue); + skb = __qdisc_dequeue_head(&q->direct_queue); if (skb != NULL) { ok: qdisc_bstats_update(sch, skb); @@ -1019,7 +1035,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) qdisc_watchdog_init(&q->watchdog, sch); INIT_WORK(&q->work, htb_work_func); - __skb_queue_head_init(&q->direct_queue); + qdisc_skb_head_init(&q->direct_queue); if (tb[TCA_HTB_DIRECT_QLEN]) q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 0a964b35f8c7..9f7b380cf0a3 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -413,6 +413,16 @@ static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch, return segs; } +static void netem_enqueue_skb_head(struct qdisc_skb_head *qh, struct sk_buff *skb) +{ + skb->next = qh->head; + + if (!qh->head) + qh->tail = skb; + qh->head = skb; + qh->qlen++; +} + /* * Insert one skb into qdisc. * Note: parent depends on return value to account for queue length. @@ -523,7 +533,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff *last; if (sch->q.qlen) - last = skb_peek_tail(&sch->q); + last = sch->q.tail; else last = netem_rb_to_skb(rb_last(&q->t_root)); if (last) { @@ -552,7 +562,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, cb->time_to_send = psched_get_time(); q->counter = 0; - __skb_queue_head(&sch->q, skb); + netem_enqueue_skb_head(&sch->q, skb); sch->qstats.requeues++; } From d55092b4812827b577daf2da7b4e404d87571e0b Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Wed, 3 Aug 2016 18:41:27 +0300 Subject: [PATCH 0354/1050] iwlwifi: mvm: remove variable shadowing Variable "ac" defined twice. Fix that. Fixes: commit 93f436e2c7fe ("iwlwifi: mvm: set sta_id in SCD_QUEUE_CONFIG cmd") Signed-off-by: Liad Kaufman Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 216aa54c8679..d3a0378b547f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -745,14 +745,14 @@ static int iwl_mvm_sta_alloc_queue(struct iwl_mvm *mvm, .scd_queue = queue, .action = SCD_CFG_DISABLE_QUEUE, }; - u8 ac; + u8 txq_curr_ac; disable_agg_tids = iwl_mvm_remove_sta_queue_marking(mvm, queue); spin_lock_bh(&mvm->queue_info_lock); - ac = mvm->queue_info[queue].mac80211_ac; + txq_curr_ac = mvm->queue_info[queue].mac80211_ac; cmd.sta_id = mvm->queue_info[queue].ra_sta_id; - cmd.tx_fifo = iwl_mvm_ac_to_tx_fifo[ac]; + cmd.tx_fifo = iwl_mvm_ac_to_tx_fifo[txq_curr_ac]; cmd.tid = mvm->queue_info[queue].txq_tid; spin_unlock_bh(&mvm->queue_info_lock); From 8352e62ac2c4ebbfe95c95561f9f16d0ff06f375 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Thu, 4 Aug 2016 10:56:53 +0300 Subject: [PATCH 0355/1050] iwlwifi: pcie: fix typo in struct name for a000 devices commit 3cd1980b0cdf ("iwlwifi: pcie: introduce new tfd and tb formats") Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 188b0dee542c..74199b174948 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -2953,7 +2953,7 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, if (cfg->use_tfh) { trans_pcie->max_tbs = IWL_TFH_NUM_TBS; - trans_pcie->tfd_size = sizeof(struct iwl_tfh_tb); + trans_pcie->tfd_size = sizeof(struct iwl_tfh_tfd); } else { trans_pcie->max_tbs = IWL_NUM_OF_TBS; From 5a710b8606cc3ec12f60a400248a051743ee35d1 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Wed, 3 Aug 2016 14:08:00 +0300 Subject: [PATCH 0356/1050] iwlwifi: mvm: cleanup skb queue functions use Use skb_queue_empty() and not skb_peek_tail() to check for empty list. Avoid a redundant check as well - loop will take care of it. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index d6d9ec401b44..b3866287d1c0 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -418,10 +418,11 @@ static void iwl_mvm_release_frames(struct iwl_mvm *mvm, ssn = ieee80211_sn_inc(ssn); - /* holes are valid since nssn indicates frames were received. */ - if (skb_queue_empty(skb_list) || !skb_peek_tail(skb_list)) - continue; - /* Empty the list. Will have more than one frame for A-MSDU */ + /* + * Empty the list. Will have more than one frame for A-MSDU. + * Empty list is valid as well since nssn indicates frames were + * received. + */ while ((skb = __skb_dequeue(skb_list))) { iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, reorder_buf->queue, @@ -434,7 +435,7 @@ static void iwl_mvm_release_frames(struct iwl_mvm *mvm, if (reorder_buf->num_stored && !reorder_buf->removed) { u16 index = reorder_buf->head_sn % reorder_buf->buf_size; - while (!skb_peek_tail(&reorder_buf->entries[index])) + while (skb_queue_empty(&reorder_buf->entries[index])) index = (index + 1) % reorder_buf->buf_size; /* modify timer to match next frame's expiration time */ mod_timer(&reorder_buf->reorder_timer, @@ -462,7 +463,7 @@ void iwl_mvm_reorder_timer_expired(unsigned long data) for (i = 0; i < buf->buf_size ; i++) { index = (buf->head_sn + i) % buf->buf_size; - if (!skb_peek_tail(&buf->entries[index])) + if (skb_queue_empty(&buf->entries[index])) continue; if (!time_after(jiffies, buf->reorder_time[index] + RX_REORDER_BUF_TIMEOUT_MQ)) From 61b0f5d7c41408ff523a5cd3925feafa82b971f5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 4 Aug 2016 08:57:59 +0200 Subject: [PATCH 0357/1050] iwlwifi: mvm: compare full command ID When comparing command IDs, the group should be taken into account so the same command/notification from a different group doesn't trigger anything unexpected. Fix this by comparing to the wide ID. Fixes: commit 1738d60b31d7 ("iwlwifi: mvm: handle RX MPDUs separately") Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 23 +++++++++++--------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 43ea1e5fdfc6..de34c9f4f0f4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -966,10 +966,11 @@ static void iwl_mvm_rx(struct iwl_op_mode *op_mode, { struct iwl_rx_packet *pkt = rxb_addr(rxb); struct iwl_mvm *mvm = IWL_OP_MODE_GET_MVM(op_mode); + u16 cmd = WIDE_ID(pkt->hdr.group_id, pkt->hdr.cmd); - if (likely(pkt->hdr.cmd == REPLY_RX_MPDU_CMD)) + if (likely(cmd == WIDE_ID(LEGACY_GROUP, REPLY_RX_MPDU_CMD))) iwl_mvm_rx_rx_mpdu(mvm, napi, rxb); - else if (pkt->hdr.cmd == REPLY_RX_PHY_CMD) + else if (cmd == WIDE_ID(LEGACY_GROUP, REPLY_RX_PHY_CMD)) iwl_mvm_rx_rx_phy_cmd(mvm, rxb); else iwl_mvm_rx_common(mvm, rxb, pkt); @@ -981,13 +982,14 @@ static void iwl_mvm_rx_mq(struct iwl_op_mode *op_mode, { struct iwl_rx_packet *pkt = rxb_addr(rxb); struct iwl_mvm *mvm = IWL_OP_MODE_GET_MVM(op_mode); + u16 cmd = WIDE_ID(pkt->hdr.group_id, pkt->hdr.cmd); - if (likely(pkt->hdr.cmd == REPLY_RX_MPDU_CMD)) + if (likely(cmd == WIDE_ID(LEGACY_GROUP, REPLY_RX_MPDU_CMD))) iwl_mvm_rx_mpdu_mq(mvm, napi, rxb, 0); - else if (unlikely(pkt->hdr.group_id == DATA_PATH_GROUP && - pkt->hdr.cmd == RX_QUEUES_NOTIFICATION)) + else if (unlikely(cmd == WIDE_ID(DATA_PATH_GROUP, + RX_QUEUES_NOTIFICATION))) iwl_mvm_rx_queue_notif(mvm, rxb, 0); - else if (pkt->hdr.cmd == FRAME_RELEASE) + else if (cmd == WIDE_ID(LEGACY_GROUP, FRAME_RELEASE)) iwl_mvm_rx_frame_release(mvm, napi, rxb, 0); else iwl_mvm_rx_common(mvm, rxb, pkt); @@ -1666,13 +1668,14 @@ static void iwl_mvm_rx_mq_rss(struct iwl_op_mode *op_mode, { struct iwl_mvm *mvm = IWL_OP_MODE_GET_MVM(op_mode); struct iwl_rx_packet *pkt = rxb_addr(rxb); + u16 cmd = WIDE_ID(pkt->hdr.group_id, pkt->hdr.cmd); - if (unlikely(pkt->hdr.cmd == FRAME_RELEASE)) + if (unlikely(cmd == WIDE_ID(LEGACY_GROUP, FRAME_RELEASE))) iwl_mvm_rx_frame_release(mvm, napi, rxb, queue); - else if (unlikely(pkt->hdr.cmd == RX_QUEUES_NOTIFICATION && - pkt->hdr.group_id == DATA_PATH_GROUP)) + else if (unlikely(cmd == WIDE_ID(DATA_PATH_GROUP, + RX_QUEUES_NOTIFICATION))) iwl_mvm_rx_queue_notif(mvm, rxb, queue); - else if (likely(pkt->hdr.cmd == REPLY_RX_MPDU_CMD)) + else if (likely(cmd == WIDE_ID(LEGACY_GROUP, REPLY_RX_MPDU_CMD))) iwl_mvm_rx_mpdu_mq(mvm, napi, rxb, queue); } From 91b08c2da4b1b89c265d4c68354b1ef3564a9eeb Mon Sep 17 00:00:00 2001 From: Aviya Erenfeld Date: Sun, 27 Mar 2016 14:53:49 +0300 Subject: [PATCH 0358/1050] iwlwifi: mvm: add support for MU-MIMO air sniffer enable MU-MIMO air sniffer if it's supported by the NIC Signed-off-by: Aviya Erenfeld Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 9506e658abd3..b7d80b5fc0bf 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -720,6 +720,10 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) if (ret) iwl_mvm_leds_exit(mvm); + if (mvm->cfg->vht_mu_mimo_supported) + wiphy_ext_feature_set(hw->wiphy, + NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER); + return ret; } @@ -2229,6 +2233,10 @@ static void iwl_mvm_bss_info_changed(struct ieee80211_hw *hw, case NL80211_IFTYPE_ADHOC: iwl_mvm_bss_info_changed_ap_ibss(mvm, vif, bss_conf, changes); break; + case NL80211_IFTYPE_MONITOR: + if (changes & BSS_CHANGED_MU_GROUPS) + iwl_mvm_update_mu_groups(mvm, vif); + break; default: /* shouldn't happen */ WARN_ON_ONCE(1); From a6c934b364948cd4de5bd9ab055bb65206ec70f3 Mon Sep 17 00:00:00 2001 From: Haim Dreyfuss Date: Tue, 2 Aug 2016 15:28:23 +0300 Subject: [PATCH 0359/1050] iwlwifi: check for valid ethernet address provided by OEM In 9000 family products we added an option to let the OEM fuse the mac address via registers. If these registers are zeroed we use the OTP address instead. Make sure that the address provided by the OEM is valid and, if not, fall back to the OTP address as well. Fixes: commit 17c867bfe89b ("iwlwifi: add support for getting HW address from CSR") Signed-off-by: Haim Dreyfuss Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/iwl-nvm-parse.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index 43f8f7d45ddb..adba3b003f55 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -564,11 +564,16 @@ static void iwl_set_hw_address_from_csr(struct iwl_trans *trans, __le32 mac_addr0 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR0_STRAP)); __le32 mac_addr1 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR1_STRAP)); - /* If OEM did not fuse address - get it from OTP */ - if (!mac_addr0 && !mac_addr1) { - mac_addr0 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR0_OTP)); - mac_addr1 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR1_OTP)); - } + iwl_flip_hw_address(mac_addr0, mac_addr1, data->hw_addr); + /* + * If the OEM fused a valid address, use it instead of the one in the + * OTP + */ + if (is_valid_ether_addr(data->hw_addr)) + return; + + mac_addr0 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR0_OTP)); + mac_addr1 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR1_OTP)); iwl_flip_hw_address(mac_addr0, mac_addr1, data->hw_addr); } From 3ee0f0e23e4f290b2ee0a273b34c6e4f95a209a0 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Mon, 8 Aug 2016 11:51:24 +0300 Subject: [PATCH 0360/1050] iwlwifi: mvm: fix DQA AP mode station assumption There was recently a Full AP mode feature added where hostap adds the station at auth stage, and not assoc. However, when running with legacy hostapd, we get the auth response before station was added, and tx_skb_non_sta fails to allocate a queue, resulting in a complete failure of association. Take care of this situation as well. Add a warning when no valid queue is returned at all and make mvm drop the packet instead of passing it on. Refactor the function a bit while at it. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 39 ++++++++++++++++----- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 10517778faf4..e0c9065a3ad4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -490,16 +490,34 @@ iwl_mvm_set_tx_params(struct iwl_mvm *mvm, struct sk_buff *skb, static int iwl_mvm_get_ctrl_vif_queue(struct iwl_mvm *mvm, struct ieee80211_tx_info *info, __le16 fc) { - if (iwl_mvm_is_dqa_supported(mvm)) { - if (info->control.vif->type == NL80211_IFTYPE_AP && - ieee80211_is_probe_resp(fc)) - return IWL_MVM_DQA_AP_PROBE_RESP_QUEUE; - else if (ieee80211_is_mgmt(fc) && - info->control.vif->type == NL80211_IFTYPE_P2P_DEVICE) - return IWL_MVM_DQA_P2P_DEVICE_QUEUE; - } + if (!iwl_mvm_is_dqa_supported(mvm)) + return info->hw_queue; - return info->hw_queue; + switch (info->control.vif->type) { + case NL80211_IFTYPE_AP: + /* + * handle legacy hostapd as well, where station may be added + * only after assoc. + */ + if (ieee80211_is_probe_resp(fc) || ieee80211_is_auth(fc)) + return IWL_MVM_DQA_AP_PROBE_RESP_QUEUE; + if (info->hw_queue == info->control.vif->cab_queue) + return info->hw_queue; + + WARN_ON_ONCE(1); + return IWL_MVM_DQA_AP_PROBE_RESP_QUEUE; + case NL80211_IFTYPE_P2P_DEVICE: + if (ieee80211_is_mgmt(fc)) + return IWL_MVM_DQA_P2P_DEVICE_QUEUE; + if (info->hw_queue == info->control.vif->cab_queue) + return info->hw_queue; + + WARN_ON_ONCE(1); + return IWL_MVM_DQA_P2P_DEVICE_QUEUE; + default: + WARN_ONCE(1, "Not a ctrl vif, no available queue\n"); + return -1; + } } int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb) @@ -560,6 +578,9 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb) sta_id = mvmvif->bcast_sta.sta_id; queue = iwl_mvm_get_ctrl_vif_queue(mvm, &info, hdr->frame_control); + if (queue < 0) + return -1; + } else if (info.control.vif->type == NL80211_IFTYPE_STATION && is_multicast_ether_addr(hdr->addr1)) { u8 ap_sta_id = ACCESS_ONCE(mvmvif->ap_sta_id); From 9a73a7d24d51eaf9e43c771c53cf7b594e5b5334 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Mon, 8 Aug 2016 13:07:01 +0300 Subject: [PATCH 0361/1050] iwlwifi: mvm: support BAR in reorder buffer On default queue we will not receive frame release notification, but the BAR itself. Upon receiving the BAR driver should look at the NSSN and adjust window accordingly. Fixes: b915c10174fb ("iwlwifi: mvm: add reorder buffer per queue") Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index b3866287d1c0..0274f14e88e3 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -600,9 +600,10 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm, mvm_sta = iwl_mvm_sta_from_mac80211(sta); - /* not a data packet */ - if (!ieee80211_is_data_qos(hdr->frame_control) || - is_multicast_ether_addr(hdr->addr1)) + /* not a data packet or a bar */ + if (!ieee80211_is_back_req(hdr->frame_control) && + (!ieee80211_is_data_qos(hdr->frame_control) || + is_multicast_ether_addr(hdr->addr1))) return false; if (unlikely(!ieee80211_is_data_present(hdr->frame_control))) @@ -626,6 +627,11 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm, spin_lock_bh(&buffer->lock); + if (ieee80211_is_back_req(hdr->frame_control)) { + iwl_mvm_release_frames(mvm, sta, napi, buffer, nssn); + goto drop; + } + /* * If there was a significant jump in the nssn - adjust. * If the SN is smaller than the NSSN it might need to first go into From aacf8f189b03b3d3c2e577a6d4ef5872bf4d393f Mon Sep 17 00:00:00 2001 From: Avrahams Stern Date: Tue, 19 Jul 2016 11:15:09 +0300 Subject: [PATCH 0362/1050] iwlwifi: mvm: Add support for RRM by scan Implement support for RRM by adding an option to configure the scan dwell time and reporting scan start time and BSS detection time, and Advertise support for these features. Signed-off-by: David Spinadel Signed-off-by: Avraham Stern Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/iwl-fw-file.h | 5 ++ .../wireless/intel/iwlwifi/mvm/fw-api-scan.h | 20 +++--- .../net/wireless/intel/iwlwifi/mvm/mac80211.c | 10 +++ drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 6 ++ drivers/net/wireless/intel/iwlwifi/mvm/scan.c | 61 +++++++++++++++++-- 5 files changed, 88 insertions(+), 14 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h b/drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h index 1b1e045f8907..94423f04320b 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h @@ -256,6 +256,10 @@ typedef unsigned int __bitwise__ iwl_ucode_tlv_api_t; * instead of 3. * @IWL_UCODE_TLV_API_TX_POWER_CHAIN: TX power API has larger command size * (command version 3) that supports per-chain limits + * @IWL_UCODE_TLV_API_SCAN_TSF_REPORT: Scan start time reported in scan + * iteration complete notification, and the timestamp reported for RX + * received during scan, are reported in TSF of the mac specified in the + * scan request. * * @NUM_IWL_UCODE_TLV_API: number of bits used */ @@ -267,6 +271,7 @@ enum iwl_ucode_tlv_api { IWL_UCODE_TLV_API_NEW_VERSION = (__force iwl_ucode_tlv_api_t)20, IWL_UCODE_TLV_API_EXT_SCAN_PRIORITY = (__force iwl_ucode_tlv_api_t)24, IWL_UCODE_TLV_API_TX_POWER_CHAIN = (__force iwl_ucode_tlv_api_t)27, + IWL_UCODE_TLV_API_SCAN_TSF_REPORT = (__force iwl_ucode_tlv_api_t)28, NUM_IWL_UCODE_TLV_API #ifdef __CHECKER__ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-scan.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-scan.h index f01dab0d0dac..0c294c9f98e9 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-scan.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-scan.h @@ -7,6 +7,7 @@ * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH + * Copyright(c) 2016 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -603,6 +604,8 @@ struct iwl_scan_req_umac_tail { * @uid: scan id, &enum iwl_umac_scan_uid_offsets * @ooc_priority: out of channel priority - &enum iwl_scan_priority * @general_flags: &enum iwl_umac_scan_general_flags + * @reserved2: for future use and alignment + * @scan_start_mac_id: report the scan start TSF time according to this mac TSF * @extended_dwell: dwell time for channels 1, 6 and 11 * @active_dwell: dwell time for active scan * @passive_dwell: dwell time for passive scan @@ -620,8 +623,10 @@ struct iwl_scan_req_umac { __le32 flags; __le32 uid; __le32 ooc_priority; - /* SCAN_GENERAL_PARAMS_API_S_VER_1 */ - __le32 general_flags; + /* SCAN_GENERAL_PARAMS_API_S_VER_4 */ + __le16 general_flags; + u8 reserved2; + u8 scan_start_mac_id; u8 extended_dwell; u8 active_dwell; u8 passive_dwell; @@ -629,7 +634,7 @@ struct iwl_scan_req_umac { __le32 max_out_time; __le32 suspend_time; __le32 scan_priority; - /* SCAN_CHANNEL_PARAMS_API_S_VER_1 */ + /* SCAN_CHANNEL_PARAMS_API_S_VER_4 */ u8 channel_flags; u8 n_channels; __le16 reserved; @@ -718,8 +723,8 @@ struct iwl_scan_offload_profiles_query { * @status: one of SCAN_COMP_STATUS_* * @bt_status: BT on/off status * @last_channel: last channel that was scanned - * @tsf_low: TSF timer (lower half) in usecs - * @tsf_high: TSF timer (higher half) in usecs + * @start_tsf: TSF timer in usecs of the scan start time for the mac specified + * in &struct iwl_scan_req_umac. * @results: array of scan results, only "scanned_channels" of them are valid */ struct iwl_umac_scan_iter_complete_notif { @@ -728,9 +733,8 @@ struct iwl_umac_scan_iter_complete_notif { u8 status; u8 bt_status; u8 last_channel; - __le32 tsf_low; - __le32 tsf_high; + __le64 start_tsf; struct iwl_scan_results_notif results[]; -} __packed; /* SCAN_ITER_COMPLETE_NTF_UMAC_API_S_VER_1 */ +} __packed; /* SCAN_ITER_COMPLETE_NTF_UMAC_API_S_VER_2 */ #endif diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index b7d80b5fc0bf..a5ede8c9bea2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -653,6 +653,16 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) IWL_UCODE_TLV_CAPA_WFA_TPC_REP_IE_SUPPORT)) hw->wiphy->features |= NL80211_FEATURE_WFA_TPC_IE_IN_PROBES; + if (fw_has_api(&mvm->fw->ucode_capa, + IWL_UCODE_TLV_API_SCAN_TSF_REPORT)) { + wiphy_ext_feature_set(hw->wiphy, + NL80211_EXT_FEATURE_SCAN_START_TIME); + wiphy_ext_feature_set(hw->wiphy, + NL80211_EXT_FEATURE_BSS_PARENT_TSF); + wiphy_ext_feature_set(hw->wiphy, + NL80211_EXT_FEATURE_SET_SCAN_DWELL); + } + mvm->rts_threshold = IEEE80211_MAX_RTS_THRESHOLD; #ifdef CONFIG_PM_SLEEP diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 83f3889e432b..e68a2bdc84ed 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -821,6 +821,12 @@ struct iwl_mvm { /* UMAC scan tracking */ u32 scan_uid_status[IWL_MVM_MAX_UMAC_SCANS]; + /* start time of last scan in TSF of the mac that requested the scan */ + u64 scan_start; + + /* the vif that requested the current scan */ + struct iwl_mvm_vif *scan_vif; + /* rx chain antennas set through debugfs for the scan command */ u8 scan_rx_ant; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c index dac120f8861b..00b03fc5807b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c @@ -141,6 +141,7 @@ struct iwl_mvm_scan_params { struct cfg80211_match_set *match_sets; int n_scan_plans; struct cfg80211_sched_scan_plan *scan_plans; + u32 measurement_dwell; }; static u8 iwl_mvm_scan_rx_ant(struct iwl_mvm *mvm) @@ -232,6 +233,27 @@ iwl_mvm_scan_type iwl_mvm_get_scan_type(struct iwl_mvm *mvm, bool p2p_device) return IWL_SCAN_TYPE_WILD; } +static int +iwl_mvm_get_measurement_dwell(struct iwl_mvm *mvm, + struct cfg80211_scan_request *req, + struct iwl_mvm_scan_params *params) +{ + if (!req->duration) + return 0; + + if (req->duration_mandatory && + req->duration > scan_timing[params->type].max_out_time) { + IWL_DEBUG_SCAN(mvm, + "Measurement scan - too long dwell %hu (max out time %u)\n", + req->duration, + scan_timing[params->type].max_out_time); + return -EOPNOTSUPP; + } + + return min_t(u32, (u32)req->duration, + scan_timing[params->type].max_out_time); +} + static inline bool iwl_mvm_rrm_scan_needed(struct iwl_mvm *mvm) { /* require rrm scan whenever the fw supports it */ @@ -1033,9 +1055,15 @@ static void iwl_mvm_scan_umac_dwell(struct iwl_mvm *mvm, struct iwl_scan_req_umac *cmd, struct iwl_mvm_scan_params *params) { - cmd->extended_dwell = scan_timing[params->type].dwell_extended; - cmd->active_dwell = scan_timing[params->type].dwell_active; - cmd->passive_dwell = scan_timing[params->type].dwell_passive; + if (params->measurement_dwell) { + cmd->active_dwell = params->measurement_dwell; + cmd->passive_dwell = params->measurement_dwell; + cmd->extended_dwell = params->measurement_dwell; + } else { + cmd->active_dwell = scan_timing[params->type].dwell_active; + cmd->passive_dwell = scan_timing[params->type].dwell_passive; + cmd->extended_dwell = scan_timing[params->type].dwell_extended; + } cmd->fragmented_dwell = scan_timing[params->type].dwell_fragmented; cmd->max_out_time = cpu_to_le32(scan_timing[params->type].max_out_time); cmd->suspend_time = cpu_to_le32(scan_timing[params->type].suspend_time); @@ -1067,11 +1095,11 @@ iwl_mvm_umac_scan_cfg_channels(struct iwl_mvm *mvm, } } -static u32 iwl_mvm_scan_umac_flags(struct iwl_mvm *mvm, +static u16 iwl_mvm_scan_umac_flags(struct iwl_mvm *mvm, struct iwl_mvm_scan_params *params, struct ieee80211_vif *vif) { - int flags = 0; + u16 flags = 0; if (params->n_ssids == 0) flags = IWL_UMAC_SCAN_GEN_FLAGS_PASSIVE; @@ -1093,6 +1121,9 @@ static u32 iwl_mvm_scan_umac_flags(struct iwl_mvm *mvm, if (!iwl_mvm_is_regular_scan(params)) flags |= IWL_UMAC_SCAN_GEN_FLAGS_PERIODIC; + if (params->measurement_dwell) + flags |= IWL_UMAC_SCAN_GEN_FLAGS_ITER_COMPLETE; + #ifdef CONFIG_IWLWIFI_DEBUGFS if (mvm->scan_iter_notif_enabled) flags |= IWL_UMAC_SCAN_GEN_FLAGS_ITER_COMPLETE; @@ -1119,6 +1150,7 @@ static int iwl_mvm_scan_umac(struct iwl_mvm *mvm, struct ieee80211_vif *vif, mvm->fw->ucode_capa.n_scan_channels; int uid, i; u32 ssid_bitmap = 0; + struct iwl_mvm_vif *scan_vif = iwl_mvm_vif_from_mac80211(vif); lockdep_assert_held(&mvm->mutex); @@ -1136,8 +1168,9 @@ static int iwl_mvm_scan_umac(struct iwl_mvm *mvm, struct ieee80211_vif *vif, mvm->scan_uid_status[uid] = type; cmd->uid = cpu_to_le32(uid); - cmd->general_flags = cpu_to_le32(iwl_mvm_scan_umac_flags(mvm, params, + cmd->general_flags = cpu_to_le16(iwl_mvm_scan_umac_flags(mvm, params, vif)); + cmd->scan_start_mac_id = scan_vif->id; if (type == IWL_MVM_SCAN_SCHED || type == IWL_MVM_SCAN_NETDETECT) cmd->flags = cpu_to_le32(IWL_UMAC_SCAN_FLAG_PREEMPTIVE); @@ -1289,6 +1322,12 @@ int iwl_mvm_reg_scan_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif, iwl_mvm_get_scan_type(mvm, vif->type == NL80211_IFTYPE_P2P_DEVICE); + ret = iwl_mvm_get_measurement_dwell(mvm, req, ¶ms); + if (ret < 0) + return ret; + + params.measurement_dwell = ret; + iwl_mvm_build_scan_probe(mvm, vif, ies, ¶ms); if (fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_UMAC_SCAN)) { @@ -1315,6 +1354,7 @@ int iwl_mvm_reg_scan_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif, IWL_DEBUG_SCAN(mvm, "Scan request was sent successfully\n"); mvm->scan_status |= IWL_MVM_SCAN_REGULAR; + mvm->scan_vif = iwl_mvm_vif_from_mac80211(vif); iwl_mvm_ref(mvm, IWL_MVM_REF_SCAN); queue_delayed_work(system_wq, &mvm->scan_timeout_dwork, @@ -1437,9 +1477,12 @@ void iwl_mvm_rx_umac_scan_complete_notif(struct iwl_mvm *mvm, if (mvm->scan_uid_status[uid] == IWL_MVM_SCAN_REGULAR) { struct cfg80211_scan_info info = { .aborted = aborted, + .scan_start_tsf = mvm->scan_start, }; + memcpy(info.tsf_bssid, mvm->scan_vif->bssid, ETH_ALEN); ieee80211_scan_completed(mvm->hw, &info); + mvm->scan_vif = NULL; iwl_mvm_unref(mvm, IWL_MVM_REF_SCAN); cancel_delayed_work(&mvm->scan_timeout_dwork); } else if (mvm->scan_uid_status[uid] == IWL_MVM_SCAN_SCHED) { @@ -1473,6 +1516,8 @@ void iwl_mvm_rx_umac_scan_iter_complete_notif(struct iwl_mvm *mvm, struct iwl_umac_scan_iter_complete_notif *notif = (void *)pkt->data; u8 buf[256]; + mvm->scan_start = le64_to_cpu(notif->start_tsf); + IWL_DEBUG_SCAN(mvm, "UMAC Scan iteration complete: status=0x%x scanned_channels=%d channels list: %s\n", notif->status, notif->scanned_channels, @@ -1485,6 +1530,10 @@ void iwl_mvm_rx_umac_scan_iter_complete_notif(struct iwl_mvm *mvm, ieee80211_sched_scan_results(mvm->hw); mvm->sched_scan_pass_all = SCHED_SCAN_PASS_ALL_ENABLED; } + + IWL_DEBUG_SCAN(mvm, + "UMAC Scan iteration complete: scan started at %llu (TSF)\n", + mvm->scan_start); } static int iwl_mvm_umac_scan_abort(struct iwl_mvm *mvm, int type) From 4857d6cbf7ca4e040ac2b24687464c76e0be96ff Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Tue, 9 Aug 2016 20:03:52 +0300 Subject: [PATCH 0363/1050] iwlwifi: mvm: support packet injection For automatic testing packet injection can be useful. Support injection through debugfs. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/mvm/debugfs.c | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c index b34489817c70..540b7c9deaef 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c @@ -917,6 +917,59 @@ static ssize_t iwl_dbgfs_indirection_tbl_write(struct iwl_mvm *mvm, return ret ?: count; } +static ssize_t iwl_dbgfs_inject_packet_write(struct iwl_mvm *mvm, + char *buf, size_t count, + loff_t *ppos) +{ + struct iwl_rx_cmd_buffer rxb = { + ._rx_page_order = 0, + .truesize = 0, /* not used */ + ._offset = 0, + }; + struct iwl_rx_packet *pkt; + struct iwl_rx_mpdu_desc *desc; + int bin_len = count / 2; + int ret = -EINVAL; + + /* supporting only 9000 descriptor */ + if (!mvm->trans->cfg->mq_rx_supported) + return -ENOTSUPP; + + rxb._page = alloc_pages(GFP_ATOMIC, 0); + if (!rxb._page) + return -ENOMEM; + pkt = rxb_addr(&rxb); + + ret = hex2bin(page_address(rxb._page), buf, bin_len); + if (ret) + goto out; + + /* avoid invalid memory access */ + if (bin_len < sizeof(*pkt) + sizeof(*desc)) + goto out; + + /* check this is RX packet */ + if (WIDE_ID(pkt->hdr.group_id, pkt->hdr.cmd) != + WIDE_ID(LEGACY_GROUP, REPLY_RX_MPDU_CMD)) + goto out; + + /* check the length in metadata matches actual received length */ + desc = (void *)pkt->data; + if (le16_to_cpu(desc->mpdu_len) != + (bin_len - sizeof(*desc) - sizeof(*pkt))) + goto out; + + local_bh_disable(); + iwl_mvm_rx_mpdu_mq(mvm, NULL, &rxb, 0); + local_bh_enable(); + ret = 0; + +out: + iwl_free_rxb(&rxb); + + return ret ?: count; +} + static ssize_t iwl_dbgfs_fw_dbg_conf_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) @@ -1454,6 +1507,7 @@ MVM_DEBUGFS_WRITE_FILE_OPS(cont_recording, 8); MVM_DEBUGFS_WRITE_FILE_OPS(max_amsdu_len, 8); MVM_DEBUGFS_WRITE_FILE_OPS(indirection_tbl, (IWL_RSS_INDIRECTION_TABLE_SIZE * 2)); +MVM_DEBUGFS_WRITE_FILE_OPS(inject_packet, 512); #ifdef CONFIG_IWLWIFI_BCAST_FILTERING MVM_DEBUGFS_READ_WRITE_FILE_OPS(bcast_filters, 256); @@ -1502,6 +1556,7 @@ int iwl_mvm_dbgfs_register(struct iwl_mvm *mvm, struct dentry *dbgfs_dir) MVM_DEBUGFS_ADD_FILE(send_echo_cmd, mvm->debugfs_dir, S_IWUSR); MVM_DEBUGFS_ADD_FILE(cont_recording, mvm->debugfs_dir, S_IWUSR); MVM_DEBUGFS_ADD_FILE(indirection_tbl, mvm->debugfs_dir, S_IWUSR); + MVM_DEBUGFS_ADD_FILE(inject_packet, mvm->debugfs_dir, S_IWUSR); if (!debugfs_create_bool("enable_scan_iteration_notif", S_IRUSR | S_IWUSR, mvm->debugfs_dir, From 671bed3fbeecb5e8392ed054d2cab78ed66290ff Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Sun, 7 Aug 2016 18:58:29 +0300 Subject: [PATCH 0364/1050] iwlwifi: move BIOS MCC retrieval to common code This will be used by more than MVM, so move it to iwlwifi While at it, rename WRD_METHOD to the more appropriate WRDD_METHOD and add some documentation. Signed-off-by: Arik Nemtsov Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- .../wireless/intel/iwlwifi/iwl-nvm-parse.c | 89 ++++++++++++++++++ .../wireless/intel/iwlwifi/iwl-nvm-parse.h | 20 +++- drivers/net/wireless/intel/iwlwifi/mvm/nvm.c | 93 +------------------ 3 files changed, 109 insertions(+), 93 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index adba3b003f55..3bd6fc1b76d4 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -67,6 +67,7 @@ #include #include #include +#include #include "iwl-drv.h" #include "iwl-modparams.h" #include "iwl-nvm-parse.h" @@ -904,3 +905,91 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, return regd; } IWL_EXPORT_SYMBOL(iwl_parse_nvm_mcc_info); + +#ifdef CONFIG_ACPI +#define WRDD_METHOD "WRDD" +#define WRDD_WIFI (0x07) +#define WRDD_WIGIG (0x10) + +static u32 iwl_wrdd_get_mcc(struct device *dev, union acpi_object *wrdd) +{ + union acpi_object *mcc_pkg, *domain_type, *mcc_value; + u32 i; + + if (wrdd->type != ACPI_TYPE_PACKAGE || + wrdd->package.count < 2 || + wrdd->package.elements[0].type != ACPI_TYPE_INTEGER || + wrdd->package.elements[0].integer.value != 0) { + IWL_DEBUG_EEPROM(dev, "Unsupported wrdd structure\n"); + return 0; + } + + for (i = 1 ; i < wrdd->package.count ; ++i) { + mcc_pkg = &wrdd->package.elements[i]; + + if (mcc_pkg->type != ACPI_TYPE_PACKAGE || + mcc_pkg->package.count < 2 || + mcc_pkg->package.elements[0].type != ACPI_TYPE_INTEGER || + mcc_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) { + mcc_pkg = NULL; + continue; + } + + domain_type = &mcc_pkg->package.elements[0]; + if (domain_type->integer.value == WRDD_WIFI) + break; + + mcc_pkg = NULL; + } + + if (mcc_pkg) { + mcc_value = &mcc_pkg->package.elements[1]; + return mcc_value->integer.value; + } + + return 0; +} + +int iwl_get_bios_mcc(struct device *dev, char *mcc) +{ + acpi_handle root_handle; + acpi_handle handle; + struct acpi_buffer wrdd = {ACPI_ALLOCATE_BUFFER, NULL}; + acpi_status status; + u32 mcc_val; + + root_handle = ACPI_HANDLE(dev); + if (!root_handle) { + IWL_DEBUG_EEPROM(dev, + "Could not retrieve root port ACPI handle\n"); + return -ENOENT; + } + + /* Get the method's handle */ + status = acpi_get_handle(root_handle, (acpi_string)WRDD_METHOD, + &handle); + if (ACPI_FAILURE(status)) { + IWL_DEBUG_EEPROM(dev, "WRD method not found\n"); + return -ENOENT; + } + + /* Call WRDD with no arguments */ + status = acpi_evaluate_object(handle, NULL, NULL, &wrdd); + if (ACPI_FAILURE(status)) { + IWL_DEBUG_EEPROM(dev, "WRDC invocation failed (0x%x)\n", + status); + return -ENOENT; + } + + mcc_val = iwl_wrdd_get_mcc(dev, wrdd.pointer); + kfree(wrdd.pointer); + if (!mcc_val) + return -ENOENT; + + mcc[0] = (mcc_val >> 8) & 0xff; + mcc[1] = mcc_val & 0xff; + mcc[2] = '\0'; + return 0; +} +IWL_EXPORT_SYMBOL(iwl_get_bios_mcc); +#endif diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h index d704d52aa7ec..7249e5b403f4 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h @@ -5,7 +5,8 @@ * * GPL LICENSE SUMMARY * - * Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved. + * Copyright(c) 2008 - 2015 Intel Corporation. All rights reserved. + * Copyright(c) 2016 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -93,4 +94,21 @@ struct ieee80211_regdomain * iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, int num_of_ch, __le32 *channels, u16 fw_mcc); +#ifdef CONFIG_ACPI +/** + * iwl_get_bios_mcc - read MCC from BIOS, if available + * + * @dev: the struct device + * @mcc: output buffer (3 bytes) that will get the MCC + * + * This function tries to read the current MCC from ACPI if available. + */ +int iwl_get_bios_mcc(struct device *dev, char *mcc); +#else +static inline int iwl_get_bios_mcc(struct device *dev, char *mcc) +{ + return -ENOENT; +} +#endif + #endif /* __iwl_nvm_parse_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c index 7a686f67f007..eade099b6dbf 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c @@ -66,7 +66,6 @@ *****************************************************************************/ #include #include -#include #include "iwl-trans.h" #include "iwl-csr.h" #include "mvm.h" @@ -751,96 +750,6 @@ exit: return resp_cp; } -#ifdef CONFIG_ACPI -#define WRD_METHOD "WRDD" -#define WRDD_WIFI (0x07) -#define WRDD_WIGIG (0x10) - -static u32 iwl_mvm_wrdd_get_mcc(struct iwl_mvm *mvm, union acpi_object *wrdd) -{ - union acpi_object *mcc_pkg, *domain_type, *mcc_value; - u32 i; - - if (wrdd->type != ACPI_TYPE_PACKAGE || - wrdd->package.count < 2 || - wrdd->package.elements[0].type != ACPI_TYPE_INTEGER || - wrdd->package.elements[0].integer.value != 0) { - IWL_DEBUG_LAR(mvm, "Unsupported wrdd structure\n"); - return 0; - } - - for (i = 1 ; i < wrdd->package.count ; ++i) { - mcc_pkg = &wrdd->package.elements[i]; - - if (mcc_pkg->type != ACPI_TYPE_PACKAGE || - mcc_pkg->package.count < 2 || - mcc_pkg->package.elements[0].type != ACPI_TYPE_INTEGER || - mcc_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) { - mcc_pkg = NULL; - continue; - } - - domain_type = &mcc_pkg->package.elements[0]; - if (domain_type->integer.value == WRDD_WIFI) - break; - - mcc_pkg = NULL; - } - - if (mcc_pkg) { - mcc_value = &mcc_pkg->package.elements[1]; - return mcc_value->integer.value; - } - - return 0; -} - -static int iwl_mvm_get_bios_mcc(struct iwl_mvm *mvm, char *mcc) -{ - acpi_handle root_handle; - acpi_handle handle; - struct acpi_buffer wrdd = {ACPI_ALLOCATE_BUFFER, NULL}; - acpi_status status; - u32 mcc_val; - - root_handle = ACPI_HANDLE(mvm->dev); - if (!root_handle) { - IWL_DEBUG_LAR(mvm, - "Could not retrieve root port ACPI handle\n"); - return -ENOENT; - } - - /* Get the method's handle */ - status = acpi_get_handle(root_handle, (acpi_string)WRD_METHOD, &handle); - if (ACPI_FAILURE(status)) { - IWL_DEBUG_LAR(mvm, "WRD method not found\n"); - return -ENOENT; - } - - /* Call WRDD with no arguments */ - status = acpi_evaluate_object(handle, NULL, NULL, &wrdd); - if (ACPI_FAILURE(status)) { - IWL_DEBUG_LAR(mvm, "WRDC invocation failed (0x%x)\n", status); - return -ENOENT; - } - - mcc_val = iwl_mvm_wrdd_get_mcc(mvm, wrdd.pointer); - kfree(wrdd.pointer); - if (!mcc_val) - return -ENOENT; - - mcc[0] = (mcc_val >> 8) & 0xff; - mcc[1] = mcc_val & 0xff; - mcc[2] = '\0'; - return 0; -} -#else /* CONFIG_ACPI */ -static int iwl_mvm_get_bios_mcc(struct iwl_mvm *mvm, char *mcc) -{ - return -ENOENT; -} -#endif - int iwl_mvm_init_mcc(struct iwl_mvm *mvm) { bool tlv_lar; @@ -884,7 +793,7 @@ int iwl_mvm_init_mcc(struct iwl_mvm *mvm) return -EIO; if (iwl_mvm_is_wifi_mcc_supported(mvm) && - !iwl_mvm_get_bios_mcc(mvm, mcc)) { + !iwl_get_bios_mcc(mvm->dev, mcc)) { kfree(regd); regd = iwl_mvm_get_regdomain(mvm->hw->wiphy, mcc, MCC_SOURCE_BIOS, NULL); From 35177c9931465e06379729d1ef2a22628ac9cbdf Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Mon, 15 Aug 2016 17:13:27 +0300 Subject: [PATCH 0365/1050] iwlwifi: pcie: log full command sequence Log group as well. Remove 0x prefix to match TX logging. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/rx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c index 0b5b331010fb..78cb6d2314d9 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c @@ -1108,13 +1108,14 @@ static void iwl_pcie_rx_handle_rb(struct iwl_trans *trans, FH_RSCSR_RXQ_POS != rxq->id); IWL_DEBUG_RX(trans, - "cmd at offset %d: %s (0x%.2x, seq 0x%x)\n", + "cmd at offset %d: %s (%.2x.%2x, seq 0x%x)\n", rxcb._offset, iwl_get_cmd_string(trans, iwl_cmd_id(pkt->hdr.cmd, pkt->hdr.group_id, 0)), - pkt->hdr.cmd, le16_to_cpu(pkt->hdr.sequence)); + pkt->hdr.group_id, pkt->hdr.cmd, + le16_to_cpu(pkt->hdr.sequence)); len = iwl_rx_packet_len(pkt); len += sizeof(u32); /* account for status word */ From 607876f1dac6c644d83964f3f4691e79737b5bb2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 18 Aug 2016 10:32:58 +0200 Subject: [PATCH 0366/1050] iwlwifi: mvm: make iwl_mvm_update_sta() an inline There's no point in making this an out-of-line function since it just calls a single other function with a few changed parameters. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 7 ------- drivers/net/wireless/intel/iwlwifi/mvm/sta.h | 11 ++++++++--- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index d3a0378b547f..258a234feb71 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -1297,13 +1297,6 @@ err: return ret; } -int iwl_mvm_update_sta(struct iwl_mvm *mvm, - struct ieee80211_vif *vif, - struct ieee80211_sta *sta) -{ - return iwl_mvm_sta_send_to_fw(mvm, sta, true, 0); -} - int iwl_mvm_drain_sta(struct iwl_mvm *mvm, struct iwl_mvm_sta *mvmsta, bool drain) { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h index 709542bbfce5..e068d5355865 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h @@ -473,9 +473,14 @@ int iwl_mvm_sta_send_to_fw(struct iwl_mvm *mvm, struct ieee80211_sta *sta, int iwl_mvm_add_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct ieee80211_sta *sta); -int iwl_mvm_update_sta(struct iwl_mvm *mvm, - struct ieee80211_vif *vif, - struct ieee80211_sta *sta); + +static inline int iwl_mvm_update_sta(struct iwl_mvm *mvm, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta) +{ + return iwl_mvm_sta_send_to_fw(mvm, sta, true, 0); +} + int iwl_mvm_rm_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct ieee80211_sta *sta); From 5b88792cd8505d3804be199c10b3c4159fecb258 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Mon, 15 Aug 2016 17:36:47 +0300 Subject: [PATCH 0367/1050] iwlwifi: move to wide ID for all commands Due to firmware design considerations, move to wide ID for all commands. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-notif-wait.c | 8 ++++++-- drivers/net/wireless/intel/iwlwifi/iwl-trans.c | 3 +++ drivers/net/wireless/intel/iwlwifi/iwl-trans.h | 5 +++-- drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 4 ++-- drivers/net/wireless/intel/iwlwifi/pcie/internal.h | 2 -- drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 1 - drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 2 +- 7 files changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-notif-wait.c b/drivers/net/wireless/intel/iwlwifi/iwl-notif-wait.c index 8aa1f2b7fdfc..88f260db3744 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-notif-wait.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-notif-wait.c @@ -99,8 +99,12 @@ void iwl_notification_wait_notify(struct iwl_notif_wait_data *notif_wait, continue; for (i = 0; i < w->n_cmds; i++) { - if (w->cmds[i] == - WIDE_ID(pkt->hdr.group_id, pkt->hdr.cmd)) { + u16 rec_id = WIDE_ID(pkt->hdr.group_id, + pkt->hdr.cmd); + + if (w->cmds[i] == rec_id || + (!iwl_cmd_groupid(w->cmds[i]) && + DEF_ID(w->cmds[i]) == rec_id)) { found = true; break; } diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c index b0bd67c64b5c..b42152c697be 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c @@ -140,6 +140,9 @@ int iwl_trans_send_cmd(struct iwl_trans *trans, struct iwl_host_cmd *cmd) if (!(cmd->flags & CMD_ASYNC)) lock_map_acquire_read(&trans->sync_cmd_lockdep_map); + if (trans->wide_cmd_header && !iwl_cmd_groupid(cmd->id)) + cmd->id = DEF_ID(cmd->id); + ret = trans->ops->send_cmd(trans, cmd); if (!(cmd->flags & CMD_ASYNC)) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h index 04e998d9d5fe..0296124a7f9c 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h @@ -153,6 +153,7 @@ static inline u32 iwl_cmd_id(u8 opcode, u8 groupid, u8 version) /* make u16 wide id out of u8 group and opcode */ #define WIDE_ID(grp, opcode) ((grp << 8) | opcode) +#define DEF_ID(opcode) ((1 << 8) | (opcode)) /* due to the conversion, this group is special; new groups * should be defined in the appropriate fw-api header files @@ -484,7 +485,6 @@ struct iwl_hcmd_arr { * @bc_table_dword: set to true if the BC table expects the byte count to be * in DWORD (as opposed to bytes) * @scd_set_active: should the transport configure the SCD for HCMD queue - * @wide_cmd_header: firmware supports wide host command header * @sw_csum_tx: transport should compute the TCP checksum * @command_groups: array of command groups, each member is an array of the * commands in the group; for debugging only @@ -506,7 +506,6 @@ struct iwl_trans_config { enum iwl_amsdu_size rx_buf_size; bool bc_table_dword; bool scd_set_active; - bool wide_cmd_header; bool sw_csum_tx; const struct iwl_hcmd_arr *command_groups; int command_groups_size; @@ -770,6 +769,7 @@ enum iwl_plat_pm_mode { * @hw_id_str: a string with info about HW ID. Set during transport allocation. * @pm_support: set to true in start_hw if link pm is supported * @ltr_enabled: set to true if the LTR is enabled + * @wide_cmd_header: true when ucode supports wide command header format * @num_rx_queues: number of RX queues allocated by the transport; * the transport must set this before calling iwl_drv_start() * @dev_cmd_pool: pool for Tx cmd allocation - for internal use only. @@ -821,6 +821,7 @@ struct iwl_trans { const struct iwl_hcmd_arr *command_groups; int command_groups_size; + bool wide_cmd_header; u8 num_rx_queues; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index de34c9f4f0f4..b2d8722cbb11 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -652,8 +652,8 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, /* the hardware splits the A-MSDU */ if (mvm->cfg->mq_rx_supported) trans_cfg.rx_buf_size = IWL_AMSDU_4K; - trans_cfg.wide_cmd_header = fw_has_api(&mvm->fw->ucode_capa, - IWL_UCODE_TLV_API_WIDE_CMD_HDR); + trans->wide_cmd_header = fw_has_api(&mvm->fw->ucode_capa, + IWL_UCODE_TLV_API_WIDE_CMD_HDR); if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_DW_BC_TABLE) trans_cfg.bc_table_dword = true; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index 987a0770fb5b..2d81630fba0f 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -332,7 +332,6 @@ enum iwl_shared_irq_flags { * @rx_buf_size: Rx buffer size * @bc_table_dword: true if the BC table expects DWORD (as opposed to bytes) * @scd_set_active: should the transport configure the SCD for HCMD queue - * @wide_cmd_header: true when ucode supports wide command header format * @sw_csum_tx: if true, then the transport will compute the csum of the TXed * frame. * @rx_page_order: page order for receive buffer size @@ -405,7 +404,6 @@ struct iwl_trans_pcie { enum iwl_amsdu_size rx_buf_size; bool bc_table_dword; bool scd_set_active; - bool wide_cmd_header; bool sw_csum_tx; u32 rx_page_order; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 74199b174948..68fa84381343 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -1755,7 +1755,6 @@ static void iwl_trans_pcie_configure(struct iwl_trans *trans, trans_pcie->rx_page_order = iwl_trans_get_rb_size_order(trans_pcie->rx_buf_size); - trans_pcie->wide_cmd_header = trans_cfg->wide_cmd_header; trans_pcie->bc_table_dword = trans_cfg->bc_table_dword; trans_pcie->scd_set_active = trans_cfg->scd_set_active; trans_pcie->sw_csum_tx = trans_cfg->sw_csum_tx; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index 0d156fddbf3d..e00e7d8f197a 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -1493,7 +1493,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, const u8 *cmddata[IWL_MAX_CMD_TBS_PER_TFD]; u16 cmdlen[IWL_MAX_CMD_TBS_PER_TFD]; - if (WARN(!trans_pcie->wide_cmd_header && + if (WARN(!trans->wide_cmd_header && group_id > IWL_ALWAYS_LONG_GROUP, "unsupported wide command %#x\n", cmd->id)) return -EINVAL; From 8ec8ed437936ea40427ac2777dd4b65c87009d0a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 18 Aug 2016 14:18:40 +0200 Subject: [PATCH 0368/1050] iwlwifi: mvm: document passing unexpected Block Ack Request frames When we get an unexpected Block Ack Request frame, the BAID from the hardware will be invalid, and we'll pass it to mac80211 for further handling (sending a delBA action frame.) Add a comment explaining that, in case anyone looks in the future. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 0274f14e88e3..a57c6ef5bc14 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -591,6 +591,11 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm, baid = (reorder & IWL_RX_MPDU_REORDER_BAID_MASK) >> IWL_RX_MPDU_REORDER_BAID_SHIFT; + /* + * This also covers the case of receiving a Block Ack Request + * outside a BA session; we'll pass it to mac80211 and that + * then sends a delBA action frame. + */ if (baid == IWL_RX_REORDER_DATA_INVALID_BAID) return false; From de9466499b1ce705808bbc7876a9ee929b13d5bc Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 22 Aug 2016 23:24:40 +0300 Subject: [PATCH 0369/1050] iwlwifi: don't export trace points that are used in iwlwifi only This can save a few bytes Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c b/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c index 1d9dd153ef1c..50510fb6ab8c 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c @@ -33,9 +33,6 @@ #define CREATE_TRACE_POINTS #include "iwl-devtrace.h" -EXPORT_TRACEPOINT_SYMBOL(iwlwifi_dev_iowrite8); -EXPORT_TRACEPOINT_SYMBOL(iwlwifi_dev_ioread32); -EXPORT_TRACEPOINT_SYMBOL(iwlwifi_dev_iowrite32); EXPORT_TRACEPOINT_SYMBOL(iwlwifi_dev_ucode_event); EXPORT_TRACEPOINT_SYMBOL(iwlwifi_dev_ucode_error); EXPORT_TRACEPOINT_SYMBOL(iwlwifi_dev_ucode_cont_event); From 341d7eb8223bdd48bdf75729487a2de5e01623b3 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Thu, 18 Aug 2016 20:14:38 +0300 Subject: [PATCH 0370/1050] iwlwifi: mvm: disable P2P queue on mac context release AP queue is properly released, but P2P queue isn't. Fixes: commit 4c965139a3cd ("iwlwifi: mvm: support p2p device frames tx on dqa queue #2") Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index 69c42ce45b8a..d742d27d8de0 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -539,6 +539,11 @@ void iwl_mvm_mac_ctxt_release(struct iwl_mvm *mvm, struct ieee80211_vif *vif) iwl_mvm_disable_txq(mvm, IWL_MVM_OFFCHANNEL_QUEUE, IWL_MVM_OFFCHANNEL_QUEUE, IWL_MAX_TID_COUNT, 0); + else + iwl_mvm_disable_txq(mvm, + IWL_MVM_DQA_P2P_DEVICE_QUEUE, + vif->hw_queue[0], IWL_MAX_TID_COUNT, + 0); break; case NL80211_IFTYPE_AP: From 5a41a86c525a53d31a229dd358f70d6b4b5a7bbc Mon Sep 17 00:00:00 2001 From: Sharon Dvir Date: Wed, 10 Aug 2016 09:05:48 +0300 Subject: [PATCH 0371/1050] iwlwifi: migrate to devm_* API Change PCIE and trans resource allocations to managed resources. Signed-off-by: Sharon Dvir Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/iwl-trans.c | 8 +- .../net/wireless/intel/iwlwifi/pcie/trans.c | 79 ++++++++----------- 2 files changed, 33 insertions(+), 54 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c index b42152c697be..d42cab291025 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c @@ -78,7 +78,7 @@ struct iwl_trans *iwl_trans_alloc(unsigned int priv_size, static struct lock_class_key __key; #endif - trans = kzalloc(sizeof(*trans) + priv_size, GFP_KERNEL); + trans = devm_kzalloc(dev, sizeof(*trans) + priv_size, GFP_KERNEL); if (!trans) return NULL; @@ -103,18 +103,14 @@ struct iwl_trans *iwl_trans_alloc(unsigned int priv_size, SLAB_HWCACHE_ALIGN, NULL); if (!trans->dev_cmd_pool) - goto free; + return NULL; return trans; - free: - kfree(trans); - return NULL; } void iwl_trans_free(struct iwl_trans *trans) { kmem_cache_destroy(trans->dev_cmd_pool); - kfree(trans); } int iwl_trans_send_cmd(struct iwl_trans *trans, struct iwl_host_cmd *cmd) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 68fa84381343..ae95533e587d 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -1605,24 +1605,22 @@ static int iwl_pcie_init_msix_handler(struct pci_dev *pdev, for (i = 0; i < trans_pcie->alloc_vecs; i++) { int ret; + struct msix_entry *msix_entry; - ret = request_threaded_irq(trans_pcie->msix_entries[i].vector, - iwl_pcie_msix_isr, - (i == trans_pcie->def_irq) ? - iwl_pcie_irq_msix_handler : - iwl_pcie_irq_rx_msix_handler, - IRQF_SHARED, - DRV_NAME, - &trans_pcie->msix_entries[i]); + msix_entry = &trans_pcie->msix_entries[i]; + ret = devm_request_threaded_irq(&pdev->dev, + msix_entry->vector, + iwl_pcie_msix_isr, + (i == trans_pcie->def_irq) ? + iwl_pcie_irq_msix_handler : + iwl_pcie_irq_rx_msix_handler, + IRQF_SHARED, + DRV_NAME, + msix_entry); if (ret) { - int j; - IWL_ERR(trans_pcie->trans, "Error allocating IRQ %d\n", i); - for (j = 0; j < i; j++) - free_irq(trans_pcie->msix_entries[j].vector, - &trans_pcie->msix_entries[j]); - pci_disable_msix(pdev); + return ret; } } @@ -1789,23 +1787,12 @@ void iwl_trans_pcie_free(struct iwl_trans *trans) irq_set_affinity_hint( trans_pcie->msix_entries[i].vector, NULL); - - free_irq(trans_pcie->msix_entries[i].vector, - &trans_pcie->msix_entries[i]); } - pci_disable_msix(trans_pcie->pci_dev); trans_pcie->msix_enabled = false; } else { - free_irq(trans_pcie->pci_dev->irq, trans); - iwl_pcie_free_ict(trans); - - pci_disable_msi(trans_pcie->pci_dev); } - iounmap(trans_pcie->hw_base); - pci_release_regions(trans_pcie->pci_dev); - pci_disable_device(trans_pcie->pci_dev); iwl_pcie_free_fw_monitor(trans); @@ -2912,6 +2899,10 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, struct iwl_trans *trans; int ret, addr_size; + ret = pcim_enable_device(pdev); + if (ret) + return ERR_PTR(ret); + trans = iwl_trans_alloc(sizeof(struct iwl_trans_pcie), &pdev->dev, cfg, &trans_ops_pcie, 0); if (!trans) @@ -2930,9 +2921,6 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, goto out_no_pci; } - ret = pci_enable_device(pdev); - if (ret) - goto out_no_pci; if (!cfg->base_params->pcie_l1_allowed) { /* @@ -2974,21 +2962,21 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, /* both attempts failed: */ if (ret) { dev_err(&pdev->dev, "No suitable DMA available\n"); - goto out_pci_disable_device; + goto out_no_pci; } } - ret = pci_request_regions(pdev, DRV_NAME); + ret = pcim_iomap_regions_request_all(pdev, BIT(0), DRV_NAME); if (ret) { - dev_err(&pdev->dev, "pci_request_regions failed\n"); - goto out_pci_disable_device; + dev_err(&pdev->dev, "pcim_iomap_regions_request_all failed\n"); + goto out_no_pci; } - trans_pcie->hw_base = pci_ioremap_bar(pdev, 0); + trans_pcie->hw_base = pcim_iomap_table(pdev)[0]; if (!trans_pcie->hw_base) { - dev_err(&pdev->dev, "pci_ioremap_bar failed\n"); + dev_err(&pdev->dev, "pcim_iomap_table failed\n"); ret = -ENODEV; - goto out_pci_release_regions; + goto out_no_pci; } /* We disable the RETRY_TIMEOUT register (0x41) to keep @@ -3015,7 +3003,7 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, ret = iwl_pcie_prepare_card_hw(trans); if (ret) { IWL_WARN(trans, "Exit HW not ready\n"); - goto out_pci_disable_msi; + goto out_no_pci; } /* @@ -3032,7 +3020,7 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, 25000); if (ret < 0) { IWL_DEBUG_INFO(trans, "Failed to wake up the nic\n"); - goto out_pci_disable_msi; + goto out_no_pci; } if (iwl_trans_grab_nic_access(trans, &flags)) { @@ -3064,15 +3052,16 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, if (trans_pcie->msix_enabled) { if (iwl_pcie_init_msix_handler(pdev, trans_pcie)) - goto out_pci_release_regions; + goto out_no_pci; } else { ret = iwl_pcie_alloc_ict(trans); if (ret) - goto out_pci_disable_msi; + goto out_no_pci; - ret = request_threaded_irq(pdev->irq, iwl_pcie_isr, - iwl_pcie_irq_handler, - IRQF_SHARED, DRV_NAME, trans); + ret = devm_request_threaded_irq(&pdev->dev, pdev->irq, + iwl_pcie_isr, + iwl_pcie_irq_handler, + IRQF_SHARED, DRV_NAME, trans); if (ret) { IWL_ERR(trans, "Error allocating IRQ %d\n", pdev->irq); goto out_free_ict; @@ -3090,12 +3079,6 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, out_free_ict: iwl_pcie_free_ict(trans); -out_pci_disable_msi: - pci_disable_msi(pdev); -out_pci_release_regions: - pci_release_regions(pdev); -out_pci_disable_device: - pci_disable_device(pdev); out_no_pci: free_percpu(trans_pcie->tso_hdr_page); iwl_trans_free(trans); From 47a66e45d7a7613322549c2475ea9d809baaf514 Mon Sep 17 00:00:00 2001 From: "Kristian H. Kristensen" Date: Tue, 13 Sep 2016 14:20:45 -0700 Subject: [PATCH 0372/1050] drm: Only use compat ioctl for addfb2 on X86/IA64 Similar to struct drm_update_draw, struct drm_mode_fb_cmd2 has an unaligned 64 bit field (modifier). This get packed differently between 32 bit and 64 bit modes on architectures that can handle unaligned 64 bit access (X86 and IA64). Other architectures pack the structs the same and don't need the compat wrapper. Use the same condition for drm_mode_fb_cmd2 as we use for drm_update_draw. Note that only the modifier will be packed differently between compat and non-compat versions. Reviewed-by: Rob Clark Signed-off-by: Kristian H. Kristensen [seanpaul added not at bottom of commit msg re: modifier] Signed-off-by: Sean Paul Link: http://patchwork.freedesktop.org/patch/msgid/1473801645-116011-1-git-send-email-hoegsberg@chromium.org Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_ioc32.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c index 57676f8d7ecf..a6289752be16 100644 --- a/drivers/gpu/drm/drm_ioc32.c +++ b/drivers/gpu/drm/drm_ioc32.c @@ -1015,6 +1015,7 @@ static int compat_drm_wait_vblank(struct file *file, unsigned int cmd, return 0; } +#if defined(CONFIG_X86) || defined(CONFIG_IA64) typedef struct drm_mode_fb_cmd232 { u32 fb_id; u32 width; @@ -1071,6 +1072,7 @@ static int compat_drm_mode_addfb2(struct file *file, unsigned int cmd, return 0; } +#endif static drm_ioctl_compat_t *drm_compat_ioctls[] = { [DRM_IOCTL_NR(DRM_IOCTL_VERSION32)] = compat_drm_version, @@ -1104,7 +1106,9 @@ static drm_ioctl_compat_t *drm_compat_ioctls[] = { [DRM_IOCTL_NR(DRM_IOCTL_UPDATE_DRAW32)] = compat_drm_update_draw, #endif [DRM_IOCTL_NR(DRM_IOCTL_WAIT_VBLANK32)] = compat_drm_wait_vblank, +#if defined(CONFIG_X86) || defined(CONFIG_IA64) [DRM_IOCTL_NR(DRM_IOCTL_MODE_ADDFB232)] = compat_drm_mode_addfb2, +#endif }; /** From b3bee580b1e200e4fc14091e3e118ae8280dc06c Mon Sep 17 00:00:00 2001 From: Roee Zamir Date: Tue, 2 Aug 2016 13:55:13 +0300 Subject: [PATCH 0373/1050] iwlwifi: mvm: Add debugfs function for clocks diff New function, reveals the diff between gp2 and host time. Signed-off-by: Roee Zamir Signed-off-by: Luca Coelho --- .../wireless/intel/iwlwifi/mvm/debugfs-vif.c | 26 +++++++++++++++++++ drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 1 + .../net/wireless/intel/iwlwifi/mvm/utils.c | 22 ++++++++++++++++ 3 files changed, 49 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c index b23271755daf..8ff19210ef1e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c @@ -504,6 +504,28 @@ static inline char *iwl_dbgfs_is_match(char *name, char *buf) return !strncmp(name, buf, len) ? buf + len : NULL; } +static ssize_t iwl_dbgfs_os_device_timediff_read(struct file *file, + char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct ieee80211_vif *vif = file->private_data; + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + struct iwl_mvm *mvm = mvmvif->mvm; + u32 curr_gp2; + u64 curr_os; + s64 diff; + char buf[64]; + const size_t bufsz = sizeof(buf); + int pos = 0; + + iwl_mvm_get_sync_time(mvm, &curr_gp2, &curr_os); + do_div(curr_os, NSEC_PER_USEC); + diff = curr_os - curr_gp2; + pos += scnprintf(buf + pos, bufsz - pos, "diff=%lld\n", diff); + + return simple_read_from_buffer(user_buf, count, ppos, buf, pos); +} + static ssize_t iwl_dbgfs_tof_enable_write(struct ieee80211_vif *vif, char *buf, size_t count, loff_t *ppos) @@ -1530,6 +1552,8 @@ MVM_DEBUGFS_READ_FILE_OPS(tof_range_response); MVM_DEBUGFS_READ_WRITE_FILE_OPS(tof_responder_params, 32); MVM_DEBUGFS_READ_WRITE_FILE_OPS(quota_min, 32); MVM_DEBUGFS_WRITE_FILE_OPS(lqm_send_cmd, 64); +MVM_DEBUGFS_READ_FILE_OPS(os_device_timediff); + void iwl_mvm_vif_dbgfs_register(struct iwl_mvm *mvm, struct ieee80211_vif *vif) { @@ -1570,6 +1594,8 @@ void iwl_mvm_vif_dbgfs_register(struct iwl_mvm *mvm, struct ieee80211_vif *vif) MVM_DEBUGFS_ADD_FILE_VIF(quota_min, mvmvif->dbgfs_dir, S_IRUSR | S_IWUSR); MVM_DEBUGFS_ADD_FILE_VIF(lqm_send_cmd, mvmvif->dbgfs_dir, S_IWUSR); + MVM_DEBUGFS_ADD_FILE_VIF(os_device_timediff, + mvmvif->dbgfs_dir, S_IRUSR); if (vif->type == NL80211_IFTYPE_STATION && !vif->p2p && mvmvif == mvm->bf_allowed_vif) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index e68a2bdc84ed..b7cfdcbcf95b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -1268,6 +1268,7 @@ u8 iwl_mvm_mac80211_idx_to_hwrate(int rate_idx); void iwl_mvm_dump_nic_error_log(struct iwl_mvm *mvm); u8 first_antenna(u8 mask); u8 iwl_mvm_next_antenna(struct iwl_mvm *mvm, u8 valid, u8 last_idx); +void iwl_mvm_get_sync_time(struct iwl_mvm *mvm, u32 *gp2, u64 *boottime); /* Tx / Host Commands */ int __must_check iwl_mvm_send_cmd(struct iwl_mvm *mvm, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c index 7c138fedcb19..9e366e28c983 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c @@ -1225,6 +1225,28 @@ void iwl_mvm_inactivity_check(struct iwl_mvm *mvm) rcu_read_unlock(); } +void iwl_mvm_get_sync_time(struct iwl_mvm *mvm, u32 *gp2, u64 *boottime) +{ + bool ps_disabled; + + lockdep_assert_held(&mvm->mutex); + + /* Disable power save when reading GP2 */ + ps_disabled = mvm->ps_disabled; + if (!ps_disabled) { + mvm->ps_disabled = true; + iwl_mvm_power_update_device(mvm); + } + + *gp2 = iwl_read_prph(mvm->trans, DEVICE_SYSTEM_TIME_REG); + *boottime = ktime_get_boot_ns(); + + if (!ps_disabled) { + mvm->ps_disabled = ps_disabled; + iwl_mvm_power_update_device(mvm); + } +} + int iwl_mvm_send_lqm_cmd(struct ieee80211_vif *vif, enum iwl_lqm_cmd_operatrions operation, u32 duration, u32 timeout) From 8b6607cc6cdecd932201e63597b3b0c0f7958f33 Mon Sep 17 00:00:00 2001 From: Oren Givon Date: Thu, 25 Aug 2016 13:15:24 +0300 Subject: [PATCH 0374/1050] iwlwifi: add new 8265 series PCI ID Add a new PCI ID for the 8265 series. Signed-off-by: Oren Givon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 14c6e94ecbf2..3d766f250d3f 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -487,6 +487,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x24FD, 0x1130, iwl8265_2ac_cfg)}, {IWL_PCI_DEVICE(0x24FD, 0x0130, iwl8265_2ac_cfg)}, {IWL_PCI_DEVICE(0x24FD, 0x1010, iwl8265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24FD, 0x10D0, iwl8265_2ac_cfg)}, {IWL_PCI_DEVICE(0x24FD, 0x0050, iwl8265_2ac_cfg)}, {IWL_PCI_DEVICE(0x24FD, 0x0150, iwl8265_2ac_cfg)}, {IWL_PCI_DEVICE(0x24FD, 0x9010, iwl8265_2ac_cfg)}, From 186cd49a4d9ad1a0d4a7371c2d02a05a8ac53a03 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 24 Aug 2016 10:05:40 +0200 Subject: [PATCH 0375/1050] iwlwifi: mvm: move AP-specific code to right function There's no need for the common MAC context function to have an if on AP mode, the values can be overridden in the AP-specific function later. Clean that up by adding the full command as a new parameter to the AP-specific function, and doing it there. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/mvm/mac-ctxt.c | 42 +++++++++---------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index d742d27d8de0..6b962d6b067a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -774,26 +774,6 @@ static void iwl_mvm_mac_ctxt_cmd_common(struct iwl_mvm *mvm, cmd->ac[txf].fifos_mask = BIT(txf); } - if (vif->type == NL80211_IFTYPE_AP) { - /* in AP mode, the MCAST FIFO takes the EDCA params from VO */ - cmd->ac[IWL_MVM_TX_FIFO_VO].fifos_mask |= - BIT(IWL_MVM_TX_FIFO_MCAST); - - /* - * in AP mode, pass probe requests and beacons from other APs - * (needed for ht protection); when there're no any associated - * station don't ask FW to pass beacons to prevent unnecessary - * wake-ups. - */ - cmd->filter_flags |= cpu_to_le32(MAC_FILTER_IN_PROBE_REQUEST); - if (mvmvif->ap_assoc_sta_count || !mvm->drop_bcn_ap_mode) { - cmd->filter_flags |= cpu_to_le32(MAC_FILTER_IN_BEACON); - IWL_DEBUG_HC(mvm, "Asking FW to pass beacons\n"); - } else { - IWL_DEBUG_HC(mvm, "No need to receive beacons\n"); - } - } - if (vif->bss_conf.qos) cmd->qos_flags |= cpu_to_le32(MAC_QOS_FLG_UPDATE_EDCA); @@ -1191,6 +1171,7 @@ static void iwl_mvm_mac_ap_iterator(void *_data, u8 *mac, */ static void iwl_mvm_mac_ctxt_cmd_fill_ap(struct iwl_mvm *mvm, struct ieee80211_vif *vif, + struct iwl_mac_ctx_cmd *cmd, struct iwl_mac_data_ap *ctxt_ap, bool add) { @@ -1201,6 +1182,23 @@ static void iwl_mvm_mac_ctxt_cmd_fill_ap(struct iwl_mvm *mvm, .beacon_device_ts = 0 }; + /* in AP mode, the MCAST FIFO takes the EDCA params from VO */ + cmd->ac[IWL_MVM_TX_FIFO_VO].fifos_mask |= BIT(IWL_MVM_TX_FIFO_MCAST); + + /* + * in AP mode, pass probe requests and beacons from other APs + * (needed for ht protection); when there're no any associated + * station don't ask FW to pass beacons to prevent unnecessary + * wake-ups. + */ + cmd->filter_flags |= cpu_to_le32(MAC_FILTER_IN_PROBE_REQUEST); + if (mvmvif->ap_assoc_sta_count || !mvm->drop_bcn_ap_mode) { + cmd->filter_flags |= cpu_to_le32(MAC_FILTER_IN_BEACON); + IWL_DEBUG_HC(mvm, "Asking FW to pass beacons\n"); + } else { + IWL_DEBUG_HC(mvm, "No need to receive beacons\n"); + } + ctxt_ap->bi = cpu_to_le32(vif->bss_conf.beacon_int); ctxt_ap->bi_reciprocal = cpu_to_le32(iwl_mvm_reciprocal(vif->bss_conf.beacon_int)); @@ -1258,7 +1256,7 @@ static int iwl_mvm_mac_ctxt_cmd_ap(struct iwl_mvm *mvm, iwl_mvm_mac_ctxt_cmd_common(mvm, vif, &cmd, NULL, action); /* Fill the data specific for ap mode */ - iwl_mvm_mac_ctxt_cmd_fill_ap(mvm, vif, &cmd.ap, + iwl_mvm_mac_ctxt_cmd_fill_ap(mvm, vif, &cmd, &cmd.ap, action == FW_CTXT_ACTION_ADD); return iwl_mvm_mac_ctxt_send_cmd(mvm, &cmd); @@ -1277,7 +1275,7 @@ static int iwl_mvm_mac_ctxt_cmd_go(struct iwl_mvm *mvm, iwl_mvm_mac_ctxt_cmd_common(mvm, vif, &cmd, NULL, action); /* Fill the data specific for GO mode */ - iwl_mvm_mac_ctxt_cmd_fill_ap(mvm, vif, &cmd.go.ap, + iwl_mvm_mac_ctxt_cmd_fill_ap(mvm, vif, &cmd, &cmd.go.ap, action == FW_CTXT_ACTION_ADD); cmd.go.ctwin = cpu_to_le32(noa->oppps_ctwindow & From 2b55f43f8e477a123bca4ab35351666479bd7b86 Mon Sep 17 00:00:00 2001 From: Ido Yariv Date: Tue, 23 Aug 2016 14:44:59 -0400 Subject: [PATCH 0376/1050] iwlwifi: mvm: Add mem debugfs entry In order to access cached/paged memory, there are a couple of firmware commands (one for UMAC and one for LMAC) that let the host access memory and registers indirectly. Since this is done by the firmware on behalf of the host, even if memory is paged out or cached, the host will retrieve the memory as the firmware sees it (paged out memory will get paged in). Export this mechanism via a debugfs entry for both read and write access. WARNING: This mechanism has no protections at all. Invalid addresses may crash or hang the firmware. Writing to arbitrary memory also comes with no guarantees. Signed-off-by: Ido Yariv Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/mvm/debugfs.c | 129 ++++++++++++++++++ .../net/wireless/intel/iwlwifi/mvm/fw-api.h | 50 +++++++ 2 files changed, 179 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c index 540b7c9deaef..539d718df797 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c @@ -1518,6 +1518,132 @@ MVM_DEBUGFS_READ_WRITE_FILE_OPS(bcast_filters_macs, 256); MVM_DEBUGFS_READ_WRITE_FILE_OPS(d3_sram, 8); #endif +static ssize_t iwl_dbgfs_mem_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct iwl_mvm *mvm = file->private_data; + struct iwl_dbg_mem_access_cmd cmd = {}; + struct iwl_dbg_mem_access_rsp *rsp; + struct iwl_host_cmd hcmd = { + .flags = CMD_WANT_SKB | CMD_SEND_IN_RFKILL, + .data = { &cmd, }, + .len = { sizeof(cmd) }, + }; + size_t delta, len; + ssize_t ret; + + hcmd.id = iwl_cmd_id(*ppos >> 24 ? UMAC_RD_WR : LMAC_RD_WR, + DEBUG_GROUP, 0); + cmd.op = cpu_to_le32(DEBUG_MEM_OP_READ); + + /* Take care of alignment of both the position and the length */ + delta = *ppos & 0x3; + cmd.addr = cpu_to_le32(*ppos - delta); + cmd.len = cpu_to_le32(min(ALIGN(count + delta, 4) / 4, + (size_t)DEBUG_MEM_MAX_SIZE_DWORDS)); + + mutex_lock(&mvm->mutex); + ret = iwl_mvm_send_cmd(mvm, &hcmd); + mutex_unlock(&mvm->mutex); + + if (ret < 0) + return ret; + + rsp = (void *)hcmd.resp_pkt->data; + if (le32_to_cpu(rsp->status) != DEBUG_MEM_STATUS_SUCCESS) { + ret = -ENXIO; + goto out; + } + + len = min((size_t)le32_to_cpu(rsp->len) << 2, + iwl_rx_packet_payload_len(hcmd.resp_pkt) - sizeof(*rsp)); + len = min(len - delta, count); + if (len < 0) { + ret = -EFAULT; + goto out; + } + + ret = len - copy_to_user(user_buf, (void *)rsp->data + delta, len); + *ppos += ret; + +out: + iwl_free_resp(&hcmd); + return ret; +} + +static ssize_t iwl_dbgfs_mem_write(struct file *file, + const char __user *user_buf, size_t count, + loff_t *ppos) +{ + struct iwl_mvm *mvm = file->private_data; + struct iwl_dbg_mem_access_cmd *cmd; + struct iwl_dbg_mem_access_rsp *rsp; + struct iwl_host_cmd hcmd = {}; + size_t cmd_size; + size_t data_size; + u32 op, len; + ssize_t ret; + + hcmd.id = iwl_cmd_id(*ppos >> 24 ? UMAC_RD_WR : LMAC_RD_WR, + DEBUG_GROUP, 0); + + if (*ppos & 0x3 || count < 4) { + op = DEBUG_MEM_OP_WRITE_BYTES; + len = min(count, (size_t)(4 - (*ppos & 0x3))); + data_size = len; + } else { + op = DEBUG_MEM_OP_WRITE; + len = min(count >> 2, (size_t)DEBUG_MEM_MAX_SIZE_DWORDS); + data_size = len << 2; + } + + cmd_size = sizeof(*cmd) + ALIGN(data_size, 4); + cmd = kzalloc(cmd_size, GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + cmd->op = cpu_to_le32(op); + cmd->len = cpu_to_le32(len); + cmd->addr = cpu_to_le32(*ppos); + if (copy_from_user((void *)cmd->data, user_buf, data_size)) { + kfree(cmd); + return -EFAULT; + } + + hcmd.flags = CMD_WANT_SKB | CMD_SEND_IN_RFKILL, + hcmd.data[0] = (void *)cmd; + hcmd.len[0] = cmd_size; + + mutex_lock(&mvm->mutex); + ret = iwl_mvm_send_cmd(mvm, &hcmd); + mutex_unlock(&mvm->mutex); + + kfree(cmd); + + if (ret < 0) + return ret; + + rsp = (void *)hcmd.resp_pkt->data; + if (rsp->status != DEBUG_MEM_STATUS_SUCCESS) { + ret = -ENXIO; + goto out; + } + + ret = data_size; + *ppos += ret; + +out: + iwl_free_resp(&hcmd); + return ret; +} + +static const struct file_operations iwl_dbgfs_mem_ops = { + .read = iwl_dbgfs_mem_read, + .write = iwl_dbgfs_mem_write, + .open = simple_open, + .llseek = default_llseek, +}; + int iwl_mvm_dbgfs_register(struct iwl_mvm *mvm, struct dentry *dbgfs_dir) { struct dentry *bcast_dir __maybe_unused; @@ -1615,6 +1741,9 @@ int iwl_mvm_dbgfs_register(struct iwl_mvm *mvm, struct dentry *dbgfs_dir) mvm->debugfs_dir, &mvm->nvm_phy_sku_blob)) goto err; + debugfs_create_file("mem", S_IRUSR | S_IWUSR, dbgfs_dir, mvm, + &iwl_dbgfs_mem_ops); + /* * Create a symlink with mac80211. It will be removed when mac80211 * exists (before the opmode exists which removes the target.) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h index 2f92994d0e5b..fdd9506e10cd 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h @@ -340,6 +340,11 @@ enum iwl_prot_offload_subcmd_ids { STORED_BEACON_NTF = 0xFF, }; +enum iwl_fmac_debug_cmds { + LMAC_RD_WR = 0x0, + UMAC_RD_WR = 0x1, +}; + /* command groups */ enum { LEGACY_GROUP = 0x0, @@ -349,6 +354,7 @@ enum { PHY_OPS_GROUP = 0x4, DATA_PATH_GROUP = 0x5, PROT_OFFLOAD_GROUP = 0xb, + DEBUG_GROUP = 0xf, }; /** @@ -2149,4 +2155,48 @@ struct iwl_channel_switch_noa_notif { __le32 id_and_color; } __packed; /* CHANNEL_SWITCH_START_NTFY_API_S_VER_1 */ +/* Operation types for the debug mem access */ +enum { + DEBUG_MEM_OP_READ = 0, + DEBUG_MEM_OP_WRITE = 1, + DEBUG_MEM_OP_WRITE_BYTES = 2, +}; + +#define DEBUG_MEM_MAX_SIZE_DWORDS 32 + +/** + * struct iwl_dbg_mem_access_cmd - Request the device to read/write memory + * @op: DEBUG_MEM_OP_* + * @addr: address to read/write from/to + * @len: in dwords, to read/write + * @data: for write opeations, contains the source buffer + */ +struct iwl_dbg_mem_access_cmd { + __le32 op; + __le32 addr; + __le32 len; + __le32 data[]; +} __packed; /* DEBUG_(U|L)MAC_RD_WR_CMD_API_S_VER_1 */ + +/* Status responses for the debug mem access */ +enum { + DEBUG_MEM_STATUS_SUCCESS = 0x0, + DEBUG_MEM_STATUS_FAILED = 0x1, + DEBUG_MEM_STATUS_LOCKED = 0x2, + DEBUG_MEM_STATUS_HIDDEN = 0x3, + DEBUG_MEM_STATUS_LENGTH = 0x4, +}; + +/** + * struct iwl_dbg_mem_access_rsp - Response to debug mem commands + * @status: DEBUG_MEM_STATUS_* + * @len: read dwords (0 for write operations) + * @data: contains the read DWs + */ +struct iwl_dbg_mem_access_rsp { + __le32 status; + __le32 len; + __le32 data[]; +} __packed; /* DEBUG_(U|L)MAC_RD_WR_RSP_API_S_VER_1 */ + #endif /* __fw_api_h__ */ From 176aa60bf148b5af4209ac323cef941dee76e390 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Wed, 31 Aug 2016 19:03:01 +0300 Subject: [PATCH 0377/1050] iwlwifi: mvm: set HCMD_NAME for PHY_DB as well Currently it is logged as UNKNOWN. Also, 0x6c seems to be the permanent ID for this command, remove incorrect comment and uncomment the command from the commands list. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c b/drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c index 7beba9ae5617..2893826d7d2b 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c @@ -110,7 +110,7 @@ enum iwl_phy_db_section_type { IWL_PHY_DB_MAX }; -#define PHY_DB_CMD 0x6c /* TEMP API - The actual is 0x8c */ +#define PHY_DB_CMD 0x6c /* * phy db - configure operational ucode diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h index fdd9506e10cd..97633690f3d5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h @@ -205,7 +205,7 @@ enum { /* Phy */ PHY_CONFIGURATION_CMD = 0x6a, CALIB_RES_NOTIF_PHY_DB = 0x6b, - /* PHY_DB_CMD = 0x6c, */ + PHY_DB_CMD = 0x6c, /* ToF - 802.11mc FTM */ TOF_CMD = 0x10, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index b2d8722cbb11..3eccd89b9570 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -359,6 +359,7 @@ static const struct iwl_hcmd_names iwl_mvm_legacy_names[] = { HCMD_NAME(BT_COEX_CI), HCMD_NAME(PHY_CONFIGURATION_CMD), HCMD_NAME(CALIB_RES_NOTIF_PHY_DB), + HCMD_NAME(PHY_DB_CMD), HCMD_NAME(SCAN_OFFLOAD_COMPLETE), HCMD_NAME(SCAN_OFFLOAD_UPDATE_PROFILES_CMD), HCMD_NAME(SCAN_OFFLOAD_CONFIG_CMD), From 8098203f26b4fd4b4ef5281b2e77d49ab6d9a3b4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 31 Aug 2016 22:16:11 +0200 Subject: [PATCH 0378/1050] iwlwifi: mvm: use LIST_HEAD() macro There's no need to declare a list and then init it manually, just use the LIST_HEAD() macro. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 3eccd89b9570..75f3ca0504fd 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -858,9 +858,7 @@ static void iwl_mvm_async_handlers_wk(struct work_struct *wk) struct iwl_mvm *mvm = container_of(wk, struct iwl_mvm, async_handlers_wk); struct iwl_async_handler_entry *entry, *tmp; - struct list_head local_list; - - INIT_LIST_HEAD(&local_list); + LIST_HEAD(local_list); /* Ensure that we are not in stop flow (check iwl_mvm_mac_stop) */ From 0979a913f879ea39504200d83fb9f275a555a58d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 31 Aug 2016 22:16:11 +0200 Subject: [PATCH 0379/1050] iwlwifi: pcie: use LIST_HEAD() macro There's no need to declare a list and then init it manually, just use the LIST_HEAD() macro. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/rx.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c index 78cb6d2314d9..6fe5546dc773 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c @@ -487,15 +487,13 @@ static void iwl_pcie_rx_allocator(struct iwl_trans *trans) while (pending) { int i; - struct list_head local_allocated; + LIST_HEAD(local_allocated); gfp_t gfp_mask = GFP_KERNEL; /* Do not post a warning if there are only a few requests */ if (pending < RX_PENDING_WATERMARK) gfp_mask |= __GFP_NOWARN; - INIT_LIST_HEAD(&local_allocated); - for (i = 0; i < RX_CLAIM_REQ_ALLOC;) { struct iwl_rx_mem_buffer *rxb; struct page *page; From 1984e075915cbae65336a99b1879865080d8e55e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 19 Sep 2016 09:49:27 +0100 Subject: [PATCH 0380/1050] genirq: Skip chained interrupt trigger setup if type is IRQ_TYPE_NONE There is no point in trying to configure the trigger of a chained interrupt if no trigger information has been configured. At best this is ignored, and at the worse this confuses the underlying irqchip (which is likely not to handle such a thing), and unnecessarily alarms the user. Only apply the configuration if type is not IRQ_TYPE_NONE. Fixes: 1e12c4a9393b ("genirq: Correctly configure the trigger on chained interrupts") Reported-and-tested-by: Geert Uytterhoeven Signed-off-by: Marc Zyngier Link: https://lkml.kernel.org/r/CAMuHMdVW1eTn20=EtYcJ8hkVwohaSuH_yQXrY2MGBEvZ8fpFOg@mail.gmail.com Link: http://lkml.kernel.org/r/1474274967-15984-1-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- kernel/irq/chip.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 637389088b3f..26ba5654d9d5 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -820,6 +820,8 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle, desc->name = name; if (handle != handle_bad_irq && is_chained) { + unsigned int type = irqd_get_trigger_type(&desc->irq_data); + /* * We're about to start this interrupt immediately, * hence the need to set the trigger configuration. @@ -828,8 +830,10 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle, * chained interrupt. Reset it immediately because we * do know better. */ - __irq_set_trigger(desc, irqd_get_trigger_type(&desc->irq_data)); - desc->handle_irq = handle; + if (type != IRQ_TYPE_NONE) { + __irq_set_trigger(desc, type); + desc->handle_irq = handle; + } irq_settings_set_noprobe(desc); irq_settings_set_norequest(desc); From 7a353289925f01cb188ebc6fc4f4a33456b7de44 Mon Sep 17 00:00:00 2001 From: RogerCC Lin Date: Mon, 19 Sep 2016 10:53:25 +0800 Subject: [PATCH 0381/1050] mtd: nand: fix generating over-boundary ECC data when writing When mtk_ecc_encode() is writing the ECC parity data to the OOB region,because each register is 4 bytes in length,but the len's unit is in bytes,the operation in the for loop will cross the ECC's boundary. Signed-off-by: RogerCC Lin Fixes: 1d6b1e464950 ("mtd: mediatek: driver for MTK Smart Device") Signed-off-by: Boris Brezillon --- drivers/mtd/nand/mtk_ecc.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/mtd/nand/mtk_ecc.c b/drivers/mtd/nand/mtk_ecc.c index 25a4fbd4d24a..d54f666417e1 100644 --- a/drivers/mtd/nand/mtk_ecc.c +++ b/drivers/mtd/nand/mtk_ecc.c @@ -366,7 +366,8 @@ int mtk_ecc_encode(struct mtk_ecc *ecc, struct mtk_ecc_config *config, u8 *data, u32 bytes) { dma_addr_t addr; - u32 *p, len, i; + u8 *p; + u32 len, i, val; int ret = 0; addr = dma_map_single(ecc->dev, data, bytes, DMA_TO_DEVICE); @@ -392,11 +393,14 @@ int mtk_ecc_encode(struct mtk_ecc *ecc, struct mtk_ecc_config *config, /* Program ECC bytes to OOB: per sector oob = FDM + ECC + SPARE */ len = (config->strength * ECC_PARITY_BITS + 7) >> 3; - p = (u32 *)(data + bytes); + p = data + bytes; /* write the parity bytes generated by the ECC back to the OOB region */ - for (i = 0; i < len; i++) - p[i] = readl(ecc->regs + ECC_ENCPAR(i)); + for (i = 0; i < len; i++) { + if ((i % 4) == 0) + val = readl(ecc->regs + ECC_ENCPAR(i / 4)); + p[i] = (val >> ((i % 4) * 8)) & 0xff; + } timeout: dma_unmap_single(ecc->dev, addr, bytes, DMA_TO_DEVICE); From 559e58e7ed2dadc310f174e609ead8a3e8acfc4e Mon Sep 17 00:00:00 2001 From: RogerCC Lin Date: Mon, 19 Sep 2016 10:53:26 +0800 Subject: [PATCH 0382/1050] mtd: nand: fix chances to create incomplete ECC data when writing When mtk_nfc_do_write_page() comparing the sector number,because the sector number field is at the 12th-bit position of NFI_BYTELEN register,the masked register should be shifted 12 bits before being compared.The result of this bug may cause the second subpage has incomplete ECC parity bytes. Signed-off-by: RogerCC Lin Fixes: 1d6b1e464950 ("mtd: mediatek: driver for MTK Smart Device") Signed-off-by: Boris Brezillon --- drivers/mtd/nand/mtk_nand.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/mtk_nand.c b/drivers/mtd/nand/mtk_nand.c index ddaa2acb9dd7..5223a2182ee4 100644 --- a/drivers/mtd/nand/mtk_nand.c +++ b/drivers/mtd/nand/mtk_nand.c @@ -93,6 +93,9 @@ #define NFI_FSM_MASK (0xf << 16) #define NFI_ADDRCNTR (0x70) #define CNTR_MASK GENMASK(16, 12) +#define ADDRCNTR_SEC_SHIFT (12) +#define ADDRCNTR_SEC(val) \ + (((val) & CNTR_MASK) >> ADDRCNTR_SEC_SHIFT) #define NFI_STRADDR (0x80) #define NFI_BYTELEN (0x84) #define NFI_CSEL (0x90) @@ -699,7 +702,7 @@ static int mtk_nfc_do_write_page(struct mtd_info *mtd, struct nand_chip *chip, } ret = readl_poll_timeout_atomic(nfc->regs + NFI_ADDRCNTR, reg, - (reg & CNTR_MASK) >= chip->ecc.steps, + ADDRCNTR_SEC(reg) >= chip->ecc.steps, 10, MTK_TIMEOUT); if (ret) dev_err(dev, "hwecc write timeout\n"); @@ -902,7 +905,7 @@ static int mtk_nfc_read_subpage(struct mtd_info *mtd, struct nand_chip *chip, dev_warn(nfc->dev, "read ahb/dma done timeout\n"); rc = readl_poll_timeout_atomic(nfc->regs + NFI_BYTELEN, reg, - (reg & CNTR_MASK) >= sectors, 10, + ADDRCNTR_SEC(reg) >= sectors, 10, MTK_TIMEOUT); if (rc < 0) { dev_err(nfc->dev, "subpage done timeout\n"); From 38178e7b88dcbe1ab384f27a7370074e774dda81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lothar=20Wa=C3=9Fmann?= Date: Mon, 19 Sep 2016 11:09:40 +0200 Subject: [PATCH 0383/1050] mtd: nand: mxc: fix obiwan error in mxc_nand_v[12]_ooblayout_free() functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a894cf6c5a82 ("mtd: nand: mxc: switch to mtd_ooblayout_ops") introduced a regression accessing the OOB area from the mxc_nand driver due to an Obiwan error in the mxc_nand_v[12]_ooblayout_free() functions. They report a bogus oobregion { 64, 7 } which leads to errors accessing bogus data when reading the oob area. Prior to the commit the mtd-oobtest module could be run without any errors. With the offending commit, this test fails with results like: |Running mtd-oobtest | |================================================= |mtd_oobtest: MTD device: 5 |mtd_oobtest: MTD device size 524288, eraseblock size 131072, page size 2048, count of eraseblocks 4, pages per eraseblock 64, OOB size 64 |mtd_test: scanning for bad eraseblocks |mtd_test: scanned 4 eraseblocks, 0 are bad |mtd_oobtest: test 1 of 5 |mtd_oobtest: writing OOBs of whole device |mtd_oobtest: written up to eraseblock 0 |mtd_oobtest: written 4 eraseblocks |mtd_oobtest: verifying all eraseblocks |mtd_oobtest: error @addr[0x0:0x19] 0x9a -> 0x78 diff 0xe2 |mtd_oobtest: error @addr[0x0:0x1a] 0xcc -> 0x0 diff 0xcc |mtd_oobtest: error @addr[0x0:0x1b] 0xe0 -> 0x85 diff 0x65 |mtd_oobtest: error @addr[0x0:0x1c] 0x60 -> 0x62 diff 0x2 |mtd_oobtest: error @addr[0x0:0x1d] 0x69 -> 0x45 diff 0x2c |mtd_oobtest: error @addr[0x0:0x1e] 0xcd -> 0xa0 diff 0x6d |mtd_oobtest: error @addr[0x0:0x1f] 0xf2 -> 0x60 diff 0x92 |mtd_oobtest: error: verify failed at 0x0 [...] Signed-off-by: Lothar Waßmann Fixes: a894cf6c5a82 ("mtd: nand: mxc: switch to mtd_ooblayout_ops") Cc: Signed-off-by: Boris Brezillon --- drivers/mtd/nand/mxc_nand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c index 5173fadc9a4e..57cbe2b83849 100644 --- a/drivers/mtd/nand/mxc_nand.c +++ b/drivers/mtd/nand/mxc_nand.c @@ -943,7 +943,7 @@ static int mxc_v2_ooblayout_free(struct mtd_info *mtd, int section, struct nand_chip *nand_chip = mtd_to_nand(mtd); int stepsize = nand_chip->ecc.bytes == 9 ? 16 : 26; - if (section > nand_chip->ecc.steps) + if (section >= nand_chip->ecc.steps) return -ERANGE; if (!section) { From b588479358ce26f32138e0f0a7ab0678f8e3e601 Mon Sep 17 00:00:00 2001 From: Ilan Tayari Date: Sun, 18 Sep 2016 07:42:53 +0000 Subject: [PATCH 0384/1050] xfrm: Fix memory leak of aead algorithm name commit 1a6509d99122 ("[IPSEC]: Add support for combined mode algorithms") introduced aead. The function attach_aead kmemdup()s the algorithm name during xfrm_state_construct(). However this memory is never freed. Implementation has since been slightly modified in commit ee5c23176fcc ("xfrm: Clone states properly on migration") without resolving this leak. This patch adds a kfree() call for the aead algorithm name. Fixes: 1a6509d99122 ("[IPSEC]: Add support for combined mode algorithms") Signed-off-by: Ilan Tayari Acked-by: Rami Rosen Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 9895a8c56d8c..a30f898dc1c5 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -332,6 +332,7 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) { tasklet_hrtimer_cancel(&x->mtimer); del_timer_sync(&x->rtimer); + kfree(x->aead); kfree(x->aalg); kfree(x->ealg); kfree(x->calg); From 4de349e786a3a2d51bd02d56f3de151bbc3c3df9 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 17 Aug 2016 12:41:08 -0300 Subject: [PATCH 0385/1050] can: flexcan: fix resume function On a imx6ul-pico board the following error is seen during system suspend: dpm_run_callback(): platform_pm_resume+0x0/0x54 returns -110 PM: Device 2090000.flexcan failed to resume: error -110 The reason for this suspend error is because when the CAN interface is not active the clocks are disabled and then flexcan_chip_enable() will always fail due to a timeout error. In order to fix this issue, only call flexcan_chip_enable/disable() when the CAN interface is active. Based on a patch from Dong Aisheng in the NXP kernel. Signed-off-by: Fabio Estevam Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 41c0fc9f3b14..16f7cadda5c3 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -1268,11 +1268,10 @@ static int __maybe_unused flexcan_suspend(struct device *device) struct flexcan_priv *priv = netdev_priv(dev); int err; - err = flexcan_chip_disable(priv); - if (err) - return err; - if (netif_running(dev)) { + err = flexcan_chip_disable(priv); + if (err) + return err; netif_stop_queue(dev); netif_device_detach(dev); } @@ -1285,13 +1284,17 @@ static int __maybe_unused flexcan_resume(struct device *device) { struct net_device *dev = dev_get_drvdata(device); struct flexcan_priv *priv = netdev_priv(dev); + int err; priv->can.state = CAN_STATE_ERROR_ACTIVE; if (netif_running(dev)) { netif_device_attach(dev); netif_start_queue(dev); + err = flexcan_chip_enable(priv); + if (err) + return err; } - return flexcan_chip_enable(priv); + return 0; } static SIMPLE_DEV_PM_OPS(flexcan_pm_ops, flexcan_suspend, flexcan_resume); From b244614a60ab7ce54c12a9cbe15cfbf8d79d0967 Mon Sep 17 00:00:00 2001 From: Marcin Nowakowski Date: Wed, 31 Aug 2016 12:33:23 +0200 Subject: [PATCH 0386/1050] MIPS: Avoid a BUG warning during prctl(PR_SET_FP_MODE, ...) cpu_has_fpu macro uses smp_processor_id() and is currently executed with preemption enabled, that triggers the warning at runtime. It is assumed throughout the kernel that if any CPU has an FPU, then all CPUs would have an FPU as well, so it is safe to perform the check with preemption enabled - change the code to use raw_ variant of the check to avoid the warning. Signed-off-by: Marcin Nowakowski Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # 4.0+ Patchwork: https://patchwork.linux-mips.org/patch/14125/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/process.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 7429ad09fbe3..d2d061520a23 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -605,14 +605,14 @@ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value) return -EOPNOTSUPP; /* Avoid inadvertently triggering emulation */ - if ((value & PR_FP_MODE_FR) && cpu_has_fpu && - !(current_cpu_data.fpu_id & MIPS_FPIR_F64)) + if ((value & PR_FP_MODE_FR) && raw_cpu_has_fpu && + !(raw_current_cpu_data.fpu_id & MIPS_FPIR_F64)) return -EOPNOTSUPP; - if ((value & PR_FP_MODE_FRE) && cpu_has_fpu && !cpu_has_fre) + if ((value & PR_FP_MODE_FRE) && raw_cpu_has_fpu && !cpu_has_fre) return -EOPNOTSUPP; /* FR = 0 not supported in MIPS R6 */ - if (!(value & PR_FP_MODE_FR) && cpu_has_fpu && cpu_has_mips_r6) + if (!(value & PR_FP_MODE_FR) && raw_cpu_has_fpu && cpu_has_mips_r6) return -EOPNOTSUPP; /* Proceed with the mode switch */ From 3312eca519ba3b68b1966705e27b9dd9134f092c Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Tue, 23 Aug 2016 01:07:35 +0300 Subject: [PATCH 0387/1050] MIPS: Octeon: mangle-port: fix build failure with VDSO code Commit 1685ddbe35cd ("MIPS: Octeon: Changes to support readq()/writeq() usage.") added bitwise shift operations that assume that unsigned long is always 64-bits. This broke the build of VDSO code, as it gets compiled also in "faked" 32-bit mode. Althought the failing inline functions are never executed in 32-bit mode, they still need to pass the compilation. Fix by using 64-bit types explicitly. The patch fixes the following build failure: CC arch/mips/vdso/gettimeofday-o32.o In file included from los/git/devel/linux/arch/mips/include/asm/io.h:32:0, from los/git/devel/linux/arch/mips/include/asm/page.h:194, from los/git/devel/linux/arch/mips/vdso/vdso.h:26, from los/git/devel/linux/arch/mips/vdso/gettimeofday.c:11: los/git/devel/linux/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h: In function '__should_swizzle_bits': los/git/devel/linux/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h:19:40: error: right shift count >= width of type [-Werror=shift-count-overflow] unsigned long did = ((unsigned long)a >> 40) & 0xff; ^~ Fixes: 1685ddbe35cd ("MIPS: Octeon: Changes to support readq()/writeq() usage.") Signed-off-by: Aaro Koskinen Acked-by: David Daney Cc: David Daney Cc: Steven J. Hill Cc: Alex Smith Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14039/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/mach-cavium-octeon/mangle-port.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h b/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h index 0cf5ac1f7245..8ff2cbdf2c3e 100644 --- a/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h +++ b/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h @@ -15,8 +15,8 @@ static inline bool __should_swizzle_bits(volatile void *a) { extern const bool octeon_should_swizzle_table[]; + u64 did = ((u64)(uintptr_t)a >> 40) & 0xff; - unsigned long did = ((unsigned long)a >> 40) & 0xff; return octeon_should_swizzle_table[did]; } @@ -29,7 +29,7 @@ static inline bool __should_swizzle_bits(volatile void *a) #define __should_swizzle_bits(a) false -static inline bool __should_swizzle_addr(unsigned long p) +static inline bool __should_swizzle_addr(u64 p) { /* boot bus? */ return ((p >> 40) & 0xff) == 0; From 8074d7829595b7c86beba914c04ed5839cf4b3d6 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Tue, 23 Aug 2016 21:39:43 +0300 Subject: [PATCH 0388/1050] MIPS: Octeon: Fix platform bus probing Commit 44a7185c2ae6 ("of/platform: Add common method to populate default bus") added new arch_initcall of_platform_default_populate_init() that will override device_initcall octeon_publish_devices(). This broke many OCTEON boards as important devices are not getting probed anymore (e.g. on EdgeRouter Lite the USB mass storage/rootfs is missing). Fix by changing octeon_publish_devices() to arch_initcall. Fixes: 44a7185c2ae6 ("of/platform: Add common method to populate default bus") Signed-off-by: Aaro Koskinen Acked-by: Rob Herring Cc: David Daney Cc: Kefeng Wang Cc: linux-mips@linux-mips.org Cc: devicetree@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14041/ Signed-off-by: Ralf Baechle --- arch/mips/cavium-octeon/octeon-platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/cavium-octeon/octeon-platform.c b/arch/mips/cavium-octeon/octeon-platform.c index b31fbc9d6eae..37a932d9148c 100644 --- a/arch/mips/cavium-octeon/octeon-platform.c +++ b/arch/mips/cavium-octeon/octeon-platform.c @@ -1059,7 +1059,7 @@ static int __init octeon_publish_devices(void) { return of_platform_bus_probe(NULL, octeon_ids, NULL); } -device_initcall(octeon_publish_devices); +arch_initcall(octeon_publish_devices); MODULE_AUTHOR("David Daney "); MODULE_LICENSE("GPL"); From d8feef9bd447381952a33e6284241006f394c080 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Sun, 18 Sep 2016 11:24:50 -0300 Subject: [PATCH 0389/1050] [media] cx23885/saa7134: assign q->dev to the PCI device Fix a regression caused by commit 2bc46b3ad3c1 ("[media] media/pci: convert drivers to use the new vb2_queue dev field"). Three places where q->dev should be set were missed, causing a WARN. Fixes: 2bc46b3ad3c1 ("[media] media/pci: convert drivers to use the new vb2_queue dev field"). Signed-off-by: Hans Verkuil Reported-by: Marton Balint Signed-off-by: Mauro Carvalho Chehab --- drivers/media/pci/cx23885/cx23885-417.c | 1 + drivers/media/pci/saa7134/saa7134-dvb.c | 1 + drivers/media/pci/saa7134/saa7134-empress.c | 1 + 3 files changed, 3 insertions(+) diff --git a/drivers/media/pci/cx23885/cx23885-417.c b/drivers/media/pci/cx23885/cx23885-417.c index efec2d1a7afd..4d080da7afaf 100644 --- a/drivers/media/pci/cx23885/cx23885-417.c +++ b/drivers/media/pci/cx23885/cx23885-417.c @@ -1552,6 +1552,7 @@ int cx23885_417_register(struct cx23885_dev *dev) q->mem_ops = &vb2_dma_sg_memops; q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC; q->lock = &dev->lock; + q->dev = &dev->pci->dev; err = vb2_queue_init(q); if (err < 0) diff --git a/drivers/media/pci/saa7134/saa7134-dvb.c b/drivers/media/pci/saa7134/saa7134-dvb.c index db987e5b93eb..59a4b5f7724e 100644 --- a/drivers/media/pci/saa7134/saa7134-dvb.c +++ b/drivers/media/pci/saa7134/saa7134-dvb.c @@ -1238,6 +1238,7 @@ static int dvb_init(struct saa7134_dev *dev) q->buf_struct_size = sizeof(struct saa7134_buf); q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC; q->lock = &dev->lock; + q->dev = &dev->pci->dev; ret = vb2_queue_init(q); if (ret) { vb2_dvb_dealloc_frontends(&dev->frontends); diff --git a/drivers/media/pci/saa7134/saa7134-empress.c b/drivers/media/pci/saa7134/saa7134-empress.c index ca417a454d67..791a5161809b 100644 --- a/drivers/media/pci/saa7134/saa7134-empress.c +++ b/drivers/media/pci/saa7134/saa7134-empress.c @@ -295,6 +295,7 @@ static int empress_init(struct saa7134_dev *dev) q->buf_struct_size = sizeof(struct saa7134_buf); q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC; q->lock = &dev->lock; + q->dev = &dev->pci->dev; err = vb2_queue_init(q); if (err) return err; From 480b6837aa579991c6acc113bccf838e6a90843c Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Mon, 19 Sep 2016 20:19:00 +1000 Subject: [PATCH 0390/1050] nvdimm: fix PHYS_PFN/PFN_PHYS mixup nd_activate_region() iomaps any hint addresses required when activating a region. To prevent duplicate mappings it checks the PFN of the hint to be mapped against the PFNs of the already mapped hints. Unfortunately it doesn't convert the PFN back into a physical address before passing it to devm_nvdimm_ioremap(). Instead it applies PHYS_PFN a second time which ends about as well as you would imagine. Signed-off-by: Oliver O'Halloran Signed-off-by: Dan Williams --- drivers/nvdimm/region_devs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index e8d5ba7b29af..4eef88eb5144 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -57,7 +57,7 @@ static int nvdimm_map_flush(struct device *dev, struct nvdimm *nvdimm, int dimm, ndrd->flush_wpq[dimm][j] & PAGE_MASK); else flush_page = devm_nvdimm_ioremap(dev, - PHYS_PFN(pfn), PAGE_SIZE); + PFN_PHYS(pfn), PAGE_SIZE); if (!flush_page) return -ENXIO; ndrd->flush_wpq[dimm][i] = flush_page From 08bccf43627e1972035c1fed5b4f570bdbde1b1e Mon Sep 17 00:00:00 2001 From: Marcin Nowakowski Date: Fri, 2 Sep 2016 10:13:21 +0200 Subject: [PATCH 0391/1050] MIPS: Select HAVE_REGS_AND_STACK_ACCESS_API Add lost Kconfig symbol. This should have been part of 40e084a506eb ('MIPS: Add uprobes support.'). Fixes: 40e084a506eb ('MIPS: Add uprobes support.') Signed-off-by: Marcin Nowakowski Signed-off-by: Ralf Baechle --- arch/mips/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 26388562e300..212ff92920d2 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -65,6 +65,7 @@ config MIPS select ARCH_CLOCKSOURCE_DATA select HANDLE_DOMAIN_IRQ select HAVE_EXIT_THREAD + select HAVE_REGS_AND_STACK_ACCESS_API menu "Machine selection" From 53f863a66904542b03204f2b115d050b04c11ba5 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 21 Jul 2016 14:12:40 +0200 Subject: [PATCH 0392/1050] Bluetooth: Put led_trigger field behind CONFIG_BT_LEDS The led_trigger field in hci_dev should be conditional based on if CONFIG_BT_LEDS is set or not. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_core.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index ee7fc47680a1..b8d43bd9c71b 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -399,7 +399,9 @@ struct hci_dev { struct delayed_work rpa_expired; bdaddr_t rpa; +#if IS_ENABLED(CONFIG_BT_LEDS) struct led_trigger *power_led; +#endif int (*open)(struct hci_dev *hdev); int (*close)(struct hci_dev *hdev); From e64c97b53bc6727aa4385535166aaa047281e02d Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 21 Jul 2016 14:12:41 +0200 Subject: [PATCH 0393/1050] Bluetooth: Add combined LED trigger for controller power Instead of just having a LED trigger for power on a specific controller, this adds the LED trigger "bluetooth-power" that combines the power states of all controllers into a single trigger. This simplifies the trigger selection and also supports multiple controllers per host system via a single LED. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/af_bluetooth.c | 5 +++++ net/bluetooth/leds.c | 27 +++++++++++++++++++++++++++ net/bluetooth/leds.h | 10 ++++++++++ 3 files changed, 42 insertions(+) diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 0b5f729d08d2..1d96ff3a8d87 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -31,6 +31,7 @@ #include #include +#include "leds.h" #include "selftest.h" /* Bluetooth sockets */ @@ -726,6 +727,8 @@ static int __init bt_init(void) bt_debugfs = debugfs_create_dir("bluetooth", NULL); + bt_leds_init(); + err = bt_sysfs_init(); if (err < 0) return err; @@ -785,6 +788,8 @@ static void __exit bt_exit(void) bt_sysfs_cleanup(); + bt_leds_cleanup(); + debugfs_remove_recursive(bt_debugfs); } diff --git a/net/bluetooth/leds.c b/net/bluetooth/leds.c index 8319c8440c89..cb670b5594eb 100644 --- a/net/bluetooth/leds.c +++ b/net/bluetooth/leds.c @@ -11,6 +11,8 @@ #include "leds.h" +DEFINE_LED_TRIGGER(bt_power_led_trigger); + struct hci_basic_led_trigger { struct led_trigger led_trigger; struct hci_dev *hdev; @@ -24,6 +26,21 @@ void hci_leds_update_powered(struct hci_dev *hdev, bool enabled) if (hdev->power_led) led_trigger_event(hdev->power_led, enabled ? LED_FULL : LED_OFF); + + if (!enabled) { + struct hci_dev *d; + + read_lock(&hci_dev_list_lock); + + list_for_each_entry(d, &hci_dev_list, list) { + if (test_bit(HCI_UP, &d->flags)) + enabled = true; + } + + read_unlock(&hci_dev_list_lock); + } + + led_trigger_event(bt_power_led_trigger, enabled ? LED_FULL : LED_OFF); } static void power_activate(struct led_classdev *led_cdev) @@ -72,3 +89,13 @@ void hci_leds_init(struct hci_dev *hdev) /* initialize power_led */ hdev->power_led = led_allocate_basic(hdev, power_activate, "power"); } + +void bt_leds_init(void) +{ + led_trigger_register_simple("bluetooth-power", &bt_power_led_trigger); +} + +void bt_leds_cleanup(void) +{ + led_trigger_unregister_simple(bt_power_led_trigger); +} diff --git a/net/bluetooth/leds.h b/net/bluetooth/leds.h index a9c4d6ea01cf..08725a2fbd9b 100644 --- a/net/bluetooth/leds.h +++ b/net/bluetooth/leds.h @@ -7,10 +7,20 @@ */ #if IS_ENABLED(CONFIG_BT_LEDS) + void hci_leds_update_powered(struct hci_dev *hdev, bool enabled); void hci_leds_init(struct hci_dev *hdev); + +void bt_leds_init(void); +void bt_leds_cleanup(void); + #else + static inline void hci_leds_update_powered(struct hci_dev *hdev, bool enabled) {} static inline void hci_leds_init(struct hci_dev *hdev) {} + +static inline void bt_leds_init(void) {} +static inline void bt_leds_cleanup(void) {} + #endif From abbcc341adb16f68915cae7ef9a10e0d7b57e3c0 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sun, 24 Jul 2016 16:12:24 +0200 Subject: [PATCH 0394/1050] mac802154: set phy net namespace for new ifaces This patch sets the net namespace when creating SoftMAC interfaces. This is important if the namespace at phy layer was switched before. Currently we losing interfaces in some namespace and it's not possible to recover that. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- net/mac802154/iface.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index 7079cd32a7ad..06019dba4b10 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c @@ -663,6 +663,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name, /* TODO check this */ SET_NETDEV_DEV(ndev, &local->phy->dev); + dev_net_set(ndev, wpan_phy_net(local->hw.phy)); sdata = netdev_priv(ndev); ndev->ieee802154_ptr = &sdata->wpan_dev; memcpy(sdata->name, ndev->name, IFNAMSIZ); From 5ddedce3b7331959a6da217ed3189d020090873c Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sun, 24 Jul 2016 16:12:25 +0200 Subject: [PATCH 0395/1050] 6lowpan: ndisc: no overreact if no short address is available This patch removes handling to remove short address for a neigbour entry if RS/RA/NS/NA doesn't contain a short address. If these messages doesn't has any short address option, the existing short address from ndisc cache will be used. The current behaviour will set that the neigbour doesn't has a short address anymore. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- net/6lowpan/ndisc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/6lowpan/ndisc.c b/net/6lowpan/ndisc.c index 86450b7e2899..941df2fa4448 100644 --- a/net/6lowpan/ndisc.c +++ b/net/6lowpan/ndisc.c @@ -101,8 +101,6 @@ static void lowpan_ndisc_802154_update(struct neighbour *n, u32 flags, ieee802154_be16_to_le16(&neigh->short_addr, lladdr_short); if (!lowpan_802154_is_valid_src_short_addr(neigh->short_addr)) neigh->short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC); - } else { - neigh->short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC); } write_unlock_bh(&n->lock); } From ca1de81aa262dcf48354a7c55f2558205517d06e Mon Sep 17 00:00:00 2001 From: Aristeu Rozanski Date: Mon, 25 Jul 2016 11:46:40 -0400 Subject: [PATCH 0396/1050] mac802154: don't warn on unsupported frames Just because we don't support certain types of frames yet doesn't mean we have to flood the message log with warnings about "invalid" frames. Signed-off-by: Aristeu Rozanski Acked-by: Alexander Aring Signed-off-by: Marcel Holtmann --- net/mac802154/rx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c index 446e1300383e..b978da018bf8 100644 --- a/net/mac802154/rx.c +++ b/net/mac802154/rx.c @@ -101,6 +101,11 @@ ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata, sdata->dev->stats.rx_bytes += skb->len; switch (mac_cb(skb)->type) { + case IEEE802154_FC_TYPE_BEACON: + case IEEE802154_FC_TYPE_ACK: + case IEEE802154_FC_TYPE_MAC_CMD: + goto fail; + case IEEE802154_FC_TYPE_DATA: return ieee802154_deliver_skb(skb); default: From bd89bb6daaca3e4a7c509bdacb53a610f432fa2c Mon Sep 17 00:00:00 2001 From: Aristeu Rozanski Date: Mon, 25 Jul 2016 11:46:41 -0400 Subject: [PATCH 0397/1050] mac802154: use rate limited warnings for malformed frames Signed-off-by: Aristeu Rozanski Acked-by: Alexander Aring Signed-off-by: Marcel Holtmann --- net/mac802154/rx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c index b978da018bf8..4dcf6e18563a 100644 --- a/net/mac802154/rx.c +++ b/net/mac802154/rx.c @@ -109,8 +109,8 @@ ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata, case IEEE802154_FC_TYPE_DATA: return ieee802154_deliver_skb(skb); default: - pr_warn("ieee802154: bad frame received (type = %d)\n", - mac_cb(skb)->type); + pr_warn_ratelimited("ieee802154: bad frame received " + "(type = %d)\n", mac_cb(skb)->type); goto fail; } From 65010e68efbeda4275845240869138c0c4587422 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Fri, 12 Aug 2016 17:01:27 -0700 Subject: [PATCH 0398/1050] Bluetooth: Add HCI device identifier for Qualcomm SMD This patch assigns the next free HCI device identifier to Bluetooth devices based on the Qualcomm Shared Memory channels. Signed-off-by: Bjorn Andersson Signed-off-by: Bjorn Andersson Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 003b25283407..0aac123b5eee 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -63,6 +63,7 @@ #define HCI_SDIO 6 #define HCI_SPI 7 #define HCI_I2C 8 +#define HCI_SMD 9 /* HCI controller types */ #define HCI_PRIMARY 0x00 From 1511cc750c3d9a1c402d71e3522c9cf1fad0ad9c Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Fri, 12 Aug 2016 17:01:28 -0700 Subject: [PATCH 0399/1050] Bluetooth: Introduce Qualcomm WCNSS SMD based HCI driver The Qualcomm WCNSS chip provides two SMD channels to the BT core; one for command and one for event packets. This driver exposes the two channels as a hci device. Signed-off-by: Bjorn Andersson Signed-off-by: Bjorn Andersson Signed-off-by: Marcel Holtmann --- drivers/bluetooth/Kconfig | 12 +++ drivers/bluetooth/Makefile | 1 + drivers/bluetooth/btqcomsmd.c | 182 ++++++++++++++++++++++++++++++++++ 3 files changed, 195 insertions(+) create mode 100644 drivers/bluetooth/btqcomsmd.c diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig index cf50fd2e96df..2c481911f1ce 100644 --- a/drivers/bluetooth/Kconfig +++ b/drivers/bluetooth/Kconfig @@ -331,4 +331,16 @@ config BT_WILINK Say Y here to compile support for Texas Instrument's WiLink7 driver into the kernel or say M to compile it as module (btwilink). +config BT_QCOMSMD + tristate "Qualcomm SMD based HCI support" + depends on QCOM_SMD + select BT_QCA + help + Qualcomm SMD based HCI driver. + This driver is used to bridge HCI data onto the shared memory + channels to the WCNSS core. + + Say Y here to compile support for HCI over Qualcomm SMD into the + kernel or say M to compile as a module. + endmenu diff --git a/drivers/bluetooth/Makefile b/drivers/bluetooth/Makefile index 9c18939fc5c9..3e92cfeb9ee2 100644 --- a/drivers/bluetooth/Makefile +++ b/drivers/bluetooth/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_BT_ATH3K) += ath3k.o obj-$(CONFIG_BT_MRVL) += btmrvl.o obj-$(CONFIG_BT_MRVL_SDIO) += btmrvl_sdio.o obj-$(CONFIG_BT_WILINK) += btwilink.o +obj-$(CONFIG_BT_QCOMSMD) += btqcomsmd.o obj-$(CONFIG_BT_BCM) += btbcm.o obj-$(CONFIG_BT_RTL) += btrtl.o obj-$(CONFIG_BT_QCA) += btqca.o diff --git a/drivers/bluetooth/btqcomsmd.c b/drivers/bluetooth/btqcomsmd.c new file mode 100644 index 000000000000..08c2c93887c1 --- /dev/null +++ b/drivers/bluetooth/btqcomsmd.c @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2016, Linaro Ltd. + * Copyright (c) 2015, Sony Mobile Communications Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include + +#include +#include + +#include "btqca.h" + +struct btqcomsmd { + struct hci_dev *hdev; + + struct qcom_smd_channel *acl_channel; + struct qcom_smd_channel *cmd_channel; +}; + +static int btqcomsmd_recv(struct hci_dev *hdev, unsigned int type, + const void *data, size_t count) +{ + struct sk_buff *skb; + + /* Use GFP_ATOMIC as we're in IRQ context */ + skb = bt_skb_alloc(count, GFP_ATOMIC); + if (!skb) { + hdev->stat.err_rx++; + return -ENOMEM; + } + + hci_skb_pkt_type(skb) = type; + memcpy(skb_put(skb, count), data, count); + + return hci_recv_frame(hdev, skb); +} + +static int btqcomsmd_acl_callback(struct qcom_smd_channel *channel, + const void *data, size_t count) +{ + struct btqcomsmd *btq = qcom_smd_get_drvdata(channel); + + btq->hdev->stat.byte_rx += count; + return btqcomsmd_recv(btq->hdev, HCI_ACLDATA_PKT, data, count); +} + +static int btqcomsmd_cmd_callback(struct qcom_smd_channel *channel, + const void *data, size_t count) +{ + struct btqcomsmd *btq = qcom_smd_get_drvdata(channel); + + return btqcomsmd_recv(btq->hdev, HCI_EVENT_PKT, data, count); +} + +static int btqcomsmd_send(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct btqcomsmd *btq = hci_get_drvdata(hdev); + int ret; + + switch (hci_skb_pkt_type(skb)) { + case HCI_ACLDATA_PKT: + ret = qcom_smd_send(btq->acl_channel, skb->data, skb->len); + hdev->stat.acl_tx++; + hdev->stat.byte_tx += skb->len; + break; + case HCI_COMMAND_PKT: + ret = qcom_smd_send(btq->cmd_channel, skb->data, skb->len); + hdev->stat.cmd_tx++; + break; + default: + ret = -EILSEQ; + break; + } + + kfree_skb(skb); + + return ret; +} + +static int btqcomsmd_open(struct hci_dev *hdev) +{ + return 0; +} + +static int btqcomsmd_close(struct hci_dev *hdev) +{ + return 0; +} + +static int btqcomsmd_probe(struct platform_device *pdev) +{ + struct btqcomsmd *btq; + struct hci_dev *hdev; + void *wcnss; + int ret; + + btq = devm_kzalloc(&pdev->dev, sizeof(*btq), GFP_KERNEL); + if (!btq) + return -ENOMEM; + + wcnss = dev_get_drvdata(pdev->dev.parent); + + btq->acl_channel = qcom_wcnss_open_channel(wcnss, "APPS_RIVA_BT_ACL", + btqcomsmd_acl_callback); + if (IS_ERR(btq->acl_channel)) + return PTR_ERR(btq->acl_channel); + + btq->cmd_channel = qcom_wcnss_open_channel(wcnss, "APPS_RIVA_BT_CMD", + btqcomsmd_cmd_callback); + if (IS_ERR(btq->cmd_channel)) + return PTR_ERR(btq->cmd_channel); + + qcom_smd_set_drvdata(btq->acl_channel, btq); + qcom_smd_set_drvdata(btq->cmd_channel, btq); + + hdev = hci_alloc_dev(); + if (!hdev) + return -ENOMEM; + + hci_set_drvdata(hdev, btq); + btq->hdev = hdev; + SET_HCIDEV_DEV(hdev, &pdev->dev); + + hdev->bus = HCI_SMD; + hdev->open = btqcomsmd_open; + hdev->close = btqcomsmd_close; + hdev->send = btqcomsmd_send; + hdev->set_bdaddr = qca_set_bdaddr_rome; + + ret = hci_register_dev(hdev); + if (ret < 0) { + hci_free_dev(hdev); + return ret; + } + + platform_set_drvdata(pdev, btq); + + return 0; +} + +static int btqcomsmd_remove(struct platform_device *pdev) +{ + struct btqcomsmd *btq = platform_get_drvdata(pdev); + + hci_unregister_dev(btq->hdev); + hci_free_dev(btq->hdev); + + return 0; +} + +static const struct of_device_id btqcomsmd_of_match[] = { + { .compatible = "qcom,wcnss-bt", }, + { }, +}; + +static struct platform_driver btqcomsmd_driver = { + .probe = btqcomsmd_probe, + .remove = btqcomsmd_remove, + .driver = { + .name = "btqcomsmd", + .of_match_table = btqcomsmd_of_match, + }, +}; + +module_platform_driver(btqcomsmd_driver); + +MODULE_AUTHOR("Bjorn Andersson "); +MODULE_DESCRIPTION("Qualcomm SMD HCI driver"); +MODULE_LICENSE("GPL v2"); From f0a70a04ca10d07a383a89edea142e3cbab1f2ca Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 11 Aug 2016 16:02:44 -0700 Subject: [PATCH 0400/1050] Bluetooth: btusb, hci_intel: Fix wait_on_bit_timeout() return value checks wait_on_bit_timeout() returns one of the following three values: * 0 to indicate success. * -EINTR to indicate that a signal has been received; * -EAGAIN to indicate timeout; Make the wait_on_bit_timeout() callers check for these values. Signed-off-by: Bart Van Assche Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btusb.c | 5 ++--- drivers/bluetooth/hci_intel.c | 6 +++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 811f9b97e360..c58a00cb5208 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -2221,9 +2221,8 @@ static int btusb_setup_intel_new(struct hci_dev *hdev) err = wait_on_bit_timeout(&data->flags, BTUSB_DOWNLOADING, TASK_INTERRUPTIBLE, msecs_to_jiffies(5000)); - if (err == 1) { + if (err == -EINTR) { BT_ERR("%s: Firmware loading interrupted", hdev->name); - err = -EINTR; goto done; } @@ -2275,7 +2274,7 @@ done: TASK_INTERRUPTIBLE, msecs_to_jiffies(1000)); - if (err == 1) { + if (err == -EINTR) { BT_ERR("%s: Device boot interrupted", hdev->name); return -EINTR; } diff --git a/drivers/bluetooth/hci_intel.c b/drivers/bluetooth/hci_intel.c index ed0a4201b551..9e271286c5e5 100644 --- a/drivers/bluetooth/hci_intel.c +++ b/drivers/bluetooth/hci_intel.c @@ -128,7 +128,7 @@ static int intel_wait_booting(struct hci_uart *hu) TASK_INTERRUPTIBLE, msecs_to_jiffies(1000)); - if (err == 1) { + if (err == -EINTR) { bt_dev_err(hu->hdev, "Device boot interrupted"); return -EINTR; } @@ -151,7 +151,7 @@ static int intel_wait_lpm_transaction(struct hci_uart *hu) TASK_INTERRUPTIBLE, msecs_to_jiffies(1000)); - if (err == 1) { + if (err == -EINTR) { bt_dev_err(hu->hdev, "LPM transaction interrupted"); return -EINTR; } @@ -813,7 +813,7 @@ static int intel_setup(struct hci_uart *hu) err = wait_on_bit_timeout(&intel->flags, STATE_DOWNLOADING, TASK_INTERRUPTIBLE, msecs_to_jiffies(5000)); - if (err == 1) { + if (err == -EINTR) { bt_dev_err(hdev, "Firmware loading interrupted"); err = -EINTR; goto done; From 1aabbbcefe8e62fbffaaa01ca8bdd4cd6ed1625b Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Fri, 29 Jul 2016 13:28:25 +0200 Subject: [PATCH 0401/1050] Bluetooth: add printf format attribute to hci_set_[fh]w_info() Commit 5177a83827cd ("Bluetooth: Add debugfs fields for hardware and firmware info") introduced hci_set_hw_info() and hci_set_fw_info(). These functions use kvasprintf_const() but are not marked with a __printf attribute. Adding such an attribute helps detecting issues related to printf-formatting at build time. Signed-off-by: Nicolas Iooss Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index b8d43bd9c71b..cc349f633570 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1028,8 +1028,8 @@ int hci_resume_dev(struct hci_dev *hdev); int hci_reset_dev(struct hci_dev *hdev); int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb); int hci_recv_diag(struct hci_dev *hdev, struct sk_buff *skb); -void hci_set_hw_info(struct hci_dev *hdev, const char *fmt, ...); -void hci_set_fw_info(struct hci_dev *hdev, const char *fmt, ...); +__printf(2, 3) void hci_set_hw_info(struct hci_dev *hdev, const char *fmt, ...); +__printf(2, 3) void hci_set_fw_info(struct hci_dev *hdev, const char *fmt, ...); int hci_dev_open(__u16 dev); int hci_dev_close(__u16 dev); int hci_dev_do_close(struct hci_dev *hdev); From 935199348048902124d0b288788c3a45e78b69ab Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 16 Aug 2016 12:50:06 +0800 Subject: [PATCH 0402/1050] Bluetooth: btusb: Add support for 0cf3:e009 Device 0cf3:e009 is one of the QCA ROME family. T: Bus=01 Lev=01 Prnt=01 Port=07 Cnt=04 Dev#= 4 Spd=12 MxCh= 0 D: Ver= 2.01 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0cf3 ProdID=e009 Rev=00.01 C: #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I: If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb I: If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb Signed-off-by: Kai-Heng Feng Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btusb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index c58a00cb5208..80ae854a0bee 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -248,6 +248,7 @@ static const struct usb_device_id blacklist_table[] = { /* QCA ROME chipset */ { USB_DEVICE(0x0cf3, 0xe007), .driver_info = BTUSB_QCA_ROME }, + { USB_DEVICE(0x0cf3, 0xe009), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0cf3, 0xe300), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0cf3, 0xe360), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0489, 0xe092), .driver_info = BTUSB_QCA_ROME }, From 7e8524591ffffe3536bd363827ff4477a5672c65 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 11 Aug 2016 23:00:31 +0200 Subject: [PATCH 0403/1050] Bluetooth: bcm203x: don't print error when allocating urb fails kmalloc will print enough information in case of failure. Signed-off-by: Wolfram Sang Signed-off-by: Marcel Holtmann --- drivers/bluetooth/bcm203x.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/bluetooth/bcm203x.c b/drivers/bluetooth/bcm203x.c index 5b0ef7bbe8ac..5ce6d4176dc3 100644 --- a/drivers/bluetooth/bcm203x.c +++ b/drivers/bluetooth/bcm203x.c @@ -185,10 +185,8 @@ static int bcm203x_probe(struct usb_interface *intf, const struct usb_device_id data->state = BCM203X_LOAD_MINIDRV; data->urb = usb_alloc_urb(0, GFP_KERNEL); - if (!data->urb) { - BT_ERR("Can't allocate URB"); + if (!data->urb) return -ENOMEM; - } if (request_firmware(&firmware, "BCM2033-MD.hex", &udev->dev) < 0) { BT_ERR("Mini driver request failed"); From 47b0f573f2fa7634860e16ea31f2bc3057a1022a Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sat, 27 Aug 2016 20:23:37 +0200 Subject: [PATCH 0404/1050] Bluetooth: Check SOL_HCI for raw socket options The SOL_HCI level should be enforced when using socket options on the HCI raw socket interface. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_sock.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 96f04b7b9556..99dd1503ef56 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1440,6 +1440,9 @@ static int hci_sock_setsockopt(struct socket *sock, int level, int optname, BT_DBG("sk %p, opt %d", sk, optname); + if (level != SOL_HCI) + return -ENOPROTOOPT; + lock_sock(sk); if (hci_pi(sk)->channel != HCI_CHANNEL_RAW) { @@ -1523,6 +1526,9 @@ static int hci_sock_getsockopt(struct socket *sock, int level, int optname, BT_DBG("sk %p, opt %d", sk, optname); + if (level != SOL_HCI) + return -ENOPROTOOPT; + if (get_user(len, optlen)) return -EFAULT; From 70ecce91e3a2d7e332fe56fd065c67d404b8fccf Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sat, 27 Aug 2016 20:23:38 +0200 Subject: [PATCH 0405/1050] Bluetooth: Store control socket cookie and comm information To further allow unique identification and tracking of control socket, store cookie and comm information when binding the socket. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/bluetooth.h | 1 + net/bluetooth/hci_sock.c | 31 ++++++++++++++++++++++++++++++- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index bfd1590821d6..69b5174168b7 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -371,6 +371,7 @@ void hci_sock_set_flag(struct sock *sk, int nr); void hci_sock_clear_flag(struct sock *sk, int nr); int hci_sock_test_flag(struct sock *sk, int nr); unsigned short hci_sock_get_channel(struct sock *sk); +u32 hci_sock_get_cookie(struct sock *sk); int hci_sock_init(void); void hci_sock_cleanup(void); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 99dd1503ef56..4dce6dfdb0f2 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -26,6 +26,7 @@ #include #include +#include #include #include @@ -38,6 +39,8 @@ static LIST_HEAD(mgmt_chan_list); static DEFINE_MUTEX(mgmt_chan_list_lock); +static DEFINE_IDA(sock_cookie_ida); + static atomic_t monitor_promisc = ATOMIC_INIT(0); /* ----- HCI socket interface ----- */ @@ -52,6 +55,8 @@ struct hci_pinfo { __u32 cmsg_mask; unsigned short channel; unsigned long flags; + __u32 cookie; + char comm[TASK_COMM_LEN]; }; void hci_sock_set_flag(struct sock *sk, int nr) @@ -74,6 +79,11 @@ unsigned short hci_sock_get_channel(struct sock *sk) return hci_pi(sk)->channel; } +u32 hci_sock_get_cookie(struct sock *sk) +{ + return hci_pi(sk)->cookie; +} + static inline int hci_test_bit(int nr, const void *addr) { return *((const __u32 *) addr + (nr >> 5)) & ((__u32) 1 << (nr & 31)); @@ -585,6 +595,7 @@ static int hci_sock_release(struct socket *sock) { struct sock *sk = sock->sk; struct hci_dev *hdev; + int id; BT_DBG("sock %p sk %p", sock, sk); @@ -593,8 +604,17 @@ static int hci_sock_release(struct socket *sock) hdev = hci_pi(sk)->hdev; - if (hci_pi(sk)->channel == HCI_CHANNEL_MONITOR) + switch (hci_pi(sk)->channel) { + case HCI_CHANNEL_MONITOR: atomic_dec(&monitor_promisc); + break; + case HCI_CHANNEL_CONTROL: + id = hci_pi(sk)->cookie; + + hci_pi(sk)->cookie = 0xffffffff; + ida_simple_remove(&sock_cookie_ida, id); + break; + } bt_sock_unlink(&hci_sk_list, sk); @@ -957,6 +977,15 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, * are changes to settings, class of device, name etc. */ if (haddr.hci_channel == HCI_CHANNEL_CONTROL) { + int id; + + id = ida_simple_get(&sock_cookie_ida, 1, 0, GFP_KERNEL); + if (id < 0) + id = 0xffffffff; + + hci_pi(sk)->cookie = id; + get_task_comm(hci_pi(sk)->comm, current); + hci_sock_set_flag(sk, HCI_MGMT_INDEX_EVENTS); hci_sock_set_flag(sk, HCI_MGMT_UNCONF_INDEX_EVENTS); hci_sock_set_flag(sk, HCI_MGMT_GENERIC_EVENTS); From 03c979c4717c7fa0c058fafe76ac4d6acdd1fb0d Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sat, 27 Aug 2016 20:23:39 +0200 Subject: [PATCH 0406/1050] Bluetooth: Introduce helper to pack mgmt version information The mgmt version information will be also needed for the control changell tracing feature. This provides a helper to pack them. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/mgmt.c | 11 +++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index cc349f633570..9f181b583b96 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1451,6 +1451,7 @@ void hci_mgmt_chan_unregister(struct hci_mgmt_chan *c); #define DISCOV_BREDR_INQUIRY_LEN 0x08 #define DISCOV_LE_RESTART_DELAY msecs_to_jiffies(200) /* msec */ +void mgmt_fill_version_info(void *ver); int mgmt_new_settings(struct hci_dev *hdev); void mgmt_index_added(struct hci_dev *hdev); void mgmt_index_removed(struct hci_dev *hdev); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 7639290b6de3..9071886df194 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -278,6 +278,14 @@ static u8 le_addr_type(u8 mgmt_addr_type) return ADDR_LE_DEV_RANDOM; } +void mgmt_fill_version_info(void *ver) +{ + struct mgmt_rp_read_version *rp = ver; + + rp->version = MGMT_VERSION; + rp->revision = cpu_to_le16(MGMT_REVISION); +} + static int read_version(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { @@ -285,8 +293,7 @@ static int read_version(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG("sock %p", sk); - rp.version = MGMT_VERSION; - rp.revision = cpu_to_le16(MGMT_REVISION); + mgmt_fill_version_info(&rp); return mgmt_cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_VERSION, 0, &rp, sizeof(rp)); From 249fa1699f8642c73eb43e61b321969f0549ab2c Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sat, 27 Aug 2016 20:23:40 +0200 Subject: [PATCH 0407/1050] Bluetooth: Add support for sending MGMT open and close to monitor This sends new notifications to the monitor support whenever a management channel has been opened or closed. This allows tracing of control channels really easily. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_mon.h | 2 + net/bluetooth/hci_sock.c | 95 +++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+) diff --git a/include/net/bluetooth/hci_mon.h b/include/net/bluetooth/hci_mon.h index 587d0131b349..9640790cbbcc 100644 --- a/include/net/bluetooth/hci_mon.h +++ b/include/net/bluetooth/hci_mon.h @@ -45,6 +45,8 @@ struct hci_mon_hdr { #define HCI_MON_VENDOR_DIAG 11 #define HCI_MON_SYSTEM_NOTE 12 #define HCI_MON_USER_LOGGING 13 +#define HCI_MON_CTRL_OPEN 14 +#define HCI_MON_CTRL_CLOSE 15 struct hci_mon_new_index { __u8 type; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 4dce6dfdb0f2..2d8725006838 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -394,6 +394,59 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) return skb; } +static struct sk_buff *create_monitor_ctrl_open(struct sock *sk) +{ + struct hci_mon_hdr *hdr; + struct sk_buff *skb; + u16 format = 0x0002; + u8 ver[3]; + u32 flags; + + skb = bt_skb_alloc(14 + TASK_COMM_LEN , GFP_ATOMIC); + if (!skb) + return NULL; + + mgmt_fill_version_info(ver); + flags = hci_sock_test_flag(sk, HCI_SOCK_TRUSTED) ? 0x1 : 0x0; + + put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4)); + put_unaligned_le16(format, skb_put(skb, 2)); + memcpy(skb_put(skb, sizeof(ver)), ver, sizeof(ver)); + put_unaligned_le32(flags, skb_put(skb, 4)); + *skb_put(skb, 1) = TASK_COMM_LEN; + memcpy(skb_put(skb, TASK_COMM_LEN), hci_pi(sk)->comm, TASK_COMM_LEN); + + __net_timestamp(skb); + + hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); + hdr->opcode = cpu_to_le16(HCI_MON_CTRL_OPEN); + hdr->index = cpu_to_le16(HCI_DEV_NONE); + hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); + + return skb; +} + +static struct sk_buff *create_monitor_ctrl_close(struct sock *sk) +{ + struct hci_mon_hdr *hdr; + struct sk_buff *skb; + + skb = bt_skb_alloc(4, GFP_ATOMIC); + if (!skb) + return NULL; + + put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4)); + + __net_timestamp(skb); + + hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); + hdr->opcode = cpu_to_le16(HCI_MON_CTRL_CLOSE); + hdr->index = cpu_to_le16(HCI_DEV_NONE); + hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); + + return skb; +} + static void __printf(2, 3) send_monitor_note(struct sock *sk, const char *fmt, ...) { @@ -468,6 +521,29 @@ static void send_monitor_replay(struct sock *sk) read_unlock(&hci_dev_list_lock); } +static void send_monitor_control_replay(struct sock *mon_sk) +{ + struct sock *sk; + + read_lock(&hci_sk_list.lock); + + sk_for_each(sk, &hci_sk_list.head) { + struct sk_buff *skb; + + if (hci_pi(sk)->channel != HCI_CHANNEL_CONTROL) + continue; + + skb = create_monitor_ctrl_open(sk); + if (!skb) + continue; + + if (sock_queue_rcv_skb(mon_sk, skb)) + kfree_skb(skb); + } + + read_unlock(&hci_sk_list.lock); +} + /* Generate internal stack event */ static void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data) { @@ -595,6 +671,7 @@ static int hci_sock_release(struct socket *sock) { struct sock *sk = sock->sk; struct hci_dev *hdev; + struct sk_buff *skb; int id; BT_DBG("sock %p sk %p", sock, sk); @@ -611,6 +688,14 @@ static int hci_sock_release(struct socket *sock) case HCI_CHANNEL_CONTROL: id = hci_pi(sk)->cookie; + /* Send event to monitor */ + skb = create_monitor_ctrl_close(sk); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + hci_pi(sk)->cookie = 0xffffffff; ida_simple_remove(&sock_cookie_ida, id); break; @@ -931,6 +1016,7 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, send_monitor_note(sk, "Bluetooth subsystem version %s", BT_SUBSYS_VERSION); send_monitor_replay(sk); + send_monitor_control_replay(sk); atomic_inc(&monitor_promisc); break; @@ -977,6 +1063,7 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, * are changes to settings, class of device, name etc. */ if (haddr.hci_channel == HCI_CHANNEL_CONTROL) { + struct sk_buff *skb; int id; id = ida_simple_get(&sock_cookie_ida, 1, 0, GFP_KERNEL); @@ -986,6 +1073,14 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, hci_pi(sk)->cookie = id; get_task_comm(hci_pi(sk)->comm, current); + /* Send event to monitor */ + skb = create_monitor_ctrl_open(sk); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + hci_sock_set_flag(sk, HCI_MGMT_INDEX_EVENTS); hci_sock_set_flag(sk, HCI_MGMT_UNCONF_INDEX_EVENTS); hci_sock_set_flag(sk, HCI_MGMT_GENERIC_EVENTS); From 38ceaa00d02dceb22c6bdd5268f5a44d5c00e123 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sat, 27 Aug 2016 20:23:41 +0200 Subject: [PATCH 0408/1050] Bluetooth: Add support for sending MGMT commands and events to monitor This adds support for tracing all management commands and events via the monitor interface. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_core.h | 3 + include/net/bluetooth/hci_mon.h | 2 + net/bluetooth/hci_sock.c | 94 ++++++++++++++++++++++++++++++++ net/bluetooth/mgmt_util.c | 66 +++++++++++++++++++++- 4 files changed, 162 insertions(+), 3 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 9f181b583b96..a48f71d73dc8 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1406,6 +1406,9 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb); void hci_send_to_channel(unsigned short channel, struct sk_buff *skb, int flag, struct sock *skip_sk); void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb); +void hci_send_monitor_ctrl_event(struct hci_dev *hdev, u16 event, + void *data, u16 data_len, ktime_t tstamp, + int flag, struct sock *skip_sk); void hci_sock_dev_event(struct hci_dev *hdev, int event); diff --git a/include/net/bluetooth/hci_mon.h b/include/net/bluetooth/hci_mon.h index 9640790cbbcc..240786b04a46 100644 --- a/include/net/bluetooth/hci_mon.h +++ b/include/net/bluetooth/hci_mon.h @@ -47,6 +47,8 @@ struct hci_mon_hdr { #define HCI_MON_USER_LOGGING 13 #define HCI_MON_CTRL_OPEN 14 #define HCI_MON_CTRL_CLOSE 15 +#define HCI_MON_CTRL_COMMAND 16 +#define HCI_MON_CTRL_EVENT 17 struct hci_mon_new_index { __u8 type; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 2d8725006838..576ea48631b9 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -315,6 +315,60 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb) kfree_skb(skb_copy); } +void hci_send_monitor_ctrl_event(struct hci_dev *hdev, u16 event, + void *data, u16 data_len, ktime_t tstamp, + int flag, struct sock *skip_sk) +{ + struct sock *sk; + __le16 index; + + if (hdev) + index = cpu_to_le16(hdev->id); + else + index = cpu_to_le16(MGMT_INDEX_NONE); + + read_lock(&hci_sk_list.lock); + + sk_for_each(sk, &hci_sk_list.head) { + struct hci_mon_hdr *hdr; + struct sk_buff *skb; + + if (hci_pi(sk)->channel != HCI_CHANNEL_CONTROL) + continue; + + /* Ignore socket without the flag set */ + if (!hci_sock_test_flag(sk, flag)) + continue; + + /* Skip the original socket */ + if (sk == skip_sk) + continue; + + skb = bt_skb_alloc(6 + data_len, GFP_ATOMIC); + if (!skb) + continue; + + put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4)); + put_unaligned_le16(event, skb_put(skb, 2)); + + if (data) + memcpy(skb_put(skb, data_len), data, data_len); + + skb->tstamp = tstamp; + + hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); + hdr->opcode = cpu_to_le16(HCI_MON_CTRL_EVENT); + hdr->index = index; + hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); + + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + + read_unlock(&hci_sk_list.lock); +} + static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) { struct hci_mon_hdr *hdr; @@ -447,6 +501,33 @@ static struct sk_buff *create_monitor_ctrl_close(struct sock *sk) return skb; } +static struct sk_buff *create_monitor_ctrl_command(struct sock *sk, u16 index, + u16 opcode, u16 len, + const void *buf) +{ + struct hci_mon_hdr *hdr; + struct sk_buff *skb; + + skb = bt_skb_alloc(6 + len, GFP_ATOMIC); + if (!skb) + return NULL; + + put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4)); + put_unaligned_le16(opcode, skb_put(skb, 2)); + + if (buf) + memcpy(skb_put(skb, len), buf, len); + + __net_timestamp(skb); + + hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); + hdr->opcode = cpu_to_le16(HCI_MON_CTRL_COMMAND); + hdr->index = cpu_to_le16(index); + hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); + + return skb; +} + static void __printf(2, 3) send_monitor_note(struct sock *sk, const char *fmt, ...) { @@ -1257,6 +1338,19 @@ static int hci_mgmt_cmd(struct hci_mgmt_chan *chan, struct sock *sk, goto done; } + if (chan->channel == HCI_CHANNEL_CONTROL) { + struct sk_buff *skb; + + /* Send event to monitor */ + skb = create_monitor_ctrl_command(sk, index, opcode, len, + buf + sizeof(*hdr)); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + } + if (opcode >= chan->handler_count || chan->handlers[opcode].func == NULL) { BT_DBG("Unknown op %u", opcode); diff --git a/net/bluetooth/mgmt_util.c b/net/bluetooth/mgmt_util.c index 8c30c7eb8bef..c933bd08c1fe 100644 --- a/net/bluetooth/mgmt_util.c +++ b/net/bluetooth/mgmt_util.c @@ -21,12 +21,41 @@ SOFTWARE IS DISCLAIMED. */ +#include + #include #include +#include #include #include "mgmt_util.h" +static struct sk_buff *create_monitor_ctrl_event(__le16 index, u32 cookie, + u16 opcode, u16 len, void *buf) +{ + struct hci_mon_hdr *hdr; + struct sk_buff *skb; + + skb = bt_skb_alloc(6 + len, GFP_ATOMIC); + if (!skb) + return NULL; + + put_unaligned_le32(cookie, skb_put(skb, 4)); + put_unaligned_le16(opcode, skb_put(skb, 2)); + + if (buf) + memcpy(skb_put(skb, len), buf, len); + + __net_timestamp(skb); + + hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); + hdr->opcode = cpu_to_le16(HCI_MON_CTRL_EVENT); + hdr->index = index; + hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); + + return skb; +} + int mgmt_send_event(u16 event, struct hci_dev *hdev, unsigned short channel, void *data, u16 data_len, int flag, struct sock *skip_sk) { @@ -52,14 +81,18 @@ int mgmt_send_event(u16 event, struct hci_dev *hdev, unsigned short channel, __net_timestamp(skb); hci_send_to_channel(channel, skb, flag, skip_sk); - kfree_skb(skb); + if (channel == HCI_CHANNEL_CONTROL) + hci_send_monitor_ctrl_event(hdev, event, data, data_len, + skb_get_ktime(skb), flag, skip_sk); + + kfree_skb(skb); return 0; } int mgmt_cmd_status(struct sock *sk, u16 index, u16 cmd, u8 status) { - struct sk_buff *skb; + struct sk_buff *skb, *mskb; struct mgmt_hdr *hdr; struct mgmt_ev_cmd_status *ev; int err; @@ -80,17 +113,30 @@ int mgmt_cmd_status(struct sock *sk, u16 index, u16 cmd, u8 status) ev->status = status; ev->opcode = cpu_to_le16(cmd); + mskb = create_monitor_ctrl_event(hdr->index, hci_sock_get_cookie(sk), + MGMT_EV_CMD_STATUS, sizeof(*ev), ev); + if (mskb) + skb->tstamp = mskb->tstamp; + else + __net_timestamp(skb); + err = sock_queue_rcv_skb(sk, skb); if (err < 0) kfree_skb(skb); + if (mskb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, mskb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(mskb); + } + return err; } int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status, void *rp, size_t rp_len) { - struct sk_buff *skb; + struct sk_buff *skb, *mskb; struct mgmt_hdr *hdr; struct mgmt_ev_cmd_complete *ev; int err; @@ -114,10 +160,24 @@ int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status, if (rp) memcpy(ev->data, rp, rp_len); + mskb = create_monitor_ctrl_event(hdr->index, hci_sock_get_cookie(sk), + MGMT_EV_CMD_COMPLETE, + sizeof(*ev) + rp_len, ev); + if (mskb) + skb->tstamp = mskb->tstamp; + else + __net_timestamp(skb); + err = sock_queue_rcv_skb(sk, skb); if (err < 0) kfree_skb(skb); + if (mskb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, mskb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(mskb); + } + return err; } From 37d3a1fab50fa07ac706787646e61c60e7c520e0 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Sun, 28 Aug 2016 20:53:34 +0300 Subject: [PATCH 0409/1050] Bluetooth: mgmt: Fix sending redundant event for Advertising Instance When an Advertising Instance is removed, the Advertising Removed event shouldn't be sent to the same socket that issued the Remove Advertising command (it gets a command complete event instead). The mgmt_advertising_removed() function already has a parameter for skipping a specific socket, but there was no code to propagate the right value to this parameter. This patch fixes the issue by making sure the intermediate hci_req_clear_adv_instance() function gets the socket pointer. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 11 ++++++----- net/bluetooth/hci_request.h | 5 +++-- net/bluetooth/mgmt.c | 6 +++--- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index b0e23dfc5c34..9968b1c7c03a 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1194,7 +1194,7 @@ static void adv_timeout_expire(struct work_struct *work) hci_req_init(&req, hdev); - hci_req_clear_adv_instance(hdev, &req, instance, false); + hci_req_clear_adv_instance(hdev, NULL, &req, instance, false); if (list_empty(&hdev->adv_instances)) __hci_req_disable_advertising(&req); @@ -1284,8 +1284,9 @@ static void cancel_adv_timeout(struct hci_dev *hdev) * setting. * - force == false: Only instances that have a timeout will be removed. */ -void hci_req_clear_adv_instance(struct hci_dev *hdev, struct hci_request *req, - u8 instance, bool force) +void hci_req_clear_adv_instance(struct hci_dev *hdev, struct sock *sk, + struct hci_request *req, u8 instance, + bool force) { struct adv_info *adv_instance, *n, *next_instance = NULL; int err; @@ -1311,7 +1312,7 @@ void hci_req_clear_adv_instance(struct hci_dev *hdev, struct hci_request *req, rem_inst = adv_instance->instance; err = hci_remove_adv_instance(hdev, rem_inst); if (!err) - mgmt_advertising_removed(NULL, hdev, rem_inst); + mgmt_advertising_removed(sk, hdev, rem_inst); } } else { adv_instance = hci_find_adv_instance(hdev, instance); @@ -1325,7 +1326,7 @@ void hci_req_clear_adv_instance(struct hci_dev *hdev, struct hci_request *req, err = hci_remove_adv_instance(hdev, instance); if (!err) - mgmt_advertising_removed(NULL, hdev, instance); + mgmt_advertising_removed(sk, hdev, instance); } } diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index b2d044bdc732..ac1e11006f38 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -73,8 +73,9 @@ void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance); int __hci_req_schedule_adv_instance(struct hci_request *req, u8 instance, bool force); -void hci_req_clear_adv_instance(struct hci_dev *hdev, struct hci_request *req, - u8 instance, bool force); +void hci_req_clear_adv_instance(struct hci_dev *hdev, struct sock *sk, + struct hci_request *req, u8 instance, + bool force); void __hci_req_update_class(struct hci_request *req); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 9071886df194..f9af5f7c2ea2 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -929,7 +929,7 @@ static int clean_up_hci_state(struct hci_dev *hdev) hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); } - hci_req_clear_adv_instance(hdev, NULL, 0x00, false); + hci_req_clear_adv_instance(hdev, NULL, NULL, 0x00, false); if (hci_dev_test_flag(hdev, HCI_LE_ADV)) __hci_req_disable_advertising(&req); @@ -1697,7 +1697,7 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) enabled = lmp_host_le_capable(hdev); if (!val) - hci_req_clear_adv_instance(hdev, NULL, 0x00, true); + hci_req_clear_adv_instance(hdev, NULL, NULL, 0x00, true); if (!hdev_is_powered(hdev) || val == enabled) { bool changed = false; @@ -6182,7 +6182,7 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev, hci_req_init(&req, hdev); - hci_req_clear_adv_instance(hdev, &req, cp->instance, true); + hci_req_clear_adv_instance(hdev, sk, &req, cp->instance, true); if (list_empty(&hdev->adv_instances)) __hci_req_disable_advertising(&req); From 5504c3a31061704512707bb23bd7835e8a5281e4 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Mon, 29 Aug 2016 06:19:46 +0200 Subject: [PATCH 0410/1050] Bluetooth: Use individual flags for certain management events Instead of hiding everything behind a general managment events flag, introduce indivdual flags that allow fine control over which events are send to a given management channel. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci.h | 5 ++++- net/bluetooth/hci_sock.c | 5 ++++- net/bluetooth/mgmt.c | 32 +++++++++++++------------------- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 0aac123b5eee..ddb9accac3a5 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -208,7 +208,10 @@ enum { HCI_MGMT_INDEX_EVENTS, HCI_MGMT_UNCONF_INDEX_EVENTS, HCI_MGMT_EXT_INDEX_EVENTS, - HCI_MGMT_GENERIC_EVENTS, + HCI_MGMT_OPTION_EVENTS, + HCI_MGMT_SETTING_EVENTS, + HCI_MGMT_DEV_CLASS_EVENTS, + HCI_MGMT_LOCAL_NAME_EVENTS, HCI_MGMT_OOB_DATA_EVENTS, }; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 576ea48631b9..d37c2243157b 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1164,7 +1164,10 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, hci_sock_set_flag(sk, HCI_MGMT_INDEX_EVENTS); hci_sock_set_flag(sk, HCI_MGMT_UNCONF_INDEX_EVENTS); - hci_sock_set_flag(sk, HCI_MGMT_GENERIC_EVENTS); + hci_sock_set_flag(sk, HCI_MGMT_OPTION_EVENTS); + hci_sock_set_flag(sk, HCI_MGMT_SETTING_EVENTS); + hci_sock_set_flag(sk, HCI_MGMT_DEV_CLASS_EVENTS); + hci_sock_set_flag(sk, HCI_MGMT_LOCAL_NAME_EVENTS); } break; } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index f9af5f7c2ea2..469f5cc3109b 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -256,13 +256,6 @@ static int mgmt_limited_event(u16 event, struct hci_dev *hdev, void *data, flag, skip_sk); } -static int mgmt_generic_event(u16 event, struct hci_dev *hdev, void *data, - u16 len, struct sock *skip_sk) -{ - return mgmt_send_event(event, hdev, HCI_CHANNEL_CONTROL, data, len, - HCI_MGMT_GENERIC_EVENTS, skip_sk); -} - static int mgmt_event(u16 event, struct hci_dev *hdev, void *data, u16 len, struct sock *skip_sk) { @@ -579,8 +572,8 @@ static int new_options(struct hci_dev *hdev, struct sock *skip) { __le32 options = get_missing_options(hdev); - return mgmt_generic_event(MGMT_EV_NEW_CONFIG_OPTIONS, hdev, &options, - sizeof(options), skip); + return mgmt_limited_event(MGMT_EV_NEW_CONFIG_OPTIONS, hdev, &options, + sizeof(options), HCI_MGMT_OPTION_EVENTS, skip); } static int send_options_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev) @@ -1007,8 +1000,8 @@ static int new_settings(struct hci_dev *hdev, struct sock *skip) { __le32 ev = cpu_to_le32(get_current_settings(hdev)); - return mgmt_generic_event(MGMT_EV_NEW_SETTINGS, hdev, &ev, - sizeof(ev), skip); + return mgmt_limited_event(MGMT_EV_NEW_SETTINGS, hdev, &ev, + sizeof(ev), HCI_MGMT_SETTING_EVENTS, skip); } int mgmt_new_settings(struct hci_dev *hdev) @@ -3000,8 +2993,8 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, if (err < 0) goto failed; - err = mgmt_generic_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, - data, len, sk); + err = mgmt_limited_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, data, + len, HCI_MGMT_LOCAL_NAME_EVENTS, sk); goto failed; } @@ -6502,8 +6495,9 @@ void __mgmt_power_off(struct hci_dev *hdev) mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status); if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) - mgmt_generic_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, - zero_cod, sizeof(zero_cod), NULL); + mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, + zero_cod, sizeof(zero_cod), + HCI_MGMT_DEV_CLASS_EVENTS, NULL); new_settings(hdev, match.sk); @@ -7100,8 +7094,8 @@ void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, sk_lookup, &match); if (!status) - mgmt_generic_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, - dev_class, 3, NULL); + mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, dev_class, + 3, HCI_MGMT_DEV_CLASS_EVENTS, NULL); if (match.sk) sock_put(match.sk); @@ -7130,8 +7124,8 @@ void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status) return; } - mgmt_generic_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev, sizeof(ev), - cmd ? cmd->sk : NULL); + mgmt_limited_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev, sizeof(ev), + HCI_MGMT_LOCAL_NAME_EVENTS, cmd ? cmd->sk : NULL); } static inline bool has_uuid(u8 *uuid, u16 uuid_count, u8 (*uuids)[16]) From 56f787c5024de7829f8cccce7569feb520829baf Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Mon, 29 Aug 2016 06:19:47 +0200 Subject: [PATCH 0411/1050] Bluetooth: Fix wrong Get Clock Information return parameters The address information of the Get Clock Information return parameters is copying from a different memory location. It uses &cmd->param while it actually needs to be cmd->param. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/mgmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 469f5cc3109b..0c83dd36b7e3 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -4869,7 +4869,7 @@ static int clock_info_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status) int err; memset(&rp, 0, sizeof(rp)); - memcpy(&rp.addr, &cmd->param, sizeof(rp.addr)); + memcpy(&rp.addr, cmd->param, sizeof(rp.addr)); if (status) goto complete; From 9db5c62951871c33e4443fe433e234419cf574d2 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Mon, 29 Aug 2016 06:31:57 +0200 Subject: [PATCH 0412/1050] Bluetooth: Use command status event for Set IO Capability errors In case of failure, the Set IO Capability command is suppose to return command status and not command complete. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/mgmt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 0c83dd36b7e3..47efdb4a669a 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2513,8 +2513,8 @@ static int set_io_capability(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG(""); if (cp->io_capability > SMP_IO_KEYBOARD_DISPLAY) - return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_IO_CAPABILITY, - MGMT_STATUS_INVALID_PARAMS, NULL, 0); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_IO_CAPABILITY, + MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); From 3d4e2fb64111ffb5dc737daf25f5434bf39bee5f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 29 Aug 2016 14:36:18 +0200 Subject: [PATCH 0413/1050] Bluetooth: add WCNSS dependency for HCI driver The newly added bluetooth driver is based on the soc-specific support, but lacks the obvious compile-time dependency on that: drivers/bluetooth/btqcomsmd.o: In function `btqcomsmd_probe': btqcomsmd.c:(.text.btqcomsmd_probe+0x40): undefined reference to `qcom_wcnss_open_channel' btqcomsmd.c:(.text.btqcomsmd_probe+0x5c): undefined reference to `qcom_wcnss_open_channel' Makefile:969: recipe for target 'vmlinux' failed Fixes: 90c107dc8b2c ("Bluetooth: Introduce Qualcomm WCNSS SMD based HCI driver") Signed-off-by: Arnd Bergmann Signed-off-by: Marcel Holtmann --- drivers/bluetooth/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig index 2c481911f1ce..43e9f9328df8 100644 --- a/drivers/bluetooth/Kconfig +++ b/drivers/bluetooth/Kconfig @@ -333,7 +333,7 @@ config BT_WILINK config BT_QCOMSMD tristate "Qualcomm SMD based HCI support" - depends on QCOM_SMD + depends on QCOM_SMD && QCOM_WCNSS_CTRL select BT_QCA help Qualcomm SMD based HCI driver. From df1cb87af9f24527a8932e4d195d49ffab1168d2 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 30 Aug 2016 05:00:34 +0200 Subject: [PATCH 0414/1050] Bluetooth: Introduce helper functions for socket cookie handling Instead of manually allocating cookie information each time, use helper functions for generating and releasing cookies. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_sock.c | 41 ++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index d37c2243157b..804208d48368 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -84,6 +84,33 @@ u32 hci_sock_get_cookie(struct sock *sk) return hci_pi(sk)->cookie; } +static bool hci_sock_gen_cookie(struct sock *sk) +{ + int id = hci_pi(sk)->cookie; + + if (!id) { + id = ida_simple_get(&sock_cookie_ida, 1, 0, GFP_KERNEL); + if (id < 0) + id = 0xffffffff; + + hci_pi(sk)->cookie = id; + get_task_comm(hci_pi(sk)->comm, current); + return true; + } + + return false; +} + +static void hci_sock_free_cookie(struct sock *sk) +{ + int id = hci_pi(sk)->cookie; + + if (id) { + hci_pi(sk)->cookie = 0xffffffff; + ida_simple_remove(&sock_cookie_ida, id); + } +} + static inline int hci_test_bit(int nr, const void *addr) { return *((const __u32 *) addr + (nr >> 5)) & ((__u32) 1 << (nr & 31)); @@ -753,7 +780,6 @@ static int hci_sock_release(struct socket *sock) struct sock *sk = sock->sk; struct hci_dev *hdev; struct sk_buff *skb; - int id; BT_DBG("sock %p sk %p", sock, sk); @@ -767,8 +793,6 @@ static int hci_sock_release(struct socket *sock) atomic_dec(&monitor_promisc); break; case HCI_CHANNEL_CONTROL: - id = hci_pi(sk)->cookie; - /* Send event to monitor */ skb = create_monitor_ctrl_close(sk); if (skb) { @@ -777,8 +801,7 @@ static int hci_sock_release(struct socket *sock) kfree_skb(skb); } - hci_pi(sk)->cookie = 0xffffffff; - ida_simple_remove(&sock_cookie_ida, id); + hci_sock_free_cookie(sk); break; } @@ -1145,14 +1168,8 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, */ if (haddr.hci_channel == HCI_CHANNEL_CONTROL) { struct sk_buff *skb; - int id; - id = ida_simple_get(&sock_cookie_ida, 1, 0, GFP_KERNEL); - if (id < 0) - id = 0xffffffff; - - hci_pi(sk)->cookie = id; - get_task_comm(hci_pi(sk)->comm, current); + hci_sock_gen_cookie(sk); /* Send event to monitor */ skb = create_monitor_ctrl_open(sk); From 9e8305b39bfa23a83b932007654097f4676c2ba2 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 30 Aug 2016 05:00:35 +0200 Subject: [PATCH 0415/1050] Bluetooth: Use numbers for subsystem version string Instead of keeping a version string around, use version and revision numbers and then stringify them for use as module parameter. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/bluetooth.h | 3 ++- net/bluetooth/af_bluetooth.c | 10 +++++++--- net/bluetooth/hci_sock.c | 4 ++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 69b5174168b7..d705bcf40710 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -29,7 +29,8 @@ #include #include -#define BT_SUBSYS_VERSION "2.21" +#define BT_SUBSYS_VERSION 2 +#define BT_SUBSYS_REVISION 21 #ifndef AF_BLUETOOTH #define AF_BLUETOOTH 31 diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 1d96ff3a8d87..1aff2da9bc74 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -26,6 +26,7 @@ #include #include +#include #include #include @@ -713,13 +714,16 @@ static struct net_proto_family bt_sock_family_ops = { struct dentry *bt_debugfs; EXPORT_SYMBOL_GPL(bt_debugfs); +#define VERSION __stringify(BT_SUBSYS_VERSION) "." \ + __stringify(BT_SUBSYS_REVISION) + static int __init bt_init(void) { int err; sock_skb_cb_check_size(sizeof(struct bt_skb_cb)); - BT_INFO("Core ver %s", BT_SUBSYS_VERSION); + BT_INFO("Core ver %s", VERSION); err = bt_selftest(); if (err < 0) @@ -797,7 +801,7 @@ subsys_initcall(bt_init); module_exit(bt_exit); MODULE_AUTHOR("Marcel Holtmann "); -MODULE_DESCRIPTION("Bluetooth Core ver " BT_SUBSYS_VERSION); -MODULE_VERSION(BT_SUBSYS_VERSION); +MODULE_DESCRIPTION("Bluetooth Core ver " VERSION); +MODULE_VERSION(VERSION); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_BLUETOOTH); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 804208d48368..a4227c777d16 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1117,8 +1117,8 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, send_monitor_note(sk, "Linux version %s (%s)", init_utsname()->release, init_utsname()->machine); - send_monitor_note(sk, "Bluetooth subsystem version %s", - BT_SUBSYS_VERSION); + send_monitor_note(sk, "Bluetooth subsystem version %u.%u", + BT_SUBSYS_VERSION, BT_SUBSYS_REVISION); send_monitor_replay(sk); send_monitor_control_replay(sk); From 0ef2c42f8c4e372bad16f67dc0f4b15b9be910f6 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 30 Aug 2016 05:00:36 +0200 Subject: [PATCH 0416/1050] Bluetooth: Send control open and close only when cookie is present Only when the cookie has been assigned, then send the open and close monitor messages. Also if the socket is bound to a device, then include the index into the message. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_sock.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index a4227c777d16..0deca758fd9e 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -483,6 +483,10 @@ static struct sk_buff *create_monitor_ctrl_open(struct sock *sk) u8 ver[3]; u32 flags; + /* No message needed when cookie is not present */ + if (!hci_pi(sk)->cookie) + return NULL; + skb = bt_skb_alloc(14 + TASK_COMM_LEN , GFP_ATOMIC); if (!skb) return NULL; @@ -501,7 +505,10 @@ static struct sk_buff *create_monitor_ctrl_open(struct sock *sk) hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); hdr->opcode = cpu_to_le16(HCI_MON_CTRL_OPEN); - hdr->index = cpu_to_le16(HCI_DEV_NONE); + if (hci_pi(sk)->hdev) + hdr->index = cpu_to_le16(hci_pi(sk)->hdev->id); + else + hdr->index = cpu_to_le16(HCI_DEV_NONE); hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); return skb; @@ -512,6 +519,10 @@ static struct sk_buff *create_monitor_ctrl_close(struct sock *sk) struct hci_mon_hdr *hdr; struct sk_buff *skb; + /* No message needed when cookie is not present */ + if (!hci_pi(sk)->cookie) + return NULL; + skb = bt_skb_alloc(4, GFP_ATOMIC); if (!skb) return NULL; @@ -522,7 +533,10 @@ static struct sk_buff *create_monitor_ctrl_close(struct sock *sk) hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); hdr->opcode = cpu_to_le16(HCI_MON_CTRL_CLOSE); - hdr->index = cpu_to_le16(HCI_DEV_NONE); + if (hci_pi(sk)->hdev) + hdr->index = cpu_to_le16(hci_pi(sk)->hdev->id); + else + hdr->index = cpu_to_le16(HCI_DEV_NONE); hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); return skb; From 5a6d2cf5f18b5afbae0b1b450070bbba50f1e3e0 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 30 Aug 2016 05:00:37 +0200 Subject: [PATCH 0417/1050] Bluetooth: Assign the channel early when binding HCI sockets Assignment of the hci_pi(sk)->channel should be done early when binding the HCI socket. This avoids confusion with the RAW channel that is used for legacy access. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_sock.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 0deca758fd9e..ca13fac1c132 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1045,6 +1045,7 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, atomic_inc(&hdev->promisc); } + hci_pi(sk)->channel = haddr.hci_channel; hci_pi(sk)->hdev = hdev; break; @@ -1107,9 +1108,10 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, } } - atomic_inc(&hdev->promisc); - + hci_pi(sk)->channel = haddr.hci_channel; hci_pi(sk)->hdev = hdev; + + atomic_inc(&hdev->promisc); break; case HCI_CHANNEL_MONITOR: @@ -1123,6 +1125,8 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, goto done; } + hci_pi(sk)->channel = haddr.hci_channel; + /* The monitor interface is restricted to CAP_NET_RAW * capabilities and with that implicitly trusted. */ @@ -1149,6 +1153,8 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, err = -EPERM; goto done; } + + hci_pi(sk)->channel = haddr.hci_channel; break; default: @@ -1170,6 +1176,8 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, if (capable(CAP_NET_ADMIN)) hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); + hci_pi(sk)->channel = haddr.hci_channel; + /* At the moment the index and unconfigured index events * are enabled unconditionally. Setting them on each * socket when binding keeps this functionality. They @@ -1180,7 +1188,7 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, * received by untrusted users. Example for such events * are changes to settings, class of device, name etc. */ - if (haddr.hci_channel == HCI_CHANNEL_CONTROL) { + if (hci_pi(sk)->channel == HCI_CHANNEL_CONTROL) { struct sk_buff *skb; hci_sock_gen_cookie(sk); @@ -1203,8 +1211,6 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, break; } - - hci_pi(sk)->channel = haddr.hci_channel; sk->sk_state = BT_BOUND; done: From d0bef1d26fb6fdad818f3d15a178d51e2a8478ae Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 30 Aug 2016 05:00:38 +0200 Subject: [PATCH 0418/1050] Bluetooth: Add extra channel checks for control open/close messages The control open and close monitoring events require special channel checks to ensure messages are only send when the right events happen. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_sock.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index ca13fac1c132..b22efe272f7e 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -479,7 +479,7 @@ static struct sk_buff *create_monitor_ctrl_open(struct sock *sk) { struct hci_mon_hdr *hdr; struct sk_buff *skb; - u16 format = 0x0002; + u16 format; u8 ver[3]; u32 flags; @@ -487,11 +487,20 @@ static struct sk_buff *create_monitor_ctrl_open(struct sock *sk) if (!hci_pi(sk)->cookie) return NULL; + switch (hci_pi(sk)->channel) { + case HCI_CHANNEL_CONTROL: + format = 0x0002; + mgmt_fill_version_info(ver); + break; + default: + /* No message for unsupported format */ + return NULL; + } + skb = bt_skb_alloc(14 + TASK_COMM_LEN , GFP_ATOMIC); if (!skb) return NULL; - mgmt_fill_version_info(ver); flags = hci_sock_test_flag(sk, HCI_SOCK_TRUSTED) ? 0x1 : 0x0; put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4)); @@ -523,6 +532,14 @@ static struct sk_buff *create_monitor_ctrl_close(struct sock *sk) if (!hci_pi(sk)->cookie) return NULL; + switch (hci_pi(sk)->channel) { + case HCI_CHANNEL_CONTROL: + break; + default: + /* No message for unsupported format */ + return NULL; + } + skb = bt_skb_alloc(4, GFP_ATOMIC); if (!skb) return NULL; @@ -652,9 +669,6 @@ static void send_monitor_control_replay(struct sock *mon_sk) sk_for_each(sk, &hci_sk_list.head) { struct sk_buff *skb; - if (hci_pi(sk)->channel != HCI_CHANNEL_CONTROL) - continue; - skb = create_monitor_ctrl_open(sk); if (!skb) continue; From f81f5b2db8692ff1d2d5f4db1fde58e67aa976a3 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 30 Aug 2016 05:00:39 +0200 Subject: [PATCH 0419/1050] Bluetooth: Send control open and close messages for HCI raw sockets When opening and closing HCI raw sockets their main usage is for legacy userspace. To track interaction with the modern mgmt interface, send open and close monitoring messages for these action. The HCI raw sockets is special since it supports unbound ioctl operation and for that special case delay the notification message until at least one ioctl has been executed. The difference between a bound and unbound socket will be detailed by the fact the HCI index is present or not. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_sock.c | 48 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index b22efe272f7e..c7772436f508 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -488,6 +488,11 @@ static struct sk_buff *create_monitor_ctrl_open(struct sock *sk) return NULL; switch (hci_pi(sk)->channel) { + case HCI_CHANNEL_RAW: + format = 0x0000; + ver[0] = BT_SUBSYS_VERSION; + put_unaligned_le16(BT_SUBSYS_REVISION, ver + 1); + break; case HCI_CHANNEL_CONTROL: format = 0x0002; mgmt_fill_version_info(ver); @@ -533,6 +538,7 @@ static struct sk_buff *create_monitor_ctrl_close(struct sock *sk) return NULL; switch (hci_pi(sk)->channel) { + case HCI_CHANNEL_RAW: case HCI_CHANNEL_CONTROL: break; default: @@ -820,6 +826,7 @@ static int hci_sock_release(struct socket *sock) case HCI_CHANNEL_MONITOR: atomic_dec(&monitor_promisc); break; + case HCI_CHANNEL_RAW: case HCI_CHANNEL_CONTROL: /* Send event to monitor */ skb = create_monitor_ctrl_close(sk); @@ -958,6 +965,27 @@ static int hci_sock_ioctl(struct socket *sock, unsigned int cmd, goto done; } + /* When calling an ioctl on an unbound raw socket, then ensure + * that the monitor gets informed. Ensure that the resulting event + * is only send once by checking if the cookie exists or not. The + * socket cookie will be only ever generated once for the lifetime + * of a given socket. + */ + if (hci_sock_gen_cookie(sk)) { + struct sk_buff *skb; + + if (capable(CAP_NET_ADMIN)) + hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); + + /* Send event to monitor */ + skb = create_monitor_ctrl_open(sk); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + } + release_sock(sk); switch (cmd) { @@ -1061,6 +1089,26 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, hci_pi(sk)->channel = haddr.hci_channel; hci_pi(sk)->hdev = hdev; + + /* Only send the event to monitor when a new cookie has + * been generated. An existing cookie means that an unbound + * socket has seen an ioctl and that triggered the cookie + * generation and sending of the monitor event. + */ + if (hci_sock_gen_cookie(sk)) { + struct sk_buff *skb; + + if (capable(CAP_NET_ADMIN)) + hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); + + /* Send event to monitor */ + skb = create_monitor_ctrl_open(sk); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + } break; case HCI_CHANNEL_USER: From f4cdbb3f25c15c17a952deae1f2e0db6df8f1948 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 30 Aug 2016 05:00:40 +0200 Subject: [PATCH 0420/1050] Bluetooth: Handle HCI raw socket transition from unbound to bound In case an unbound HCI raw socket is later on bound, ensure that the monitor notification messages indicate a close and re-open. None of the userspace tools use the socket this, but it is actually possible to use an ioctl on an unbound socket and then later bind it. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_sock.c | 53 +++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index c7772436f508..83e9fdb712e5 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1049,6 +1049,7 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, struct sockaddr_hci haddr; struct sock *sk = sock->sk; struct hci_dev *hdev = NULL; + struct sk_buff *skb; int len, err = 0; BT_DBG("sock %p sk %p", sock, sk); @@ -1088,27 +1089,34 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, } hci_pi(sk)->channel = haddr.hci_channel; - hci_pi(sk)->hdev = hdev; - /* Only send the event to monitor when a new cookie has - * been generated. An existing cookie means that an unbound - * socket has seen an ioctl and that triggered the cookie - * generation and sending of the monitor event. - */ - if (hci_sock_gen_cookie(sk)) { - struct sk_buff *skb; - - if (capable(CAP_NET_ADMIN)) - hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); - - /* Send event to monitor */ - skb = create_monitor_ctrl_open(sk); + if (!hci_sock_gen_cookie(sk)) { + /* In the case when a cookie has already been assigned, + * then there has been already an ioctl issued against + * an unbound socket and with that triggerd an open + * notification. Send a close notification first to + * allow the state transition to bounded. + */ + skb = create_monitor_ctrl_close(sk); if (skb) { hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, HCI_SOCK_TRUSTED, NULL); kfree_skb(skb); } } + + if (capable(CAP_NET_ADMIN)) + hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); + + hci_pi(sk)->hdev = hdev; + + /* Send event to monitor */ + skb = create_monitor_ctrl_open(sk); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } break; case HCI_CHANNEL_USER: @@ -1251,9 +1259,20 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, * are changes to settings, class of device, name etc. */ if (hci_pi(sk)->channel == HCI_CHANNEL_CONTROL) { - struct sk_buff *skb; - - hci_sock_gen_cookie(sk); + if (!hci_sock_gen_cookie(sk)) { + /* In the case when a cookie has already been + * assigned, this socket will transtion from + * a raw socket into a control socket. To + * allow for a clean transtion, send the + * close notification first. + */ + skb = create_monitor_ctrl_close(sk); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + } /* Send event to monitor */ skb = create_monitor_ctrl_open(sk); From fac9a6021b4e8dc8b2112a8e133936c0daf7ff94 Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Tue, 30 Aug 2016 22:42:53 +0530 Subject: [PATCH 0421/1050] Bluetooth: Remove deprecated create_singlethread_workqueue The workqueue "workqueue" queues multiple work items viz &qca->ws_awake_rx &qca->ws_rx_vote_off, &qca->ws_awake_device, &qca->ws_tx_vote_off which require strict execution ordering. Hence, an ordered dedicated workqueue has been used to replace the deprecated create_singlethread_workqueue instance. WQ_MEM_RECLAIM has not been set since the driver is not being used on a memory reclaim path. Signed-off-by: Bhaktipriya Shridhar Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_qca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 683c2b642057..6c867fbc56a7 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -397,7 +397,7 @@ static int qca_open(struct hci_uart *hu) skb_queue_head_init(&qca->txq); skb_queue_head_init(&qca->tx_wait_q); spin_lock_init(&qca->hci_ibs_lock); - qca->workqueue = create_singlethread_workqueue("qca_wq"); + qca->workqueue = alloc_ordered_workqueue("qca_wq", 0); if (!qca->workqueue) { BT_ERR("QCA Workqueue not initialized properly"); kfree(qca); From 418678b01aca849b4f86224e609610ce87a9bdc4 Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Thu, 1 Sep 2016 17:22:37 +0200 Subject: [PATCH 0422/1050] Bluetooth: btusb: Mark CW6622 devices to have broken link key commands Conwise CW6622 seems to have a problem with the stored link key commands so just mark it as broken. < HCI Command: Read Local Supported Features (0x04|0x0003) plen 0 > HCI Event: Command Complete (0x0e) plen 12 Read Local Supported Features (0x04|0x0003) ncmd 1 status 0x00 Features: 0xff 0x3e 0x85 0x38 0x18 0x18 0x00 0x00 < HCI Command: Read Local Version Information (0x04|0x0001) plen 0 > HCI Event: Command Complete (0x0e) plen 12 Read Local Version Information (0x04|0x0001) ncmd 1 status 0x00 HCI Version: 2.0 (0x3) HCI Revision: 0x1f4 LMP Version: 2.0 (0x3) LMP Subversion: 0x1f4 Manufacturer: CONWISE Technology Corporation Ltd (66) ... < HCI Command: Read Local Supported Commands (0x04|0x0002) plen 0 > HCI Event: Command Complete (0x0e) plen 68 Read Local Supported Commands (0x04|0x0002) ncmd 1 status 0x00 Commands: 7fffef03cedfffffffffff1ff20ff8ff3f ... < HCI Command: Read Stored Link Key (0x03|0x000d) plen 7 bdaddr 00:00:00:00:00:00 all 1 > HCI Event: Command Complete (0x0e) plen 8 Read Stored Link Key (0x03|0x000d) ncmd 1 status 0x11 max 0 num 0 Error: Unsupported Feature or Parameter Value Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btusb.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 80ae854a0bee..9ebd73dd7915 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -62,6 +62,7 @@ static struct usb_driver btusb_driver; #define BTUSB_REALTEK 0x20000 #define BTUSB_BCM2045 0x40000 #define BTUSB_IFNUM_2 0x80000 +#define BTUSB_CW6622 0x100000 static const struct usb_device_id btusb_table[] = { /* Generic Bluetooth USB device */ @@ -291,7 +292,8 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0400, 0x080a), .driver_info = BTUSB_BROKEN_ISOC }, /* CONWISE Technology based adapters with buggy SCO support */ - { USB_DEVICE(0x0e5e, 0x6622), .driver_info = BTUSB_BROKEN_ISOC }, + { USB_DEVICE(0x0e5e, 0x6622), + .driver_info = BTUSB_BROKEN_ISOC | BTUSB_CW6622}, /* Roper Class 1 Bluetooth Dongle (Silicon Wave based) */ { USB_DEVICE(0x1310, 0x0001), .driver_info = BTUSB_SWAVE }, @@ -2845,6 +2847,9 @@ static int btusb_probe(struct usb_interface *intf, hdev->send = btusb_send_frame; hdev->notify = btusb_notify; + if (id->driver_info & BTUSB_CW6622) + set_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks); + if (id->driver_info & BTUSB_BCM2045) set_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks); From 321c6feed2519a2691f65e41c4d62332d6ee3d52 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 1 Sep 2016 16:46:23 +0200 Subject: [PATCH 0423/1050] Bluetooth: Add framework for Extended Controller Information MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This command is used to retrieve the current state and basic information of a controller. It is typically used right after getting the response to the Read Controller Index List command or an Index Added event (or its extended counterparts). When any of the values in the EIR_Data field changes, the event Extended Controller Information Changed will be used to inform clients about the updated information. Signed-off-by: Marcel Holtmann Signed-off-by: Michał Narajowski --- include/net/bluetooth/hci.h | 1 + include/net/bluetooth/mgmt.h | 18 +++++++++++ net/bluetooth/mgmt.c | 62 ++++++++++++++++++++++++++++++++++-- 3 files changed, 79 insertions(+), 2 deletions(-) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index ddb9accac3a5..99aa5e5e3100 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -208,6 +208,7 @@ enum { HCI_MGMT_INDEX_EVENTS, HCI_MGMT_UNCONF_INDEX_EVENTS, HCI_MGMT_EXT_INDEX_EVENTS, + HCI_MGMT_EXT_INFO_EVENTS, HCI_MGMT_OPTION_EVENTS, HCI_MGMT_SETTING_EVENTS, HCI_MGMT_DEV_CLASS_EVENTS, diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 7647964b1efa..611b243713ea 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -586,6 +586,18 @@ struct mgmt_rp_get_adv_size_info { #define MGMT_OP_START_LIMITED_DISCOVERY 0x0041 +#define MGMT_OP_READ_EXT_INFO 0x0042 +#define MGMT_READ_EXT_INFO_SIZE 0 +struct mgmt_rp_read_ext_info { + bdaddr_t bdaddr; + __u8 version; + __le16 manufacturer; + __le32 supported_settings; + __le32 current_settings; + __le16 eir_len; + __u8 eir[0]; +} __packed; + #define MGMT_EV_CMD_COMPLETE 0x0001 struct mgmt_ev_cmd_complete { __le16 opcode; @@ -800,3 +812,9 @@ struct mgmt_ev_advertising_added { struct mgmt_ev_advertising_removed { __u8 instance; } __packed; + +#define MGMT_EV_EXT_INFO_CHANGED 0x0025 +struct mgmt_ev_ext_info_changed { + __le16 eir_len; + __u8 eir[0]; +} __packed; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 47efdb4a669a..69001f415efa 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -104,6 +104,7 @@ static const u16 mgmt_commands[] = { MGMT_OP_REMOVE_ADVERTISING, MGMT_OP_GET_ADV_SIZE_INFO, MGMT_OP_START_LIMITED_DISCOVERY, + MGMT_OP_READ_EXT_INFO, }; static const u16 mgmt_events[] = { @@ -141,6 +142,7 @@ static const u16 mgmt_events[] = { MGMT_EV_LOCAL_OOB_DATA_UPDATED, MGMT_EV_ADVERTISING_ADDED, MGMT_EV_ADVERTISING_REMOVED, + MGMT_EV_EXT_INFO_CHANGED, }; static const u16 mgmt_untrusted_commands[] = { @@ -149,6 +151,7 @@ static const u16 mgmt_untrusted_commands[] = { MGMT_OP_READ_UNCONF_INDEX_LIST, MGMT_OP_READ_CONFIG_INFO, MGMT_OP_READ_EXT_INDEX_LIST, + MGMT_OP_READ_EXT_INFO, }; static const u16 mgmt_untrusted_events[] = { @@ -162,6 +165,7 @@ static const u16 mgmt_untrusted_events[] = { MGMT_EV_NEW_CONFIG_OPTIONS, MGMT_EV_EXT_INDEX_ADDED, MGMT_EV_EXT_INDEX_REMOVED, + MGMT_EV_EXT_INFO_CHANGED, }; #define CACHE_TIMEOUT msecs_to_jiffies(2 * 1000) @@ -862,6 +866,52 @@ static int read_controller_info(struct sock *sk, struct hci_dev *hdev, sizeof(rp)); } +static int read_ext_controller_info(struct sock *sk, struct hci_dev *hdev, + void *data, u16 data_len) +{ + struct mgmt_rp_read_ext_info rp; + + BT_DBG("sock %p %s", sk, hdev->name); + + hci_dev_lock(hdev); + + memset(&rp, 0, sizeof(rp)); + + bacpy(&rp.bdaddr, &hdev->bdaddr); + + rp.version = hdev->hci_ver; + rp.manufacturer = cpu_to_le16(hdev->manufacturer); + + rp.supported_settings = cpu_to_le32(get_supported_settings(hdev)); + rp.current_settings = cpu_to_le32(get_current_settings(hdev)); + + rp.eir_len = cpu_to_le16(0); + + hci_dev_unlock(hdev); + + /* If this command is called at least once, then the events + * for class of device and local name changes are disabled + * and only the new extended controller information event + * is used. + */ + hci_sock_set_flag(sk, HCI_MGMT_EXT_INFO_EVENTS); + hci_sock_clear_flag(sk, HCI_MGMT_DEV_CLASS_EVENTS); + hci_sock_clear_flag(sk, HCI_MGMT_LOCAL_NAME_EVENTS); + + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_READ_EXT_INFO, 0, &rp, + sizeof(rp)); +} + +static int ext_info_changed(struct hci_dev *hdev, struct sock *skip) +{ + struct mgmt_ev_ext_info_changed ev; + + ev.eir_len = cpu_to_le16(0); + + return mgmt_limited_event(MGMT_EV_EXT_INFO_CHANGED, hdev, &ev, + sizeof(ev), HCI_MGMT_EXT_INFO_EVENTS, skip); +} + static int send_settings_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev) { __le32 settings = cpu_to_le32(get_current_settings(hdev)); @@ -2995,6 +3045,7 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, err = mgmt_limited_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, data, len, HCI_MGMT_LOCAL_NAME_EVENTS, sk); + ext_info_changed(hdev, sk); goto failed; } @@ -6356,6 +6407,8 @@ static const struct hci_mgmt_handler mgmt_handlers[] = { { remove_advertising, MGMT_REMOVE_ADVERTISING_SIZE }, { get_adv_size_info, MGMT_GET_ADV_SIZE_INFO_SIZE }, { start_limited_discovery, MGMT_START_DISCOVERY_SIZE }, + { read_ext_controller_info,MGMT_READ_EXT_INFO_SIZE, + HCI_MGMT_UNTRUSTED }, }; void mgmt_index_added(struct hci_dev *hdev) @@ -6494,10 +6547,12 @@ void __mgmt_power_off(struct hci_dev *hdev) mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status); - if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) + if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) { mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, zero_cod, sizeof(zero_cod), HCI_MGMT_DEV_CLASS_EVENTS, NULL); + ext_info_changed(hdev, NULL); + } new_settings(hdev, match.sk); @@ -7093,9 +7148,11 @@ void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, sk_lookup, &match); mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, sk_lookup, &match); - if (!status) + if (!status) { mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, dev_class, 3, HCI_MGMT_DEV_CLASS_EVENTS, NULL); + ext_info_changed(hdev, NULL); + } if (match.sk) sock_put(match.sk); @@ -7126,6 +7183,7 @@ void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status) mgmt_limited_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev, sizeof(ev), HCI_MGMT_LOCAL_NAME_EVENTS, cmd ? cmd->sk : NULL); + ext_info_changed(hdev, cmd ? cmd->sk : NULL); } static inline bool has_uuid(u8 *uuid, u16 uuid_count, u8 (*uuids)[16]) From 8a0c9f49090fe8ae122fd1bbf7260c8492289386 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Narajowski?= Date: Thu, 1 Sep 2016 16:46:24 +0200 Subject: [PATCH 0424/1050] Bluetooth: Append local name and CoD to Extended Controller Info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds device class, complete local name and short local name to EIR data in Extended Controller Info as specified in docs. Signed-off-by: Michał Narajowski Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 63 +++++++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 21 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 69001f415efa..74179b92ef22 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -866,26 +866,58 @@ static int read_controller_info(struct sock *sk, struct hci_dev *hdev, sizeof(rp)); } +static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data, + u8 data_len) +{ + eir[eir_len++] = sizeof(type) + data_len; + eir[eir_len++] = type; + memcpy(&eir[eir_len], data, data_len); + eir_len += data_len; + + return eir_len; +} + static int read_ext_controller_info(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { - struct mgmt_rp_read_ext_info rp; + struct mgmt_rp_read_ext_info *rp; + char buff[512]; + u16 eir_len = 0; + u8 name_len; BT_DBG("sock %p %s", sk, hdev->name); hci_dev_lock(hdev); - memset(&rp, 0, sizeof(rp)); + if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) + eir_len = eir_append_data(buff, eir_len, + EIR_CLASS_OF_DEV, + hdev->dev_class, 3); - bacpy(&rp.bdaddr, &hdev->bdaddr); + name_len = strlen(hdev->dev_name); + eir_len = eir_append_data(buff, eir_len, EIR_NAME_COMPLETE, + hdev->dev_name, name_len); - rp.version = hdev->hci_ver; - rp.manufacturer = cpu_to_le16(hdev->manufacturer); + name_len = strlen(hdev->short_name); + eir_len = eir_append_data(buff, eir_len, EIR_NAME_SHORT, + hdev->short_name, name_len); - rp.supported_settings = cpu_to_le32(get_supported_settings(hdev)); - rp.current_settings = cpu_to_le32(get_current_settings(hdev)); + rp = kmalloc(sizeof(*rp) + eir_len, GFP_KERNEL); + if (!rp) + return -ENOMEM; - rp.eir_len = cpu_to_le16(0); + memset(rp, 0, sizeof(*rp) + eir_len); + + rp->eir_len = cpu_to_le16(eir_len); + memcpy(rp->eir, buff, eir_len); + + bacpy(&rp->bdaddr, &hdev->bdaddr); + + rp->version = hdev->hci_ver; + rp->manufacturer = cpu_to_le16(hdev->manufacturer); + + rp->supported_settings = cpu_to_le32(get_supported_settings(hdev)); + rp->current_settings = cpu_to_le32(get_current_settings(hdev)); hci_dev_unlock(hdev); @@ -898,8 +930,8 @@ static int read_ext_controller_info(struct sock *sk, struct hci_dev *hdev, hci_sock_clear_flag(sk, HCI_MGMT_DEV_CLASS_EVENTS); hci_sock_clear_flag(sk, HCI_MGMT_LOCAL_NAME_EVENTS); - return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_READ_EXT_INFO, 0, &rp, - sizeof(rp)); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_READ_EXT_INFO, 0, rp, + sizeof(*rp) + eir_len); } static int ext_info_changed(struct hci_dev *hdev, struct sock *skip) @@ -5552,17 +5584,6 @@ unlock: return err; } -static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data, - u8 data_len) -{ - eir[eir_len++] = sizeof(type) + data_len; - eir[eir_len++] = type; - memcpy(&eir[eir_len], data, data_len); - eir_len += data_len; - - return eir_len; -} - static void read_local_oob_ext_data_complete(struct hci_dev *hdev, u8 status, u16 opcode, struct sk_buff *skb) { From bdca1fd9a6df745857e23c6056494b7fe062b4e6 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Thu, 1 Sep 2016 11:24:57 +0200 Subject: [PATCH 0425/1050] fakelb: fix schedule while atomic This patch changes the spinlock to mutex for the available fakelb phy list. When holding the spinlock the ieee802154_unregister_hw is called which holding the rtnl_mutex, in that case we get a "BUG: sleeping function called from invalid context" error. We simple change the spinlock to mutex which allows to hold the rtnl lock there. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- drivers/net/ieee802154/fakelb.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ieee802154/fakelb.c b/drivers/net/ieee802154/fakelb.c index 0becf0ac3926..ec387efb61d0 100644 --- a/drivers/net/ieee802154/fakelb.c +++ b/drivers/net/ieee802154/fakelb.c @@ -30,7 +30,7 @@ static int numlbs = 2; static LIST_HEAD(fakelb_phys); -static DEFINE_SPINLOCK(fakelb_phys_lock); +static DEFINE_MUTEX(fakelb_phys_lock); static LIST_HEAD(fakelb_ifup_phys); static DEFINE_RWLOCK(fakelb_ifup_phys_lock); @@ -188,9 +188,9 @@ static int fakelb_add_one(struct device *dev) if (err) goto err_reg; - spin_lock(&fakelb_phys_lock); + mutex_lock(&fakelb_phys_lock); list_add_tail(&phy->list, &fakelb_phys); - spin_unlock(&fakelb_phys_lock); + mutex_unlock(&fakelb_phys_lock); return 0; @@ -222,10 +222,10 @@ static int fakelb_probe(struct platform_device *pdev) return 0; err_slave: - spin_lock(&fakelb_phys_lock); + mutex_lock(&fakelb_phys_lock); list_for_each_entry_safe(phy, tmp, &fakelb_phys, list) fakelb_del(phy); - spin_unlock(&fakelb_phys_lock); + mutex_unlock(&fakelb_phys_lock); return err; } @@ -233,10 +233,10 @@ static int fakelb_remove(struct platform_device *pdev) { struct fakelb_phy *phy, *tmp; - spin_lock(&fakelb_phys_lock); + mutex_lock(&fakelb_phys_lock); list_for_each_entry_safe(phy, tmp, &fakelb_phys, list) fakelb_del(phy); - spin_unlock(&fakelb_phys_lock); + mutex_unlock(&fakelb_phys_lock); return 0; } From aa1638dde75d00e4f549902017d0df48b77e86ff Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 1 Sep 2016 19:48:28 +0200 Subject: [PATCH 0426/1050] Bluetooth: Send control open and close messages for HCI user channels When opening and closing HCI user channel, send monitoring messages to be able to trace its behavior. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_sock.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 83e9fdb712e5..48f9471e7c85 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -493,6 +493,11 @@ static struct sk_buff *create_monitor_ctrl_open(struct sock *sk) ver[0] = BT_SUBSYS_VERSION; put_unaligned_le16(BT_SUBSYS_REVISION, ver + 1); break; + case HCI_CHANNEL_USER: + format = 0x0001; + ver[0] = BT_SUBSYS_VERSION; + put_unaligned_le16(BT_SUBSYS_REVISION, ver + 1); + break; case HCI_CHANNEL_CONTROL: format = 0x0002; mgmt_fill_version_info(ver); @@ -539,6 +544,7 @@ static struct sk_buff *create_monitor_ctrl_close(struct sock *sk) switch (hci_pi(sk)->channel) { case HCI_CHANNEL_RAW: + case HCI_CHANNEL_USER: case HCI_CHANNEL_CONTROL: break; default: @@ -827,6 +833,7 @@ static int hci_sock_release(struct socket *sock) atomic_dec(&monitor_promisc); break; case HCI_CHANNEL_RAW: + case HCI_CHANNEL_USER: case HCI_CHANNEL_CONTROL: /* Send event to monitor */ skb = create_monitor_ctrl_close(sk); @@ -1179,8 +1186,36 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, } hci_pi(sk)->channel = haddr.hci_channel; + + if (!hci_sock_gen_cookie(sk)) { + /* In the case when a cookie has already been assigned, + * this socket will transition from a raw socket into + * an user channel socket. For a clean transition, send + * the close notification first. + */ + skb = create_monitor_ctrl_close(sk); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + } + + /* The user channel is restricted to CAP_NET_ADMIN + * capabilities and with that implicitly trusted. + */ + hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); + hci_pi(sk)->hdev = hdev; + /* Send event to monitor */ + skb = create_monitor_ctrl_open(sk); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + atomic_inc(&hdev->promisc); break; From baab793225c9badf46309f56982eb1012dbaac80 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 4 Sep 2016 05:13:46 +0200 Subject: [PATCH 0427/1050] Bluetooth: Fix wrong New Settings event when closing HCI User Channel When closing HCI User Channel, the New Settings event was send out to inform about changed settings. However such event is wrong since the exclusive HCI User Channel access is active until the Index Added event has been sent. @ USER Close: test @ MGMT Event: New Settings (0x0006) plen 4 Current settings: 0x00000ad0 Bondable Secure Simple Pairing BR/EDR Low Energy Secure Connections = Close Index: 00:14:EF:22:04:12 @ MGMT Event: Index Added (0x0004) plen 0 Calling __mgmt_power_off from hci_dev_do_close requires an extra check for an active HCI User Channel. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index ddf8432fe8fb..3ac89e9ace71 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1562,6 +1562,7 @@ int hci_dev_do_close(struct hci_dev *hdev) auto_off = hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF); if (!auto_off && hdev->dev_type == HCI_PRIMARY && + !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && hci_dev_test_flag(hdev, HCI_MGMT)) __mgmt_power_off(hdev); From 0676cab47ed18c9cfa884f055a3ba0f9e6fb8e96 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 6 Sep 2016 13:15:49 +0100 Subject: [PATCH 0428/1050] Bluetooth: btqca: remove null checks on edl->data as it is an array edl->data is an array of __u8 so the null check is unneccessary, so remove it. Signed-off-by: Colin Ian King Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btqca.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 4a6208168850..28afd5d585f9 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -55,8 +55,8 @@ static int rome_patch_ver_req(struct hci_dev *hdev, u32 *rome_version) } edl = (struct edl_event_hdr *)(skb->data); - if (!edl || !edl->data) { - BT_ERR("%s: TLV with no header or no data", hdev->name); + if (!edl) { + BT_ERR("%s: TLV with no header", hdev->name); err = -EILSEQ; goto out; } @@ -224,8 +224,8 @@ static int rome_tlv_send_segment(struct hci_dev *hdev, int idx, int seg_size, } edl = (struct edl_event_hdr *)(skb->data); - if (!edl || !edl->data) { - BT_ERR("%s: TLV with no header or no data", hdev->name); + if (!edl) { + BT_ERR("%s: TLV with no header", hdev->name); err = -EILSEQ; goto out; } From 3c0975a7a1087add3bf873601f0270aa695d7616 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Dalleau?= Date: Thu, 8 Sep 2016 12:00:11 +0200 Subject: [PATCH 0429/1050] Bluetooth: Fix reason code used for rejecting SCO connections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A comment in the code states that SCO connection should be rejected with the proper error value between 0xd-0xf. The code uses HCI_ERROR_REMOTE_LOW_RESOURCES which is 0x14. This led to following error: < HCI Command: Reject Synchronous Co.. (0x01|0x002a) plen 7 Address: 34:51:C9:EF:02:CA (Apple, Inc.) Reason: Remote Device Terminated due to Low Resources (0x14) > HCI Event: Command Status (0x0f) plen 4 Reject Synchronous Connection Request (0x01|0x002a) ncmd 1 Status: Invalid HCI Command Parameters (0x12) Instead make use of HCI_ERROR_REJ_LIMITED_RESOURCES which is 0xd. Signed-off-by: Frédéric Dalleau Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 9968b1c7c03a..9566ff8e3223 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1717,7 +1717,7 @@ void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, * function. To be safe hard-code one of the * values that's suitable for SCO. */ - rej.reason = HCI_ERROR_REMOTE_LOW_RESOURCES; + rej.reason = HCI_ERROR_REJ_LIMITED_RESOURCES; hci_req_add(req, HCI_OP_REJECT_SYNC_CONN_REQ, sizeof(rej), &rej); From 4037a7747d7b5a3e5bb4d10fb9ea6e2fd8a23c3b Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 8 Sep 2016 18:07:07 +0200 Subject: [PATCH 0430/1050] Bluetooth: Increase the subsystem minor version number While the subsystem version information are purely informational, increase the minor number due to the addition of user channel and management control monitoring suppport. It is helpful for debugging purposes to see the version numbers change. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/bluetooth.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index d705bcf40710..0a1e21d7bce1 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -30,7 +30,7 @@ #include #define BT_SUBSYS_VERSION 2 -#define BT_SUBSYS_REVISION 21 +#define BT_SUBSYS_REVISION 22 #ifndef AF_BLUETOOTH #define AF_BLUETOOTH 31 From 3e36ca483a642f441b8e29b4e98091f2c62bfb38 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 10 Sep 2016 12:21:22 +0000 Subject: [PATCH 0431/1050] Bluetooth: Use kzalloc instead of kmalloc/memset Use kzalloc rather than kmalloc followed by memset with 0. Generated by: scripts/coccinelle/api/alloc/kzalloc-simple.cocci Signed-off-by: Wei Yongjun Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 74179b92ef22..0ac881cfc646 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -902,12 +902,10 @@ static int read_ext_controller_info(struct sock *sk, struct hci_dev *hdev, eir_len = eir_append_data(buff, eir_len, EIR_NAME_SHORT, hdev->short_name, name_len); - rp = kmalloc(sizeof(*rp) + eir_len, GFP_KERNEL); + rp = kzalloc(sizeof(*rp) + eir_len, GFP_KERNEL); if (!rp) return -ENOMEM; - memset(rp, 0, sizeof(*rp) + eir_len); - rp->eir_len = cpu_to_le16(eir_len); memcpy(rp->eir, buff, eir_len); From 83ebb9ec734e9e768a9fae469e4a7ed1762ef43a Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Fri, 9 Sep 2016 20:24:40 +0200 Subject: [PATCH 0432/1050] Bluetooth: Fix not registering BR/EDR SMP channel with force_bredr flag If force_bredr is set SMP BR/EDR channel should also be for non-SC capable controllers. Since hcidev flag is persistent wrt power toggle it can be already set when calling smp_register(). This resulted in SMP BR/EDR channel not being registered even if HCI_FORCE_BREDR_SMP flag was set. This also fix NULL pointer dereference when trying to disable force_bredr after power cycle. BUG: unable to handle kernel NULL pointer dereference at 0000000000000388 IP: [] smp_del_chan+0x18/0x80 [bluetooth] Call Trace: [] force_bredr_smp_write+0xba/0x100 [bluetooth] [] full_proxy_write+0x54/0x90 [] __vfs_write+0x37/0x160 [] ? selinux_file_permission+0xd7/0x110 [] ? security_file_permission+0x3d/0xc0 [] ? percpu_down_read+0x12/0x50 [] vfs_write+0xb5/0x1a0 [] SyS_write+0x55/0xc0 [] entry_SYSCALL_64_fastpath+0x1a/0xa4 Code: 48 8b 45 f0 eb c1 0f 1f 00 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 f6 05 c6 3b 02 00 04 55 48 89 e5 41 54 53 49 89 fc 75 4b <49> 8b 9c 24 88 03 00 00 48 85 db 74 31 49 c7 84 24 88 03 00 00 RIP [] smp_del_chan+0x18/0x80 [bluetooth] RSP CR2: 0000000000000388 Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- net/bluetooth/smp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 4c1a16a96ae5..43faf2aea2ab 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -3387,7 +3387,10 @@ int smp_register(struct hci_dev *hdev) if (!lmp_sc_capable(hdev)) { debugfs_create_file("force_bredr_smp", 0644, hdev->debugfs, hdev, &force_bredr_smp_fops); - return 0; + + /* Flag can be already set here (due to power toggle) */ + if (!hci_dev_test_flag(hdev, HCI_FORCE_BREDR_SMP)) + return 0; } if (WARN_ON(hdev->smp_bredr_data)) { From 1110a2dbe69831abdcf119c3a9a4c4ef2d0905f8 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Fri, 9 Sep 2016 10:02:05 -0500 Subject: [PATCH 0433/1050] Bluetooth: btrtl: Add RTL8822BE Bluetooth device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The RTL8822BE is a new Realtek wifi and BT device. Support for the BT part is hereby added. As this device is similar to most of the other Realtek BT devices, the changes are minimal. The main difference is that the 8822BE needs a configuration file for enabling and disabling features. Thus code is added to select and load this configuration file. Although not needed at the moment, hooks are added for the other devices that might need such configuration files. One additional change is to the routine that tests that the project ID contained in the firmware matches the hardware. As the project IDs are not sequential, continuing to use the position in the array as the expected value of the ID would require adding extra unused entries in the table, and any subsequant rearrangment of the array would break the code. To fix these problems, the array elements now contain both the hardware ID and the expected value for the project ID. Signed-off-by: 陆朱伟 Signed-off-by: Larry Finger Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btrtl.c | 107 +++++++++++++++++++++++++++++++++----- 1 file changed, 95 insertions(+), 12 deletions(-) diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c index 84288938f7f2..fc9b25703c67 100644 --- a/drivers/bluetooth/btrtl.c +++ b/drivers/bluetooth/btrtl.c @@ -33,6 +33,7 @@ #define RTL_ROM_LMP_8723B 0x8723 #define RTL_ROM_LMP_8821A 0x8821 #define RTL_ROM_LMP_8761A 0x8761 +#define RTL_ROM_LMP_8822B 0x8822 static int rtl_read_rom_version(struct hci_dev *hdev, u8 *version) { @@ -78,11 +79,15 @@ static int rtl8723b_parse_firmware(struct hci_dev *hdev, u16 lmp_subver, const unsigned char *patch_length_base, *patch_offset_base; u32 patch_offset = 0; u16 patch_length, num_patches; - const u16 project_id_to_lmp_subver[] = { - RTL_ROM_LMP_8723A, - RTL_ROM_LMP_8723B, - RTL_ROM_LMP_8821A, - RTL_ROM_LMP_8761A + static const struct { + __u16 lmp_subver; + __u8 id; + } project_id_to_lmp_subver[] = { + { RTL_ROM_LMP_8723A, 0 }, + { RTL_ROM_LMP_8723B, 1 }, + { RTL_ROM_LMP_8821A, 2 }, + { RTL_ROM_LMP_8761A, 3 }, + { RTL_ROM_LMP_8822B, 8 }, }; ret = rtl_read_rom_version(hdev, &rom_version); @@ -134,14 +139,20 @@ static int rtl8723b_parse_firmware(struct hci_dev *hdev, u16 lmp_subver, return -EINVAL; } - if (project_id >= ARRAY_SIZE(project_id_to_lmp_subver)) { + /* Find project_id in table */ + for (i = 0; i < ARRAY_SIZE(project_id_to_lmp_subver); i++) { + if (project_id == project_id_to_lmp_subver[i].id) + break; + } + + if (i >= ARRAY_SIZE(project_id_to_lmp_subver)) { BT_ERR("%s: unknown project id %d", hdev->name, project_id); return -EINVAL; } - if (lmp_subver != project_id_to_lmp_subver[project_id]) { + if (lmp_subver != project_id_to_lmp_subver[i].lmp_subver) { BT_ERR("%s: firmware is for %x but this is a %x", hdev->name, - project_id_to_lmp_subver[project_id], lmp_subver); + project_id_to_lmp_subver[i].lmp_subver, lmp_subver); return -EINVAL; } @@ -257,6 +268,26 @@ out: return ret; } +static int rtl_load_config(struct hci_dev *hdev, const char *name, u8 **buff) +{ + const struct firmware *fw; + int ret; + + BT_INFO("%s: rtl: loading %s", hdev->name, name); + ret = request_firmware(&fw, name, &hdev->dev); + if (ret < 0) { + BT_ERR("%s: Failed to load %s", hdev->name, name); + return ret; + } + + ret = fw->size; + *buff = kmemdup(fw->data, ret, GFP_KERNEL); + + release_firmware(fw); + + return ret; +} + static int btrtl_setup_rtl8723a(struct hci_dev *hdev) { const struct firmware *fw; @@ -296,25 +327,74 @@ static int btrtl_setup_rtl8723b(struct hci_dev *hdev, u16 lmp_subver, unsigned char *fw_data = NULL; const struct firmware *fw; int ret; + int cfg_sz; + u8 *cfg_buff = NULL; + u8 *tbuff; + char *cfg_name = NULL; + + switch (lmp_subver) { + case RTL_ROM_LMP_8723B: + cfg_name = "rtl_bt/rtl8723b_config.bin"; + break; + case RTL_ROM_LMP_8821A: + cfg_name = "rtl_bt/rtl8821a_config.bin"; + break; + case RTL_ROM_LMP_8761A: + cfg_name = "rtl_bt/rtl8761a_config.bin"; + break; + case RTL_ROM_LMP_8822B: + cfg_name = "rtl_bt/rtl8822b_config.bin"; + break; + default: + BT_ERR("%s: rtl: no config according to lmp_subver %04x", + hdev->name, lmp_subver); + break; + } + + if (cfg_name) { + cfg_sz = rtl_load_config(hdev, cfg_name, &cfg_buff); + if (cfg_sz < 0) + cfg_sz = 0; + } else + cfg_sz = 0; BT_INFO("%s: rtl: loading %s", hdev->name, fw_name); ret = request_firmware(&fw, fw_name, &hdev->dev); if (ret < 0) { BT_ERR("%s: Failed to load %s", hdev->name, fw_name); - return ret; + goto err_req_fw; } ret = rtl8723b_parse_firmware(hdev, lmp_subver, fw, &fw_data); if (ret < 0) goto out; + if (cfg_sz) { + tbuff = kzalloc(ret + cfg_sz, GFP_KERNEL); + if (!tbuff) { + ret = -ENOMEM; + goto out; + } + + memcpy(tbuff, fw_data, ret); + kfree(fw_data); + + memcpy(tbuff + ret, cfg_buff, cfg_sz); + ret += cfg_sz; + + fw_data = tbuff; + } + + BT_INFO("cfg_sz %d, total size %d", cfg_sz, ret); + ret = rtl_download_firmware(hdev, fw_data, ret); - kfree(fw_data); - if (ret < 0) - goto out; out: release_firmware(fw); + kfree(fw_data); +err_req_fw: + if (cfg_sz) + kfree(cfg_buff); return ret; } @@ -377,6 +457,9 @@ int btrtl_setup_realtek(struct hci_dev *hdev) case RTL_ROM_LMP_8761A: return btrtl_setup_rtl8723b(hdev, lmp_subver, "rtl_bt/rtl8761a_fw.bin"); + case RTL_ROM_LMP_8822B: + return btrtl_setup_rtl8723b(hdev, lmp_subver, + "rtl_bt/rtl8822b_fw.bin"); default: BT_INFO("rtl: assuming no firmware upload needed."); return 0; From 7c295c4801b2de24fc25687eb0cb73cf0c99d114 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Narajowski?= Date: Sun, 18 Sep 2016 12:50:02 +0200 Subject: [PATCH 0434/1050] Bluetooth: Add support for local name in scan rsp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch enables appending local name to scan response data. If currently advertised instance has name flag set it is expired immediately. Signed-off-by: Michał Narajowski Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 28 +++++++++++++++------- net/bluetooth/mgmt.c | 46 +++++++++++++++++++++++++++++++++++-- 2 files changed, 64 insertions(+), 10 deletions(-) diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 9566ff8e3223..0ce6cdd278b2 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -971,14 +971,14 @@ void __hci_req_enable_advertising(struct hci_request *req) hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); } -static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) +static u8 append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len) { - u8 ad_len = 0; size_t name_len; + int max_len; + max_len = HCI_MAX_AD_LENGTH - ad_len - 2; name_len = strlen(hdev->dev_name); - if (name_len > 0) { - size_t max_len = HCI_MAX_AD_LENGTH - ad_len - 2; + if (name_len > 0 && max_len > 0) { if (name_len > max_len) { name_len = max_len; @@ -997,22 +997,34 @@ static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) return ad_len; } +static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) +{ + return append_local_name(hdev, ptr, 0); +} + static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance, u8 *ptr) { struct adv_info *adv_instance; + u32 instance_flags; + u8 scan_rsp_len = 0; adv_instance = hci_find_adv_instance(hdev, instance); if (!adv_instance) return 0; - /* TODO: Set the appropriate entries based on advertising instance flags - * here once flags other than 0 are supported. - */ + instance_flags = adv_instance->flags; + memcpy(ptr, adv_instance->scan_rsp_data, adv_instance->scan_rsp_len); - return adv_instance->scan_rsp_len; + scan_rsp_len += adv_instance->scan_rsp_len; + ptr += adv_instance->scan_rsp_len; + + if (instance_flags & MGMT_ADV_FLAG_LOCAL_NAME) + scan_rsp_len = append_local_name(hdev, ptr, scan_rsp_len); + + return scan_rsp_len; } void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 0ac881cfc646..89954bb19222 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3012,6 +3012,35 @@ static int user_passkey_neg_reply(struct sock *sk, struct hci_dev *hdev, HCI_OP_USER_PASSKEY_NEG_REPLY, 0); } +static void adv_expire(struct hci_dev *hdev, u32 flags) +{ + struct adv_info *adv_instance; + struct hci_request req; + int err; + + adv_instance = hci_find_adv_instance(hdev, hdev->cur_adv_instance); + if (!adv_instance) + return; + + /* stop if current instance doesn't need to be changed */ + if (!(adv_instance->flags & flags)) + return; + + cancel_adv_timeout(hdev); + + adv_instance = hci_get_next_instance(hdev, adv_instance->instance); + if (!adv_instance) + return; + + hci_req_init(&req, hdev); + err = __hci_req_schedule_adv_instance(&req, adv_instance->instance, + true); + if (err) + return; + + hci_req_run(&req, NULL); +} + static void set_name_complete(struct hci_dev *hdev, u8 status, u16 opcode) { struct mgmt_cp_set_local_name *cp; @@ -3027,13 +3056,17 @@ static void set_name_complete(struct hci_dev *hdev, u8 status, u16 opcode) cp = cmd->param; - if (status) + if (status) { mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, mgmt_status(status)); - else + } else { mgmt_cmd_complete(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0, cp, sizeof(*cp)); + if (hci_dev_test_flag(hdev, HCI_LE_ADV)) + adv_expire(hdev, MGMT_ADV_FLAG_LOCAL_NAME); + } + mgmt_pending_remove(cmd); unlock: @@ -5885,6 +5918,7 @@ static u32 get_supported_adv_flags(struct hci_dev *hdev) flags |= MGMT_ADV_FLAG_DISCOV; flags |= MGMT_ADV_FLAG_LIMITED_DISCOV; flags |= MGMT_ADV_FLAG_MANAGED_FLAGS; + flags |= MGMT_ADV_FLAG_LOCAL_NAME; if (hdev->adv_tx_power != HCI_TX_POWER_INVALID) flags |= MGMT_ADV_FLAG_TX_POWER; @@ -5961,6 +5995,10 @@ static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data, tx_power_managed = true; max_len -= 3; } + } else { + /* at least 1 byte of name should fit in */ + if (adv_flags & MGMT_ADV_FLAG_LOCAL_NAME) + max_len -= 3; } if (len > max_len) @@ -6293,6 +6331,10 @@ static u8 tlv_data_max_len(u32 adv_flags, bool is_adv_data) if (adv_flags & MGMT_ADV_FLAG_TX_POWER) max_len -= 3; + } else { + /* at least 1 byte of name should fit in */ + if (adv_flags & MGMT_ADV_FLAG_LOCAL_NAME) + max_len -= 3; } return max_len; From c4960ecf2b09210930964ef2c05ce2590802ccf4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Narajowski?= Date: Sun, 18 Sep 2016 12:50:03 +0200 Subject: [PATCH 0435/1050] Bluetooth: Add support for appearance in scan rsp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch enables prepending appearance value to scan response data. It also adds support for setting appearance value through mgmt command. If currently advertised instance has apperance flag set it is expired immediately. Signed-off-by: Michał Narajowski Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 1 + include/net/bluetooth/mgmt.h | 6 ++++++ net/bluetooth/hci_request.c | 8 +++++++ net/bluetooth/mgmt.c | 37 ++++++++++++++++++++++++++++++++ 4 files changed, 52 insertions(+) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index a48f71d73dc8..f00bf667ec33 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -211,6 +211,7 @@ struct hci_dev { __u8 dev_name[HCI_MAX_NAME_LENGTH]; __u8 short_name[HCI_MAX_SHORT_NAME_LENGTH]; __u8 eir[HCI_MAX_EIR_LENGTH]; + __u16 appearance; __u8 dev_class[3]; __u8 major_class; __u8 minor_class; diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 611b243713ea..72a456bbbcd5 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -598,6 +598,12 @@ struct mgmt_rp_read_ext_info { __u8 eir[0]; } __packed; +#define MGMT_OP_SET_APPEARANCE 0x0043 +struct mgmt_cp_set_appearance { + __u16 appearance; +} __packed; +#define MGMT_SET_APPEARANCE_SIZE 2 + #define MGMT_EV_CMD_COMPLETE 0x0001 struct mgmt_ev_cmd_complete { __le16 opcode; diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 0ce6cdd278b2..c8135680c43e 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1015,6 +1015,14 @@ static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance, instance_flags = adv_instance->flags; + if ((instance_flags & MGMT_ADV_FLAG_APPEARANCE) && hdev->appearance) { + ptr[0] = 3; + ptr[1] = EIR_APPEARANCE; + put_unaligned_le16(hdev->appearance, ptr + 2); + scan_rsp_len += 4; + ptr += 4; + } + memcpy(ptr, adv_instance->scan_rsp_data, adv_instance->scan_rsp_len); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 89954bb19222..78d708851208 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -105,6 +105,7 @@ static const u16 mgmt_commands[] = { MGMT_OP_GET_ADV_SIZE_INFO, MGMT_OP_START_LIMITED_DISCOVERY, MGMT_OP_READ_EXT_INFO, + MGMT_OP_SET_APPEARANCE, }; static const u16 mgmt_events[] = { @@ -3143,6 +3144,34 @@ failed: return err; } +static int set_appearance(struct sock *sk, struct hci_dev *hdev, void *data, + u16 len) +{ + struct mgmt_cp_set_appearance *cp = data; + u16 apperance; + int err; + + BT_DBG(""); + + apperance = le16_to_cpu(cp->appearance); + + hci_dev_lock(hdev); + + if (hdev->appearance != apperance) { + hdev->appearance = apperance; + + if (hci_dev_test_flag(hdev, HCI_LE_ADV)) + adv_expire(hdev, MGMT_ADV_FLAG_APPEARANCE); + } + + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_APPEARANCE, 0, NULL, + 0); + + hci_dev_unlock(hdev); + + return err; +} + static void read_local_oob_data_complete(struct hci_dev *hdev, u8 status, u16 opcode, struct sk_buff *skb) { @@ -5918,6 +5947,7 @@ static u32 get_supported_adv_flags(struct hci_dev *hdev) flags |= MGMT_ADV_FLAG_DISCOV; flags |= MGMT_ADV_FLAG_LIMITED_DISCOV; flags |= MGMT_ADV_FLAG_MANAGED_FLAGS; + flags |= MGMT_ADV_FLAG_APPEARANCE; flags |= MGMT_ADV_FLAG_LOCAL_NAME; if (hdev->adv_tx_power != HCI_TX_POWER_INVALID) @@ -5999,6 +6029,9 @@ static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data, /* at least 1 byte of name should fit in */ if (adv_flags & MGMT_ADV_FLAG_LOCAL_NAME) max_len -= 3; + + if (adv_flags & MGMT_ADV_FLAG_APPEARANCE) + max_len -= 4; } if (len > max_len) @@ -6335,6 +6368,9 @@ static u8 tlv_data_max_len(u32 adv_flags, bool is_adv_data) /* at least 1 byte of name should fit in */ if (adv_flags & MGMT_ADV_FLAG_LOCAL_NAME) max_len -= 3; + + if (adv_flags & (MGMT_ADV_FLAG_APPEARANCE)) + max_len -= 4; } return max_len; @@ -6470,6 +6506,7 @@ static const struct hci_mgmt_handler mgmt_handlers[] = { { start_limited_discovery, MGMT_START_DISCOVERY_SIZE }, { read_ext_controller_info,MGMT_READ_EXT_INFO_SIZE, HCI_MGMT_UNTRUSTED }, + { set_appearance, MGMT_SET_APPEARANCE_SIZE }, }; void mgmt_index_added(struct hci_dev *hdev) From 5e2c59e84b633e4f7719fdc6a2930f2a311da83a Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Sun, 18 Sep 2016 12:50:04 +0200 Subject: [PATCH 0436/1050] Bluetooth: Remove unused parameter from tlv_data_is_valid function hdev parameter is not used in function. Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 78d708851208..97f70b7fb7b1 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -6005,8 +6005,7 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev, return err; } -static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data, - u8 len, bool is_adv_data) +static bool tlv_data_is_valid(u32 adv_flags, u8 *data, u8 len, bool is_adv_data) { u8 max_len = HCI_MAX_AD_LENGTH; int i, cur_len; @@ -6168,8 +6167,8 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, goto unlock; } - if (!tlv_data_is_valid(hdev, flags, cp->data, cp->adv_data_len, true) || - !tlv_data_is_valid(hdev, flags, cp->data + cp->adv_data_len, + if (!tlv_data_is_valid(flags, cp->data, cp->adv_data_len, true) || + !tlv_data_is_valid(flags, cp->data + cp->adv_data_len, cp->scan_rsp_len, false)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, MGMT_STATUS_INVALID_PARAMS); From 2bb36870e8cb29949ef9acec37129cd8e70f1857 Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Sun, 18 Sep 2016 12:50:05 +0200 Subject: [PATCH 0437/1050] Bluetooth: Unify advertising instance flags check This unifies max length and TLV validity checks. Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 85 +++++++++++++++++++++++++------------------- 1 file changed, 48 insertions(+), 37 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 97f70b7fb7b1..c96b0adc4971 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -6005,34 +6005,59 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev, return err; } -static bool tlv_data_is_valid(u32 adv_flags, u8 *data, u8 len, bool is_adv_data) +static u8 tlv_data_max_len(u32 adv_flags, bool is_adv_data) { u8 max_len = HCI_MAX_AD_LENGTH; - int i, cur_len; - bool flags_managed = false; - bool tx_power_managed = false; if (is_adv_data) { if (adv_flags & (MGMT_ADV_FLAG_DISCOV | MGMT_ADV_FLAG_LIMITED_DISCOV | - MGMT_ADV_FLAG_MANAGED_FLAGS)) { - flags_managed = true; + MGMT_ADV_FLAG_MANAGED_FLAGS)) max_len -= 3; - } - if (adv_flags & MGMT_ADV_FLAG_TX_POWER) { - tx_power_managed = true; + if (adv_flags & MGMT_ADV_FLAG_TX_POWER) max_len -= 3; - } } else { /* at least 1 byte of name should fit in */ if (adv_flags & MGMT_ADV_FLAG_LOCAL_NAME) max_len -= 3; - if (adv_flags & MGMT_ADV_FLAG_APPEARANCE) + if (adv_flags & (MGMT_ADV_FLAG_APPEARANCE)) max_len -= 4; } + return max_len; +} + +static bool flags_managed(u32 adv_flags) +{ + return adv_flags & (MGMT_ADV_FLAG_DISCOV | + MGMT_ADV_FLAG_LIMITED_DISCOV | + MGMT_ADV_FLAG_MANAGED_FLAGS); +} + +static bool tx_power_managed(u32 adv_flags) +{ + return adv_flags & MGMT_ADV_FLAG_TX_POWER; +} + +static bool name_managed(u32 adv_flags) +{ + return adv_flags & MGMT_ADV_FLAG_LOCAL_NAME; +} + +static bool appearance_managed(u32 adv_flags) +{ + return adv_flags & MGMT_ADV_FLAG_APPEARANCE; +} + +static bool tlv_data_is_valid(u32 adv_flags, u8 *data, u8 len, bool is_adv_data) +{ + int i, cur_len; + u8 max_len; + + max_len = tlv_data_max_len(adv_flags, is_adv_data); + if (len > max_len) return false; @@ -6040,10 +6065,20 @@ static bool tlv_data_is_valid(u32 adv_flags, u8 *data, u8 len, bool is_adv_data) for (i = 0, cur_len = 0; i < len; i += (cur_len + 1)) { cur_len = data[i]; - if (flags_managed && data[i + 1] == EIR_FLAGS) + if (data[i + 1] == EIR_FLAGS && flags_managed(adv_flags)) return false; - if (tx_power_managed && data[i + 1] == EIR_TX_POWER) + if (data[i + 1] == EIR_TX_POWER && tx_power_managed(adv_flags)) + return false; + + if (data[i + 1] == EIR_NAME_COMPLETE && name_managed(adv_flags)) + return false; + + if (data[i + 1] == EIR_NAME_SHORT && name_managed(adv_flags)) + return false; + + if (data[i + 1] == EIR_APPEARANCE && + appearance_managed(adv_flags)) return false; /* If the current field length would exceed the total data @@ -6351,30 +6386,6 @@ unlock: return err; } -static u8 tlv_data_max_len(u32 adv_flags, bool is_adv_data) -{ - u8 max_len = HCI_MAX_AD_LENGTH; - - if (is_adv_data) { - if (adv_flags & (MGMT_ADV_FLAG_DISCOV | - MGMT_ADV_FLAG_LIMITED_DISCOV | - MGMT_ADV_FLAG_MANAGED_FLAGS)) - max_len -= 3; - - if (adv_flags & MGMT_ADV_FLAG_TX_POWER) - max_len -= 3; - } else { - /* at least 1 byte of name should fit in */ - if (adv_flags & MGMT_ADV_FLAG_LOCAL_NAME) - max_len -= 3; - - if (adv_flags & (MGMT_ADV_FLAG_APPEARANCE)) - max_len -= 4; - } - - return max_len; -} - static int get_adv_size_info(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { From 9c9db78dc0fbbd95177fefdad008e46ffaa777f2 Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Sun, 18 Sep 2016 12:50:06 +0200 Subject: [PATCH 0438/1050] Bluetooth: Fix advertising instance validity check for flags Flags are not allowed in Scan Response. Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index c96b0adc4971..2758c6a4425c 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -6065,7 +6065,8 @@ static bool tlv_data_is_valid(u32 adv_flags, u8 *data, u8 len, bool is_adv_data) for (i = 0, cur_len = 0; i < len; i += (cur_len + 1)) { cur_len = data[i]; - if (data[i + 1] == EIR_FLAGS && flags_managed(adv_flags)) + if (data[i + 1] == EIR_FLAGS && + (!is_adv_data || flags_managed(adv_flags))) return false; if (data[i + 1] == EIR_TX_POWER && tx_power_managed(adv_flags)) From 3310230c5dddfafe3d1ef87f1257812011681aca Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Sun, 18 Sep 2016 12:50:07 +0200 Subject: [PATCH 0439/1050] Bluetooth: Increment management interface revision Increment the mgmt revision due to the recently added Read Extended Controller Information and Set Appearance commands. Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 2758c6a4425c..54dd218d06f7 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -38,7 +38,7 @@ #include "mgmt_util.h" #define MGMT_VERSION 1 -#define MGMT_REVISION 13 +#define MGMT_REVISION 14 static const u16 mgmt_commands[] = { MGMT_OP_READ_INDEX_LIST, From 143f0a28ff7ebcc74144ed29bc66da6fbcce0dc7 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Mon, 19 Sep 2016 12:05:12 +0200 Subject: [PATCH 0440/1050] Bluetooth: hci_bcm: Change protocol name Use full name instead of abbreviation. Signed-off-by: Loic Poulain Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_bcm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index 1c97eda8bae3..5ccb90ef0146 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -798,7 +798,7 @@ static int bcm_remove(struct platform_device *pdev) static const struct hci_uart_proto bcm_proto = { .id = HCI_UART_BCM, - .name = "BCM", + .name = "Broadcom", .manufacturer = 15, .init_speed = 115200, .oper_speed = 4000000, From 9e69130c4efc61ce0a8fb3b9eea0188f8d41f779 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Mon, 19 Sep 2016 11:32:35 +0200 Subject: [PATCH 0441/1050] Bluetooth: hci_uart: Add Nokia Protocol identifier Will be used by hci_nokia extra protocol. Signed-off-by: Loic Poulain Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_uart.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/hci_uart.h b/drivers/bluetooth/hci_uart.h index 839bad1d8152..22b7c58ae837 100644 --- a/drivers/bluetooth/hci_uart.h +++ b/drivers/bluetooth/hci_uart.h @@ -35,7 +35,7 @@ #define HCIUARTGETFLAGS _IOR('U', 204, int) /* UART protocols */ -#define HCI_UART_MAX_PROTO 10 +#define HCI_UART_MAX_PROTO 11 #define HCI_UART_H4 0 #define HCI_UART_BCSP 1 @@ -47,6 +47,7 @@ #define HCI_UART_BCM 7 #define HCI_UART_QCA 8 #define HCI_UART_AG6XX 9 +#define HCI_UART_NOKIA 10 #define HCI_UART_RAW_DEVICE 0 #define HCI_UART_RESET_ON_INIT 1 From 162f812f23bab583f5d514ca0e4df67797ac9cdf Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Mon, 19 Sep 2016 16:29:27 +0200 Subject: [PATCH 0442/1050] Bluetooth: hci_uart: Add Marvell support This patch introduces support for Marvell Bluetooth controller over UART (8897 for now). In order to send the final firmware at full speed, a helper firmware is firstly sent. Firmware download is driven by the controller which sends request firmware packets (including expected size). This driver is a global rework of the one proposed by Amitkumar Karwar . Signed-off-by: Loic Poulain Signed-off-by: Marcel Holtmann --- drivers/bluetooth/Kconfig | 11 + drivers/bluetooth/Makefile | 1 + drivers/bluetooth/hci_ldisc.c | 6 + drivers/bluetooth/hci_mrvl.c | 387 ++++++++++++++++++++++++++++++++++ drivers/bluetooth/hci_uart.h | 8 +- 5 files changed, 412 insertions(+), 1 deletion(-) create mode 100644 drivers/bluetooth/hci_mrvl.c diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig index 43e9f9328df8..3cc9bff9d99d 100644 --- a/drivers/bluetooth/Kconfig +++ b/drivers/bluetooth/Kconfig @@ -180,6 +180,17 @@ config BT_HCIUART_AG6XX Say Y here to compile support for Intel AG6XX protocol. +config BT_HCIUART_MRVL + bool "Marvell protocol support" + depends on BT_HCIUART + select BT_HCIUART_H4 + help + Marvell is serial protocol for communication between Bluetooth + device and host. This protocol is required for most Marvell Bluetooth + devices with UART interface. + + Say Y here to compile support for HCI MRVL protocol. + config BT_HCIBCM203X tristate "HCI BCM203x USB driver" depends on USB diff --git a/drivers/bluetooth/Makefile b/drivers/bluetooth/Makefile index 3e92cfeb9ee2..b1fc29a697b7 100644 --- a/drivers/bluetooth/Makefile +++ b/drivers/bluetooth/Makefile @@ -38,6 +38,7 @@ hci_uart-$(CONFIG_BT_HCIUART_INTEL) += hci_intel.o hci_uart-$(CONFIG_BT_HCIUART_BCM) += hci_bcm.o hci_uart-$(CONFIG_BT_HCIUART_QCA) += hci_qca.o hci_uart-$(CONFIG_BT_HCIUART_AG6XX) += hci_ag6xx.o +hci_uart-$(CONFIG_BT_HCIUART_MRVL) += hci_mrvl.o hci_uart-objs := $(hci_uart-y) ccflags-y += -D__CHECK_ENDIAN__ diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index dda97398c59a..9a3aab67b6bb 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -810,6 +810,9 @@ static int __init hci_uart_init(void) #ifdef CONFIG_BT_HCIUART_AG6XX ag6xx_init(); #endif +#ifdef CONFIG_BT_HCIUART_MRVL + mrvl_init(); +#endif return 0; } @@ -845,6 +848,9 @@ static void __exit hci_uart_exit(void) #ifdef CONFIG_BT_HCIUART_AG6XX ag6xx_deinit(); #endif +#ifdef CONFIG_BT_HCIUART_MRVL + mrvl_deinit(); +#endif /* Release tty registration of line discipline */ err = tty_unregister_ldisc(N_HCI); diff --git a/drivers/bluetooth/hci_mrvl.c b/drivers/bluetooth/hci_mrvl.c new file mode 100644 index 000000000000..bbc4b39b1dbf --- /dev/null +++ b/drivers/bluetooth/hci_mrvl.c @@ -0,0 +1,387 @@ +/* + * + * Bluetooth HCI UART driver for marvell devices + * + * Copyright (C) 2016 Marvell International Ltd. + * Copyright (C) 2016 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "hci_uart.h" + +#define HCI_FW_REQ_PKT 0xA5 +#define HCI_CHIP_VER_PKT 0xAA + +#define MRVL_ACK 0x5A +#define MRVL_NAK 0xBF +#define MRVL_RAW_DATA 0x1F + +enum { + STATE_CHIP_VER_PENDING, + STATE_FW_REQ_PENDING, +}; + +struct mrvl_data { + struct sk_buff *rx_skb; + struct sk_buff_head txq; + struct sk_buff_head rawq; + unsigned long flags; + unsigned int tx_len; + u8 id, rev; +}; + +struct hci_mrvl_pkt { + __le16 lhs; + __le16 rhs; +} __packed; +#define HCI_MRVL_PKT_SIZE 4 + +static int mrvl_open(struct hci_uart *hu) +{ + struct mrvl_data *mrvl; + + BT_DBG("hu %p", hu); + + mrvl = kzalloc(sizeof(*mrvl), GFP_KERNEL); + if (!mrvl) + return -ENOMEM; + + skb_queue_head_init(&mrvl->txq); + skb_queue_head_init(&mrvl->rawq); + + set_bit(STATE_CHIP_VER_PENDING, &mrvl->flags); + + hu->priv = mrvl; + return 0; +} + +static int mrvl_close(struct hci_uart *hu) +{ + struct mrvl_data *mrvl = hu->priv; + + BT_DBG("hu %p", hu); + + skb_queue_purge(&mrvl->txq); + skb_queue_purge(&mrvl->rawq); + kfree_skb(mrvl->rx_skb); + kfree(mrvl); + + hu->priv = NULL; + return 0; +} + +static int mrvl_flush(struct hci_uart *hu) +{ + struct mrvl_data *mrvl = hu->priv; + + BT_DBG("hu %p", hu); + + skb_queue_purge(&mrvl->txq); + skb_queue_purge(&mrvl->rawq); + + return 0; +} + +static struct sk_buff *mrvl_dequeue(struct hci_uart *hu) +{ + struct mrvl_data *mrvl = hu->priv; + struct sk_buff *skb; + + skb = skb_dequeue(&mrvl->txq); + if (!skb) { + /* Any raw data ? */ + skb = skb_dequeue(&mrvl->rawq); + } else { + /* Prepend skb with frame type */ + memcpy(skb_push(skb, 1), &bt_cb(skb)->pkt_type, 1); + } + + return skb; +} + +static int mrvl_enqueue(struct hci_uart *hu, struct sk_buff *skb) +{ + struct mrvl_data *mrvl = hu->priv; + + skb_queue_tail(&mrvl->txq, skb); + return 0; +} + +static void mrvl_send_ack(struct hci_uart *hu, unsigned char type) +{ + struct mrvl_data *mrvl = hu->priv; + struct sk_buff *skb; + + /* No H4 payload, only 1 byte header */ + skb = bt_skb_alloc(0, GFP_ATOMIC); + if (!skb) { + bt_dev_err(hu->hdev, "Unable to alloc ack/nak packet"); + return; + } + hci_skb_pkt_type(skb) = type; + + skb_queue_tail(&mrvl->txq, skb); + hci_uart_tx_wakeup(hu); +} + +static int mrvl_recv_fw_req(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_mrvl_pkt *pkt = (void *)skb->data; + struct hci_uart *hu = hci_get_drvdata(hdev); + struct mrvl_data *mrvl = hu->priv; + int ret = 0; + + if ((pkt->lhs ^ pkt->rhs) != 0xffff) { + bt_dev_err(hdev, "Corrupted mrvl header"); + mrvl_send_ack(hu, MRVL_NAK); + ret = -EINVAL; + goto done; + } + mrvl_send_ack(hu, MRVL_ACK); + + if (!test_bit(STATE_FW_REQ_PENDING, &mrvl->flags)) { + bt_dev_err(hdev, "Received unexpected firmware request"); + ret = -EINVAL; + goto done; + } + + mrvl->tx_len = le16_to_cpu(pkt->lhs); + + clear_bit(STATE_FW_REQ_PENDING, &mrvl->flags); + smp_mb__after_atomic(); + wake_up_bit(&mrvl->flags, STATE_FW_REQ_PENDING); + +done: + kfree_skb(skb); + return ret; +} + +static int mrvl_recv_chip_ver(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_mrvl_pkt *pkt = (void *)skb->data; + struct hci_uart *hu = hci_get_drvdata(hdev); + struct mrvl_data *mrvl = hu->priv; + u16 version = le16_to_cpu(pkt->lhs); + int ret = 0; + + if ((pkt->lhs ^ pkt->rhs) != 0xffff) { + bt_dev_err(hdev, "Corrupted mrvl header"); + mrvl_send_ack(hu, MRVL_NAK); + ret = -EINVAL; + goto done; + } + mrvl_send_ack(hu, MRVL_ACK); + + if (!test_bit(STATE_CHIP_VER_PENDING, &mrvl->flags)) { + bt_dev_err(hdev, "Received unexpected chip version"); + goto done; + } + + mrvl->id = version; + mrvl->rev = version >> 8; + + bt_dev_info(hdev, "Controller id = %x, rev = %x", mrvl->id, mrvl->rev); + + clear_bit(STATE_CHIP_VER_PENDING, &mrvl->flags); + smp_mb__after_atomic(); + wake_up_bit(&mrvl->flags, STATE_CHIP_VER_PENDING); + +done: + kfree_skb(skb); + return ret; +} + +#define HCI_RECV_CHIP_VER \ + .type = HCI_CHIP_VER_PKT, \ + .hlen = HCI_MRVL_PKT_SIZE, \ + .loff = 0, \ + .lsize = 0, \ + .maxlen = HCI_MRVL_PKT_SIZE + +#define HCI_RECV_FW_REQ \ + .type = HCI_FW_REQ_PKT, \ + .hlen = HCI_MRVL_PKT_SIZE, \ + .loff = 0, \ + .lsize = 0, \ + .maxlen = HCI_MRVL_PKT_SIZE + +static const struct h4_recv_pkt mrvl_recv_pkts[] = { + { H4_RECV_ACL, .recv = hci_recv_frame }, + { H4_RECV_SCO, .recv = hci_recv_frame }, + { H4_RECV_EVENT, .recv = hci_recv_frame }, + { HCI_RECV_FW_REQ, .recv = mrvl_recv_fw_req }, + { HCI_RECV_CHIP_VER, .recv = mrvl_recv_chip_ver }, +}; + +static int mrvl_recv(struct hci_uart *hu, const void *data, int count) +{ + struct mrvl_data *mrvl = hu->priv; + + if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) + return -EUNATCH; + + mrvl->rx_skb = h4_recv_buf(hu->hdev, mrvl->rx_skb, data, count, + mrvl_recv_pkts, + ARRAY_SIZE(mrvl_recv_pkts)); + if (IS_ERR(mrvl->rx_skb)) { + int err = PTR_ERR(mrvl->rx_skb); + bt_dev_err(hu->hdev, "Frame reassembly failed (%d)", err); + mrvl->rx_skb = NULL; + return err; + } + + return count; +} + +static int mrvl_load_firmware(struct hci_dev *hdev, const char *name) +{ + struct hci_uart *hu = hci_get_drvdata(hdev); + struct mrvl_data *mrvl = hu->priv; + const struct firmware *fw = NULL; + const u8 *fw_ptr, *fw_max; + int err; + + err = request_firmware(&fw, name, &hdev->dev); + if (err < 0) { + bt_dev_err(hdev, "Failed to load firmware file %s", name); + return err; + } + + fw_ptr = fw->data; + fw_max = fw->data + fw->size; + + bt_dev_info(hdev, "Loading %s", name); + + set_bit(STATE_FW_REQ_PENDING, &mrvl->flags); + + while (fw_ptr <= fw_max) { + struct sk_buff *skb; + + /* Controller drives the firmware load by sending firmware + * request packets containing the expected fragment size. + */ + err = wait_on_bit_timeout(&mrvl->flags, STATE_FW_REQ_PENDING, + TASK_INTERRUPTIBLE, + msecs_to_jiffies(2000)); + if (err == 1) { + bt_dev_err(hdev, "Firmware load interrupted"); + err = -EINTR; + break; + } else if (err) { + bt_dev_err(hdev, "Firmware request timeout"); + err = -ETIMEDOUT; + break; + } + + bt_dev_dbg(hdev, "Firmware request, expecting %d bytes", + mrvl->tx_len); + + if (fw_ptr == fw_max) { + /* Controller requests a null size once firmware is + * fully loaded. If controller expects more data, there + * is an issue. + */ + if (!mrvl->tx_len) { + bt_dev_info(hdev, "Firmware loading complete"); + } else { + bt_dev_err(hdev, "Firmware loading failure"); + err = -EINVAL; + } + break; + } + + if (fw_ptr + mrvl->tx_len > fw_max) { + mrvl->tx_len = fw_max - fw_ptr; + bt_dev_dbg(hdev, "Adjusting tx_len to %d", + mrvl->tx_len); + } + + skb = bt_skb_alloc(mrvl->tx_len, GFP_KERNEL); + if (!skb) { + bt_dev_err(hdev, "Failed to alloc mem for FW packet"); + err = -ENOMEM; + break; + } + bt_cb(skb)->pkt_type = MRVL_RAW_DATA; + + memcpy(skb_put(skb, mrvl->tx_len), fw_ptr, mrvl->tx_len); + fw_ptr += mrvl->tx_len; + + set_bit(STATE_FW_REQ_PENDING, &mrvl->flags); + + skb_queue_tail(&mrvl->rawq, skb); + hci_uart_tx_wakeup(hu); + } + + release_firmware(fw); + return err; +} + +static int mrvl_setup(struct hci_uart *hu) +{ + int err; + + hci_uart_set_flow_control(hu, true); + + err = mrvl_load_firmware(hu->hdev, "mrvl/helper_uart_3000000.bin"); + if (err) { + bt_dev_err(hu->hdev, "Unable to download firmware helper"); + return -EINVAL; + } + + hci_uart_set_baudrate(hu, 3000000); + hci_uart_set_flow_control(hu, false); + + err = mrvl_load_firmware(hu->hdev, "mrvl/uart8897_bt.bin"); + if (err) + return err; + + return 0; +} + +static const struct hci_uart_proto mrvl_proto = { + .id = HCI_UART_MRVL, + .name = "Marvell", + .init_speed = 115200, + .open = mrvl_open, + .close = mrvl_close, + .flush = mrvl_flush, + .setup = mrvl_setup, + .recv = mrvl_recv, + .enqueue = mrvl_enqueue, + .dequeue = mrvl_dequeue, +}; + +int __init mrvl_init(void) +{ + return hci_uart_register_proto(&mrvl_proto); +} + +int __exit mrvl_deinit(void) +{ + return hci_uart_unregister_proto(&mrvl_proto); +} diff --git a/drivers/bluetooth/hci_uart.h b/drivers/bluetooth/hci_uart.h index 22b7c58ae837..070139513e65 100644 --- a/drivers/bluetooth/hci_uart.h +++ b/drivers/bluetooth/hci_uart.h @@ -35,7 +35,7 @@ #define HCIUARTGETFLAGS _IOR('U', 204, int) /* UART protocols */ -#define HCI_UART_MAX_PROTO 11 +#define HCI_UART_MAX_PROTO 12 #define HCI_UART_H4 0 #define HCI_UART_BCSP 1 @@ -48,6 +48,7 @@ #define HCI_UART_QCA 8 #define HCI_UART_AG6XX 9 #define HCI_UART_NOKIA 10 +#define HCI_UART_MRVL 11 #define HCI_UART_RAW_DEVICE 0 #define HCI_UART_RESET_ON_INIT 1 @@ -190,3 +191,8 @@ int qca_deinit(void); int ag6xx_init(void); int ag6xx_deinit(void); #endif + +#ifdef CONFIG_BT_HCIUART_MRVL +int mrvl_init(void); +int mrvl_deinit(void); +#endif From 7d5c11da1ff6389511c42448f59456373edfc103 Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Mon, 19 Sep 2016 20:25:52 +0200 Subject: [PATCH 0443/1050] Bluetooth: Refactor read_ext_controller_info handler There is no need to allocate heap for reply only to copy stack data to it. This also fix rp memory leak and missing hdev unlock if kmalloc failed. Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 42 +++++++++++++++++++----------------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 54dd218d06f7..604c48142848 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -881,35 +881,17 @@ static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data, static int read_ext_controller_info(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { - struct mgmt_rp_read_ext_info *rp; - char buff[512]; + char buf[512]; + struct mgmt_rp_read_ext_info *rp = (void *)buf; u16 eir_len = 0; - u8 name_len; + size_t name_len; BT_DBG("sock %p %s", sk, hdev->name); + memset(&buf, 0, sizeof(buf)); + hci_dev_lock(hdev); - if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) - eir_len = eir_append_data(buff, eir_len, - EIR_CLASS_OF_DEV, - hdev->dev_class, 3); - - name_len = strlen(hdev->dev_name); - eir_len = eir_append_data(buff, eir_len, EIR_NAME_COMPLETE, - hdev->dev_name, name_len); - - name_len = strlen(hdev->short_name); - eir_len = eir_append_data(buff, eir_len, EIR_NAME_SHORT, - hdev->short_name, name_len); - - rp = kzalloc(sizeof(*rp) + eir_len, GFP_KERNEL); - if (!rp) - return -ENOMEM; - - rp->eir_len = cpu_to_le16(eir_len); - memcpy(rp->eir, buff, eir_len); - bacpy(&rp->bdaddr, &hdev->bdaddr); rp->version = hdev->hci_ver; @@ -918,6 +900,20 @@ static int read_ext_controller_info(struct sock *sk, struct hci_dev *hdev, rp->supported_settings = cpu_to_le32(get_supported_settings(hdev)); rp->current_settings = cpu_to_le32(get_current_settings(hdev)); + if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) + eir_len = eir_append_data(rp->eir, eir_len, EIR_CLASS_OF_DEV, + hdev->dev_class, 3); + + name_len = strlen(hdev->dev_name); + eir_len = eir_append_data(rp->eir, eir_len, EIR_NAME_COMPLETE, + hdev->dev_name, name_len); + + name_len = strlen(hdev->short_name); + eir_len = eir_append_data(rp->eir, eir_len, EIR_NAME_SHORT, + hdev->short_name, name_len); + + rp->eir_len = cpu_to_le16(eir_len); + hci_dev_unlock(hdev); /* If this command is called at least once, then the events From cde7a863d36a4a629c111f37edc2297d6b822a82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Narajowski?= Date: Mon, 19 Sep 2016 20:25:53 +0200 Subject: [PATCH 0444/1050] Bluetooth: Factor appending EIR to separate helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will also be used for Extended Information Event handling. Signed-off-by: Michał Narajowski Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 604c48142848..2b6fe10256b9 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -878,13 +878,32 @@ static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data, return eir_len; } +static u16 append_eir_data_to_buf(struct hci_dev *hdev, u8 *eir) +{ + u16 eir_len = 0; + size_t name_len; + + if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) + eir_len = eir_append_data(eir, eir_len, EIR_CLASS_OF_DEV, + hdev->dev_class, 3); + + name_len = strlen(hdev->dev_name); + eir_len = eir_append_data(eir, eir_len, EIR_NAME_COMPLETE, + hdev->dev_name, name_len); + + name_len = strlen(hdev->short_name); + eir_len = eir_append_data(eir, eir_len, EIR_NAME_SHORT, + hdev->short_name, name_len); + + return eir_len; +} + static int read_ext_controller_info(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { char buf[512]; struct mgmt_rp_read_ext_info *rp = (void *)buf; - u16 eir_len = 0; - size_t name_len; + u16 eir_len; BT_DBG("sock %p %s", sk, hdev->name); @@ -900,18 +919,8 @@ static int read_ext_controller_info(struct sock *sk, struct hci_dev *hdev, rp->supported_settings = cpu_to_le32(get_supported_settings(hdev)); rp->current_settings = cpu_to_le32(get_current_settings(hdev)); - if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) - eir_len = eir_append_data(rp->eir, eir_len, EIR_CLASS_OF_DEV, - hdev->dev_class, 3); - - name_len = strlen(hdev->dev_name); - eir_len = eir_append_data(rp->eir, eir_len, EIR_NAME_COMPLETE, - hdev->dev_name, name_len); - - name_len = strlen(hdev->short_name); - eir_len = eir_append_data(rp->eir, eir_len, EIR_NAME_SHORT, - hdev->short_name, name_len); + eir_len = append_eir_data_to_buf(hdev, rp->eir); rp->eir_len = cpu_to_le16(eir_len); hci_dev_unlock(hdev); From 6a9e90bff9cfb33d5939c29e5bf2674c9176365d Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Mon, 19 Sep 2016 20:25:54 +0200 Subject: [PATCH 0445/1050] Bluetooth: Add appearance to Read Ext Controller Info command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If LE is enabled appearance is added to EIR data. Signed-off-by: Michał Narajowski Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 2b6fe10256b9..d3837e0633af 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -878,6 +878,16 @@ static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data, return eir_len; } +static inline u16 eir_append_le16(u8 *eir, u16 eir_len, u8 type, u16 data) +{ + eir[eir_len++] = sizeof(type) + sizeof(data); + eir[eir_len++] = type; + put_unaligned_le16(data, &eir[eir_len]); + eir_len += sizeof(data); + + return eir_len; +} + static u16 append_eir_data_to_buf(struct hci_dev *hdev, u8 *eir) { u16 eir_len = 0; @@ -887,6 +897,10 @@ static u16 append_eir_data_to_buf(struct hci_dev *hdev, u8 *eir) eir_len = eir_append_data(eir, eir_len, EIR_CLASS_OF_DEV, hdev->dev_class, 3); + if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) + eir_len = eir_append_le16(eir, eir_len, EIR_APPEARANCE, + hdev->appearance); + name_len = strlen(hdev->dev_name); eir_len = eir_append_data(eir, eir_len, EIR_NAME_COMPLETE, hdev->dev_name, name_len); From 5e9fae48f800b973e45887ce0b8d717d54c0bb11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Narajowski?= Date: Mon, 19 Sep 2016 20:25:55 +0200 Subject: [PATCH 0446/1050] Bluetooth: Add supported data types to ext info changed event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds EIR data to extended info changed event. Signed-off-by: Michał Narajowski Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index d3837e0633af..29e5ce95c50c 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -954,12 +954,18 @@ static int read_ext_controller_info(struct sock *sk, struct hci_dev *hdev, static int ext_info_changed(struct hci_dev *hdev, struct sock *skip) { - struct mgmt_ev_ext_info_changed ev; + char buf[512]; + struct mgmt_ev_ext_info_changed *ev = (void *)buf; + u16 eir_len; - ev.eir_len = cpu_to_le16(0); + memset(buf, 0, sizeof(buf)); - return mgmt_limited_event(MGMT_EV_EXT_INFO_CHANGED, hdev, &ev, - sizeof(ev), HCI_MGMT_EXT_INFO_EVENTS, skip); + eir_len = append_eir_data_to_buf(hdev, ev->eir); + ev->eir_len = cpu_to_le16(eir_len); + + return mgmt_limited_event(MGMT_EV_EXT_INFO_CHANGED, hdev, ev, + sizeof(*ev) + eir_len, + HCI_MGMT_EXT_INFO_EVENTS, skip); } static int send_settings_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev) From e74317f43f5ce2d13cddaab867c59d42934d9585 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Narajowski?= Date: Mon, 19 Sep 2016 20:25:56 +0200 Subject: [PATCH 0447/1050] Bluetooth: Fix missing ext info event when setting appearance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds missing event when setting appearance, just like in the set local name command. Signed-off-by: Michał Narajowski Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 29e5ce95c50c..cd9f345894e0 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3187,6 +3187,8 @@ static int set_appearance(struct sock *sk, struct hci_dev *hdev, void *data, if (hci_dev_test_flag(hdev, HCI_LE_ADV)) adv_expire(hdev, MGMT_ADV_FLAG_APPEARANCE); + + ext_info_changed(hdev, sk); } err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_APPEARANCE, 0, NULL, From af4168c5a925dc3b11b0246c2b91124327919f47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Narajowski?= Date: Mon, 19 Sep 2016 14:33:33 +0200 Subject: [PATCH 0448/1050] Bluetooth: Set appearance only for LE capable controllers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Setting appearance on controllers without LE support will result in No Supported error. Signed-off-by: Michał Narajowski Signed-off-by: Johan Hedberg --- net/bluetooth/mgmt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index cd9f345894e0..7b2bac492fb1 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3178,6 +3178,10 @@ static int set_appearance(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG(""); + if (!lmp_le_capable(hdev)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_APPEARANCE, + MGMT_STATUS_NOT_SUPPORTED); + apperance = le16_to_cpu(cp->appearance); hci_dev_lock(hdev); From 67326666e2d45ebea7db3ed8e3e735f15e60dd91 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 19 Sep 2016 10:52:14 -0500 Subject: [PATCH 0449/1050] scripts: add script for translating stack dump function offsets addr2line doesn't work with KASLR addresses. Add a basic addr2line wrapper script which takes the 'func+offset/size' format as input. Signed-off-by: Josh Poimboeuf Signed-off-by: Linus Torvalds --- scripts/faddr2line | 177 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 177 insertions(+) create mode 100755 scripts/faddr2line diff --git a/scripts/faddr2line b/scripts/faddr2line new file mode 100755 index 000000000000..4fbfe8305fe3 --- /dev/null +++ b/scripts/faddr2line @@ -0,0 +1,177 @@ +#!/bin/bash +# +# Translate stack dump function offsets. +# +# addr2line doesn't work with KASLR addresses. This works similarly to +# addr2line, but instead takes the 'func+0x123' format as input: +# +# $ ./scripts/faddr2line ~/k/vmlinux meminfo_proc_show+0x5/0x568 +# meminfo_proc_show+0x5/0x568: +# meminfo_proc_show at fs/proc/meminfo.c:27 +# +# If the address is part of an inlined function, the full inline call chain is +# printed: +# +# $ ./scripts/faddr2line ~/k/vmlinux native_write_msr+0x6/0x27 +# native_write_msr+0x6/0x27: +# arch_static_branch at arch/x86/include/asm/msr.h:121 +# (inlined by) static_key_false at include/linux/jump_label.h:125 +# (inlined by) native_write_msr at arch/x86/include/asm/msr.h:125 +# +# The function size after the '/' in the input is optional, but recommended. +# It's used to help disambiguate any duplicate symbol names, which can occur +# rarely. If the size is omitted for a duplicate symbol then it's possible for +# multiple code sites to be printed: +# +# $ ./scripts/faddr2line ~/k/vmlinux raw_ioctl+0x5 +# raw_ioctl+0x5/0x20: +# raw_ioctl at drivers/char/raw.c:122 +# +# raw_ioctl+0x5/0xb1: +# raw_ioctl at net/ipv4/raw.c:876 +# +# Multiple addresses can be specified on a single command line: +# +# $ ./scripts/faddr2line ~/k/vmlinux type_show+0x10/45 free_reserved_area+0x90 +# type_show+0x10/0x2d: +# type_show at drivers/video/backlight/backlight.c:213 +# +# free_reserved_area+0x90/0x123: +# free_reserved_area at mm/page_alloc.c:6429 (discriminator 2) + + +set -o errexit +set -o nounset + +command -v awk >/dev/null 2>&1 || die "awk isn't installed" +command -v readelf >/dev/null 2>&1 || die "readelf isn't installed" +command -v addr2line >/dev/null 2>&1 || die "addr2line isn't installed" + +usage() { + echo "usage: faddr2line ..." >&2 + exit 1 +} + +warn() { + echo "$1" >&2 +} + +die() { + echo "ERROR: $1" >&2 + exit 1 +} + +# Try to figure out the source directory prefix so we can remove it from the +# addr2line output. HACK ALERT: This assumes that start_kernel() is in +# kernel/init.c! This only works for vmlinux. Otherwise it falls back to +# printing the absolute path. +find_dir_prefix() { + local objfile=$1 + + local start_kernel_addr=$(readelf -sW $objfile | awk '$8 == "start_kernel" {printf "0x%s", $2}') + [[ -z $start_kernel_addr ]] && return + + local file_line=$(addr2line -e $objfile $start_kernel_addr) + [[ -z $file_line ]] && return + + local prefix=${file_line%init/main.c:*} + if [[ -z $prefix ]] || [[ $prefix = $file_line ]]; then + return + fi + + DIR_PREFIX=$prefix + return 0 +} + +__faddr2line() { + local objfile=$1 + local func_addr=$2 + local dir_prefix=$3 + local print_warnings=$4 + + local func=${func_addr%+*} + local offset=${func_addr#*+} + offset=${offset%/*} + local size= + [[ $func_addr =~ "/" ]] && size=${func_addr#*/} + + if [[ -z $func ]] || [[ -z $offset ]] || [[ $func = $func_addr ]]; then + warn "bad func+offset $func_addr" + DONE=1 + return + fi + + # Go through each of the object's symbols which match the func name. + # In rare cases there might be duplicates. + while read symbol; do + local fields=($symbol) + local sym_base=0x${fields[1]} + local sym_size=${fields[2]} + local sym_type=${fields[3]} + + # calculate the address + local addr=$(($sym_base + $offset)) + if [[ -z $addr ]] || [[ $addr = 0 ]]; then + warn "bad address: $sym_base + $offset" + DONE=1 + return + fi + local hexaddr=0x$(printf %x $addr) + + # weed out non-function symbols + if [[ $sym_type != "FUNC" ]]; then + [[ $print_warnings = 1 ]] && + echo "skipping $func address at $hexaddr due to non-function symbol" + continue + fi + + # if the user provided a size, make sure it matches the symbol's size + if [[ -n $size ]] && [[ $size -ne $sym_size ]]; then + [[ $print_warnings = 1 ]] && + echo "skipping $func address at $hexaddr due to size mismatch ($size != $sym_size)" + continue; + fi + + # make sure the provided offset is within the symbol's range + if [[ $offset -gt $sym_size ]]; then + [[ $print_warnings = 1 ]] && + echo "skipping $func address at $hexaddr due to size mismatch ($offset > $sym_size)" + continue + fi + + # separate multiple entries with a blank line + [[ $FIRST = 0 ]] && echo + FIRST=0 + + local hexsize=0x$(printf %x $sym_size) + echo "$func+$offset/$hexsize:" + addr2line -fpie $objfile $hexaddr | sed "s;$dir_prefix;;" + DONE=1 + + done < <(readelf -sW $objfile | awk -v f=$func '$8 == f {print}') +} + +[[ $# -lt 2 ]] && usage + +objfile=$1 +[[ ! -f $objfile ]] && die "can't find objfile $objfile" +shift + +DIR_PREFIX=supercalifragilisticexpialidocious +find_dir_prefix $objfile + +FIRST=1 +while [[ $# -gt 0 ]]; do + func_addr=$1 + shift + + # print any matches found + DONE=0 + __faddr2line $objfile $func_addr $DIR_PREFIX 0 + + # if no match was found, print warnings + if [[ $DONE = 0 ]]; then + __faddr2line $objfile $func_addr $DIR_PREFIX 1 + warn "no match for $func_addr" + fi +done From 9d15ce9caaf9ecbec74e3be156a4a57451ed16c2 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 19 Sep 2016 13:49:48 -0700 Subject: [PATCH 0450/1050] tools/testing/nvdimm: fix allocation range for mock flush hint tables Commit 480b6837aa57 "nvdimm: fix PHYS_PFN/PFN_PHYS mixup" identified that we were passing an invalid address to devm_nvdimm_ioremap(). With that fixed it exposed a bug in the memory reservation size for flush hint tables. Since we map a full page we need to mock a full page of memory to back the flush hint table entries. Cc: Oliver O'Halloran Signed-off-by: Dan Williams --- tools/testing/nvdimm/test/nfit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index dd48f421844c..f64c57bf1d4b 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -603,7 +603,8 @@ static int nfit_test0_alloc(struct nfit_test *t) return -ENOMEM; sprintf(t->label[i], "label%d", i); - t->flush[i] = test_alloc(t, sizeof(u64) * NUM_HINTS, + t->flush[i] = test_alloc(t, max(PAGE_SIZE, + sizeof(u64) * NUM_HINTS), &t->flush_dma[i]); if (!t->flush[i]) return -ENOMEM; From 164c80ed84a7669114869d9347c0f3ea7f56ea89 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Mon, 19 Sep 2016 15:12:41 -0600 Subject: [PATCH 0451/1050] blk-throttle: Extend slice if throttle group is not empty Right now, if slice is expired, we start a new slice. If a bio is queued, we keep on extending slice by throtle_slice interval (100ms). This worked well as long as pending timer function got executed with-in few milli seconds of scheduled time. But looks like with recent changes in timer subsystem, slack can be much longer depending on the expiry time of the scheduled timer. commit 500462a9de65 ("timers: Switch to a non-cascading wheel") This means, by the time timer function gets executed, it is possible the delay from scheduled time is more than 100ms. That means current code will conclude that existing slice has expired and a new one needs to be started. New slice will be 100ms by default and that will not be sufficient to meet rate requirement of group given the bio size and bio will not be dispatched and we will start a new timer function to wait. And when that timer expires, same process will repeat and we will wait again and this can easily be an infinite loop. Solve this issue by starting a new slice only if throttle gropup is empty. If it is not empty, that means there should be an active slice going on. Ideally it should not be expired but given the slack, it is possible that it has expired. Reported-by: Hou Tao Signed-off-by: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index f1aba26f4719..a3ea8260c94c 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -780,9 +780,11 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio, /* * If previous slice expired, start a new one otherwise renew/extend * existing slice to make sure it is at least throtl_slice interval - * long since now. + * long since now. New slice is started only for empty throttle group. + * If there is queued bio, that means there should be an active + * slice and it should be extended instead. */ - if (throtl_slice_used(tg, rw)) + if (throtl_slice_used(tg, rw) && !(tg->service_queue.nr_queued[rw])) throtl_start_new_slice(tg, rw); else { if (time_before(tg->slice_end[rw], jiffies + throtl_slice)) From 7fadce0d60d09427e0027d3d468781b08ca0b3d1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 19 Sep 2016 14:49:08 -0700 Subject: [PATCH 0452/1050] scripts/faddr2line: improve on base path filtering a bit Due to our compiler include directives, the build pathnames for header files often end up being of the form "$srcdir/./include/linux/xyz.h", which ends up having that extra "." path component after the build base in it. Teach faddr2line to skip that too, to make code generated in inline functions in header files match the filename for the regular C files. Rabin Vincent pointed out that I can't make a stricter regexp match by using the " at " prefix for the pathname, because that ends up being locale-dependent. But this does require that the path match be preceded by a space, to make it a bit more strict (that matters mainly if we didn't find any base_dir at all, and we only end up with the "./" part of the match) Acked-by: Josh Poimboeuf Cc: Rabin Vincent Signed-off-by: Linus Torvalds --- scripts/faddr2line | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/faddr2line b/scripts/faddr2line index 4fbfe8305fe3..450b33257339 100755 --- a/scripts/faddr2line +++ b/scripts/faddr2line @@ -145,7 +145,7 @@ __faddr2line() { local hexsize=0x$(printf %x $sym_size) echo "$func+$offset/$hexsize:" - addr2line -fpie $objfile $hexaddr | sed "s;$dir_prefix;;" + addr2line -fpie $objfile $hexaddr | sed "s; $dir_prefix\(\./\)*; ;" DONE=1 done < <(readelf -sW $objfile | awk -v f=$func '$8 == f {print}') From 9bb627be47a574b764e162e8513d5db78d49e7f5 Mon Sep 17 00:00:00 2001 From: Li Zhong Date: Mon, 19 Sep 2016 14:43:52 -0700 Subject: [PATCH 0453/1050] mem-hotplug: don't clear the only node in new_node_page() Commit 394e31d2ceb4 ("mem-hotplug: alloc new page from a nearest neighbor node when mem-offline") introduced new_node_page() for memory hotplug. In new_node_page(), the nid is cleared before calling __alloc_pages_nodemask(). But if it is the only node of the system, and the first round allocation fails, it will not be able to get memory from an empty nodemask, and will trigger oom. The patch checks whether it is the last node on the system, and if it is, then don't clear the nid in the nodemask. Fixes: 394e31d2ceb4 ("mem-hotplug: alloc new page from a nearest neighbor node when mem-offline") Link: http://lkml.kernel.org/r/1473044391.4250.19.camel@TP420 Signed-off-by: Li Zhong Reported-by: John Allen Acked-by: Vlastimil Babka Cc: Xishi Qiu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 41266dc29f33..b58906b6215c 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1567,7 +1567,9 @@ static struct page *new_node_page(struct page *page, unsigned long private, return alloc_huge_page_node(page_hstate(compound_head(page)), next_node_in(nid, nmask)); - node_clear(nid, nmask); + if (nid != next_node_in(nid, nmask)) + node_clear(nid, nmask); + if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE)) gfp_mask |= __GFP_HIGHMEM; From e6f0c6e6170fec175fe676495f29029aecdf486c Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Mon, 19 Sep 2016 14:43:55 -0700 Subject: [PATCH 0454/1050] ocfs2/dlm: fix race between convert and migration Commit ac7cf246dfdb ("ocfs2/dlm: fix race between convert and recovery") checks if lockres master has changed to identify whether new master has finished recovery or not. This will introduce a race that right after old master does umount ( means master will change), a new convert request comes. In this case, it will reset lockres state to DLM_RECOVERING and then retry convert, and then fail with lockres->l_action being set to OCFS2_AST_INVALID, which will cause inconsistent lock level between ocfs2 and dlm, and then finally BUG. Since dlm recovery will clear lock->convert_pending in dlm_move_lockres_to_recovery_list, we can use it to correctly identify the race case between convert and recovery. So fix it. Fixes: ac7cf246dfdb ("ocfs2/dlm: fix race between convert and recovery") Link: http://lkml.kernel.org/r/57CE1569.8010704@huawei.com Signed-off-by: Joseph Qi Signed-off-by: Jun Piao Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/dlm/dlmconvert.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c index cdeafb4e7ed6..0bb128659d4b 100644 --- a/fs/ocfs2/dlm/dlmconvert.c +++ b/fs/ocfs2/dlm/dlmconvert.c @@ -268,7 +268,6 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm, struct dlm_lock *lock, int flags, int type) { enum dlm_status status; - u8 old_owner = res->owner; mlog(0, "type=%d, convert_type=%d, busy=%d\n", lock->ml.type, lock->ml.convert_type, res->state & DLM_LOCK_RES_IN_PROGRESS); @@ -335,7 +334,6 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm, spin_lock(&res->spinlock); res->state &= ~DLM_LOCK_RES_IN_PROGRESS; - lock->convert_pending = 0; /* if it failed, move it back to granted queue. * if master returns DLM_NORMAL and then down before sending ast, * it may have already been moved to granted queue, reset to @@ -344,12 +342,14 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm, if (status != DLM_NOTQUEUED) dlm_error(status); dlm_revert_pending_convert(res, lock); - } else if ((res->state & DLM_LOCK_RES_RECOVERING) || - (old_owner != res->owner)) { - mlog(0, "res %.*s is in recovering or has been recovered.\n", - res->lockname.len, res->lockname.name); + } else if (!lock->convert_pending) { + mlog(0, "%s: res %.*s, owner died and lock has been moved back " + "to granted list, retry convert.\n", + dlm->name, res->lockname.len, res->lockname.name); status = DLM_RECOVERING; } + + lock->convert_pending = 0; bail: spin_unlock(&res->spinlock); From d8e3875431956c1f78e142d531f490f76c760ce3 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Mon, 19 Sep 2016 14:43:58 -0700 Subject: [PATCH 0455/1050] MAINTAINERS: Maik has moved Maik is no longer using the plusserver.de email, update with his current email. Link: http://lkml.kernel.org/r/1473007794-27960-1-git-send-email-sudipm.mukherjee@gmail.com Signed-off-by: Sudip Mukherjee Cc: Maik Broemme Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 644ff65d336d..2551f6e2fb43 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6103,7 +6103,7 @@ S: Supported F: drivers/cpufreq/intel_pstate.c INTEL FRAMEBUFFER DRIVER (excluding 810 and 815) -M: Maik Broemme +M: Maik Broemme L: linux-fbdev@vger.kernel.org S: Maintained F: Documentation/fb/intelfb.txt From c131f751ab1a852d4dd4b490b3a7fbba7d738de5 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 19 Sep 2016 14:44:01 -0700 Subject: [PATCH 0456/1050] khugepaged: fix use-after-free in collapse_huge_page() hugepage_vma_revalidate() tries to re-check if we still should try to collapse small pages into huge one after the re-acquiring mmap_sem. The problem Dmitry Vyukov reported[1] is that the vma found by hugepage_vma_revalidate() can be suitable for huge pages, but not the same vma we had before dropping mmap_sem. And dereferencing original vma can lead to fun results.. Let's use vma hugepage_vma_revalidate() found instead of assuming it's the same as what we had before the lock was dropped. [1] http://lkml.kernel.org/r/CACT4Y+Z3gigBvhca9kRJFcjX0G70V_nRhbwKBU+yGoESBDKi9Q@mail.gmail.com Link: http://lkml.kernel.org/r/20160907122559.GA6542@black.fi.intel.com Signed-off-by: Kirill A. Shutemov Reported-by: Dmitry Vyukov Reviewed-by: Andrea Arcangeli Cc: Ebru Akagunduz Cc: Vlastimil Babka Cc: Mel Gorman Cc: Johannes Weiner Cc: Vegard Nossum Cc: Sasha Levin Cc: Konstantin Khlebnikov Cc: Andrey Ryabinin Cc: Greg Thelen Cc: Suleiman Souhlal Cc: Hugh Dickins Cc: David Rientjes Cc: syzkaller Cc: Kostya Serebryany Cc: Alexander Potapenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/khugepaged.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 79c52d0061af..62339bf3c726 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -838,7 +838,8 @@ static bool hugepage_vma_check(struct vm_area_struct *vma) * value (scan code). */ -static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address) +static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address, + struct vm_area_struct **vmap) { struct vm_area_struct *vma; unsigned long hstart, hend; @@ -846,7 +847,7 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address) if (unlikely(khugepaged_test_exit(mm))) return SCAN_ANY_PROCESS; - vma = find_vma(mm, address); + *vmap = vma = find_vma(mm, address); if (!vma) return SCAN_VMA_NULL; @@ -898,7 +899,7 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm, /* do_swap_page returns VM_FAULT_RETRY with released mmap_sem */ if (ret & VM_FAULT_RETRY) { down_read(&mm->mmap_sem); - if (hugepage_vma_revalidate(mm, address)) { + if (hugepage_vma_revalidate(mm, address, &fe.vma)) { /* vma is no longer available, don't continue to swapin */ trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0); return false; @@ -923,7 +924,6 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm, static void collapse_huge_page(struct mm_struct *mm, unsigned long address, struct page **hpage, - struct vm_area_struct *vma, int node, int referenced) { pmd_t *pmd, _pmd; @@ -933,6 +933,7 @@ static void collapse_huge_page(struct mm_struct *mm, spinlock_t *pmd_ptl, *pte_ptl; int isolated = 0, result = 0; struct mem_cgroup *memcg; + struct vm_area_struct *vma; unsigned long mmun_start; /* For mmu_notifiers */ unsigned long mmun_end; /* For mmu_notifiers */ gfp_t gfp; @@ -961,7 +962,7 @@ static void collapse_huge_page(struct mm_struct *mm, } down_read(&mm->mmap_sem); - result = hugepage_vma_revalidate(mm, address); + result = hugepage_vma_revalidate(mm, address, &vma); if (result) { mem_cgroup_cancel_charge(new_page, memcg, true); up_read(&mm->mmap_sem); @@ -994,7 +995,7 @@ static void collapse_huge_page(struct mm_struct *mm, * handled by the anon_vma lock + PG_lock. */ down_write(&mm->mmap_sem); - result = hugepage_vma_revalidate(mm, address); + result = hugepage_vma_revalidate(mm, address, &vma); if (result) goto out; /* check if the pmd is still valid */ @@ -1202,7 +1203,7 @@ out_unmap: if (ret) { node = khugepaged_find_target_node(); /* collapse_huge_page will return with the mmap_sem released */ - collapse_huge_page(mm, address, hpage, vma, node, referenced); + collapse_huge_page(mm, address, hpage, node, referenced); } out: trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced, From 982785c6b05a82c01e90687b7e25ee87c8970b2e Mon Sep 17 00:00:00 2001 From: Ebru Akagunduz Date: Mon, 19 Sep 2016 14:44:04 -0700 Subject: [PATCH 0457/1050] mm, thp: fix leaking mapped pte in __collapse_huge_page_swapin() Currently, khugepaged does not permit swapin if there are enough young pages in a THP. The problem is when a THP does not have enough young pages, khugepaged leaks mapped ptes. This patch prohibits leaking mapped ptes. Link: http://lkml.kernel.org/r/1472820276-7831-1-git-send-email-ebru.akagunduz@gmail.com Signed-off-by: Ebru Akagunduz Suggested-by: Andrea Arcangeli Reviewed-by: Andrea Arcangeli Reviewed-by: Rik van Riel Cc: Vlastimil Babka Cc: Mel Gorman Cc: Kirill A. Shutemov Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/khugepaged.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 62339bf3c726..728d7790dc2d 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -882,6 +882,11 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm, .pmd = pmd, }; + /* we only decide to swapin, if there is enough young ptes */ + if (referenced < HPAGE_PMD_NR/2) { + trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0); + return false; + } fe.pte = pte_offset_map(pmd, address); for (; fe.address < address + HPAGE_PMD_NR*PAGE_SIZE; fe.pte++, fe.address += PAGE_SIZE) { @@ -889,11 +894,6 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm, if (!is_swap_pte(pteval)) continue; swapped_in++; - /* we only decide to swapin, if there is enough young ptes */ - if (referenced < HPAGE_PMD_NR/2) { - trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0); - return false; - } ret = do_swap_page(&fe, pteval); /* do_swap_page returns VM_FAULT_RETRY with released mmap_sem */ From 4d35427ad7641cba08ea0deffae1a78147ad41c0 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 19 Sep 2016 14:44:07 -0700 Subject: [PATCH 0458/1050] mm: avoid endless recursion in dump_page() dump_page() uses page_mapcount() to get mapcount of the page. page_mapcount() has VM_BUG_ON_PAGE(PageSlab(page)) as mapcount doesn't make sense for slab pages and the field in struct page used for other information. It leads to recursion if dump_page() called for slub page and DEBUG_VM is enabled: dump_page() -> page_mapcount() -> VM_BUG_ON_PAGE() -> dump_page -> ... Let's avoid calling page_mapcount() for slab pages in dump_page(). Link: http://lkml.kernel.org/r/20160908082137.131076-1-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/debug.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/debug.c b/mm/debug.c index 8865bfb41b0b..74c7cae4f683 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -42,9 +42,11 @@ const struct trace_print_flags vmaflag_names[] = { void __dump_page(struct page *page, const char *reason) { + int mapcount = PageSlab(page) ? 0 : page_mapcount(page); + pr_emerg("page:%p count:%d mapcount:%d mapping:%p index:%#lx", - page, page_ref_count(page), page_mapcount(page), - page->mapping, page->index); + page, page_ref_count(page), mapcount, + page->mapping, page_to_pgoff(page)); if (PageCompound(page)) pr_cont(" compound_mapcount: %d", compound_mapcount(page)); pr_cont("\n"); From 08eeb3061e44661afb4cb9eb08780e2fff8bfbc5 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 19 Sep 2016 14:44:09 -0700 Subject: [PATCH 0459/1050] MAINTAINERS: update email for VLYNQ bus entry Link: http://lkml.kernel.org/r/1473218738-21836-1-git-send-email-f.fainelli@gmail.com Signed-off-by: Florian Fainelli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2551f6e2fb43..a0ce40f4c66c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12569,7 +12569,7 @@ F: include/linux/if_*vlan.h F: net/8021q/ VLYNQ BUS -M: Florian Fainelli +M: Florian Fainelli L: openwrt-devel@lists.openwrt.org (subscribers-only) S: Maintained F: drivers/vlynq/vlynq.c From 7cbdb4a286a60c5d519cb9223fe2134d26870d39 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Mon, 19 Sep 2016 14:44:12 -0700 Subject: [PATCH 0460/1050] autofs: use dentry flags to block walks during expire Somewhere along the way the autofs expire operation has changed to hold a spin lock over expired dentry selection. The autofs indirect mount expired dentry selection is complicated and quite lengthy so it isn't appropriate to hold a spin lock over the operation. Commit 47be61845c77 ("fs/dcache.c: avoid soft-lockup in dput()") added a might_sleep() to dput() causing a WARN_ONCE() about this usage to be issued. But the spin lock doesn't need to be held over this check, the autofs dentry info. flags are enough to block walks into dentrys during the expire. I've left the direct mount expire as it is (for now) because it is much simpler and quicker than the indirect mount expire and adding spin lock release and re-aquires would do nothing more than add overhead. Fixes: 47be61845c77 ("fs/dcache.c: avoid soft-lockup in dput()") Link: http://lkml.kernel.org/r/20160912014017.1773.73060.stgit@pluto.themaw.net Signed-off-by: Ian Kent Reported-by: Takashi Iwai Tested-by: Takashi Iwai Cc: Takashi Iwai Cc: NeilBrown Cc: Al Viro Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs4/expire.c | 55 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 13 deletions(-) diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index b493909e7492..d8e6d421c27f 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -417,6 +417,7 @@ static struct dentry *should_expire(struct dentry *dentry, } return NULL; } + /* * Find an eligible tree to time-out * A tree is eligible if :- @@ -432,6 +433,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, struct dentry *root = sb->s_root; struct dentry *dentry; struct dentry *expired; + struct dentry *found; struct autofs_info *ino; if (!root) @@ -442,31 +444,46 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, dentry = NULL; while ((dentry = get_next_positive_subdir(dentry, root))) { + int flags = how; + spin_lock(&sbi->fs_lock); ino = autofs4_dentry_ino(dentry); - if (ino->flags & AUTOFS_INF_WANT_EXPIRE) - expired = NULL; - else - expired = should_expire(dentry, mnt, timeout, how); - if (!expired) { + if (ino->flags & AUTOFS_INF_WANT_EXPIRE) { spin_unlock(&sbi->fs_lock); continue; } + spin_unlock(&sbi->fs_lock); + + expired = should_expire(dentry, mnt, timeout, flags); + if (!expired) + continue; + + spin_lock(&sbi->fs_lock); ino = autofs4_dentry_ino(expired); ino->flags |= AUTOFS_INF_WANT_EXPIRE; spin_unlock(&sbi->fs_lock); synchronize_rcu(); - spin_lock(&sbi->fs_lock); - if (should_expire(expired, mnt, timeout, how)) { - if (expired != dentry) - dput(dentry); - goto found; - } + /* Make sure a reference is not taken on found if + * things have changed. + */ + flags &= ~AUTOFS_EXP_LEAVES; + found = should_expire(expired, mnt, timeout, how); + if (!found || found != expired) + /* Something has changed, continue */ + goto next; + + if (expired != dentry) + dput(dentry); + + spin_lock(&sbi->fs_lock); + goto found; +next: + spin_lock(&sbi->fs_lock); ino->flags &= ~AUTOFS_INF_WANT_EXPIRE; + spin_unlock(&sbi->fs_lock); if (expired != dentry) dput(expired); - spin_unlock(&sbi->fs_lock); } return NULL; @@ -483,6 +500,7 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk) struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); struct autofs_info *ino = autofs4_dentry_ino(dentry); int status; + int state; /* Block on any pending expire */ if (!(ino->flags & AUTOFS_INF_WANT_EXPIRE)) @@ -490,8 +508,19 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk) if (rcu_walk) return -ECHILD; +retry: spin_lock(&sbi->fs_lock); - if (ino->flags & AUTOFS_INF_EXPIRING) { + state = ino->flags & (AUTOFS_INF_WANT_EXPIRE | AUTOFS_INF_EXPIRING); + if (state == AUTOFS_INF_WANT_EXPIRE) { + spin_unlock(&sbi->fs_lock); + /* + * Possibly being selected for expire, wait until + * it's selected or not. + */ + schedule_timeout_uninterruptible(HZ/10); + goto retry; + } + if (state & AUTOFS_INF_EXPIRING) { spin_unlock(&sbi->fs_lock); pr_debug("waiting for expire %p name=%pd\n", dentry, dentry); From c8de641b1e9c5489aa6ca57b7836acd68e7563f1 Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Mon, 19 Sep 2016 14:44:15 -0700 Subject: [PATCH 0461/1050] mm: fix the page_swap_info() BUG_ON check Commit 62c230bc1790 ("mm: add support for a filesystem to activate swap files and use direct_IO for writing swap pages") replaced the swap_aops dirty hook from __set_page_dirty_no_writeback() with swap_set_page_dirty(). For normal cases without these special SWP flags code path falls back to __set_page_dirty_no_writeback() so the behaviour is expected to be the same as before. But swap_set_page_dirty() makes use of the page_swap_info() helper to get the swap_info_struct to check for the flags like SWP_FILE, SWP_BLKDEV etc as desired for those features. This helper has BUG_ON(!PageSwapCache(page)) which is racy and safe only for the set_page_dirty_lock() path. For the set_page_dirty() path which is often needed for cases to be called from irq context, kswapd() can toggle the flag behind the back while the call is getting executed when system is low on memory and heavy swapping is ongoing. This ends up with undesired kernel panic. This patch just moves the check outside the helper to its users appropriately to fix kernel panic for the described path. Couple of users of helpers already take care of SwapCache condition so I skipped them. Link: http://lkml.kernel.org/r/1473460718-31013-1-git-send-email-santosh.shilimkar@oracle.com Signed-off-by: Santosh Shilimkar Cc: Mel Gorman Cc: Joe Perches Cc: Peter Zijlstra Cc: Rik van Riel Cc: David S. Miller Cc: Jens Axboe Cc: Michal Hocko Cc: Hugh Dickins Cc: Al Viro Cc: [4.7.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_io.c | 3 +++ mm/swapfile.c | 1 - 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/page_io.c b/mm/page_io.c index 16bd82fad38c..eafe5ddc2b54 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -264,6 +264,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, int ret; struct swap_info_struct *sis = page_swap_info(page); + BUG_ON(!PageSwapCache(page)); if (sis->flags & SWP_FILE) { struct kiocb kiocb; struct file *swap_file = sis->swap_file; @@ -337,6 +338,7 @@ int swap_readpage(struct page *page) int ret = 0; struct swap_info_struct *sis = page_swap_info(page); + BUG_ON(!PageSwapCache(page)); VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(PageUptodate(page), page); if (frontswap_load(page) == 0) { @@ -386,6 +388,7 @@ int swap_set_page_dirty(struct page *page) if (sis->flags & SWP_FILE) { struct address_space *mapping = sis->swap_file->f_mapping; + BUG_ON(!PageSwapCache(page)); return mapping->a_ops->set_page_dirty(page); } else { return __set_page_dirty_no_writeback(page); diff --git a/mm/swapfile.c b/mm/swapfile.c index 78cfa292a29a..2657accc6e2b 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2724,7 +2724,6 @@ int swapcache_prepare(swp_entry_t entry) struct swap_info_struct *page_swap_info(struct page *page) { swp_entry_t swap = { .val = page_private(page) }; - BUG_ON(!PageSwapCache(page)); return swap_info[swp_type(swap)]; } From 31b4beb473e3bdee1bf79db849502dcb24b5c202 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 19 Sep 2016 14:44:18 -0700 Subject: [PATCH 0462/1050] ipc/shm: fix crash if CONFIG_SHMEM is not set Commit c01d5b300774 ("shmem: get_unmapped_area align huge page") makes use of shm_get_unmapped_area() in shm_file_operations() unconditional to CONFIG_MMU. As Tony Battersby pointed this can lead NULL-pointer dereference on machine with CONFIG_MMU=y and CONFIG_SHMEM=n. In this case ipc/shm is backed by ramfs which doesn't provide f_op->get_unmapped_area for configurations with MMU. The solution is to provide dummy f_op->get_unmapped_area for ramfs when CONFIG_MMU=y, which just call current->mm->get_unmapped_area(). Fixes: c01d5b300774 ("shmem: get_unmapped_area align huge page") Link: http://lkml.kernel.org/r/20160912102704.140442-1-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Reported-by: Tony Battersby Tested-by: Tony Battersby Cc: Hugh Dickins Cc: [4.7.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ramfs/file-mmu.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c index 183a212694bf..12af0490322f 100644 --- a/fs/ramfs/file-mmu.c +++ b/fs/ramfs/file-mmu.c @@ -27,9 +27,17 @@ #include #include #include +#include #include "internal.h" +static unsigned long ramfs_mmu_get_unmapped_area(struct file *file, + unsigned long addr, unsigned long len, unsigned long pgoff, + unsigned long flags) +{ + return current->mm->get_unmapped_area(file, addr, len, pgoff, flags); +} + const struct file_operations ramfs_file_operations = { .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, @@ -38,6 +46,7 @@ const struct file_operations ramfs_file_operations = { .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .llseek = generic_file_llseek, + .get_unmapped_area = ramfs_mmu_get_unmapped_area, }; const struct inode_operations ramfs_file_inode_operations = { From 2b0ad0085aa47ace4756aa501274a7de0325c09c Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Mon, 19 Sep 2016 14:44:21 -0700 Subject: [PATCH 0463/1050] ocfs2: fix trans extend while flush truncate log Every time, ocfs2_extend_trans() included a credit for truncate log inode, but as that inode had been managed by jbd2 running transaction first time, it will not consume that credit until jbd2_journal_restart(). Since total credits to extend always included the un-consumed ones, there will be more and more un-consumed credit, at last jbd2_journal_restart() will fail due to credit number over the half of max transction credit. The following error was caught when unlinking a large file with many extents: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 13626 at fs/jbd2/transaction.c:269 start_this_handle+0x4c3/0x510 [jbd2]() Modules linked in: ocfs2 nfsd lockd grace nfs_acl auth_rpcgss sunrpc autofs4 ocfs2_dlmfs ocfs2_stack_o2cb ocfs2_dlm ocfs2_nodemanager ocfs2_stackglue configfs sd_mod sg ip6t_REJECT nf_reject_ipv6 nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables be2iscsi iscsi_boot_sysfs bnx2i cnic uio cxgb4i cxgb4 cxgb3i libcxgbi cxgb3 mdio ib_iser rdma_cm ib_cm iw_cm ib_sa ib_mad ib_core ib_addr ipv6 iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ppdev xen_kbdfront xen_netfront fb_sys_fops sysimgblt sysfillrect syscopyarea parport_pc parport pcspkr i2c_piix4 i2c_core acpi_cpufreq ext4 jbd2 mbcache xen_blkfront floppy pata_acpi ata_generic ata_piix dm_mirror dm_region_hash dm_log dm_mod CPU: 0 PID: 13626 Comm: unlink Tainted: G W 4.1.12-37.6.3.el6uek.x86_64 #2 Hardware name: Xen HVM domU, BIOS 4.4.4OVM 02/11/2016 Call Trace: dump_stack+0x48/0x5c warn_slowpath_common+0x95/0xe0 warn_slowpath_null+0x1a/0x20 start_this_handle+0x4c3/0x510 [jbd2] jbd2__journal_restart+0x161/0x1b0 [jbd2] jbd2_journal_restart+0x13/0x20 [jbd2] ocfs2_extend_trans+0x74/0x220 [ocfs2] ocfs2_replay_truncate_records+0x93/0x360 [ocfs2] __ocfs2_flush_truncate_log+0x13e/0x3a0 [ocfs2] ocfs2_remove_btree_range+0x458/0x7f0 [ocfs2] ocfs2_commit_truncate+0x1b3/0x6f0 [ocfs2] ocfs2_truncate_for_delete+0xbd/0x380 [ocfs2] ocfs2_wipe_inode+0x136/0x6a0 [ocfs2] ocfs2_delete_inode+0x2a2/0x3e0 [ocfs2] ocfs2_evict_inode+0x28/0x60 [ocfs2] evict+0xab/0x1a0 iput_final+0xf6/0x190 iput+0xc8/0xe0 do_unlinkat+0x1b7/0x310 SyS_unlink+0x16/0x20 system_call_fastpath+0x12/0x71 ---[ end trace 28aa7410e69369cf ]--- JBD2: unlink wants too many credits (251 > 128) Link: http://lkml.kernel.org/r/1473674623-11810-1-git-send-email-junxiao.bi@oracle.com Signed-off-by: Junxiao Bi Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/alloc.c | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 7dabbc31060e..51128789a661 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -5922,7 +5922,6 @@ bail: } static int ocfs2_replay_truncate_records(struct ocfs2_super *osb, - handle_t *handle, struct inode *data_alloc_inode, struct buffer_head *data_alloc_bh) { @@ -5935,11 +5934,19 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb, struct ocfs2_truncate_log *tl; struct inode *tl_inode = osb->osb_tl_inode; struct buffer_head *tl_bh = osb->osb_tl_bh; + handle_t *handle; di = (struct ocfs2_dinode *) tl_bh->b_data; tl = &di->id2.i_dealloc; i = le16_to_cpu(tl->tl_used) - 1; while (i >= 0) { + handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + goto bail; + } + /* Caller has given us at least enough credits to * update the truncate log dinode */ status = ocfs2_journal_access_di(handle, INODE_CACHE(tl_inode), tl_bh, @@ -5974,12 +5981,7 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb, } } - status = ocfs2_extend_trans(handle, - OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC); - if (status < 0) { - mlog_errno(status); - goto bail; - } + ocfs2_commit_trans(osb, handle); i--; } @@ -5994,7 +5996,6 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) { int status; unsigned int num_to_flush; - handle_t *handle; struct inode *tl_inode = osb->osb_tl_inode; struct inode *data_alloc_inode = NULL; struct buffer_head *tl_bh = osb->osb_tl_bh; @@ -6038,21 +6039,11 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) goto out_mutex; } - handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC); - if (IS_ERR(handle)) { - status = PTR_ERR(handle); - mlog_errno(status); - goto out_unlock; - } - - status = ocfs2_replay_truncate_records(osb, handle, data_alloc_inode, + status = ocfs2_replay_truncate_records(osb, data_alloc_inode, data_alloc_bh); if (status < 0) mlog_errno(status); - ocfs2_commit_trans(osb, handle); - -out_unlock: brelse(data_alloc_bh); ocfs2_inode_unlock(data_alloc_inode, 1); From d5bf141893880f7283fe97e1812c58ff22c8f9a5 Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Mon, 19 Sep 2016 14:44:24 -0700 Subject: [PATCH 0464/1050] ocfs2: fix trans extend while free cached blocks The root cause of this issue is the same with the one fixed by the last patch, but this time credits for allocator inode and group descriptor may not be consumed before trans extend. The following error was caught: WARNING: CPU: 0 PID: 2037 at fs/jbd2/transaction.c:269 start_this_handle+0x4c3/0x510 [jbd2]() Modules linked in: ocfs2 nfsd lockd grace nfs_acl auth_rpcgss sunrpc autofs4 ocfs2_dlmfs ocfs2_stack_o2cb ocfs2_dlm ocfs2_nodemanager ocfs2_stackglue configfs sd_mod sg ip6t_REJECT nf_reject_ipv6 nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables be2iscsi iscsi_boot_sysfs bnx2i cnic uio cxgb4i cxgb4 cxgb3i libcxgbi cxgb3 mdio ib_iser rdma_cm ib_cm iw_cm ib_sa ib_mad ib_core ib_addr ipv6 iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ppdev xen_kbdfront fb_sys_fops sysimgblt sysfillrect syscopyarea xen_netfront parport_pc parport pcspkr i2c_piix4 i2c_core acpi_cpufreq ext4 jbd2 mbcache xen_blkfront floppy pata_acpi ata_generic ata_piix dm_mirror dm_region_hash dm_log dm_mod CPU: 0 PID: 2037 Comm: rm Tainted: G W 4.1.12-37.6.3.el6uek.bug24573128v2.x86_64 #2 Hardware name: Xen HVM domU, BIOS 4.4.4OVM 02/11/2016 Call Trace: dump_stack+0x48/0x5c warn_slowpath_common+0x95/0xe0 warn_slowpath_null+0x1a/0x20 start_this_handle+0x4c3/0x510 [jbd2] jbd2__journal_restart+0x161/0x1b0 [jbd2] jbd2_journal_restart+0x13/0x20 [jbd2] ocfs2_extend_trans+0x74/0x220 [ocfs2] ocfs2_free_cached_blocks+0x16b/0x4e0 [ocfs2] ocfs2_run_deallocs+0x70/0x270 [ocfs2] ocfs2_commit_truncate+0x474/0x6f0 [ocfs2] ocfs2_truncate_for_delete+0xbd/0x380 [ocfs2] ocfs2_wipe_inode+0x136/0x6a0 [ocfs2] ocfs2_delete_inode+0x2a2/0x3e0 [ocfs2] ocfs2_evict_inode+0x28/0x60 [ocfs2] evict+0xab/0x1a0 iput_final+0xf6/0x190 iput+0xc8/0xe0 do_unlinkat+0x1b7/0x310 SyS_unlinkat+0x22/0x40 system_call_fastpath+0x12/0x71 ---[ end trace a62437cb060baa71 ]--- JBD2: rm wants too many credits (149 > 128) Link: http://lkml.kernel.org/r/1473674623-11810-2-git-send-email-junxiao.bi@oracle.com Signed-off-by: Junxiao Bi Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/alloc.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 51128789a661..f165f867f332 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -6404,43 +6404,34 @@ static int ocfs2_free_cached_blocks(struct ocfs2_super *osb, goto out_mutex; } - handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - mlog_errno(ret); - goto out_unlock; - } - while (head) { if (head->free_bg) bg_blkno = head->free_bg; else bg_blkno = ocfs2_which_suballoc_group(head->free_blk, head->free_bit); + handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + mlog_errno(ret); + goto out_unlock; + } + trace_ocfs2_free_cached_blocks( (unsigned long long)head->free_blk, head->free_bit); ret = ocfs2_free_suballoc_bits(handle, inode, di_bh, head->free_bit, bg_blkno, 1); - if (ret) { + if (ret) mlog_errno(ret); - goto out_journal; - } - ret = ocfs2_extend_trans(handle, OCFS2_SUBALLOC_FREE); - if (ret) { - mlog_errno(ret); - goto out_journal; - } + ocfs2_commit_trans(osb, handle); tmp = head; head = head->free_next; kfree(tmp); } -out_journal: - ocfs2_commit_trans(osb, handle); - out_unlock: ocfs2_inode_unlock(inode, 1); brelse(di_bh); From 12703dbfeb15402260e7554d32a34ac40c233990 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 19 Sep 2016 14:44:27 -0700 Subject: [PATCH 0465/1050] fsnotify: add a way to stop queueing events on group shutdown Implement a function that can be called when a group is being shutdown to stop queueing new events to the group. Fanotify will use this. Fixes: 5838d4442bd5 ("fanotify: fix double free of pending permission events") Link: http://lkml.kernel.org/r/1473797711-14111-2-git-send-email-jack@suse.cz Signed-off-by: Jan Kara Reviewed-by: Miklos Szeredi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/notify/group.c | 19 +++++++++++++++++++ fs/notify/notification.c | 8 +++++++- include/linux/fsnotify_backend.h | 3 +++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/fs/notify/group.c b/fs/notify/group.c index 3e2dd85be5dd..b47f7cfdcaa4 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c @@ -39,6 +39,17 @@ static void fsnotify_final_destroy_group(struct fsnotify_group *group) kfree(group); } +/* + * Stop queueing new events for this group. Once this function returns + * fsnotify_add_event() will not add any new events to the group's queue. + */ +void fsnotify_group_stop_queueing(struct fsnotify_group *group) +{ + mutex_lock(&group->notification_mutex); + group->shutdown = true; + mutex_unlock(&group->notification_mutex); +} + /* * Trying to get rid of a group. Remove all marks, flush all events and release * the group reference. @@ -47,6 +58,14 @@ static void fsnotify_final_destroy_group(struct fsnotify_group *group) */ void fsnotify_destroy_group(struct fsnotify_group *group) { + /* + * Stop queueing new events. The code below is careful enough to not + * require this but fanotify needs to stop queuing events even before + * fsnotify_destroy_group() is called and this makes the other callers + * of fsnotify_destroy_group() to see the same behavior. + */ + fsnotify_group_stop_queueing(group); + /* clear all inode marks for this group, attach them to destroy_list */ fsnotify_detach_group_marks(group); diff --git a/fs/notify/notification.c b/fs/notify/notification.c index a95d8e037aeb..3d76e65ff84f 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c @@ -82,7 +82,8 @@ void fsnotify_destroy_event(struct fsnotify_group *group, * Add an event to the group notification queue. The group can later pull this * event off the queue to deal with. The function returns 0 if the event was * added to the queue, 1 if the event was merged with some other queued event, - * 2 if the queue of events has overflown. + * 2 if the event was not queued - either the queue of events has overflown + * or the group is shutting down. */ int fsnotify_add_event(struct fsnotify_group *group, struct fsnotify_event *event, @@ -96,6 +97,11 @@ int fsnotify_add_event(struct fsnotify_group *group, mutex_lock(&group->notification_mutex); + if (group->shutdown) { + mutex_unlock(&group->notification_mutex); + return 2; + } + if (group->q_len >= group->max_events) { ret = 2; /* Queue overflow event only if it isn't already queued */ diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 58205f33af02..40a9e99de703 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -148,6 +148,7 @@ struct fsnotify_group { #define FS_PRIO_1 1 /* fanotify content based access control */ #define FS_PRIO_2 2 /* fanotify pre-content access */ unsigned int priority; + bool shutdown; /* group is being shut down, don't queue more events */ /* stores all fastpath marks assoc with this group so they can be cleaned on unregister */ struct mutex mark_mutex; /* protect marks_list */ @@ -292,6 +293,8 @@ extern struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *op extern void fsnotify_get_group(struct fsnotify_group *group); /* drop reference on a group from fsnotify_alloc_group */ extern void fsnotify_put_group(struct fsnotify_group *group); +/* group destruction begins, stop queuing new events */ +extern void fsnotify_group_stop_queueing(struct fsnotify_group *group); /* destroy group */ extern void fsnotify_destroy_group(struct fsnotify_group *group); /* fasync handler function */ From 96d41019e3ac55f6f0115b0ce97e4f24a3d636d2 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 19 Sep 2016 14:44:30 -0700 Subject: [PATCH 0466/1050] fanotify: fix list corruption in fanotify_get_response() fanotify_get_response() calls fsnotify_remove_event() when it finds that group is being released from fanotify_release() (bypass_perm is set). However the event it removes need not be only in the group's notification queue but it can have already moved to access_list (userspace read the event before closing the fanotify instance fd) which is protected by a different lock. Thus when fsnotify_remove_event() races with fanotify_release() operating on access_list, the list can get corrupted. Fix the problem by moving all the logic removing permission events from the lists to one place - fanotify_release(). Fixes: 5838d4442bd5 ("fanotify: fix double free of pending permission events") Link: http://lkml.kernel.org/r/1473797711-14111-3-git-send-email-jack@suse.cz Signed-off-by: Jan Kara Reported-by: Miklos Szeredi Tested-by: Miklos Szeredi Reviewed-by: Miklos Szeredi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/notify/fanotify/fanotify.c | 13 +---------- fs/notify/fanotify/fanotify_user.c | 36 ++++++++++++++++++++---------- fs/notify/notification.c | 15 ------------- include/linux/fsnotify_backend.h | 3 --- 4 files changed, 25 insertions(+), 42 deletions(-) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index d2f97ecca6a5..e0e5f7c3c99f 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -67,18 +67,7 @@ static int fanotify_get_response(struct fsnotify_group *group, pr_debug("%s: group=%p event=%p\n", __func__, group, event); - wait_event(group->fanotify_data.access_waitq, event->response || - atomic_read(&group->fanotify_data.bypass_perm)); - - if (!event->response) { /* bypass_perm set */ - /* - * Event was canceled because group is being destroyed. Remove - * it from group's event list because we are responsible for - * freeing the permission event. - */ - fsnotify_remove_event(group, &event->fae.fse); - return 0; - } + wait_event(group->fanotify_data.access_waitq, event->response); /* userspace responded, convert to something usable */ switch (event->response) { diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 8e8e6bcd1d43..a64313868d3a 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -358,16 +358,20 @@ static int fanotify_release(struct inode *ignored, struct file *file) #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS struct fanotify_perm_event_info *event, *next; + struct fsnotify_event *fsn_event; /* - * There may be still new events arriving in the notification queue - * but since userspace cannot use fanotify fd anymore, no event can - * enter or leave access_list by now. + * Stop new events from arriving in the notification queue. since + * userspace cannot use fanotify fd anymore, no event can enter or + * leave access_list by now either. + */ + fsnotify_group_stop_queueing(group); + + /* + * Process all permission events on access_list and notification queue + * and simulate reply from userspace. */ spin_lock(&group->fanotify_data.access_lock); - - atomic_inc(&group->fanotify_data.bypass_perm); - list_for_each_entry_safe(event, next, &group->fanotify_data.access_list, fae.fse.list) { pr_debug("%s: found group=%p event=%p\n", __func__, group, @@ -379,12 +383,21 @@ static int fanotify_release(struct inode *ignored, struct file *file) spin_unlock(&group->fanotify_data.access_lock); /* - * Since bypass_perm is set, newly queued events will not wait for - * access response. Wake up the already sleeping ones now. - * synchronize_srcu() in fsnotify_destroy_group() will wait for all - * processes sleeping in fanotify_handle_event() waiting for access - * response and thus also for all permission events to be freed. + * Destroy all non-permission events. For permission events just + * dequeue them and set the response. They will be freed once the + * response is consumed and fanotify_get_response() returns. */ + mutex_lock(&group->notification_mutex); + while (!fsnotify_notify_queue_is_empty(group)) { + fsn_event = fsnotify_remove_first_event(group); + if (!(fsn_event->mask & FAN_ALL_PERM_EVENTS)) + fsnotify_destroy_event(group, fsn_event); + else + FANOTIFY_PE(fsn_event)->response = FAN_ALLOW; + } + mutex_unlock(&group->notification_mutex); + + /* Response for all permission events it set, wakeup waiters */ wake_up(&group->fanotify_data.access_waitq); #endif @@ -755,7 +768,6 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) spin_lock_init(&group->fanotify_data.access_lock); init_waitqueue_head(&group->fanotify_data.access_waitq); INIT_LIST_HEAD(&group->fanotify_data.access_list); - atomic_set(&group->fanotify_data.bypass_perm, 0); #endif switch (flags & FAN_ALL_CLASS_BITS) { case FAN_CLASS_NOTIF: diff --git a/fs/notify/notification.c b/fs/notify/notification.c index 3d76e65ff84f..e455e83ceeeb 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c @@ -131,21 +131,6 @@ queue: return ret; } -/* - * Remove @event from group's notification queue. It is the responsibility of - * the caller to destroy the event. - */ -void fsnotify_remove_event(struct fsnotify_group *group, - struct fsnotify_event *event) -{ - mutex_lock(&group->notification_mutex); - if (!list_empty(&event->list)) { - list_del_init(&event->list); - group->q_len--; - } - mutex_unlock(&group->notification_mutex); -} - /* * Remove and return the first event from the notification list. It is the * responsibility of the caller to destroy the obtained event diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 40a9e99de703..7268ed076be8 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -180,7 +180,6 @@ struct fsnotify_group { spinlock_t access_lock; struct list_head access_list; wait_queue_head_t access_waitq; - atomic_t bypass_perm; #endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */ int f_flags; unsigned int max_marks; @@ -307,8 +306,6 @@ extern int fsnotify_add_event(struct fsnotify_group *group, struct fsnotify_event *event, int (*merge)(struct list_head *, struct fsnotify_event *)); -/* Remove passed event from groups notification queue */ -extern void fsnotify_remove_event(struct fsnotify_group *group, struct fsnotify_event *event); /* true if the group notification queue is empty */ extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group); /* return, but do not dequeue the first event on the notification queue */ From 3bb8b653c86f6b1d2cc05aa1744fed4b18f99485 Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Mon, 19 Sep 2016 14:44:33 -0700 Subject: [PATCH 0467/1050] ocfs2: fix double unlock in case retry after free truncate log If ocfs2_reserve_cluster_bitmap_bits() fails with ENOSPC, it will try to free truncate log and then retry. Since ocfs2_try_to_free_truncate_log will lock/unlock global bitmap inode, we have to unlock it before calling this function. But when retry reserve and it fails with no global bitmap inode lock taken, it will unlock again in error handling branch and BUG. This issue also exists if no need retry and then ocfs2_inode_lock fails. So fix it. Fixes: 2070ad1aebff ("ocfs2: retry on ENOSPC if sufficient space in truncate log") Link: http://lkml.kernel.org/r/57D91939.6030809@huawei.com Signed-off-by: Joseph Qi Signed-off-by: Jiufei Xue Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/suballoc.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index ea47120a85ff..6ad3533940ba 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -1199,14 +1199,24 @@ retry: inode_unlock((*ac)->ac_inode); ret = ocfs2_try_to_free_truncate_log(osb, bits_wanted); - if (ret == 1) + if (ret == 1) { + iput((*ac)->ac_inode); + (*ac)->ac_inode = NULL; goto retry; + } if (ret < 0) mlog_errno(ret); inode_lock((*ac)->ac_inode); - ocfs2_inode_lock((*ac)->ac_inode, NULL, 1); + ret = ocfs2_inode_lock((*ac)->ac_inode, NULL, 1); + if (ret < 0) { + mlog_errno(ret); + inode_unlock((*ac)->ac_inode); + iput((*ac)->ac_inode); + (*ac)->ac_inode = NULL; + goto bail; + } } if (status < 0) { if (status != -ENOSPC) From db2ba40c277dc545bab531671c3f45ac0afea6f8 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 19 Sep 2016 14:44:36 -0700 Subject: [PATCH 0468/1050] mm: memcontrol: make per-cpu charge cache IRQ-safe for socket accounting During cgroup2 rollout into production, we started encountering css refcount underflows and css access crashes in the memory controller. Splitting the heavily shared css reference counter into logical users narrowed the imbalance down to the cgroup2 socket memory accounting. The problem turns out to be the per-cpu charge cache. Cgroup1 had a separate socket counter, but the new cgroup2 socket accounting goes through the common charge path that uses a shared per-cpu cache for all memory that is being tracked. Those caches are safe against scheduling preemption, but not against interrupts - such as the newly added packet receive path. When cache draining is interrupted by network RX taking pages out of the cache, the resuming drain operation will put references of in-use pages, thus causing the imbalance. Disable IRQs during all per-cpu charge cache operations. Fixes: f7e1cb6ec51b ("mm: memcontrol: account socket memory in unified hierarchy memory controller") Link: http://lkml.kernel.org/r/20160914194846.11153-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Tejun Heo Cc: "David S. Miller" Cc: Michal Hocko Cc: Vladimir Davydov Cc: [4.5+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9a6a51a7c416..4be518d4e68a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1740,17 +1740,22 @@ static DEFINE_MUTEX(percpu_charge_mutex); static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages) { struct memcg_stock_pcp *stock; + unsigned long flags; bool ret = false; if (nr_pages > CHARGE_BATCH) return ret; - stock = &get_cpu_var(memcg_stock); + local_irq_save(flags); + + stock = this_cpu_ptr(&memcg_stock); if (memcg == stock->cached && stock->nr_pages >= nr_pages) { stock->nr_pages -= nr_pages; ret = true; } - put_cpu_var(memcg_stock); + + local_irq_restore(flags); + return ret; } @@ -1771,15 +1776,18 @@ static void drain_stock(struct memcg_stock_pcp *stock) stock->cached = NULL; } -/* - * This must be called under preempt disabled or must be called by - * a thread which is pinned to local cpu. - */ static void drain_local_stock(struct work_struct *dummy) { - struct memcg_stock_pcp *stock = this_cpu_ptr(&memcg_stock); + struct memcg_stock_pcp *stock; + unsigned long flags; + + local_irq_save(flags); + + stock = this_cpu_ptr(&memcg_stock); drain_stock(stock); clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags); + + local_irq_restore(flags); } /* @@ -1788,14 +1796,19 @@ static void drain_local_stock(struct work_struct *dummy) */ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) { - struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock); + struct memcg_stock_pcp *stock; + unsigned long flags; + local_irq_save(flags); + + stock = this_cpu_ptr(&memcg_stock); if (stock->cached != memcg) { /* reset if necessary */ drain_stock(stock); stock->cached = memcg; } stock->nr_pages += nr_pages; - put_cpu_var(memcg_stock); + + local_irq_restore(flags); } /* From d979a39d7242e0601bf9b60e89628fb8ac577179 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 19 Sep 2016 14:44:38 -0700 Subject: [PATCH 0469/1050] cgroup: duplicate cgroup reference when cloning sockets When a socket is cloned, the associated sock_cgroup_data is duplicated but not its reference on the cgroup. As a result, the cgroup reference count will underflow when both sockets are destroyed later on. Fixes: bd1060a1d671 ("sock, cgroup: add sock->sk_cgroup") Link: http://lkml.kernel.org/r/20160914194846.11153-2-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Tejun Heo Cc: Michal Hocko Cc: Vladimir Davydov Cc: [4.5+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup.c | 6 ++++++ net/core/sock.c | 5 ++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d1c51b7f5221..5e8dab5bf9ad 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -6270,6 +6270,12 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd) if (cgroup_sk_alloc_disabled) return; + /* Socket clone path */ + if (skcd->val) { + cgroup_get(sock_cgroup_ptr(skcd)); + return; + } + rcu_read_lock(); while (true) { diff --git a/net/core/sock.c b/net/core/sock.c index 25dab8b60223..fd7b41edf1ce 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1362,7 +1362,6 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, if (!try_module_get(prot->owner)) goto out_free_sec; sk_tx_queue_clear(sk); - cgroup_sk_alloc(&sk->sk_cgrp_data); } return sk; @@ -1422,6 +1421,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, sock_net_set(sk, net); atomic_set(&sk->sk_wmem_alloc, 1); + cgroup_sk_alloc(&sk->sk_cgrp_data); sock_update_classid(&sk->sk_cgrp_data); sock_update_netprioidx(&sk->sk_cgrp_data); } @@ -1566,6 +1566,9 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_priority = 0; newsk->sk_incoming_cpu = raw_smp_processor_id(); atomic64_set(&newsk->sk_cookie, 0); + + cgroup_sk_alloc(&newsk->sk_cgrp_data); + /* * Before updating sk_refcnt, we must commit prior changes to memory * (Documentation/RCU/rculist_nulls.txt for details) From d21c353d5e99c56cdd5b5c1183ffbcaf23b8b960 Mon Sep 17 00:00:00 2001 From: Ashish Samant Date: Mon, 19 Sep 2016 14:44:42 -0700 Subject: [PATCH 0470/1050] ocfs2: fix start offset to ocfs2_zero_range_for_truncate() If we punch a hole on a reflink such that following conditions are met: 1. start offset is on a cluster boundary 2. end offset is not on a cluster boundary 3. (end offset is somewhere in another extent) or (hole range > MAX_CONTIG_BYTES(1MB)), we dont COW the first cluster starting at the start offset. But in this case, we were wrongly passing this cluster to ocfs2_zero_range_for_truncate() to zero out. This will modify the cluster in place and zero it in the source too. Fix this by skipping this cluster in such a scenario. To reproduce: 1. Create a random file of say 10 MB xfs_io -c 'pwrite -b 4k 0 10M' -f 10MBfile 2. Reflink it reflink -f 10MBfile reflnktest 3. Punch a hole at starting at cluster boundary with range greater that 1MB. You can also use a range that will put the end offset in another extent. fallocate -p -o 0 -l 1048615 reflnktest 4. sync 5. Check the first cluster in the source file. (It will be zeroed out). dd if=10MBfile iflag=direct bs= count=1 | hexdump -C Link: http://lkml.kernel.org/r/1470957147-14185-1-git-send-email-ashish.samant@oracle.com Signed-off-by: Ashish Samant Reported-by: Saar Maoz Reviewed-by: Srinivas Eeda Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Cc: Eric Ren Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/file.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 4e7b0dc22450..0b055bfb8e86 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1506,7 +1506,8 @@ static int ocfs2_zero_partial_clusters(struct inode *inode, u64 start, u64 len) { int ret = 0; - u64 tmpend, end = start + len; + u64 tmpend = 0; + u64 end = start + len; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); unsigned int csize = osb->s_clustersize; handle_t *handle; @@ -1538,18 +1539,31 @@ static int ocfs2_zero_partial_clusters(struct inode *inode, } /* - * We want to get the byte offset of the end of the 1st cluster. + * If start is on a cluster boundary and end is somewhere in another + * cluster, we have not COWed the cluster starting at start, unless + * end is also within the same cluster. So, in this case, we skip this + * first call to ocfs2_zero_range_for_truncate() truncate and move on + * to the next one. */ - tmpend = (u64)osb->s_clustersize + (start & ~(osb->s_clustersize - 1)); - if (tmpend > end) - tmpend = end; + if ((start & (csize - 1)) != 0) { + /* + * We want to get the byte offset of the end of the 1st + * cluster. + */ + tmpend = (u64)osb->s_clustersize + + (start & ~(osb->s_clustersize - 1)); + if (tmpend > end) + tmpend = end; - trace_ocfs2_zero_partial_clusters_range1((unsigned long long)start, - (unsigned long long)tmpend); + trace_ocfs2_zero_partial_clusters_range1( + (unsigned long long)start, + (unsigned long long)tmpend); - ret = ocfs2_zero_range_for_truncate(inode, handle, start, tmpend); - if (ret) - mlog_errno(ret); + ret = ocfs2_zero_range_for_truncate(inode, handle, start, + tmpend); + if (ret) + mlog_errno(ret); + } if (tmpend < end) { /* From 63b52c4936a2e679639c38ef51a50aa8ca1c5c07 Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Mon, 19 Sep 2016 14:44:44 -0700 Subject: [PATCH 0471/1050] Revert "ocfs2: bump up o2cb network protocol version" This reverts commit 38b52efd218b ("ocfs2: bump up o2cb network protocol version"). This commit made rolling upgrade fail. When one node is upgraded to new version with this commit, the remaining nodes will fail to establish connections to it, then the application like VMs on the remaining nodes can't be live migrated to the upgraded one. This will cause an outage. Since negotiate hb timeout behavior didn't change without this commit, so revert it. Fixes: 38b52efd218bf ("ocfs2: bump up o2cb network protocol version") Link: http://lkml.kernel.org/r/1471396924-10375-1-git-send-email-junxiao.bi@oracle.com Signed-off-by: Junxiao Bi Cc: Mark Fasheh Cc: Joel Becker Cc: Joseph Qi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/cluster/tcp_internal.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index 94b18369b1cc..b95e7df5b76a 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h @@ -44,9 +44,6 @@ * version here in tcp_internal.h should not need to be bumped for * filesystem locking changes. * - * New in version 12 - * - Negotiate hb timeout when storage is down. - * * New in version 11 * - Negotiation of filesystem locking in the dlm join. * @@ -78,7 +75,7 @@ * - full 64 bit i_size in the metadata lock lvbs * - introduction of "rw" lock and pushing meta/data locking down */ -#define O2NET_PROTOCOL_VERSION 12ULL +#define O2NET_PROTOCOL_VERSION 11ULL struct o2net_handshake { __be64 protocol_version; __be64 connector_id; From b92ae139c308c5223521ed6ec022148b81312809 Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Mon, 19 Sep 2016 14:44:47 -0700 Subject: [PATCH 0472/1050] rapidio/rio_cm: avoid GFP_KERNEL in atomic context As reported by Alexey Khoroshilov (https://lkml.org/lkml/2016/9/9/737): riocm_send_close() is called from rio_cm_shutdown() under spin_lock_bh(idr_lock), but riocm_send_close() uses a GFP_KERNEL allocation. Fix by taking riocm_send_close() outside of spinlock protected code. [akpm@linux-foundation.org: remove unneeded `if (!list_empty())'] Link: http://lkml.kernel.org/r/20160915175402.10122-1-alexandre.bounine@idt.com Signed-off-by: Alexandre Bounine Reported-by: Alexey Khoroshilov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/rio_cm.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/rapidio/rio_cm.c b/drivers/rapidio/rio_cm.c index 3fa17ac8df54..cebc296463ad 100644 --- a/drivers/rapidio/rio_cm.c +++ b/drivers/rapidio/rio_cm.c @@ -2247,17 +2247,30 @@ static int rio_cm_shutdown(struct notifier_block *nb, unsigned long code, { struct rio_channel *ch; unsigned int i; + LIST_HEAD(list); riocm_debug(EXIT, "."); + /* + * If there are any channels left in connected state send + * close notification to the connection partner. + * First build a list of channels that require a closing + * notification because function riocm_send_close() should + * be called outside of spinlock protected code. + */ spin_lock_bh(&idr_lock); idr_for_each_entry(&ch_idr, ch, i) { - riocm_debug(EXIT, "close ch %d", ch->id); - if (ch->state == RIO_CM_CONNECTED) - riocm_send_close(ch); + if (ch->state == RIO_CM_CONNECTED) { + riocm_debug(EXIT, "close ch %d", ch->id); + idr_remove(&ch_idr, ch->id); + list_add(&ch->ch_node, &list); + } } spin_unlock_bh(&idr_lock); + list_for_each_entry(ch, &list, ch_node) + riocm_send_close(ch); + return NOTIFY_DONE; } From 727653d6ce7103b245eb8041f55dd5885f4c3289 Mon Sep 17 00:00:00 2001 From: James Morse Date: Mon, 19 Sep 2016 18:29:15 +0100 Subject: [PATCH 0473/1050] irqchip/gicv3: Silence noisy DEBUG_PER_CPU_MAPS warning gic_raise_softirq() walks the list of cpus using for_each_cpu(), it calls gic_compute_target_list() which advances the iterator by the number of CPUs in the cluster. If gic_compute_target_list() reaches the last CPU it leaves the iterator pointing at the last CPU. This means the next time round the for_each_cpu() loop cpumask_next() will be called with an invalid CPU. This triggers a warning when built with CONFIG_DEBUG_PER_CPU_MAPS: [ 3.077738] GICv3: CPU1: found redistributor 1 region 0:0x000000002f120000 [ 3.077943] CPU1: Booted secondary processor [410fd0f0] [ 3.078542] ------------[ cut here ]------------ [ 3.078746] WARNING: CPU: 1 PID: 0 at ../include/linux/cpumask.h:121 gic_raise_softirq+0x12c/0x170 [ 3.078812] Modules linked in: [ 3.078869] [ 3.078930] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.8.0-rc5+ #5188 [ 3.078994] Hardware name: Foundation-v8A (DT) [ 3.079059] task: ffff80087a1a0080 task.stack: ffff80087a19c000 [ 3.079145] PC is at gic_raise_softirq+0x12c/0x170 [ 3.079226] LR is at gic_raise_softirq+0xa4/0x170 [ 3.079296] pc : [] lr : [] pstate: 200001c9 [ 3.081139] Call trace: [ 3.081202] Exception stack(0xffff80087a19fbe0 to 0xffff80087a19fd10) [ 3.082269] [] gic_raise_softirq+0x12c/0x170 [ 3.082354] [] smp_send_reschedule+0x34/0x40 [ 3.082433] [] resched_curr+0x50/0x88 [ 3.082512] [] check_preempt_curr+0x60/0xd0 [ 3.082593] [] ttwu_do_wakeup+0x20/0xe8 [ 3.082672] [] ttwu_do_activate+0x90/0xc0 [ 3.082753] [] try_to_wake_up+0x224/0x370 [ 3.082836] [] default_wake_function+0x10/0x18 [ 3.082920] [] __wake_up_common+0x5c/0xa0 [ 3.083003] [] __wake_up_locked+0x14/0x20 [ 3.083086] [] complete+0x40/0x60 [ 3.083168] [] secondary_start_kernel+0x15c/0x1d0 [ 3.083240] [<00000000808911a4>] 0x808911a4 [ 3.113401] Detected PIPT I-cache on CPU2 Avoid updating the iterator if the next call to cpumask_next() would cause the for_each_cpu() loop to exit. There is no change to gic_raise_softirq()'s behaviour, (cpumask_next()s eventual call to _find_next_bit() will return early as start >= nbits), this patch just silences the warning. Fixes: 021f653791ad ("irqchip: gic-v3: Initial support for GICv3") Signed-off-by: James Morse Acked-by: Marc Zyngier Cc: linux-arm-kernel@lists.infradead.org Cc: Jason Cooper Link: http://lkml.kernel.org/r/1474306155-3303-1-git-send-email-james.morse@arm.com Signed-off-by: Thomas Gleixner --- drivers/irqchip/irq-gic-v3.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index ede5672ab34d..da6c0ba61d4f 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -548,7 +548,7 @@ static int gic_starting_cpu(unsigned int cpu) static u16 gic_compute_target_list(int *base_cpu, const struct cpumask *mask, unsigned long cluster_id) { - int cpu = *base_cpu; + int next_cpu, cpu = *base_cpu; unsigned long mpidr = cpu_logical_map(cpu); u16 tlist = 0; @@ -562,9 +562,10 @@ static u16 gic_compute_target_list(int *base_cpu, const struct cpumask *mask, tlist |= 1 << (mpidr & 0xf); - cpu = cpumask_next(cpu, mask); - if (cpu >= nr_cpu_ids) + next_cpu = cpumask_next(cpu, mask); + if (next_cpu >= nr_cpu_ids) goto out; + cpu = next_cpu; mpidr = cpu_logical_map(cpu); From 07b26c9454a2a19fff86d6fcf2aba6bc801eb8d8 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 19 Sep 2016 12:58:47 +0200 Subject: [PATCH 0474/1050] gso: Support partial splitting at the frag_list pointer Since commit 8a29111c7 ("net: gro: allow to build full sized skb") gro may build buffers with a frag_list. This can hurt forwarding because most NICs can't offload such packets, they need to be segmented in software. This patch splits buffers with a frag_list at the frag_list pointer into buffers that can be TSO offloaded. Signed-off-by: Steffen Klassert Acked-by: Alexander Duyck Signed-off-by: David S. Miller --- net/core/skbuff.c | 51 +++++++++++++++++++++++++++++++++--------- net/ipv4/af_inet.c | 14 ++++++++---- net/ipv4/gre_offload.c | 6 +++-- net/ipv4/tcp_offload.c | 13 ++++++----- net/ipv4/udp_offload.c | 6 +++-- net/ipv6/ip6_offload.c | 5 ++++- 6 files changed, 69 insertions(+), 26 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1e329d411242..7bf82a28e10a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3097,11 +3097,31 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, sg = !!(features & NETIF_F_SG); csum = !!can_checksum_protocol(features, proto); - /* GSO partial only requires that we trim off any excess that - * doesn't fit into an MSS sized block, so take care of that - * now. - */ - if (sg && csum && (features & NETIF_F_GSO_PARTIAL)) { + if (sg && csum && (mss != GSO_BY_FRAGS)) { + if (!(features & NETIF_F_GSO_PARTIAL)) { + struct sk_buff *iter; + + if (!list_skb || + !net_gso_ok(features, skb_shinfo(head_skb)->gso_type)) + goto normal; + + /* Split the buffer at the frag_list pointer. + * This is based on the assumption that all + * buffers in the chain excluding the last + * containing the same amount of data. + */ + skb_walk_frags(head_skb, iter) { + if (skb_headlen(iter)) + goto normal; + + len -= iter->len; + } + } + + /* GSO partial only requires that we trim off any excess that + * doesn't fit into an MSS sized block, so take care of that + * now. + */ partial_segs = len / mss; if (partial_segs > 1) mss *= partial_segs; @@ -3109,6 +3129,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, partial_segs = 0; } +normal: headroom = skb_headroom(head_skb); pos = skb_headlen(head_skb); @@ -3300,21 +3321,29 @@ perform_csum_check: */ segs->prev = tail; - /* Update GSO info on first skb in partial sequence. */ if (partial_segs) { + struct sk_buff *iter; int type = skb_shinfo(head_skb)->gso_type; + unsigned short gso_size = skb_shinfo(head_skb)->gso_size; /* Update type to add partial and then remove dodgy if set */ - type |= SKB_GSO_PARTIAL; + type |= (features & NETIF_F_GSO_PARTIAL) / NETIF_F_GSO_PARTIAL * SKB_GSO_PARTIAL; type &= ~SKB_GSO_DODGY; /* Update GSO info and prepare to start updating headers on * our way back down the stack of protocols. */ - skb_shinfo(segs)->gso_size = skb_shinfo(head_skb)->gso_size; - skb_shinfo(segs)->gso_segs = partial_segs; - skb_shinfo(segs)->gso_type = type; - SKB_GSO_CB(segs)->data_offset = skb_headroom(segs) + doffset; + for (iter = segs; iter; iter = iter->next) { + skb_shinfo(iter)->gso_size = gso_size; + skb_shinfo(iter)->gso_segs = partial_segs; + skb_shinfo(iter)->gso_type = type; + SKB_GSO_CB(iter)->data_offset = skb_headroom(iter) + doffset; + } + + if (tail->len - doffset <= gso_size) + skb_shinfo(tail)->gso_size = 0; + else if (tail != segs) + skb_shinfo(tail)->gso_segs = DIV_ROUND_UP(tail->len - doffset, gso_size); } /* Following permits correct backpressure, for protocols diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index e94b47be0019..1effc986739e 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1192,7 +1192,7 @@ EXPORT_SYMBOL(inet_sk_rebuild_header); struct sk_buff *inet_gso_segment(struct sk_buff *skb, netdev_features_t features) { - bool udpfrag = false, fixedid = false, encap; + bool udpfrag = false, fixedid = false, gso_partial, encap; struct sk_buff *segs = ERR_PTR(-EINVAL); const struct net_offload *ops; unsigned int offset = 0; @@ -1245,6 +1245,8 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb, if (IS_ERR_OR_NULL(segs)) goto out; + gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL); + skb = segs; do { iph = (struct iphdr *)(skb_mac_header(skb) + nhoff); @@ -1259,9 +1261,13 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb, iph->id = htons(id); id += skb_shinfo(skb)->gso_segs; } - tot_len = skb_shinfo(skb)->gso_size + - SKB_GSO_CB(skb)->data_offset + - skb->head - (unsigned char *)iph; + + if (gso_partial) + tot_len = skb_shinfo(skb)->gso_size + + SKB_GSO_CB(skb)->data_offset + + skb->head - (unsigned char *)iph; + else + tot_len = skb->len - nhoff; } else { if (!fixedid) iph->id = htons(id++); diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index ecd1e09dbbf1..96e0efecefa6 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -24,7 +24,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, __be16 protocol = skb->protocol; u16 mac_len = skb->mac_len; int gre_offset, outer_hlen; - bool need_csum, ufo; + bool need_csum, ufo, gso_partial; if (!skb->encapsulation) goto out; @@ -69,6 +69,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, goto out; } + gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL); + outer_hlen = skb_tnl_header_len(skb); gre_offset = outer_hlen - tnl_hlen; skb = segs; @@ -96,7 +98,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, greh = (struct gre_base_hdr *)skb_transport_header(skb); pcsum = (__sum16 *)(greh + 1); - if (skb_is_gso(skb)) { + if (gso_partial) { unsigned int partial_adj; /* Adjust checksum to account for the fact that diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 5c5964962d0c..bc68da38ea86 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -90,12 +90,6 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, goto out; } - /* GSO partial only requires splitting the frame into an MSS - * multiple and possibly a remainder. So update the mss now. - */ - if (features & NETIF_F_GSO_PARTIAL) - mss = skb->len - (skb->len % mss); - copy_destructor = gso_skb->destructor == tcp_wfree; ooo_okay = gso_skb->ooo_okay; /* All segments but the first should have ooo_okay cleared */ @@ -108,6 +102,13 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, /* Only first segment might have ooo_okay set */ segs->ooo_okay = ooo_okay; + /* GSO partial and frag_list segmentation only requires splitting + * the frame into an MSS multiple and possibly a remainder, both + * cases return a GSO skb. So update the mss now. + */ + if (skb_is_gso(segs)) + mss *= skb_shinfo(segs)->gso_segs; + delta = htonl(oldlen + (thlen + mss)); skb = segs; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 81f253b6ff36..f9333c963607 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -21,7 +21,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, __be16 new_protocol, bool is_ipv6) { int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); - bool remcsum, need_csum, offload_csum, ufo; + bool remcsum, need_csum, offload_csum, ufo, gso_partial; struct sk_buff *segs = ERR_PTR(-EINVAL); struct udphdr *uh = udp_hdr(skb); u16 mac_offset = skb->mac_header; @@ -88,6 +88,8 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, goto out; } + gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL); + outer_hlen = skb_tnl_header_len(skb); udp_offset = outer_hlen - tnl_hlen; skb = segs; @@ -117,7 +119,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, * will be using a length value equal to only one MSS sized * segment instead of the entire frame. */ - if (skb_is_gso(skb)) { + if (gso_partial) { uh->len = htons(skb_shinfo(skb)->gso_size + SKB_GSO_CB(skb)->data_offset + skb->head - (unsigned char *)uh); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 22e90e56b5a9..e7bfd55899a3 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -69,6 +69,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int offset = 0; bool encap, udpfrag; int nhoff; + bool gso_partial; skb_reset_network_header(skb); nhoff = skb_network_header(skb) - skb_mac_header(skb); @@ -101,9 +102,11 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, if (IS_ERR(segs)) goto out; + gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL); + for (skb = segs; skb; skb = skb->next) { ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff); - if (skb_is_gso(skb)) + if (gso_partial) payload_len = skb_shinfo(skb)->gso_size + SKB_GSO_CB(skb)->data_offset + skb->head - (unsigned char *)(ipv6h + 1); From 8d6be8b627389c6dc7e0ea2455a7542c8a2a16a7 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 19 Sep 2016 03:58:00 -0400 Subject: [PATCH 0475/1050] bnxt_en: Use RSS flags defined in the bnxt_hsi.h file. And remove redundant definitions of the same flags. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 ++++---- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 5 ----- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 228c964e709a..cee0e8db7cc1 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3419,10 +3419,10 @@ static int bnxt_hwrm_vnic_set_rss(struct bnxt *bp, u16 vnic_id, bool set_rss) bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_RSS_CFG, -1, -1); if (set_rss) { - vnic->hash_type = BNXT_RSS_HASH_TYPE_FLAG_IPV4 | - BNXT_RSS_HASH_TYPE_FLAG_TCP_IPV4 | - BNXT_RSS_HASH_TYPE_FLAG_IPV6 | - BNXT_RSS_HASH_TYPE_FLAG_TCP_IPV6; + vnic->hash_type = VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4 | + VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4 | + VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6 | + VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6; req.hash_type = cpu_to_le32(vnic->hash_type); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 23e04a6142fb..db4814e927f7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -389,11 +389,6 @@ struct rx_tpa_end_cmp_ext { #define INVALID_HW_RING_ID ((u16)-1) -#define BNXT_RSS_HASH_TYPE_FLAG_IPV4 0x01 -#define BNXT_RSS_HASH_TYPE_FLAG_TCP_IPV4 0x02 -#define BNXT_RSS_HASH_TYPE_FLAG_IPV6 0x04 -#define BNXT_RSS_HASH_TYPE_FLAG_TCP_IPV6 0x08 - /* The hardware supports certain page sizes. Use the supported page sizes * to allocate the rings. */ From adbc830545003c4b7494c903654bea22e5a66bb4 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 19 Sep 2016 03:58:01 -0400 Subject: [PATCH 0476/1050] bnxt_en: Simplify PCI device names and add additinal PCI IDs. Remove "Single-port/Dual-port" from the device names. Dual-port devices will appear as 2 separate devices, so no need to call each a dual-port device. Use a more generic name for VF devices belonging to the same chip fanmily. Add some remaining NPAR device IDs. Signed-off-by: David Christensen Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 68 ++++++++++++----------- 1 file changed, 36 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index cee0e8db7cc1..d9b4cd1694ff 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -93,50 +93,49 @@ enum board_idx { BCM57404_NPAR, BCM57406_NPAR, BCM57407_SFP, + BCM57407_NPAR, BCM57414_NPAR, BCM57416_NPAR, - BCM57304_VF, - BCM57404_VF, - BCM57414_VF, - BCM57314_VF, + NETXTREME_E_VF, + NETXTREME_C_VF, }; /* indexed by enum above */ static const struct { char *name; } board_info[] = { - { "Broadcom BCM57301 NetXtreme-C Single-port 10Gb Ethernet" }, - { "Broadcom BCM57302 NetXtreme-C Dual-port 10Gb/25Gb Ethernet" }, - { "Broadcom BCM57304 NetXtreme-C Dual-port 10Gb/25Gb/40Gb/50Gb Ethernet" }, + { "Broadcom BCM57301 NetXtreme-C 10Gb Ethernet" }, + { "Broadcom BCM57302 NetXtreme-C 10Gb/25Gb Ethernet" }, + { "Broadcom BCM57304 NetXtreme-C 10Gb/25Gb/40Gb/50Gb Ethernet" }, { "Broadcom BCM57417 NetXtreme-E Ethernet Partition" }, - { "Broadcom BCM58700 Nitro 4-port 1Gb/2.5Gb/10Gb Ethernet" }, - { "Broadcom BCM57311 NetXtreme-C Single-port 10Gb Ethernet" }, - { "Broadcom BCM57312 NetXtreme-C Dual-port 10Gb/25Gb Ethernet" }, - { "Broadcom BCM57402 NetXtreme-E Dual-port 10Gb Ethernet" }, - { "Broadcom BCM57404 NetXtreme-E Dual-port 10Gb/25Gb Ethernet" }, - { "Broadcom BCM57406 NetXtreme-E Dual-port 10GBase-T Ethernet" }, + { "Broadcom BCM58700 Nitro 1Gb/2.5Gb/10Gb Ethernet" }, + { "Broadcom BCM57311 NetXtreme-C 10Gb Ethernet" }, + { "Broadcom BCM57312 NetXtreme-C 10Gb/25Gb Ethernet" }, + { "Broadcom BCM57402 NetXtreme-E 10Gb Ethernet" }, + { "Broadcom BCM57404 NetXtreme-E 10Gb/25Gb Ethernet" }, + { "Broadcom BCM57406 NetXtreme-E 10GBase-T Ethernet" }, { "Broadcom BCM57402 NetXtreme-E Ethernet Partition" }, - { "Broadcom BCM57407 NetXtreme-E Dual-port 10GBase-T Ethernet" }, - { "Broadcom BCM57412 NetXtreme-E Dual-port 10Gb Ethernet" }, - { "Broadcom BCM57414 NetXtreme-E Dual-port 10Gb/25Gb Ethernet" }, - { "Broadcom BCM57416 NetXtreme-E Dual-port 10GBase-T Ethernet" }, - { "Broadcom BCM57417 NetXtreme-E Dual-port 10GBase-T Ethernet" }, + { "Broadcom BCM57407 NetXtreme-E 10GBase-T Ethernet" }, + { "Broadcom BCM57412 NetXtreme-E 10Gb Ethernet" }, + { "Broadcom BCM57414 NetXtreme-E 10Gb/25Gb Ethernet" }, + { "Broadcom BCM57416 NetXtreme-E 10GBase-T Ethernet" }, + { "Broadcom BCM57417 NetXtreme-E 10GBase-T Ethernet" }, { "Broadcom BCM57412 NetXtreme-E Ethernet Partition" }, - { "Broadcom BCM57314 NetXtreme-C Dual-port 10Gb/25Gb/40Gb/50Gb Ethernet" }, - { "Broadcom BCM57417 NetXtreme-E Dual-port 10Gb/25Gb Ethernet" }, - { "Broadcom BCM57416 NetXtreme-E Dual-port 10Gb Ethernet" }, + { "Broadcom BCM57314 NetXtreme-C 10Gb/25Gb/40Gb/50Gb Ethernet" }, + { "Broadcom BCM57417 NetXtreme-E 10Gb/25Gb Ethernet" }, + { "Broadcom BCM57416 NetXtreme-E 10Gb Ethernet" }, { "Broadcom BCM57404 NetXtreme-E Ethernet Partition" }, { "Broadcom BCM57406 NetXtreme-E Ethernet Partition" }, - { "Broadcom BCM57407 NetXtreme-E Dual-port 25Gb Ethernet" }, + { "Broadcom BCM57407 NetXtreme-E 25Gb Ethernet" }, + { "Broadcom BCM57407 NetXtreme-E Ethernet Partition" }, { "Broadcom BCM57414 NetXtreme-E Ethernet Partition" }, { "Broadcom BCM57416 NetXtreme-E Ethernet Partition" }, - { "Broadcom BCM57304 NetXtreme-C Ethernet Virtual Function" }, - { "Broadcom BCM57404 NetXtreme-E Ethernet Virtual Function" }, - { "Broadcom BCM57414 NetXtreme-E Ethernet Virtual Function" }, - { "Broadcom BCM57314 NetXtreme-E Ethernet Virtual Function" }, + { "Broadcom NetXtreme-E Ethernet Virtual Function" }, + { "Broadcom NetXtreme-C Ethernet Virtual Function" }, }; static const struct pci_device_id bnxt_pci_tbl[] = { + { PCI_VDEVICE(BROADCOM, 0x16c0), .driver_data = BCM57417_NPAR }, { PCI_VDEVICE(BROADCOM, 0x16c8), .driver_data = BCM57301 }, { PCI_VDEVICE(BROADCOM, 0x16c9), .driver_data = BCM57302 }, { PCI_VDEVICE(BROADCOM, 0x16ca), .driver_data = BCM57304 }, @@ -160,13 +159,19 @@ static const struct pci_device_id bnxt_pci_tbl[] = { { PCI_VDEVICE(BROADCOM, 0x16e7), .driver_data = BCM57404_NPAR }, { PCI_VDEVICE(BROADCOM, 0x16e8), .driver_data = BCM57406_NPAR }, { PCI_VDEVICE(BROADCOM, 0x16e9), .driver_data = BCM57407_SFP }, + { PCI_VDEVICE(BROADCOM, 0x16ea), .driver_data = BCM57407_NPAR }, + { PCI_VDEVICE(BROADCOM, 0x16eb), .driver_data = BCM57412_NPAR }, { PCI_VDEVICE(BROADCOM, 0x16ec), .driver_data = BCM57414_NPAR }, + { PCI_VDEVICE(BROADCOM, 0x16ed), .driver_data = BCM57414_NPAR }, { PCI_VDEVICE(BROADCOM, 0x16ee), .driver_data = BCM57416_NPAR }, + { PCI_VDEVICE(BROADCOM, 0x16ef), .driver_data = BCM57416_NPAR }, #ifdef CONFIG_BNXT_SRIOV - { PCI_VDEVICE(BROADCOM, 0x16cb), .driver_data = BCM57304_VF }, - { PCI_VDEVICE(BROADCOM, 0x16d3), .driver_data = BCM57404_VF }, - { PCI_VDEVICE(BROADCOM, 0x16dc), .driver_data = BCM57414_VF }, - { PCI_VDEVICE(BROADCOM, 0x16e1), .driver_data = BCM57314_VF }, + { PCI_VDEVICE(BROADCOM, 0x16c1), .driver_data = NETXTREME_E_VF }, + { PCI_VDEVICE(BROADCOM, 0x16cb), .driver_data = NETXTREME_C_VF }, + { PCI_VDEVICE(BROADCOM, 0x16d3), .driver_data = NETXTREME_E_VF }, + { PCI_VDEVICE(BROADCOM, 0x16dc), .driver_data = NETXTREME_E_VF }, + { PCI_VDEVICE(BROADCOM, 0x16e1), .driver_data = NETXTREME_C_VF }, + { PCI_VDEVICE(BROADCOM, 0x16e5), .driver_data = NETXTREME_C_VF }, #endif { 0 } }; @@ -189,8 +194,7 @@ static const u16 bnxt_async_events_arr[] = { static bool bnxt_vf_pciid(enum board_idx idx) { - return (idx == BCM57304_VF || idx == BCM57404_VF || - idx == BCM57314_VF || idx == BCM57414_VF); + return (idx == NETXTREME_C_VF || idx == NETXTREME_E_VF); } #define DB_CP_REARM_FLAGS (DB_KEY_CP | DB_IDX_VALID) From 441cabbbf1bd0b99e283c9116fe430e53ee67a4a Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 19 Sep 2016 03:58:02 -0400 Subject: [PATCH 0477/1050] bnxt_en: Update to firmware interface spec 1.5.1. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 + drivers/net/ethernet/broadcom/bnxt/bnxt.h | 14 +- drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h | 1251 ++++++++++------- 3 files changed, 760 insertions(+), 508 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index d9b4cd1694ff..f6b4f342d7b4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4253,6 +4253,9 @@ static int bnxt_hwrm_queue_qportcfg(struct bnxt *bp) if (bp->max_tc > BNXT_MAX_QUEUE) bp->max_tc = BNXT_MAX_QUEUE; + if (resp->queue_cfg_info & QUEUE_QPORTCFG_RESP_QUEUE_CFG_INFO_ASYM_CFG) + bp->max_tc = 1; + qptr = &resp->queue_id0; for (i = 0; i < bp->max_tc; i++) { bp->q_info[i].queue_id = *qptr++; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index db4814e927f7..012cc51440b7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -11,10 +11,10 @@ #define BNXT_H #define DRV_MODULE_NAME "bnxt_en" -#define DRV_MODULE_VERSION "1.3.0" +#define DRV_MODULE_VERSION "1.5.0" #define DRV_VER_MAJ 1 -#define DRV_VER_MIN 3 +#define DRV_VER_MIN 5 #define DRV_VER_UPD 0 struct tx_bd { @@ -106,11 +106,11 @@ struct tx_cmp { #define CMP_TYPE_REMOTE_DRIVER_REQ 34 #define CMP_TYPE_REMOTE_DRIVER_RESP 36 #define CMP_TYPE_ERROR_STATUS 48 - #define CMPL_BASE_TYPE_STAT_EJECT (0x1aUL << 0) - #define CMPL_BASE_TYPE_HWRM_DONE (0x20UL << 0) - #define CMPL_BASE_TYPE_HWRM_FWD_REQ (0x22UL << 0) - #define CMPL_BASE_TYPE_HWRM_FWD_RESP (0x24UL << 0) - #define CMPL_BASE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define CMPL_BASE_TYPE_STAT_EJECT 0x1aUL + #define CMPL_BASE_TYPE_HWRM_DONE 0x20UL + #define CMPL_BASE_TYPE_HWRM_FWD_REQ 0x22UL + #define CMPL_BASE_TYPE_HWRM_FWD_RESP 0x24UL + #define CMPL_BASE_TYPE_HWRM_ASYNC_EVENT 0x2eUL #define TX_CMP_FLAGS_ERROR (1 << 6) #define TX_CMP_FLAGS_PUSH (1 << 7) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h index 517567f6d651..04a96cc3498a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h @@ -39,7 +39,7 @@ struct eject_cmpl { __le16 type; #define EJECT_CMPL_TYPE_MASK 0x3fUL #define EJECT_CMPL_TYPE_SFT 0 - #define EJECT_CMPL_TYPE_STAT_EJECT (0x1aUL << 0) + #define EJECT_CMPL_TYPE_STAT_EJECT 0x1aUL __le16 len; __le32 opaque; __le32 v; @@ -52,7 +52,7 @@ struct hwrm_cmpl { __le16 type; #define HWRM_CMPL_TYPE_MASK 0x3fUL #define HWRM_CMPL_TYPE_SFT 0 - #define HWRM_CMPL_TYPE_HWRM_DONE (0x20UL << 0) + #define HWRM_CMPL_TYPE_HWRM_DONE 0x20UL __le16 sequence_id; __le32 unused_1; __le32 v; @@ -65,7 +65,7 @@ struct hwrm_fwd_req_cmpl { __le16 req_len_type; #define HWRM_FWD_REQ_CMPL_TYPE_MASK 0x3fUL #define HWRM_FWD_REQ_CMPL_TYPE_SFT 0 - #define HWRM_FWD_REQ_CMPL_TYPE_HWRM_FWD_REQ (0x22UL << 0) + #define HWRM_FWD_REQ_CMPL_TYPE_HWRM_FWD_REQ 0x22UL #define HWRM_FWD_REQ_CMPL_REQ_LEN_MASK 0xffc0UL #define HWRM_FWD_REQ_CMPL_REQ_LEN_SFT 6 __le16 source_id; @@ -81,7 +81,7 @@ struct hwrm_fwd_resp_cmpl { __le16 type; #define HWRM_FWD_RESP_CMPL_TYPE_MASK 0x3fUL #define HWRM_FWD_RESP_CMPL_TYPE_SFT 0 - #define HWRM_FWD_RESP_CMPL_TYPE_HWRM_FWD_RESP (0x24UL << 0) + #define HWRM_FWD_RESP_CMPL_TYPE_HWRM_FWD_RESP 0x24UL __le16 source_id; __le16 resp_len; __le16 unused_1; @@ -96,25 +96,26 @@ struct hwrm_async_event_cmpl { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE (0x0UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_MTU_CHANGE (0x1UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CHANGE (0x2UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE (0x3UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED (0x4UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_NOT_ALLOWED (0x5UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE (0x6UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_PHY_CFG_CHANGE (0x7UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_UNLOAD (0x10UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_LOAD (0x11UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD (0x20UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_LOAD (0x21UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_FLR (0x30UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_MAC_ADDR_CHANGE (0x31UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_VF_COMM_STATUS_CHANGE (0x32UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_CFG_CHANGE (0x33UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR (0xffUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE 0x0UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_MTU_CHANGE 0x1UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CHANGE 0x2UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE 0x3UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED 0x4UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_NOT_ALLOWED 0x5UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE 0x6UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_PHY_CFG_CHANGE 0x7UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_UNLOAD 0x10UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_LOAD 0x11UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_FLR_PROC_CMPLT 0x12UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD 0x20UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_LOAD 0x21UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_FLR 0x30UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_MAC_ADDR_CHANGE 0x31UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_VF_COMM_STATUS_CHANGE 0x32UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_CFG_CHANGE 0x33UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR 0xffUL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_V 0x1UL @@ -130,9 +131,9 @@ struct hwrm_async_event_cmpl_link_status_change { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_ID_LINK_STATUS_CHANGE (0x0UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_ID_LINK_STATUS_CHANGE 0x0UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_V 0x1UL @@ -156,9 +157,9 @@ struct hwrm_async_event_cmpl_link_mtu_change { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_EVENT_ID_LINK_MTU_CHANGE (0x1UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_EVENT_ID_LINK_MTU_CHANGE 0x1UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_V 0x1UL @@ -176,9 +177,9 @@ struct hwrm_async_event_cmpl_link_speed_change { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_ID_LINK_SPEED_CHANGE (0x2UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_ID_LINK_SPEED_CHANGE 0x2UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_V 0x1UL @@ -200,8 +201,7 @@ struct hwrm_async_event_cmpl_link_speed_change { #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_40GB (0x190UL << 1) #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_50GB (0x1f4UL << 1) #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_100GB (0x3e8UL << 1) - #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_10MB (0xffffUL << 1) - #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_LAST HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_10MB + #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_LAST HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_100GB #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_PORT_ID_MASK 0xffff0000UL #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_PORT_ID_SFT 16 }; @@ -211,9 +211,9 @@ struct hwrm_async_event_cmpl_dcb_config_change { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_ID_DCB_CONFIG_CHANGE (0x3UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_ID_DCB_CONFIG_CHANGE 0x3UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_V 0x1UL @@ -231,9 +231,9 @@ struct hwrm_async_event_cmpl_port_conn_not_allowed { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_ID_PORT_CONN_NOT_ALLOWED (0x4UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_ID_PORT_CONN_NOT_ALLOWED 0x4UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_V 0x1UL @@ -258,9 +258,9 @@ struct hwrm_async_event_cmpl_link_speed_cfg_not_allowed { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_EVENT_ID_LINK_SPEED_CFG_NOT_ALLOWED (0x5UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_EVENT_ID_LINK_SPEED_CFG_NOT_ALLOWED 0x5UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_V 0x1UL @@ -278,9 +278,9 @@ struct hwrm_async_event_cmpl_link_speed_cfg_change { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_EVENT_ID_LINK_SPEED_CFG_CHANGE (0x6UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_EVENT_ID_LINK_SPEED_CFG_CHANGE 0x6UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_V 0x1UL @@ -300,9 +300,9 @@ struct hwrm_async_event_cmpl_func_drvr_unload { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_EVENT_ID_FUNC_DRVR_UNLOAD (0x10UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_EVENT_ID_FUNC_DRVR_UNLOAD 0x10UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_V 0x1UL @@ -320,9 +320,9 @@ struct hwrm_async_event_cmpl_func_drvr_load { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_EVENT_ID_FUNC_DRVR_LOAD (0x11UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_EVENT_ID_FUNC_DRVR_LOAD 0x11UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_V 0x1UL @@ -340,9 +340,9 @@ struct hwrm_async_event_cmpl_pf_drvr_unload { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_EVENT_ID_PF_DRVR_UNLOAD (0x20UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_EVENT_ID_PF_DRVR_UNLOAD 0x20UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_V 0x1UL @@ -362,9 +362,9 @@ struct hwrm_async_event_cmpl_pf_drvr_load { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_EVENT_ID_PF_DRVR_LOAD (0x21UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_EVENT_ID_PF_DRVR_LOAD 0x21UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_V 0x1UL @@ -384,9 +384,9 @@ struct hwrm_async_event_cmpl_vf_flr { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_VF_FLR_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_VF_FLR_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_VF_FLR_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_VF_FLR_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_VF_FLR_EVENT_ID_VF_FLR (0x30UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_VF_FLR_EVENT_ID_VF_FLR 0x30UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_VF_FLR_V 0x1UL @@ -404,9 +404,9 @@ struct hwrm_async_event_cmpl_vf_mac_addr_change { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_EVENT_ID_VF_MAC_ADDR_CHANGE (0x31UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_EVENT_ID_VF_MAC_ADDR_CHANGE 0x31UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_V 0x1UL @@ -424,9 +424,9 @@ struct hwrm_async_event_cmpl_pf_vf_comm_status_change { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_EVENT_ID_PF_VF_COMM_STATUS_CHANGE (0x32UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_EVENT_ID_PF_VF_COMM_STATUS_CHANGE 0x32UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_V 0x1UL @@ -443,9 +443,9 @@ struct hwrm_async_event_cmpl_vf_cfg_change { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_ID_VF_CFG_CHANGE (0x33UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_ID_VF_CFG_CHANGE 0x33UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_V 0x1UL @@ -465,15 +465,15 @@ struct hwrm_async_event_cmpl_hwrm_error { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_ID_HWRM_ERROR (0xffUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_ID_HWRM_ERROR 0xffUL __le32 event_data2; #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_MASK 0xffUL #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_WARNING (0x0UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_NONFATAL (0x1UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_FATAL (0x2UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_WARNING 0x0UL + #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_NONFATAL 0x1UL + #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_FATAL 0x2UL #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_LAST HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_FATAL u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_V 0x1UL @@ -485,12 +485,12 @@ struct hwrm_async_event_cmpl_hwrm_error { #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA1_TIMESTAMP 0x1UL }; -/* HW Resource Manager Specification 1.3.0 */ +/* HW Resource Manager Specification 1.5.1 */ #define HWRM_VERSION_MAJOR 1 -#define HWRM_VERSION_MINOR 3 -#define HWRM_VERSION_UPDATE 0 +#define HWRM_VERSION_MINOR 5 +#define HWRM_VERSION_UPDATE 1 -#define HWRM_VERSION_STR "1.3.0" +#define HWRM_VERSION_STR "1.5.1" /* * Following is the signature for HWRM message field that indicates not * applicable (All F's). Need to cast it the size of the field if needed. @@ -556,8 +556,8 @@ struct cmd_nums { #define HWRM_QUEUE_QPORTCFG (0x30UL) #define HWRM_QUEUE_QCFG (0x31UL) #define HWRM_QUEUE_CFG (0x32UL) - #define HWRM_QUEUE_BUFFERS_QCFG (0x33UL) - #define HWRM_QUEUE_BUFFERS_CFG (0x34UL) + #define RESERVED2 (0x33UL) + #define RESERVED3 (0x34UL) #define HWRM_QUEUE_PFCENABLE_QCFG (0x35UL) #define HWRM_QUEUE_PFCENABLE_CFG (0x36UL) #define HWRM_QUEUE_PRI2COS_QCFG (0x37UL) @@ -574,6 +574,7 @@ struct cmd_nums { #define HWRM_VNIC_RSS_QCFG (0x47UL) #define HWRM_VNIC_PLCMODES_CFG (0x48UL) #define HWRM_VNIC_PLCMODES_QCFG (0x49UL) + #define HWRM_VNIC_QCAPS (0x4aUL) #define HWRM_RING_ALLOC (0x50UL) #define HWRM_RING_FREE (0x51UL) #define HWRM_RING_CMPL_RING_QAGGINT_PARAMS (0x52UL) @@ -581,13 +582,15 @@ struct cmd_nums { #define HWRM_RING_RESET (0x5eUL) #define HWRM_RING_GRP_ALLOC (0x60UL) #define HWRM_RING_GRP_FREE (0x61UL) + #define RESERVED5 (0x64UL) + #define RESERVED6 (0x65UL) #define HWRM_VNIC_RSS_COS_LB_CTX_ALLOC (0x70UL) #define HWRM_VNIC_RSS_COS_LB_CTX_FREE (0x71UL) #define HWRM_CFA_L2_FILTER_ALLOC (0x90UL) #define HWRM_CFA_L2_FILTER_FREE (0x91UL) #define HWRM_CFA_L2_FILTER_CFG (0x92UL) #define HWRM_CFA_L2_SET_RX_MASK (0x93UL) - #define RESERVED3 (0x94UL) + #define RESERVED4 (0x94UL) #define HWRM_CFA_TUNNEL_FILTER_ALLOC (0x95UL) #define HWRM_CFA_TUNNEL_FILTER_FREE (0x96UL) #define HWRM_CFA_ENCAP_RECORD_ALLOC (0x97UL) @@ -607,6 +610,8 @@ struct cmd_nums { #define HWRM_STAT_CTX_CLR_STATS (0xb3UL) #define HWRM_FW_RESET (0xc0UL) #define HWRM_FW_QSTATUS (0xc1UL) + #define HWRM_FW_SET_TIME (0xc8UL) + #define HWRM_FW_GET_TIME (0xc9UL) #define HWRM_EXEC_FWD_RESP (0xd0UL) #define HWRM_REJECT_FWD_RESP (0xd1UL) #define HWRM_FWD_RESP (0xd2UL) @@ -615,11 +620,13 @@ struct cmd_nums { #define HWRM_WOL_FILTER_ALLOC (0xf0UL) #define HWRM_WOL_FILTER_FREE (0xf1UL) #define HWRM_WOL_FILTER_QCFG (0xf2UL) + #define HWRM_WOL_REASON_QCFG (0xf3UL) #define HWRM_DBG_READ_DIRECT (0xff10UL) #define HWRM_DBG_READ_INDIRECT (0xff11UL) #define HWRM_DBG_WRITE_DIRECT (0xff12UL) #define HWRM_DBG_WRITE_INDIRECT (0xff13UL) #define HWRM_DBG_DUMP (0xff14UL) + #define HWRM_NVM_INSTALL_UPDATE (0xfff3UL) #define HWRM_NVM_MODIFY (0xfff4UL) #define HWRM_NVM_VERIFY_UPDATE (0xfff5UL) #define HWRM_NVM_GET_DEV_INFO (0xfff6UL) @@ -824,7 +831,9 @@ struct hwrm_ver_get_output { u8 netctrl_fw_min; u8 netctrl_fw_bld; u8 netctrl_fw_rsvd; - __le32 reserved1; + __le32 dev_caps_cfg; + #define VER_GET_RESP_DEV_CAPS_CFG_SECURE_FW_UPD_SUPPORTED 0x1UL + #define VER_GET_RESP_DEV_CAPS_CFG_FW_DCBX_AGENT_SUPPORTED 0x2UL u8 roce_fw_maj; u8 roce_fw_min; u8 roce_fw_bld; @@ -839,9 +848,9 @@ struct hwrm_ver_get_output { u8 chip_metal; u8 chip_bond_id; u8 chip_platform_type; - #define VER_GET_RESP_CHIP_PLATFORM_TYPE_ASIC (0x0UL << 0) - #define VER_GET_RESP_CHIP_PLATFORM_TYPE_FPGA (0x1UL << 0) - #define VER_GET_RESP_CHIP_PLATFORM_TYPE_PALLADIUM (0x2UL << 0) + #define VER_GET_RESP_CHIP_PLATFORM_TYPE_ASIC 0x0UL + #define VER_GET_RESP_CHIP_PLATFORM_TYPE_FPGA 0x1UL + #define VER_GET_RESP_CHIP_PLATFORM_TYPE_PALLADIUM 0x2UL __le16 max_req_win_len; __le16 max_resp_len; __le16 def_req_timeout; @@ -863,10 +872,10 @@ struct hwrm_func_reset_input { #define FUNC_RESET_REQ_ENABLES_VF_ID_VALID 0x1UL __le16 vf_id; u8 func_reset_level; - #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETALL (0x0UL << 0) - #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETME (0x1UL << 0) - #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETCHILDREN (0x2UL << 0) - #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETVF (0x3UL << 0) + #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETALL 0x0UL + #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETME 0x1UL + #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETCHILDREN 0x2UL + #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETVF 0x3UL u8 unused_0; }; @@ -1028,6 +1037,10 @@ struct hwrm_func_qcaps_output { #define FUNC_QCAPS_RESP_FLAGS_ROCE_V2_SUPPORTED 0x10UL #define FUNC_QCAPS_RESP_FLAGS_WOL_MAGICPKT_SUPPORTED 0x20UL #define FUNC_QCAPS_RESP_FLAGS_WOL_BMP_SUPPORTED 0x40UL + #define FUNC_QCAPS_RESP_FLAGS_TX_RING_RL_SUPPORTED 0x80UL + #define FUNC_QCAPS_RESP_FLAGS_TX_BW_CFG_SUPPORTED 0x100UL + #define FUNC_QCAPS_RESP_FLAGS_VF_TX_RING_RL_SUPPORTED 0x200UL + #define FUNC_QCAPS_RESP_FLAGS_VF_BW_CFG_SUPPORTED 0x400UL u8 mac_address[6]; __le16 max_rsscos_ctx; __le16 max_cmpl_rings; @@ -1047,9 +1060,8 @@ struct hwrm_func_qcaps_output { __le32 max_mcast_filters; __le32 max_flow_id; __le32 max_hw_ring_grps; + __le16 max_sp_tx_rings; u8 unused_0; - u8 unused_1; - u8 unused_2; u8 valid; }; @@ -1077,6 +1089,7 @@ struct hwrm_func_qcfg_output { __le16 flags; #define FUNC_QCFG_RESP_FLAGS_OOB_WOL_MAGICPKT_ENABLED 0x1UL #define FUNC_QCFG_RESP_FLAGS_OOB_WOL_BMP_ENABLED 0x2UL + #define FUNC_QCFG_RESP_FLAGS_FW_DCBX_AGENT_ENABLED 0x4UL u8 mac_address[6]; __le16 pci_id; __le16 alloc_rsscos_ctx; @@ -1089,29 +1102,46 @@ struct hwrm_func_qcfg_output { __le16 mru; __le16 stat_ctx_id; u8 port_partition_type; - #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_SPF (0x0UL << 0) - #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_MPFS (0x1UL << 0) - #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_0 (0x2UL << 0) - #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_5 (0x3UL << 0) - #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR2_0 (0x4UL << 0) - #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_UNKNOWN (0xffUL << 0) + #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_SPF 0x0UL + #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_MPFS 0x1UL + #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_0 0x2UL + #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_5 0x3UL + #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR2_0 0x4UL + #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_UNKNOWN 0xffUL u8 unused_0; __le16 dflt_vnic_id; u8 unused_1; u8 unused_2; __le32 min_bw; + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_SFT 0 + #define FUNC_QCFG_RESP_MIN_BW_RSVD 0x10000000UL + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_LAST FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_INVALID __le32 max_bw; + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_SFT 0 + #define FUNC_QCFG_RESP_MAX_BW_RSVD 0x10000000UL + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_LAST FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_INVALID u8 evb_mode; - #define FUNC_QCFG_RESP_EVB_MODE_NO_EVB (0x0UL << 0) - #define FUNC_QCFG_RESP_EVB_MODE_VEB (0x1UL << 0) - #define FUNC_QCFG_RESP_EVB_MODE_VEPA (0x2UL << 0) + #define FUNC_QCFG_RESP_EVB_MODE_NO_EVB 0x0UL + #define FUNC_QCFG_RESP_EVB_MODE_VEB 0x1UL + #define FUNC_QCFG_RESP_EVB_MODE_VEPA 0x2UL u8 unused_3; - __le16 unused_4; + __le16 alloc_vfs; __le32 alloc_mcast_filters; __le32 alloc_hw_ring_grps; - u8 unused_5; - u8 unused_6; - u8 unused_7; + __le16 alloc_sp_tx_rings; + u8 unused_4; u8 valid; }; @@ -1171,18 +1201,36 @@ struct hwrm_func_cfg_input { __le16 dflt_vlan; __be32 dflt_ip_addr[4]; __le32 min_bw; + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_SFT 0 + #define FUNC_CFG_REQ_MIN_BW_RSVD 0x10000000UL + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_LAST FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_INVALID __le32 max_bw; + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_SFT 0 + #define FUNC_CFG_REQ_MAX_BW_RSVD 0x10000000UL + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_LAST FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_INVALID __le16 async_event_cr; u8 vlan_antispoof_mode; - #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_NOCHECK (0x0UL << 0) - #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_VALIDATE_VLAN (0x1UL << 0) - #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_INSERT_IF_VLANDNE (0x2UL << 0) - #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_INSERT_OR_OVERRIDE_VLAN (0x3UL << 0) + #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_NOCHECK 0x0UL + #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_VALIDATE_VLAN 0x1UL + #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_INSERT_IF_VLANDNE 0x2UL + #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_INSERT_OR_OVERRIDE_VLAN 0x3UL u8 allowed_vlan_pris; u8 evb_mode; - #define FUNC_CFG_REQ_EVB_MODE_NO_EVB (0x0UL << 0) - #define FUNC_CFG_REQ_EVB_MODE_VEB (0x1UL << 0) - #define FUNC_CFG_REQ_EVB_MODE_VEPA (0x2UL << 0) + #define FUNC_CFG_REQ_EVB_MODE_NO_EVB 0x0UL + #define FUNC_CFG_REQ_EVB_MODE_VEB 0x1UL + #define FUNC_CFG_REQ_EVB_MODE_VEPA 0x2UL u8 unused_2; __le16 num_mcast_filters; }; @@ -1341,16 +1389,16 @@ struct hwrm_func_drv_rgtr_input { #define FUNC_DRV_RGTR_REQ_ENABLES_VF_REQ_FWD 0x8UL #define FUNC_DRV_RGTR_REQ_ENABLES_ASYNC_EVENT_FWD 0x10UL __le16 os_type; - #define FUNC_DRV_RGTR_REQ_OS_TYPE_UNKNOWN (0x0UL << 0) - #define FUNC_DRV_RGTR_REQ_OS_TYPE_OTHER (0x1UL << 0) - #define FUNC_DRV_RGTR_REQ_OS_TYPE_MSDOS (0xeUL << 0) - #define FUNC_DRV_RGTR_REQ_OS_TYPE_WINDOWS (0x12UL << 0) - #define FUNC_DRV_RGTR_REQ_OS_TYPE_SOLARIS (0x1dUL << 0) - #define FUNC_DRV_RGTR_REQ_OS_TYPE_LINUX (0x24UL << 0) - #define FUNC_DRV_RGTR_REQ_OS_TYPE_FREEBSD (0x2aUL << 0) - #define FUNC_DRV_RGTR_REQ_OS_TYPE_ESXI (0x68UL << 0) - #define FUNC_DRV_RGTR_REQ_OS_TYPE_WIN864 (0x73UL << 0) - #define FUNC_DRV_RGTR_REQ_OS_TYPE_WIN2012R2 (0x74UL << 0) + #define FUNC_DRV_RGTR_REQ_OS_TYPE_UNKNOWN 0x0UL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_OTHER 0x1UL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_MSDOS 0xeUL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_WINDOWS 0x12UL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_SOLARIS 0x1dUL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_LINUX 0x24UL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_FREEBSD 0x2aUL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_ESXI 0x68UL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_WIN864 0x73UL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_WIN2012R2 0x74UL u8 ver_maj; u8 ver_min; u8 ver_upd; @@ -1415,13 +1463,13 @@ struct hwrm_func_buf_rgtr_input { __le16 vf_id; __le16 req_buf_num_pages; __le16 req_buf_page_size; - #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_16B (0x4UL << 0) - #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_4K (0xcUL << 0) - #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_8K (0xdUL << 0) - #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_64K (0x10UL << 0) - #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_2M (0x15UL << 0) - #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_4M (0x16UL << 0) - #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_1G (0x1eUL << 0) + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_16B 0x4UL + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_4K 0xcUL + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_8K 0xdUL + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_64K 0x10UL + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_2M 0x15UL + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_4M 0x16UL + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_1G 0x1eUL __le16 req_buf_len; __le16 resp_buf_len; u8 unused_0; @@ -1473,16 +1521,16 @@ struct hwrm_func_drv_qver_output { __le16 seq_id; __le16 resp_len; __le16 os_type; - #define FUNC_DRV_QVER_RESP_OS_TYPE_UNKNOWN (0x0UL << 0) - #define FUNC_DRV_QVER_RESP_OS_TYPE_OTHER (0x1UL << 0) - #define FUNC_DRV_QVER_RESP_OS_TYPE_MSDOS (0xeUL << 0) - #define FUNC_DRV_QVER_RESP_OS_TYPE_WINDOWS (0x12UL << 0) - #define FUNC_DRV_QVER_RESP_OS_TYPE_SOLARIS (0x1dUL << 0) - #define FUNC_DRV_QVER_RESP_OS_TYPE_LINUX (0x24UL << 0) - #define FUNC_DRV_QVER_RESP_OS_TYPE_FREEBSD (0x2aUL << 0) - #define FUNC_DRV_QVER_RESP_OS_TYPE_ESXI (0x68UL << 0) - #define FUNC_DRV_QVER_RESP_OS_TYPE_WIN864 (0x73UL << 0) - #define FUNC_DRV_QVER_RESP_OS_TYPE_WIN2012R2 (0x74UL << 0) + #define FUNC_DRV_QVER_RESP_OS_TYPE_UNKNOWN 0x0UL + #define FUNC_DRV_QVER_RESP_OS_TYPE_OTHER 0x1UL + #define FUNC_DRV_QVER_RESP_OS_TYPE_MSDOS 0xeUL + #define FUNC_DRV_QVER_RESP_OS_TYPE_WINDOWS 0x12UL + #define FUNC_DRV_QVER_RESP_OS_TYPE_SOLARIS 0x1dUL + #define FUNC_DRV_QVER_RESP_OS_TYPE_LINUX 0x24UL + #define FUNC_DRV_QVER_RESP_OS_TYPE_FREEBSD 0x2aUL + #define FUNC_DRV_QVER_RESP_OS_TYPE_ESXI 0x68UL + #define FUNC_DRV_QVER_RESP_OS_TYPE_WIN864 0x73UL + #define FUNC_DRV_QVER_RESP_OS_TYPE_WIN2012R2 0x74UL u8 ver_maj; u8 ver_min; u8 ver_upd; @@ -1528,44 +1576,44 @@ struct hwrm_port_phy_cfg_input { #define PORT_PHY_CFG_REQ_ENABLES_TX_LPI_TIMER 0x400UL __le16 port_id; __le16 force_link_speed; - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_100MB (0x1UL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_1GB (0xaUL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_2GB (0x14UL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_2_5GB (0x19UL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_10GB (0x64UL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_20GB (0xc8UL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_25GB (0xfaUL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_40GB (0x190UL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_50GB (0x1f4UL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_100GB (0x3e8UL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_10MB (0xffffUL << 0) + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_100MB 0x1UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_1GB 0xaUL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_2GB 0x14UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_2_5GB 0x19UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_10GB 0x64UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_20GB 0xc8UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_25GB 0xfaUL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_40GB 0x190UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_50GB 0x1f4UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_100GB 0x3e8UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_10MB 0xffffUL u8 auto_mode; - #define PORT_PHY_CFG_REQ_AUTO_MODE_NONE (0x0UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_MODE_ALL_SPEEDS (0x1UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_MODE_ONE_SPEED (0x2UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_MODE_ONE_OR_BELOW (0x3UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_MODE_SPEED_MASK (0x4UL << 0) + #define PORT_PHY_CFG_REQ_AUTO_MODE_NONE 0x0UL + #define PORT_PHY_CFG_REQ_AUTO_MODE_ALL_SPEEDS 0x1UL + #define PORT_PHY_CFG_REQ_AUTO_MODE_ONE_SPEED 0x2UL + #define PORT_PHY_CFG_REQ_AUTO_MODE_ONE_OR_BELOW 0x3UL + #define PORT_PHY_CFG_REQ_AUTO_MODE_SPEED_MASK 0x4UL u8 auto_duplex; - #define PORT_PHY_CFG_REQ_AUTO_DUPLEX_HALF (0x0UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_DUPLEX_FULL (0x1UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_DUPLEX_BOTH (0x2UL << 0) + #define PORT_PHY_CFG_REQ_AUTO_DUPLEX_HALF 0x0UL + #define PORT_PHY_CFG_REQ_AUTO_DUPLEX_FULL 0x1UL + #define PORT_PHY_CFG_REQ_AUTO_DUPLEX_BOTH 0x2UL u8 auto_pause; #define PORT_PHY_CFG_REQ_AUTO_PAUSE_TX 0x1UL #define PORT_PHY_CFG_REQ_AUTO_PAUSE_RX 0x2UL #define PORT_PHY_CFG_REQ_AUTO_PAUSE_AUTONEG_PAUSE 0x4UL u8 unused_0; __le16 auto_link_speed; - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_100MB (0x1UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_1GB (0xaUL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_2GB (0x14UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_2_5GB (0x19UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_10GB (0x64UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_20GB (0xc8UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_25GB (0xfaUL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_40GB (0x190UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_50GB (0x1f4UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_100GB (0x3e8UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_10MB (0xffffUL << 0) + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_100MB 0x1UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_1GB 0xaUL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_2GB 0x14UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_2_5GB 0x19UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_10GB 0x64UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_20GB 0xc8UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_25GB 0xfaUL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_40GB 0x190UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_50GB 0x1f4UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_100GB 0x3e8UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_10MB 0xffffUL __le16 auto_link_speed_mask; #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_100MBHD 0x1UL #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_100MB 0x2UL @@ -1582,12 +1630,12 @@ struct hwrm_port_phy_cfg_input { #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_10MBHD 0x1000UL #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_10MB 0x2000UL u8 wirespeed; - #define PORT_PHY_CFG_REQ_WIRESPEED_OFF (0x0UL << 0) - #define PORT_PHY_CFG_REQ_WIRESPEED_ON (0x1UL << 0) + #define PORT_PHY_CFG_REQ_WIRESPEED_OFF 0x0UL + #define PORT_PHY_CFG_REQ_WIRESPEED_ON 0x1UL u8 lpbk; - #define PORT_PHY_CFG_REQ_LPBK_NONE (0x0UL << 0) - #define PORT_PHY_CFG_REQ_LPBK_LOCAL (0x1UL << 0) - #define PORT_PHY_CFG_REQ_LPBK_REMOTE (0x2UL << 0) + #define PORT_PHY_CFG_REQ_LPBK_NONE 0x0UL + #define PORT_PHY_CFG_REQ_LPBK_LOCAL 0x1UL + #define PORT_PHY_CFG_REQ_LPBK_REMOTE 0x2UL u8 force_pause; #define PORT_PHY_CFG_REQ_FORCE_PAUSE_TX 0x1UL #define PORT_PHY_CFG_REQ_FORCE_PAUSE_RX 0x2UL @@ -1641,25 +1689,25 @@ struct hwrm_port_phy_qcfg_output { __le16 seq_id; __le16 resp_len; u8 link; - #define PORT_PHY_QCFG_RESP_LINK_NO_LINK (0x0UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SIGNAL (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_LINK (0x2UL << 0) + #define PORT_PHY_QCFG_RESP_LINK_NO_LINK 0x0UL + #define PORT_PHY_QCFG_RESP_LINK_SIGNAL 0x1UL + #define PORT_PHY_QCFG_RESP_LINK_LINK 0x2UL u8 unused_0; __le16 link_speed; - #define PORT_PHY_QCFG_RESP_LINK_SPEED_100MB (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_1GB (0xaUL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_2GB (0x14UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_2_5GB (0x19UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_10GB (0x64UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_20GB (0xc8UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_25GB (0xfaUL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_40GB (0x190UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_50GB (0x1f4UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_100GB (0x3e8UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_10MB (0xffffUL << 0) + #define PORT_PHY_QCFG_RESP_LINK_SPEED_100MB 0x1UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_1GB 0xaUL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_2GB 0x14UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_2_5GB 0x19UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_10GB 0x64UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_20GB 0xc8UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_25GB 0xfaUL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_40GB 0x190UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_50GB 0x1f4UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_100GB 0x3e8UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_10MB 0xffffUL u8 duplex; - #define PORT_PHY_QCFG_RESP_DUPLEX_HALF (0x0UL << 0) - #define PORT_PHY_QCFG_RESP_DUPLEX_FULL (0x1UL << 0) + #define PORT_PHY_QCFG_RESP_DUPLEX_HALF 0x0UL + #define PORT_PHY_QCFG_RESP_DUPLEX_FULL 0x1UL u8 pause; #define PORT_PHY_QCFG_RESP_PAUSE_TX 0x1UL #define PORT_PHY_QCFG_RESP_PAUSE_RX 0x2UL @@ -1679,39 +1727,39 @@ struct hwrm_port_phy_qcfg_output { #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_10MBHD 0x1000UL #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_10MB 0x2000UL __le16 force_link_speed; - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_100MB (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_1GB (0xaUL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_2GB (0x14UL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_2_5GB (0x19UL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_10GB (0x64UL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_20GB (0xc8UL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_25GB (0xfaUL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_40GB (0x190UL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_50GB (0x1f4UL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_100GB (0x3e8UL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_10MB (0xffffUL << 0) + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_100MB 0x1UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_1GB 0xaUL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_2GB 0x14UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_2_5GB 0x19UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_10GB 0x64UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_20GB 0xc8UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_25GB 0xfaUL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_40GB 0x190UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_50GB 0x1f4UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_100GB 0x3e8UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_10MB 0xffffUL u8 auto_mode; - #define PORT_PHY_QCFG_RESP_AUTO_MODE_NONE (0x0UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_MODE_ALL_SPEEDS (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_MODE_ONE_SPEED (0x2UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_MODE_ONE_OR_BELOW (0x3UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_MODE_SPEED_MASK (0x4UL << 0) + #define PORT_PHY_QCFG_RESP_AUTO_MODE_NONE 0x0UL + #define PORT_PHY_QCFG_RESP_AUTO_MODE_ALL_SPEEDS 0x1UL + #define PORT_PHY_QCFG_RESP_AUTO_MODE_ONE_SPEED 0x2UL + #define PORT_PHY_QCFG_RESP_AUTO_MODE_ONE_OR_BELOW 0x3UL + #define PORT_PHY_QCFG_RESP_AUTO_MODE_SPEED_MASK 0x4UL u8 auto_pause; #define PORT_PHY_QCFG_RESP_AUTO_PAUSE_TX 0x1UL #define PORT_PHY_QCFG_RESP_AUTO_PAUSE_RX 0x2UL #define PORT_PHY_QCFG_RESP_AUTO_PAUSE_AUTONEG_PAUSE 0x4UL __le16 auto_link_speed; - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_100MB (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_1GB (0xaUL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_2GB (0x14UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_2_5GB (0x19UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_10GB (0x64UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_20GB (0xc8UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_25GB (0xfaUL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_40GB (0x190UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_50GB (0x1f4UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_100GB (0x3e8UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_10MB (0xffffUL << 0) + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_100MB 0x1UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_1GB 0xaUL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_2GB 0x14UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_2_5GB 0x19UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_10GB 0x64UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_20GB 0xc8UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_25GB 0xfaUL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_40GB 0x190UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_50GB 0x1f4UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_100GB 0x3e8UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_10MB 0xffffUL __le16 auto_link_speed_mask; #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_100MBHD 0x1UL #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_100MB 0x2UL @@ -1728,46 +1776,46 @@ struct hwrm_port_phy_qcfg_output { #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_10MBHD 0x1000UL #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_10MB 0x2000UL u8 wirespeed; - #define PORT_PHY_QCFG_RESP_WIRESPEED_OFF (0x0UL << 0) - #define PORT_PHY_QCFG_RESP_WIRESPEED_ON (0x1UL << 0) + #define PORT_PHY_QCFG_RESP_WIRESPEED_OFF 0x0UL + #define PORT_PHY_QCFG_RESP_WIRESPEED_ON 0x1UL u8 lpbk; - #define PORT_PHY_QCFG_RESP_LPBK_NONE (0x0UL << 0) - #define PORT_PHY_QCFG_RESP_LPBK_LOCAL (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_LPBK_REMOTE (0x2UL << 0) + #define PORT_PHY_QCFG_RESP_LPBK_NONE 0x0UL + #define PORT_PHY_QCFG_RESP_LPBK_LOCAL 0x1UL + #define PORT_PHY_QCFG_RESP_LPBK_REMOTE 0x2UL u8 force_pause; #define PORT_PHY_QCFG_RESP_FORCE_PAUSE_TX 0x1UL #define PORT_PHY_QCFG_RESP_FORCE_PAUSE_RX 0x2UL u8 module_status; - #define PORT_PHY_QCFG_RESP_MODULE_STATUS_NONE (0x0UL << 0) - #define PORT_PHY_QCFG_RESP_MODULE_STATUS_DISABLETX (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_MODULE_STATUS_WARNINGMSG (0x2UL << 0) - #define PORT_PHY_QCFG_RESP_MODULE_STATUS_PWRDOWN (0x3UL << 0) - #define PORT_PHY_QCFG_RESP_MODULE_STATUS_NOTINSERTED (0x4UL << 0) - #define PORT_PHY_QCFG_RESP_MODULE_STATUS_NOTAPPLICABLE (0xffUL << 0) + #define PORT_PHY_QCFG_RESP_MODULE_STATUS_NONE 0x0UL + #define PORT_PHY_QCFG_RESP_MODULE_STATUS_DISABLETX 0x1UL + #define PORT_PHY_QCFG_RESP_MODULE_STATUS_WARNINGMSG 0x2UL + #define PORT_PHY_QCFG_RESP_MODULE_STATUS_PWRDOWN 0x3UL + #define PORT_PHY_QCFG_RESP_MODULE_STATUS_NOTINSERTED 0x4UL + #define PORT_PHY_QCFG_RESP_MODULE_STATUS_NOTAPPLICABLE 0xffUL __le32 preemphasis; u8 phy_maj; u8 phy_min; u8 phy_bld; u8 phy_type; - #define PORT_PHY_QCFG_RESP_PHY_TYPE_UNKNOWN (0x0UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASECR (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKR4 (0x2UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASELR (0x3UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASESR (0x4UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKR2 (0x5UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKX (0x6UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKR (0x7UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASET (0x8UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASETE (0x9UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_SGMIIEXTPHY (0xaUL << 0) + #define PORT_PHY_QCFG_RESP_PHY_TYPE_UNKNOWN 0x0UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASECR 0x1UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKR4 0x2UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASELR 0x3UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASESR 0x4UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKR2 0x5UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKX 0x6UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKR 0x7UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASET 0x8UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASETE 0x9UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_SGMIIEXTPHY 0xaUL u8 media_type; - #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_UNKNOWN (0x0UL << 0) - #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_TP (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_DAC (0x2UL << 0) - #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_FIBRE (0x3UL << 0) + #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_UNKNOWN 0x0UL + #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_TP 0x1UL + #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_DAC 0x2UL + #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_FIBRE 0x3UL u8 xcvr_pkg_type; - #define PORT_PHY_QCFG_RESP_XCVR_PKG_TYPE_XCVR_INTERNAL (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_XCVR_PKG_TYPE_XCVR_EXTERNAL (0x2UL << 0) + #define PORT_PHY_QCFG_RESP_XCVR_PKG_TYPE_XCVR_INTERNAL 0x1UL + #define PORT_PHY_QCFG_RESP_XCVR_PKG_TYPE_XCVR_EXTERNAL 0x2UL u8 eee_config_phy_addr; #define PORT_PHY_QCFG_RESP_PHY_ADDR_MASK 0x1fUL #define PORT_PHY_QCFG_RESP_PHY_ADDR_SFT 0 @@ -1796,11 +1844,11 @@ struct hwrm_port_phy_qcfg_output { #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_SPEEDS_10MBHD 0x1000UL #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_SPEEDS_10MB 0x2000UL u8 link_partner_adv_auto_mode; - #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_NONE (0x0UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_ALL_SPEEDS (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_ONE_SPEED (0x2UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_ONE_OR_BELOW (0x3UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_SPEED_MASK (0x4UL << 0) + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_NONE 0x0UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_ALL_SPEEDS 0x1UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_ONE_SPEED 0x2UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_ONE_OR_BELOW 0x3UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_SPEED_MASK 0x4UL u8 link_partner_adv_pause; #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_PAUSE_TX 0x1UL #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_PAUSE_RX 0x2UL @@ -1859,7 +1907,7 @@ struct hwrm_port_mac_cfg_input { __le64 resp_addr; __le32 flags; #define PORT_MAC_CFG_REQ_FLAGS_MATCH_LINK 0x1UL - #define PORT_MAC_CFG_REQ_FLAGS_COS_ASSIGNMENT_ENABLE 0x2UL + #define PORT_MAC_CFG_REQ_FLAGS_VLAN_PRI2COS_ENABLE 0x2UL #define PORT_MAC_CFG_REQ_FLAGS_TUNNEL_PRI2COS_ENABLE 0x4UL #define PORT_MAC_CFG_REQ_FLAGS_IP_DSCP2COS_ENABLE 0x8UL #define PORT_MAC_CFG_REQ_FLAGS_PTP_RX_TS_CAPTURE_ENABLE 0x10UL @@ -1868,28 +1916,50 @@ struct hwrm_port_mac_cfg_input { #define PORT_MAC_CFG_REQ_FLAGS_PTP_TX_TS_CAPTURE_DISABLE 0x80UL #define PORT_MAC_CFG_REQ_FLAGS_OOB_WOL_ENABLE 0x100UL #define PORT_MAC_CFG_REQ_FLAGS_OOB_WOL_DISABLE 0x200UL + #define PORT_MAC_CFG_REQ_FLAGS_VLAN_PRI2COS_DISABLE 0x400UL + #define PORT_MAC_CFG_REQ_FLAGS_TUNNEL_PRI2COS_DISABLE 0x800UL + #define PORT_MAC_CFG_REQ_FLAGS_IP_DSCP2COS_DISABLE 0x1000UL __le32 enables; #define PORT_MAC_CFG_REQ_ENABLES_IPG 0x1UL #define PORT_MAC_CFG_REQ_ENABLES_LPBK 0x2UL - #define PORT_MAC_CFG_REQ_ENABLES_IVLAN_PRI2COS_MAP_PRI 0x4UL - #define PORT_MAC_CFG_REQ_ENABLES_LCOS_MAP_PRI 0x8UL + #define PORT_MAC_CFG_REQ_ENABLES_VLAN_PRI2COS_MAP_PRI 0x4UL + #define PORT_MAC_CFG_REQ_ENABLES_RESERVED1 0x8UL #define PORT_MAC_CFG_REQ_ENABLES_TUNNEL_PRI2COS_MAP_PRI 0x10UL #define PORT_MAC_CFG_REQ_ENABLES_DSCP2COS_MAP_PRI 0x20UL #define PORT_MAC_CFG_REQ_ENABLES_RX_TS_CAPTURE_PTP_MSG_TYPE 0x40UL #define PORT_MAC_CFG_REQ_ENABLES_TX_TS_CAPTURE_PTP_MSG_TYPE 0x80UL + #define PORT_MAC_CFG_REQ_ENABLES_COS_FIELD_CFG 0x100UL __le16 port_id; u8 ipg; u8 lpbk; - #define PORT_MAC_CFG_REQ_LPBK_NONE (0x0UL << 0) - #define PORT_MAC_CFG_REQ_LPBK_LOCAL (0x1UL << 0) - #define PORT_MAC_CFG_REQ_LPBK_REMOTE (0x2UL << 0) - u8 ivlan_pri2cos_map_pri; - u8 lcos_map_pri; + #define PORT_MAC_CFG_REQ_LPBK_NONE 0x0UL + #define PORT_MAC_CFG_REQ_LPBK_LOCAL 0x1UL + #define PORT_MAC_CFG_REQ_LPBK_REMOTE 0x2UL + u8 vlan_pri2cos_map_pri; + u8 reserved1; u8 tunnel_pri2cos_map_pri; u8 dscp2pri_map_pri; __le16 rx_ts_capture_ptp_msg_type; __le16 tx_ts_capture_ptp_msg_type; - __le32 unused_0; + u8 cos_field_cfg; + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_RSVD1 0x1UL + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_MASK 0x6UL + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_SFT 1 + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_INNERMOST (0x0UL << 1) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_OUTER (0x1UL << 1) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_OUTERMOST (0x2UL << 1) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_UNSPECIFIED (0x3UL << 1) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_LAST PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_UNSPECIFIED + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_MASK 0x18UL + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_SFT 3 + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_INNERMOST (0x0UL << 3) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_OUTER (0x1UL << 3) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_OUTERMOST (0x2UL << 3) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_UNSPECIFIED (0x3UL << 3) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_LAST PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_UNSPECIFIED + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_DEFAULT_COS_MASK 0xe0UL + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_DEFAULT_COS_SFT 5 + u8 unused_0[3]; }; /* Output (16 bytes) */ @@ -1902,9 +1972,9 @@ struct hwrm_port_mac_cfg_output { __le16 mtu; u8 ipg; u8 lpbk; - #define PORT_MAC_CFG_RESP_LPBK_NONE (0x0UL << 0) - #define PORT_MAC_CFG_RESP_LPBK_LOCAL (0x1UL << 0) - #define PORT_MAC_CFG_RESP_LPBK_REMOTE (0x2UL << 0) + #define PORT_MAC_CFG_RESP_LPBK_NONE 0x0UL + #define PORT_MAC_CFG_RESP_LPBK_LOCAL 0x1UL + #define PORT_MAC_CFG_RESP_LPBK_REMOTE 0x2UL u8 unused_0; u8 valid; }; @@ -2163,8 +2233,8 @@ struct hwrm_queue_qportcfg_input { __le64 resp_addr; __le32 flags; #define QUEUE_QPORTCFG_REQ_FLAGS_PATH 0x1UL - #define QUEUE_QPORTCFG_REQ_FLAGS_PATH_TX (0x0UL << 0) - #define QUEUE_QPORTCFG_REQ_FLAGS_PATH_RX (0x1UL << 0) + #define QUEUE_QPORTCFG_REQ_FLAGS_PATH_TX 0x0UL + #define QUEUE_QPORTCFG_REQ_FLAGS_PATH_RX 0x1UL #define QUEUE_QPORTCFG_REQ_FLAGS_PATH_LAST QUEUE_QPORTCFG_REQ_FLAGS_PATH_RX __le16 port_id; __le16 unused_0; @@ -2179,50 +2249,51 @@ struct hwrm_queue_qportcfg_output { u8 max_configurable_queues; u8 max_configurable_lossless_queues; u8 queue_cfg_allowed; - u8 queue_buffers_cfg_allowed; + u8 queue_cfg_info; + #define QUEUE_QPORTCFG_RESP_QUEUE_CFG_INFO_ASYM_CFG 0x1UL u8 queue_pfcenable_cfg_allowed; u8 queue_pri2cos_cfg_allowed; u8 queue_cos2bw_cfg_allowed; u8 queue_id0; u8 queue_id0_service_profile; - #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LOSSY (0x0UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LOSSLESS (0x1UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_UNKNOWN (0xffUL << 0) + #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_UNKNOWN 0xffUL u8 queue_id1; u8 queue_id1_service_profile; - #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_LOSSY (0x0UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_LOSSLESS (0x1UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_UNKNOWN (0xffUL << 0) + #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_UNKNOWN 0xffUL u8 queue_id2; u8 queue_id2_service_profile; - #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_LOSSY (0x0UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_LOSSLESS (0x1UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_UNKNOWN (0xffUL << 0) + #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_UNKNOWN 0xffUL u8 queue_id3; u8 queue_id3_service_profile; - #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_LOSSY (0x0UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_LOSSLESS (0x1UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_UNKNOWN (0xffUL << 0) + #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_UNKNOWN 0xffUL u8 queue_id4; u8 queue_id4_service_profile; - #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_LOSSY (0x0UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_LOSSLESS (0x1UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_UNKNOWN (0xffUL << 0) + #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_UNKNOWN 0xffUL u8 queue_id5; u8 queue_id5_service_profile; - #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_LOSSY (0x0UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_LOSSLESS (0x1UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_UNKNOWN (0xffUL << 0) + #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_UNKNOWN 0xffUL u8 queue_id6; u8 queue_id6_service_profile; - #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_LOSSY (0x0UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_LOSSLESS (0x1UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_UNKNOWN (0xffUL << 0) + #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_UNKNOWN 0xffUL u8 queue_id7; u8 queue_id7_service_profile; - #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LOSSY (0x0UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LOSSLESS (0x1UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_UNKNOWN (0xffUL << 0) + #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_UNKNOWN 0xffUL u8 valid; }; @@ -2235,19 +2306,21 @@ struct hwrm_queue_cfg_input { __le16 target_id; __le64 resp_addr; __le32 flags; - #define QUEUE_CFG_REQ_FLAGS_PATH 0x1UL - #define QUEUE_CFG_REQ_FLAGS_PATH_TX (0x0UL << 0) - #define QUEUE_CFG_REQ_FLAGS_PATH_RX (0x1UL << 0) - #define QUEUE_CFG_REQ_FLAGS_PATH_LAST QUEUE_CFG_REQ_FLAGS_PATH_RX + #define QUEUE_CFG_REQ_FLAGS_PATH_MASK 0x3UL + #define QUEUE_CFG_REQ_FLAGS_PATH_SFT 0 + #define QUEUE_CFG_REQ_FLAGS_PATH_TX 0x0UL + #define QUEUE_CFG_REQ_FLAGS_PATH_RX 0x1UL + #define QUEUE_CFG_REQ_FLAGS_PATH_BIDIR 0x2UL + #define QUEUE_CFG_REQ_FLAGS_PATH_LAST QUEUE_CFG_REQ_FLAGS_PATH_BIDIR __le32 enables; #define QUEUE_CFG_REQ_ENABLES_DFLT_LEN 0x1UL #define QUEUE_CFG_REQ_ENABLES_SERVICE_PROFILE 0x2UL __le32 queue_id; __le32 dflt_len; u8 service_profile; - #define QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSY (0x0UL << 0) - #define QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSLESS (0x1UL << 0) - #define QUEUE_CFG_REQ_SERVICE_PROFILE_UNKNOWN (0xffUL << 0) + #define QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_CFG_REQ_SERVICE_PROFILE_UNKNOWN 0xffUL u8 unused_0[7]; }; @@ -2264,50 +2337,6 @@ struct hwrm_queue_cfg_output { u8 valid; }; -/* hwrm_queue_buffers_cfg */ -/* Input (56 bytes) */ -struct hwrm_queue_buffers_cfg_input { - __le16 req_type; - __le16 cmpl_ring; - __le16 seq_id; - __le16 target_id; - __le64 resp_addr; - __le32 flags; - #define QUEUE_BUFFERS_CFG_REQ_FLAGS_PATH 0x1UL - #define QUEUE_BUFFERS_CFG_REQ_FLAGS_PATH_TX (0x0UL << 0) - #define QUEUE_BUFFERS_CFG_REQ_FLAGS_PATH_RX (0x1UL << 0) - #define QUEUE_BUFFERS_CFG_REQ_FLAGS_PATH_LAST QUEUE_BUFFERS_CFG_REQ_FLAGS_PATH_RX - __le32 enables; - #define QUEUE_BUFFERS_CFG_REQ_ENABLES_RESERVED 0x1UL - #define QUEUE_BUFFERS_CFG_REQ_ENABLES_SHARED 0x2UL - #define QUEUE_BUFFERS_CFG_REQ_ENABLES_XOFF 0x4UL - #define QUEUE_BUFFERS_CFG_REQ_ENABLES_XON 0x8UL - #define QUEUE_BUFFERS_CFG_REQ_ENABLES_FULL 0x10UL - #define QUEUE_BUFFERS_CFG_REQ_ENABLES_NOTFULL 0x20UL - #define QUEUE_BUFFERS_CFG_REQ_ENABLES_MAX 0x40UL - __le32 queue_id; - __le32 reserved; - __le32 shared; - __le32 xoff; - __le32 xon; - __le32 full; - __le32 notfull; - __le32 max; -}; - -/* Output (16 bytes) */ -struct hwrm_queue_buffers_cfg_output { - __le16 error_code; - __le16 req_type; - __le16 seq_id; - __le16 resp_len; - __le32 unused_0; - u8 unused_1; - u8 unused_2; - u8 unused_3; - u8 valid; -}; - /* hwrm_queue_pfcenable_cfg */ /* Input (24 bytes) */ struct hwrm_queue_pfcenable_cfg_input { @@ -2351,12 +2380,22 @@ struct hwrm_queue_pri2cos_cfg_input { __le16 target_id; __le64 resp_addr; __le32 flags; - #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH 0x1UL + #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_MASK 0x3UL + #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_SFT 0 #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_TX (0x0UL << 0) #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_RX (0x1UL << 0) - #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_LAST QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_RX - #define QUEUE_PRI2COS_CFG_REQ_FLAGS_IVLAN 0x2UL + #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_BIDIR (0x2UL << 0) + #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_LAST QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_BIDIR + #define QUEUE_PRI2COS_CFG_REQ_FLAGS_IVLAN 0x4UL __le32 enables; + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI0_COS_QUEUE_ID 0x1UL + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI1_COS_QUEUE_ID 0x2UL + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI2_COS_QUEUE_ID 0x4UL + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI3_COS_QUEUE_ID 0x8UL + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI4_COS_QUEUE_ID 0x10UL + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI5_COS_QUEUE_ID 0x20UL + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI6_COS_QUEUE_ID 0x40UL + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI7_COS_QUEUE_ID 0x80UL u8 port_id; u8 pri0_cos_queue_id; u8 pri1_cos_queue_id; @@ -2404,82 +2443,226 @@ struct hwrm_queue_cos2bw_cfg_input { u8 queue_id0; u8 unused_0; __le32 queue_id0_min_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_INVALID __le32 queue_id0_max_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_INVALID u8 queue_id0_tsa_assign; - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_SP (0x0UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_ETS (0x1UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_SP 0x0UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_ETS 0x1UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_RESERVED_FIRST 0x2UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_RESERVED_LAST 0xffUL u8 queue_id0_pri_lvl; u8 queue_id0_bw_weight; u8 queue_id1; __le32 queue_id1_min_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_INVALID __le32 queue_id1_max_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_INVALID u8 queue_id1_tsa_assign; - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_SP (0x0UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_ETS (0x1UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_SP 0x0UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_ETS 0x1UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_RESERVED_FIRST 0x2UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_RESERVED_LAST 0xffUL u8 queue_id1_pri_lvl; u8 queue_id1_bw_weight; u8 queue_id2; __le32 queue_id2_min_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_INVALID __le32 queue_id2_max_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_INVALID u8 queue_id2_tsa_assign; - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_SP (0x0UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_ETS (0x1UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_SP 0x0UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_ETS 0x1UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_RESERVED_FIRST 0x2UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_RESERVED_LAST 0xffUL u8 queue_id2_pri_lvl; u8 queue_id2_bw_weight; u8 queue_id3; __le32 queue_id3_min_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_INVALID __le32 queue_id3_max_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_INVALID u8 queue_id3_tsa_assign; - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_SP (0x0UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_ETS (0x1UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_SP 0x0UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_ETS 0x1UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_RESERVED_FIRST 0x2UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_RESERVED_LAST 0xffUL u8 queue_id3_pri_lvl; u8 queue_id3_bw_weight; u8 queue_id4; __le32 queue_id4_min_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_INVALID __le32 queue_id4_max_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_INVALID u8 queue_id4_tsa_assign; - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_SP (0x0UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_ETS (0x1UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_SP 0x0UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_ETS 0x1UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_RESERVED_FIRST 0x2UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_RESERVED_LAST 0xffUL u8 queue_id4_pri_lvl; u8 queue_id4_bw_weight; u8 queue_id5; __le32 queue_id5_min_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_INVALID __le32 queue_id5_max_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_INVALID u8 queue_id5_tsa_assign; - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_SP (0x0UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_ETS (0x1UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_SP 0x0UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_ETS 0x1UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_RESERVED_FIRST 0x2UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_RESERVED_LAST 0xffUL u8 queue_id5_pri_lvl; u8 queue_id5_bw_weight; u8 queue_id6; __le32 queue_id6_min_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_INVALID __le32 queue_id6_max_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_INVALID u8 queue_id6_tsa_assign; - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_SP (0x0UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_ETS (0x1UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_SP 0x0UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_ETS 0x1UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_RESERVED_FIRST 0x2UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_RESERVED_LAST 0xffUL u8 queue_id6_pri_lvl; u8 queue_id6_bw_weight; u8 queue_id7; __le32 queue_id7_min_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_INVALID __le32 queue_id7_max_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_INVALID u8 queue_id7_tsa_assign; - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_SP (0x0UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_ETS (0x1UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_SP 0x0UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_ETS 0x1UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_RESERVED_FIRST 0x2UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_RESERVED_LAST 0xffUL u8 queue_id7_pri_lvl; u8 queue_id7_bw_weight; u8 unused_1[5]; @@ -2563,6 +2746,7 @@ struct hwrm_vnic_cfg_input { #define VNIC_CFG_REQ_FLAGS_BD_STALL_MODE 0x4UL #define VNIC_CFG_REQ_FLAGS_ROCE_DUAL_VNIC_MODE 0x8UL #define VNIC_CFG_REQ_FLAGS_ROCE_ONLY_VNIC_MODE 0x10UL + #define VNIC_CFG_REQ_FLAGS_RSS_DFLT_CR_MODE 0x20UL __le32 enables; #define VNIC_CFG_REQ_ENABLES_DFLT_RING_GRP 0x1UL #define VNIC_CFG_REQ_ENABLES_RSS_RULE 0x2UL @@ -2615,18 +2799,18 @@ struct hwrm_vnic_tpa_cfg_input { #define VNIC_TPA_CFG_REQ_ENABLES_MIN_AGG_LEN 0x8UL __le16 vnic_id; __le16 max_agg_segs; - #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_1 (0x0UL << 0) - #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_2 (0x1UL << 0) - #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_4 (0x2UL << 0) - #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_8 (0x3UL << 0) - #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_MAX (0x1fUL << 0) + #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_1 0x0UL + #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_2 0x1UL + #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_4 0x2UL + #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_8 0x3UL + #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_MAX 0x1fUL __le16 max_aggs; - #define VNIC_TPA_CFG_REQ_MAX_AGGS_1 (0x0UL << 0) - #define VNIC_TPA_CFG_REQ_MAX_AGGS_2 (0x1UL << 0) - #define VNIC_TPA_CFG_REQ_MAX_AGGS_4 (0x2UL << 0) - #define VNIC_TPA_CFG_REQ_MAX_AGGS_8 (0x3UL << 0) - #define VNIC_TPA_CFG_REQ_MAX_AGGS_16 (0x4UL << 0) - #define VNIC_TPA_CFG_REQ_MAX_AGGS_MAX (0x7UL << 0) + #define VNIC_TPA_CFG_REQ_MAX_AGGS_1 0x0UL + #define VNIC_TPA_CFG_REQ_MAX_AGGS_2 0x1UL + #define VNIC_TPA_CFG_REQ_MAX_AGGS_4 0x2UL + #define VNIC_TPA_CFG_REQ_MAX_AGGS_8 0x3UL + #define VNIC_TPA_CFG_REQ_MAX_AGGS_16 0x4UL + #define VNIC_TPA_CFG_REQ_MAX_AGGS_MAX 0x7UL u8 unused_0; u8 unused_1; __le32 max_agg_timer; @@ -2780,15 +2964,15 @@ struct hwrm_ring_alloc_input { __le64 resp_addr; __le32 enables; #define RING_ALLOC_REQ_ENABLES_RESERVED1 0x1UL - #define RING_ALLOC_REQ_ENABLES_RESERVED2 0x2UL + #define RING_ALLOC_REQ_ENABLES_RING_ARB_CFG 0x2UL #define RING_ALLOC_REQ_ENABLES_RESERVED3 0x4UL #define RING_ALLOC_REQ_ENABLES_STAT_CTX_ID_VALID 0x8UL #define RING_ALLOC_REQ_ENABLES_RESERVED4 0x10UL #define RING_ALLOC_REQ_ENABLES_MAX_BW_VALID 0x20UL u8 ring_type; - #define RING_ALLOC_REQ_RING_TYPE_CMPL (0x0UL << 0) - #define RING_ALLOC_REQ_RING_TYPE_TX (0x1UL << 0) - #define RING_ALLOC_REQ_RING_TYPE_RX (0x2UL << 0) + #define RING_ALLOC_REQ_RING_TYPE_CMPL 0x0UL + #define RING_ALLOC_REQ_RING_TYPE_TX 0x1UL + #define RING_ALLOC_REQ_RING_TYPE_RX 0x2UL u8 unused_0; __le16 unused_1; __le64 page_tbl_addr; @@ -2804,18 +2988,36 @@ struct hwrm_ring_alloc_input { u8 unused_4; u8 unused_5; __le32 reserved1; - __le16 reserved2; + __le16 ring_arb_cfg; + #define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_MASK 0xfUL + #define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_SFT 0 + #define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_SP (0x1UL << 0) + #define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_WFQ (0x2UL << 0) + #define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_LAST RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_WFQ + #define RING_ALLOC_REQ_RING_ARB_CFG_RSVD_MASK 0xf0UL + #define RING_ALLOC_REQ_RING_ARB_CFG_RSVD_SFT 4 + #define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_PARAM_MASK 0xff00UL + #define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_PARAM_SFT 8 u8 unused_6; u8 unused_7; __le32 reserved3; __le32 stat_ctx_id; __le32 reserved4; __le32 max_bw; + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_SFT 0 + #define RING_ALLOC_REQ_MAX_BW_RSVD 0x10000000UL + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_LAST RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_INVALID u8 int_mode; - #define RING_ALLOC_REQ_INT_MODE_LEGACY (0x0UL << 0) - #define RING_ALLOC_REQ_INT_MODE_RSVD (0x1UL << 0) - #define RING_ALLOC_REQ_INT_MODE_MSIX (0x2UL << 0) - #define RING_ALLOC_REQ_INT_MODE_POLL (0x3UL << 0) + #define RING_ALLOC_REQ_INT_MODE_LEGACY 0x0UL + #define RING_ALLOC_REQ_INT_MODE_RSVD 0x1UL + #define RING_ALLOC_REQ_INT_MODE_MSIX 0x2UL + #define RING_ALLOC_REQ_INT_MODE_POLL 0x3UL u8 unused_8[3]; }; @@ -2842,9 +3044,9 @@ struct hwrm_ring_free_input { __le16 target_id; __le64 resp_addr; u8 ring_type; - #define RING_FREE_REQ_RING_TYPE_CMPL (0x0UL << 0) - #define RING_FREE_REQ_RING_TYPE_TX (0x1UL << 0) - #define RING_FREE_REQ_RING_TYPE_RX (0x2UL << 0) + #define RING_FREE_REQ_RING_TYPE_CMPL 0x0UL + #define RING_FREE_REQ_RING_TYPE_TX 0x1UL + #define RING_FREE_REQ_RING_TYPE_RX 0x2UL u8 unused_0; __le16 ring_id; __le32 unused_1; @@ -2942,9 +3144,9 @@ struct hwrm_ring_reset_input { __le16 target_id; __le64 resp_addr; u8 ring_type; - #define RING_RESET_REQ_RING_TYPE_CMPL (0x0UL << 0) - #define RING_RESET_REQ_RING_TYPE_TX (0x1UL << 0) - #define RING_RESET_REQ_RING_TYPE_RX (0x2UL << 0) + #define RING_RESET_REQ_RING_TYPE_CMPL 0x0UL + #define RING_RESET_REQ_RING_TYPE_TX 0x1UL + #define RING_RESET_REQ_RING_TYPE_RX 0x2UL u8 unused_0; __le16 ring_id; __le32 unused_1; @@ -3068,36 +3270,36 @@ struct hwrm_cfa_l2_filter_alloc_input { __le16 t_l2_ivlan; __le16 t_l2_ivlan_mask; u8 src_type; - #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_NPORT (0x0UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_PF (0x1UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_VF (0x2UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_VNIC (0x3UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_KONG (0x4UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_APE (0x5UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_BONO (0x6UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_TANG (0x7UL << 0) + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_NPORT 0x0UL + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_PF 0x1UL + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_VF 0x2UL + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_VNIC 0x3UL + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_KONG 0x4UL + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_APE 0x5UL + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_BONO 0x6UL + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_TANG 0x7UL u8 unused_6; __le32 src_id; u8 tunnel_type; - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL (0x0UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN (0x1UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE (0x2UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE (0x3UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP (0x4UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE (0x5UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS (0x6UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT (0x7UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE (0x8UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL (0xffUL << 0) + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL 0x0UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE 0x2UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE 0x3UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP 0x4UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE 0x5UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS 0x6UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT 0x7UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE 0x8UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL 0xffUL u8 unused_7; __le16 dst_id; __le16 mirror_vnic_id; u8 pri_hint; - #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_NO_PREFER (0x0UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_ABOVE_FILTER (0x1UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_BELOW_FILTER (0x2UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_MAX (0x3UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_MIN (0x4UL << 0) + #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_NO_PREFER 0x0UL + #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_ABOVE_FILTER 0x1UL + #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_BELOW_FILTER 0x2UL + #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_MAX 0x3UL + #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_MIN 0x4UL u8 unused_8; __le32 unused_9; __le64 l2_filter_id_hint; @@ -3246,16 +3448,16 @@ struct hwrm_cfa_tunnel_filter_alloc_input { u8 l3_addr_type; u8 t_l3_addr_type; u8 tunnel_type; - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL (0x0UL << 0) - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN (0x1UL << 0) - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE (0x2UL << 0) - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE (0x3UL << 0) - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP (0x4UL << 0) - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE (0x5UL << 0) - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS (0x6UL << 0) - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT (0x7UL << 0) - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE (0x8UL << 0) - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL (0xffUL << 0) + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL 0x0UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE 0x2UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE 0x3UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP 0x4UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE 0x5UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS 0x6UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT 0x7UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE 0x8UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL 0xffUL u8 unused_0; __le32 vni; __le32 dst_vnic_id; @@ -3311,14 +3513,14 @@ struct hwrm_cfa_encap_record_alloc_input { __le32 flags; #define CFA_ENCAP_RECORD_ALLOC_REQ_FLAGS_LOOPBACK 0x1UL u8 encap_type; - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN (0x1UL << 0) - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_NVGRE (0x2UL << 0) - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_L2GRE (0x3UL << 0) - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPIP (0x4UL << 0) - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_GENEVE (0x5UL << 0) - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_MPLS (0x6UL << 0) - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VLAN (0x7UL << 0) - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPGRE (0x8UL << 0) + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN 0x1UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_NVGRE 0x2UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_L2GRE 0x3UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPIP 0x4UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_GENEVE 0x5UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_MPLS 0x6UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VLAN 0x7UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPGRE 0x8UL u8 unused_0; __le16 unused_1; __le32 encap_data[16]; @@ -3397,32 +3599,32 @@ struct hwrm_cfa_ntuple_filter_alloc_input { u8 src_macaddr[6]; __be16 ethertype; u8 ip_addr_type; - #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_UNKNOWN (0x0UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4 (0x4UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV6 (0x6UL << 0) + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_UNKNOWN 0x0UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4 0x4UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV6 0x6UL u8 ip_protocol; - #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_UNKNOWN (0x0UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP (0x6UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_TCP (0x11UL << 0) + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_UNKNOWN 0x0UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP 0x6UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_TCP 0x11UL __le16 dst_id; __le16 mirror_vnic_id; u8 tunnel_type; - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL (0x0UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN (0x1UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE (0x2UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE (0x3UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP (0x4UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE (0x5UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS (0x6UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT (0x7UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE (0x8UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL (0xffUL << 0) + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL 0x0UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE 0x2UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE 0x3UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP 0x4UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE 0x5UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS 0x6UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT 0x7UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE 0x8UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL 0xffUL u8 pri_hint; - #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_NO_PREFER (0x0UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_ABOVE (0x1UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_BELOW (0x2UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_HIGHEST (0x3UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_LOWEST (0x4UL << 0) + #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_NO_PREFER 0x0UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_ABOVE 0x1UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_BELOW 0x2UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_HIGHEST 0x3UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_LOWEST 0x4UL __be32 src_ipaddr[4]; __be32 src_ipaddr_mask[4]; __be32 dst_ipaddr[4]; @@ -3511,8 +3713,8 @@ struct hwrm_tunnel_dst_port_query_input { __le16 target_id; __le64 resp_addr; u8 tunnel_type; - #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN (0x1UL << 0) - #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_GENEVE (0x5UL << 0) + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_GENEVE 0x5UL u8 unused_0[7]; }; @@ -3539,8 +3741,8 @@ struct hwrm_tunnel_dst_port_alloc_input { __le16 target_id; __le64 resp_addr; u8 tunnel_type; - #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN (0x1UL << 0) - #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GENEVE (0x5UL << 0) + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GENEVE 0x5UL u8 unused_0; __be16 tunnel_dst_port_val; __le32 unused_1; @@ -3570,8 +3772,8 @@ struct hwrm_tunnel_dst_port_free_input { __le16 target_id; __le64 resp_addr; u8 tunnel_type; - #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN (0x1UL << 0) - #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE (0x5UL << 0) + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE 0x5UL u8 unused_0; __le16 tunnel_dst_port_id; __le32 unused_1; @@ -3720,15 +3922,15 @@ struct hwrm_fw_reset_input { __le16 target_id; __le64 resp_addr; u8 embedded_proc_type; - #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_BOOT (0x0UL << 0) - #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_MGMT (0x1UL << 0) - #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_NETCTRL (0x2UL << 0) - #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_ROCE (0x3UL << 0) - #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_RSVD (0x4UL << 0) + #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_BOOT 0x0UL + #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_MGMT 0x1UL + #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_NETCTRL 0x2UL + #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_ROCE 0x3UL + #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_RSVD 0x4UL u8 selfrst_status; - #define FW_RESET_REQ_SELFRST_STATUS_SELFRSTNONE (0x0UL << 0) - #define FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP (0x1UL << 0) - #define FW_RESET_REQ_SELFRST_STATUS_SELFRSTPCIERST (0x2UL << 0) + #define FW_RESET_REQ_SELFRST_STATUS_SELFRSTNONE 0x0UL + #define FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP 0x1UL + #define FW_RESET_REQ_SELFRST_STATUS_SELFRSTPCIERST 0x2UL __le16 unused_0[3]; }; @@ -3739,9 +3941,9 @@ struct hwrm_fw_reset_output { __le16 seq_id; __le16 resp_len; u8 selfrst_status; - #define FW_RESET_RESP_SELFRST_STATUS_SELFRSTNONE (0x0UL << 0) - #define FW_RESET_RESP_SELFRST_STATUS_SELFRSTASAP (0x1UL << 0) - #define FW_RESET_RESP_SELFRST_STATUS_SELFRSTPCIERST (0x2UL << 0) + #define FW_RESET_RESP_SELFRST_STATUS_SELFRSTNONE 0x0UL + #define FW_RESET_RESP_SELFRST_STATUS_SELFRSTASAP 0x1UL + #define FW_RESET_RESP_SELFRST_STATUS_SELFRSTPCIERST 0x2UL u8 unused_0; __le16 unused_1; u8 unused_2; @@ -3759,11 +3961,11 @@ struct hwrm_fw_qstatus_input { __le16 target_id; __le64 resp_addr; u8 embedded_proc_type; - #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_BOOT (0x0UL << 0) - #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_MGMT (0x1UL << 0) - #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_NETCTRL (0x2UL << 0) - #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_ROCE (0x3UL << 0) - #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_RSVD (0x4UL << 0) + #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_BOOT 0x0UL + #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_MGMT 0x1UL + #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_NETCTRL 0x2UL + #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_ROCE 0x3UL + #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_RSVD 0x4UL u8 unused_0[7]; }; @@ -3774,9 +3976,9 @@ struct hwrm_fw_qstatus_output { __le16 seq_id; __le16 resp_len; u8 selfrst_status; - #define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTNONE (0x0UL << 0) - #define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTASAP (0x1UL << 0) - #define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTPCIERST (0x2UL << 0) + #define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTNONE 0x0UL + #define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTASAP 0x1UL + #define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTPCIERST 0x2UL u8 unused_0; __le16 unused_1; u8 unused_2; @@ -3785,6 +3987,42 @@ struct hwrm_fw_qstatus_output { u8 valid; }; +/* hwrm_fw_set_time */ +/* Input (32 bytes) */ +struct hwrm_fw_set_time_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 year; + #define FW_SET_TIME_REQ_YEAR_UNKNOWN 0x0UL + u8 month; + u8 day; + u8 hour; + u8 minute; + u8 second; + u8 unused_0; + __le16 millisecond; + __le16 zone; + #define FW_SET_TIME_REQ_ZONE_UTC 0x0UL + #define FW_SET_TIME_REQ_ZONE_UNKNOWN 0xffffUL + __le32 unused_1; +}; + +/* Output (16 bytes) */ +struct hwrm_fw_set_time_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 unused_0; + u8 unused_1; + u8 unused_2; + u8 unused_3; + u8 valid; +}; + /* hwrm_exec_fwd_resp */ /* Input (128 bytes) */ struct hwrm_exec_fwd_resp_input { @@ -3921,32 +4159,6 @@ struct hwrm_temp_monitor_query_output { u8 valid; }; -/* hwrm_nvm_raw_write_blk */ -/* Input (32 bytes) */ -struct hwrm_nvm_raw_write_blk_input { - __le16 req_type; - __le16 cmpl_ring; - __le16 seq_id; - __le16 target_id; - __le64 resp_addr; - __le64 host_src_addr; - __le32 dest_addr; - __le32 len; -}; - -/* Output (16 bytes) */ -struct hwrm_nvm_raw_write_blk_output { - __le16 error_code; - __le16 req_type; - __le16 seq_id; - __le16 resp_len; - __le32 unused_0; - u8 unused_1; - u8 unused_2; - u8 unused_3; - u8 valid; -}; - /* hwrm_nvm_read */ /* Input (40 bytes) */ struct hwrm_nvm_read_input { @@ -4132,9 +4344,9 @@ struct hwrm_nvm_find_dir_entry_input { u8 opt_ordinal; #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_MASK 0x3UL #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_SFT 0 - #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_EQ (0x0UL << 0) - #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_GE (0x1UL << 0) - #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_GT (0x2UL << 0) + #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_EQ 0x0UL + #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_GE 0x1UL + #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_GT 0x2UL u8 unused_1[3]; }; @@ -4266,4 +4478,41 @@ struct hwrm_nvm_verify_update_output { u8 valid; }; +/* hwrm_nvm_install_update */ +/* Input (24 bytes) */ +struct hwrm_nvm_install_update_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 install_type; + #define NVM_INSTALL_UPDATE_REQ_INSTALL_TYPE_NORMAL 0x0UL + #define NVM_INSTALL_UPDATE_REQ_INSTALL_TYPE_ALL 0xffffffffUL + __le32 unused_0; +}; + +/* Output (24 bytes) */ +struct hwrm_nvm_install_update_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le64 installed_items; + u8 result; + #define NVM_INSTALL_UPDATE_RESP_RESULT_SUCCESS 0x0UL + u8 problem_item; + #define NVM_INSTALL_UPDATE_RESP_PROBLEM_ITEM_NONE 0x0UL + #define NVM_INSTALL_UPDATE_RESP_PROBLEM_ITEM_PACKAGE 0xffUL + u8 reset_required; + #define NVM_INSTALL_UPDATE_RESP_RESET_REQUIRED_NONE 0x0UL + #define NVM_INSTALL_UPDATE_RESP_RESET_REQUIRED_PCI 0x1UL + #define NVM_INSTALL_UPDATE_RESP_RESET_REQUIRED_POWER 0x2UL + u8 unused_0; + u8 unused_1; + u8 unused_2; + u8 unused_3; + u8 valid; +}; + #endif From 5ac67d8bc753b122175e682274599338b3ee7d42 Mon Sep 17 00:00:00 2001 From: Rob Swindell Date: Mon, 19 Sep 2016 03:58:03 -0400 Subject: [PATCH 0478/1050] bnxt_en: Added support for Secure Firmware Update Using Ethtool flashdev command, entire NVM package (*.pkg) files may now be staged into the "update" area of the NVM and subsequently verified and installed by the firmware using the newly introduced command: NVM_INSTALL_UPDATE. We also introduce use of the new firmware command FW_SET_TIME so that the NVM-resident package installation log contains valid time-stamps. Signed-off-by: Rob Swindell Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 24 +++ drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 155 ++++++++++++++++-- .../net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h | 16 +- 4 files changed, 182 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index f6b4f342d7b4..f0a9d23fda15 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -4314,6 +4315,27 @@ hwrm_ver_get_exit: return rc; } +int bnxt_hwrm_fw_set_time(struct bnxt *bp) +{ + struct hwrm_fw_set_time_input req = {0}; + struct rtc_time tm; + struct timeval tv; + + if (bp->hwrm_spec_code < 0x10400) + return -EOPNOTSUPP; + + do_gettimeofday(&tv); + rtc_time_to_tm(tv.tv_sec, &tm); + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FW_SET_TIME, -1, -1); + req.year = cpu_to_le16(1900 + tm.tm_year); + req.month = 1 + tm.tm_mon; + req.day = tm.tm_mday; + req.hour = tm.tm_hour; + req.minute = tm.tm_min; + req.second = tm.tm_sec; + return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); +} + static int bnxt_hwrm_port_qstats(struct bnxt *bp) { int rc; @@ -6811,6 +6833,8 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (rc) goto init_err; + bnxt_hwrm_fw_set_time(bp); + dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_GRE | diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 012cc51440b7..41033d01fe8d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1220,6 +1220,7 @@ int bnxt_hwrm_set_coal(struct bnxt *); int bnxt_hwrm_func_qcaps(struct bnxt *); int bnxt_hwrm_set_pause(struct bnxt *); int bnxt_hwrm_set_link_setting(struct bnxt *, bool, bool); +int bnxt_hwrm_fw_set_time(struct bnxt *); int bnxt_open_nic(struct bnxt *, bool, bool); int bnxt_close_nic(struct bnxt *, bool, bool); int bnxt_get_max_rings(struct bnxt *, int *, int *, bool); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index b83e17403d6c..4a430b623489 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -21,6 +21,8 @@ #include "bnxt_nvm_defs.h" /* NVRAM content constant and structure defs */ #include "bnxt_fw_hdr.h" /* Firmware hdr constant and structure defs */ #define FLASH_NVRAM_TIMEOUT ((HWRM_CMD_TIMEOUT) * 100) +#define FLASH_PACKAGE_TIMEOUT ((HWRM_CMD_TIMEOUT) * 200) +#define INSTALL_PACKAGE_TIMEOUT ((HWRM_CMD_TIMEOUT) * 200) static char *bnxt_get_pkgver(struct net_device *dev, char *buf, size_t buflen); @@ -1028,6 +1030,10 @@ static u32 bnxt_get_link(struct net_device *dev) return bp->link_info.link_up; } +static int bnxt_find_nvram_item(struct net_device *dev, u16 type, u16 ordinal, + u16 ext, u16 *index, u32 *item_length, + u32 *data_length); + static int bnxt_flash_nvram(struct net_device *dev, u16 dir_type, u16 dir_ordinal, @@ -1179,7 +1185,6 @@ static int bnxt_flash_firmware(struct net_device *dev, (unsigned long)calculated_crc); return -EINVAL; } - /* TODO: Validate digital signature (RSA-encrypted SHA-256 hash) here */ rc = bnxt_flash_nvram(dev, dir_type, BNX_DIR_ORDINAL_FIRST, 0, 0, fw_data, fw_size); if (rc == 0) /* Firmware update successful */ @@ -1188,6 +1193,57 @@ static int bnxt_flash_firmware(struct net_device *dev, return rc; } +static int bnxt_flash_microcode(struct net_device *dev, + u16 dir_type, + const u8 *fw_data, + size_t fw_size) +{ + struct bnxt_ucode_trailer *trailer; + u32 calculated_crc; + u32 stored_crc; + int rc = 0; + + if (fw_size < sizeof(struct bnxt_ucode_trailer)) { + netdev_err(dev, "Invalid microcode file size: %u\n", + (unsigned int)fw_size); + return -EINVAL; + } + trailer = (struct bnxt_ucode_trailer *)(fw_data + (fw_size - + sizeof(*trailer))); + if (trailer->sig != cpu_to_le32(BNXT_UCODE_TRAILER_SIGNATURE)) { + netdev_err(dev, "Invalid microcode trailer signature: %08X\n", + le32_to_cpu(trailer->sig)); + return -EINVAL; + } + if (le16_to_cpu(trailer->dir_type) != dir_type) { + netdev_err(dev, "Expected microcode type: %d, read: %d\n", + dir_type, le16_to_cpu(trailer->dir_type)); + return -EINVAL; + } + if (le16_to_cpu(trailer->trailer_length) < + sizeof(struct bnxt_ucode_trailer)) { + netdev_err(dev, "Invalid microcode trailer length: %d\n", + le16_to_cpu(trailer->trailer_length)); + return -EINVAL; + } + + /* Confirm the CRC32 checksum of the file: */ + stored_crc = le32_to_cpu(*(__le32 *)(fw_data + fw_size - + sizeof(stored_crc))); + calculated_crc = ~crc32(~0, fw_data, fw_size - sizeof(stored_crc)); + if (calculated_crc != stored_crc) { + netdev_err(dev, + "CRC32 (%08lX) does not match calculated: %08lX\n", + (unsigned long)stored_crc, + (unsigned long)calculated_crc); + return -EINVAL; + } + rc = bnxt_flash_nvram(dev, dir_type, BNX_DIR_ORDINAL_FIRST, + 0, 0, fw_data, fw_size); + + return rc; +} + static bool bnxt_dir_type_is_ape_bin_format(u16 dir_type) { switch (dir_type) { @@ -1206,7 +1262,7 @@ static bool bnxt_dir_type_is_ape_bin_format(u16 dir_type) return false; } -static bool bnxt_dir_type_is_unprotected_exec_format(u16 dir_type) +static bool bnxt_dir_type_is_other_exec_format(u16 dir_type) { switch (dir_type) { case BNX_DIR_TYPE_AVS: @@ -1227,7 +1283,7 @@ static bool bnxt_dir_type_is_unprotected_exec_format(u16 dir_type) static bool bnxt_dir_type_is_executable(u16 dir_type) { return bnxt_dir_type_is_ape_bin_format(dir_type) || - bnxt_dir_type_is_unprotected_exec_format(dir_type); + bnxt_dir_type_is_other_exec_format(dir_type); } static int bnxt_flash_firmware_from_file(struct net_device *dev, @@ -1237,10 +1293,6 @@ static int bnxt_flash_firmware_from_file(struct net_device *dev, const struct firmware *fw; int rc; - if (dir_type != BNX_DIR_TYPE_UPDATE && - bnxt_dir_type_is_executable(dir_type) == false) - return -EINVAL; - rc = request_firmware(&fw, filename, &dev->dev); if (rc != 0) { netdev_err(dev, "Error %d requesting firmware file: %s\n", @@ -1249,6 +1301,8 @@ static int bnxt_flash_firmware_from_file(struct net_device *dev, } if (bnxt_dir_type_is_ape_bin_format(dir_type) == true) rc = bnxt_flash_firmware(dev, dir_type, fw->data, fw->size); + else if (bnxt_dir_type_is_other_exec_format(dir_type) == true) + rc = bnxt_flash_microcode(dev, dir_type, fw->data, fw->size); else rc = bnxt_flash_nvram(dev, dir_type, BNX_DIR_ORDINAL_FIRST, 0, 0, fw->data, fw->size); @@ -1257,10 +1311,83 @@ static int bnxt_flash_firmware_from_file(struct net_device *dev, } static int bnxt_flash_package_from_file(struct net_device *dev, - char *filename) + char *filename, u32 install_type) { - netdev_err(dev, "packages are not yet supported\n"); - return -EINVAL; + struct bnxt *bp = netdev_priv(dev); + struct hwrm_nvm_install_update_output *resp = bp->hwrm_cmd_resp_addr; + struct hwrm_nvm_install_update_input install = {0}; + const struct firmware *fw; + u32 item_len; + u16 index; + int rc; + + bnxt_hwrm_fw_set_time(bp); + + if (bnxt_find_nvram_item(dev, BNX_DIR_TYPE_UPDATE, + BNX_DIR_ORDINAL_FIRST, BNX_DIR_EXT_NONE, + &index, &item_len, NULL) != 0) { + netdev_err(dev, "PKG update area not created in nvram\n"); + return -ENOBUFS; + } + + rc = request_firmware(&fw, filename, &dev->dev); + if (rc != 0) { + netdev_err(dev, "PKG error %d requesting file: %s\n", + rc, filename); + return rc; + } + + if (fw->size > item_len) { + netdev_err(dev, "PKG insufficient update area in nvram: %lu", + (unsigned long)fw->size); + rc = -EFBIG; + } else { + dma_addr_t dma_handle; + u8 *kmem; + struct hwrm_nvm_modify_input modify = {0}; + + bnxt_hwrm_cmd_hdr_init(bp, &modify, HWRM_NVM_MODIFY, -1, -1); + + modify.dir_idx = cpu_to_le16(index); + modify.len = cpu_to_le32(fw->size); + + kmem = dma_alloc_coherent(&bp->pdev->dev, fw->size, + &dma_handle, GFP_KERNEL); + if (!kmem) { + netdev_err(dev, + "dma_alloc_coherent failure, length = %u\n", + (unsigned int)fw->size); + rc = -ENOMEM; + } else { + memcpy(kmem, fw->data, fw->size); + modify.host_src_addr = cpu_to_le64(dma_handle); + + rc = hwrm_send_message(bp, &modify, sizeof(modify), + FLASH_PACKAGE_TIMEOUT); + dma_free_coherent(&bp->pdev->dev, fw->size, kmem, + dma_handle); + } + } + release_firmware(fw); + if (rc) + return rc; + + if ((install_type & 0xffff) == 0) + install_type >>= 16; + bnxt_hwrm_cmd_hdr_init(bp, &install, HWRM_NVM_INSTALL_UPDATE, -1, -1); + install.install_type = cpu_to_le32(install_type); + + rc = hwrm_send_message(bp, &install, sizeof(install), + INSTALL_PACKAGE_TIMEOUT); + if (rc) + return -EOPNOTSUPP; + + if (resp->result) { + netdev_err(dev, "PKG install error = %d, problem_item = %d\n", + (s8)resp->result, (int)resp->problem_item); + return -ENOPKG; + } + return 0; } static int bnxt_flash_device(struct net_device *dev, @@ -1271,8 +1398,10 @@ static int bnxt_flash_device(struct net_device *dev, return -EINVAL; } - if (flash->region == ETHTOOL_FLASH_ALL_REGIONS) - return bnxt_flash_package_from_file(dev, flash->data); + if (flash->region == ETHTOOL_FLASH_ALL_REGIONS || + flash->region > 0xffff) + return bnxt_flash_package_from_file(dev, flash->data, + flash->region); return bnxt_flash_firmware_from_file(dev, flash->region, flash->data); } @@ -1516,7 +1645,7 @@ static int bnxt_set_eeprom(struct net_device *dev, /* Create or re-write an NVM item: */ if (bnxt_dir_type_is_executable(type) == true) - return -EINVAL; + return -EOPNOTSUPP; ext = eeprom->magic & 0xffff; ordinal = eeprom->offset >> 16; attr = eeprom->offset & 0xffff; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h index 82bf44ab811b..cad30ddc6936 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h @@ -11,6 +11,7 @@ #define __BNXT_FW_HDR_H__ #define BNXT_FIRMWARE_BIN_SIGNATURE 0x1a4d4342 /* "BCM"+0x1a */ +#define BNXT_UCODE_TRAILER_SIGNATURE 0x726c7254 /* "Trlr" */ enum SUPPORTED_FAMILY { DEVICE_5702_3_4_FAMILY, /* 0 - Denali, Vinson, K2 */ @@ -85,7 +86,7 @@ enum SUPPORTED_MEDIA { struct bnxt_fw_header { __le32 signature; /* constains the constant value of - * BNXT_Firmware_Bin_Signatures + * BNXT_FIRMWARE_BIN_SIGNATURE */ u8 flags; /* reserved for ChiMP use */ u8 code_type; /* enum SUPPORTED_CODE */ @@ -102,4 +103,17 @@ struct bnxt_fw_header { u8 major_ver; }; +/* Microcode and pre-boot software/firmware trailer: */ +struct bnxt_ucode_trailer { + u8 rsa_sig[256]; + __le16 flags; + u8 version_format; + u8 version_length; + u8 version[16]; + __le16 dir_type; + __le16 trailer_length; + __le32 sig; /* BNXT_UCODE_TRAILER_SIGNATURE */ + __le32 chksum; /* CRC-32 */ +}; + #endif From 47f8e8b9bbbbe00740786bd1da0d5097d45ba46b Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 19 Sep 2016 03:58:04 -0400 Subject: [PATCH 0479/1050] bnxt_en: Fix ethtool -l|-L inconsistent channel counts. The existing code is inconsistent in reporting and accepting the combined channel count. bnxt_get_channels() reports maximum combined as the maximum rx count. bnxt_set_channels() accepts combined count that cannot be bigger than max rx or max tx. For example, if max rx = 2 and max tx = 1, we report max supported combined to be 2. But if the user tries to set combined to 2, it will fail because 2 is bigger than max tx which is 1. Fix the code to be consistent. Max allowed combined = max(max_rx, max_tx). We will accept a combined channel count <= max(max_rx, max_tx). Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 4a430b623489..c74ce698805e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -348,7 +348,7 @@ static void bnxt_get_channels(struct net_device *dev, int max_rx_rings, max_tx_rings, tcs; bnxt_get_max_rings(bp, &max_rx_rings, &max_tx_rings, true); - channel->max_combined = max_rx_rings; + channel->max_combined = max_t(int, max_rx_rings, max_tx_rings); if (bnxt_get_max_rings(bp, &max_rx_rings, &max_tx_rings, false)) { max_rx_rings = 0; @@ -406,8 +406,8 @@ static int bnxt_set_channels(struct net_device *dev, if (tcs > 1) max_tx_rings /= tcs; - if (sh && (channel->combined_count > max_rx_rings || - channel->combined_count > max_tx_rings)) + if (sh && + channel->combined_count > max_t(int, max_rx_rings, max_tx_rings)) return -ENOMEM; if (!sh && (channel->rx_count > max_rx_rings || @@ -430,8 +430,10 @@ static int bnxt_set_channels(struct net_device *dev, if (sh) { bp->flags |= BNXT_FLAG_SHARED_RINGS; - bp->rx_nr_rings = channel->combined_count; - bp->tx_nr_rings_per_tc = channel->combined_count; + bp->rx_nr_rings = min_t(int, channel->combined_count, + max_rx_rings); + bp->tx_nr_rings_per_tc = min_t(int, channel->combined_count, + max_tx_rings); } else { bp->flags &= ~BNXT_FLAG_SHARED_RINGS; bp->rx_nr_rings = channel->rx_count; From 7cc5a20e38fcaf395ac59e7ed6c3decb575a0dc7 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 19 Sep 2016 03:58:05 -0400 Subject: [PATCH 0480/1050] bnxt_en: Re-arrange bnxt_hwrm_func_qcaps(). Re-arrange the code so that the generation of the random MAC address for the VF is at the end of the function. The next patch will add one more step to call bnxt_approve_mac() to get the firmware to approve the random MAC address. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index f0a9d23fda15..b1dcece2cd64 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4161,6 +4161,11 @@ int bnxt_hwrm_func_qcaps(struct bnxt *bp) if (rc) goto hwrm_func_qcaps_exit; + bp->tx_push_thresh = 0; + if (resp->flags & + cpu_to_le32(FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED)) + bp->tx_push_thresh = BNXT_TX_PUSH_THRESH; + if (BNXT_PF(bp)) { struct bnxt_pf_info *pf = &bp->pf; @@ -4192,12 +4197,6 @@ int bnxt_hwrm_func_qcaps(struct bnxt *bp) struct bnxt_vf_info *vf = &bp->vf; vf->fw_fid = le16_to_cpu(resp->fid); - memcpy(vf->mac_addr, resp->mac_address, ETH_ALEN); - if (is_valid_ether_addr(vf->mac_addr)) - /* overwrite netdev dev_adr with admin VF MAC */ - memcpy(bp->dev->dev_addr, vf->mac_addr, ETH_ALEN); - else - random_ether_addr(bp->dev->dev_addr); vf->max_rsscos_ctxs = le16_to_cpu(resp->max_rsscos_ctx); vf->max_cp_rings = le16_to_cpu(resp->max_cmpl_rings); @@ -4209,14 +4208,16 @@ int bnxt_hwrm_func_qcaps(struct bnxt *bp) vf->max_l2_ctxs = le16_to_cpu(resp->max_l2_ctxs); vf->max_vnics = le16_to_cpu(resp->max_vnics); vf->max_stat_ctxs = le16_to_cpu(resp->max_stat_ctx); + + memcpy(vf->mac_addr, resp->mac_address, ETH_ALEN); + if (is_valid_ether_addr(vf->mac_addr)) + /* overwrite netdev dev_adr with admin VF MAC */ + memcpy(bp->dev->dev_addr, vf->mac_addr, ETH_ALEN); + else + random_ether_addr(bp->dev->dev_addr); #endif } - bp->tx_push_thresh = 0; - if (resp->flags & - cpu_to_le32(FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED)) - bp->tx_push_thresh = BNXT_TX_PUSH_THRESH; - hwrm_func_qcaps_exit: mutex_unlock(&bp->hwrm_cmd_lock); return rc; From 001154eb242b5a6667b74e5cf20873fb75f1b9d3 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 19 Sep 2016 03:58:06 -0400 Subject: [PATCH 0481/1050] bnxt_en: Call firmware to approve the random VF MAC address. After generating the random MAC address for VF, call the firmware to approve it. This step serves 2 purposes. Some hypervisor (e.g. ESX) wants to approve the MAC address. 2nd, the call will setup the proper forwarding database in the internal switch. We need to unlock the hwrm_cmd_lock mutex before calling bnxt_approve_mac(). We can do that because we are at the end of the function and all the previous firmware response data has been copied. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b1dcece2cd64..cbc0b8ad916c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4210,11 +4210,16 @@ int bnxt_hwrm_func_qcaps(struct bnxt *bp) vf->max_stat_ctxs = le16_to_cpu(resp->max_stat_ctx); memcpy(vf->mac_addr, resp->mac_address, ETH_ALEN); - if (is_valid_ether_addr(vf->mac_addr)) + mutex_unlock(&bp->hwrm_cmd_lock); + + if (is_valid_ether_addr(vf->mac_addr)) { /* overwrite netdev dev_adr with admin VF MAC */ memcpy(bp->dev->dev_addr, vf->mac_addr, ETH_ALEN); - else + } else { random_ether_addr(bp->dev->dev_addr); + rc = bnxt_approve_mac(bp, bp->dev->dev_addr); + } + return rc; #endif } From 4ffcd582301bd020b1f9d00c55473af305ec19b5 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 19 Sep 2016 03:58:07 -0400 Subject: [PATCH 0482/1050] bnxt_en: Pad TX packets below 52 bytes. The hardware has a limitation that it won't pass host to BMC loopback packets below 52-bytes. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 41033d01fe8d..51b164a0e844 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -413,7 +413,7 @@ struct rx_tpa_end_cmp_ext { #define BNXT_RX_PAGE_SIZE (1 << BNXT_RX_PAGE_SHIFT) -#define BNXT_MIN_PKT_SIZE 45 +#define BNXT_MIN_PKT_SIZE 52 #define BNXT_NUM_TESTS(bp) 0 From ae8e98a6fa7a73917196c507e43414ea96b6a0fc Mon Sep 17 00:00:00 2001 From: Deepak Khungar Date: Mon, 19 Sep 2016 03:58:08 -0400 Subject: [PATCH 0483/1050] bnxt_en: Support for "ethtool -r" command Restart autoneg if autoneg is enabled. Signed-off-by: Deepak Khungar Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index c74ce698805e..a7e04ff4eaed 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1849,6 +1849,25 @@ static int bnxt_get_module_eeprom(struct net_device *dev, return rc; } +static int bnxt_nway_reset(struct net_device *dev) +{ + int rc = 0; + + struct bnxt *bp = netdev_priv(dev); + struct bnxt_link_info *link_info = &bp->link_info; + + if (!BNXT_SINGLE_PF(bp)) + return -EOPNOTSUPP; + + if (!(link_info->autoneg & BNXT_AUTONEG_SPEED)) + return -EINVAL; + + if (netif_running(dev)) + rc = bnxt_hwrm_set_link_setting(bp, true, false); + + return rc; +} + const struct ethtool_ops bnxt_ethtool_ops = { .get_link_ksettings = bnxt_get_link_ksettings, .set_link_ksettings = bnxt_set_link_ksettings, @@ -1881,4 +1900,5 @@ const struct ethtool_ops bnxt_ethtool_ops = { .set_eee = bnxt_set_eee, .get_module_info = bnxt_get_module_info, .get_module_eeprom = bnxt_get_module_eeprom, + .nway_reset = bnxt_nway_reset }; From 350a714960eb8a980c913c9be5a96bb18b2fe9da Mon Sep 17 00:00:00 2001 From: Eddie Wai Date: Mon, 19 Sep 2016 03:58:09 -0400 Subject: [PATCH 0484/1050] bnxt_en: Fixed the VF link status after a link state change The VF link state can be changed via the 'ip link set' cmd. Currently, the new link state does not take effect immediately. The fix is for the PF to send a link change async event to the designated VF after a VF link state change. This async event will trigger the VF to update the link status. Signed-off-by: Eddie Wai Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/bnxt/bnxt_sriov.c | 84 +++++++++---------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index 50d2007a2640..8be718508600 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -19,6 +19,45 @@ #include "bnxt_ethtool.h" #ifdef CONFIG_BNXT_SRIOV +static int bnxt_hwrm_fwd_async_event_cmpl(struct bnxt *bp, + struct bnxt_vf_info *vf, u16 event_id) +{ + struct hwrm_fwd_async_event_cmpl_output *resp = bp->hwrm_cmd_resp_addr; + struct hwrm_fwd_async_event_cmpl_input req = {0}; + struct hwrm_async_event_cmpl *async_cmpl; + int rc = 0; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FWD_ASYNC_EVENT_CMPL, -1, -1); + if (vf) + req.encap_async_event_target_id = cpu_to_le16(vf->fw_fid); + else + /* broadcast this async event to all VFs */ + req.encap_async_event_target_id = cpu_to_le16(0xffff); + async_cmpl = (struct hwrm_async_event_cmpl *)req.encap_async_event_cmpl; + async_cmpl->type = + cpu_to_le16(HWRM_ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT); + async_cmpl->event_id = cpu_to_le16(event_id); + + mutex_lock(&bp->hwrm_cmd_lock); + rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + + if (rc) { + netdev_err(bp->dev, "hwrm_fwd_async_event_cmpl failed. rc:%d\n", + rc); + goto fwd_async_event_cmpl_exit; + } + + if (resp->error_code) { + netdev_err(bp->dev, "hwrm_fwd_async_event_cmpl error %d\n", + resp->error_code); + rc = -1; + } + +fwd_async_event_cmpl_exit: + mutex_unlock(&bp->hwrm_cmd_lock); + return rc; +} + static int bnxt_vf_ndo_prep(struct bnxt *bp, int vf_id) { if (!test_bit(BNXT_STATE_OPEN, &bp->state)) { @@ -243,8 +282,9 @@ int bnxt_set_vf_link_state(struct net_device *dev, int vf_id, int link) rc = -EINVAL; break; } - /* CHIMP TODO: send msg to VF to update new link state */ - + if (vf->flags & (BNXT_VF_LINK_UP | BNXT_VF_LINK_FORCED)) + rc = bnxt_hwrm_fwd_async_event_cmpl(bp, vf, + HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE); return rc; } @@ -525,46 +565,6 @@ err_out1: return rc; } -static int bnxt_hwrm_fwd_async_event_cmpl(struct bnxt *bp, - struct bnxt_vf_info *vf, - u16 event_id) -{ - int rc = 0; - struct hwrm_fwd_async_event_cmpl_input req = {0}; - struct hwrm_fwd_async_event_cmpl_output *resp = bp->hwrm_cmd_resp_addr; - struct hwrm_async_event_cmpl *async_cmpl; - - bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FWD_ASYNC_EVENT_CMPL, -1, -1); - if (vf) - req.encap_async_event_target_id = cpu_to_le16(vf->fw_fid); - else - /* broadcast this async event to all VFs */ - req.encap_async_event_target_id = cpu_to_le16(0xffff); - async_cmpl = (struct hwrm_async_event_cmpl *)req.encap_async_event_cmpl; - async_cmpl->type = - cpu_to_le16(HWRM_ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT); - async_cmpl->event_id = cpu_to_le16(event_id); - - mutex_lock(&bp->hwrm_cmd_lock); - rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); - - if (rc) { - netdev_err(bp->dev, "hwrm_fwd_async_event_cmpl failed. rc:%d\n", - rc); - goto fwd_async_event_cmpl_exit; - } - - if (resp->error_code) { - netdev_err(bp->dev, "hwrm_fwd_async_event_cmpl error %d\n", - resp->error_code); - rc = -1; - } - -fwd_async_event_cmpl_exit: - mutex_unlock(&bp->hwrm_cmd_lock); - return rc; -} - void bnxt_sriov_disable(struct bnxt *bp) { u16 num_vfs = pci_num_vf(bp->pdev); From 51f141bec15aecb2ee5f0db77761dbf219333b93 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sun, 18 Sep 2016 00:11:34 +0200 Subject: [PATCH 0485/1050] net: ethernet: broadcom: b44: use phydev from struct net_device The private structure contain a pointer to phydev, but the structure net_device already contain such pointer. So we can remove the pointer phydev in the private structure, and update the driver to use the one contained in struct net_device. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/b44.c | 22 +++++++++++----------- drivers/net/ethernet/broadcom/b44.h | 1 - 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 74f0a37c4eb6..936f06ffef23 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -1486,7 +1486,7 @@ static int b44_open(struct net_device *dev) b44_enable_ints(bp); if (bp->flags & B44_FLAG_EXTERNAL_PHY) - phy_start(bp->phydev); + phy_start(dev->phydev); netif_start_queue(dev); out: @@ -1651,7 +1651,7 @@ static int b44_close(struct net_device *dev) netif_stop_queue(dev); if (bp->flags & B44_FLAG_EXTERNAL_PHY) - phy_stop(bp->phydev); + phy_stop(dev->phydev); napi_disable(&bp->napi); @@ -1837,8 +1837,8 @@ static int b44_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) struct b44 *bp = netdev_priv(dev); if (bp->flags & B44_FLAG_EXTERNAL_PHY) { - BUG_ON(!bp->phydev); - return phy_ethtool_gset(bp->phydev, cmd); + BUG_ON(!dev->phydev); + return phy_ethtool_gset(dev->phydev, cmd); } cmd->supported = (SUPPORTED_Autoneg); @@ -1886,12 +1886,12 @@ static int b44_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) int ret; if (bp->flags & B44_FLAG_EXTERNAL_PHY) { - BUG_ON(!bp->phydev); + BUG_ON(!dev->phydev); spin_lock_irq(&bp->lock); if (netif_running(dev)) b44_setup_phy(bp); - ret = phy_ethtool_sset(bp->phydev, cmd); + ret = phy_ethtool_sset(dev->phydev, cmd); spin_unlock_irq(&bp->lock); @@ -2137,8 +2137,8 @@ static int b44_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) spin_lock_irq(&bp->lock); if (bp->flags & B44_FLAG_EXTERNAL_PHY) { - BUG_ON(!bp->phydev); - err = phy_mii_ioctl(bp->phydev, ifr, cmd); + BUG_ON(!dev->phydev); + err = phy_mii_ioctl(dev->phydev, ifr, cmd); } else { err = generic_mii_ioctl(&bp->mii_if, if_mii(ifr), cmd, NULL); } @@ -2206,7 +2206,7 @@ static const struct net_device_ops b44_netdev_ops = { static void b44_adjust_link(struct net_device *dev) { struct b44 *bp = netdev_priv(dev); - struct phy_device *phydev = bp->phydev; + struct phy_device *phydev = dev->phydev; bool status_changed = 0; BUG_ON(!phydev); @@ -2303,7 +2303,6 @@ static int b44_register_phy_one(struct b44 *bp) SUPPORTED_MII); phydev->advertising = phydev->supported; - bp->phydev = phydev; bp->old_link = 0; bp->phy_addr = phydev->mdio.addr; @@ -2323,9 +2322,10 @@ err_out: static void b44_unregister_phy_one(struct b44 *bp) { + struct net_device *dev = bp->dev; struct mii_bus *mii_bus = bp->mii_bus; - phy_disconnect(bp->phydev); + phy_disconnect(dev->phydev); mdiobus_unregister(mii_bus); mdiobus_free(mii_bus); } diff --git a/drivers/net/ethernet/broadcom/b44.h b/drivers/net/ethernet/broadcom/b44.h index 65d88d7c5581..89d2cf341163 100644 --- a/drivers/net/ethernet/broadcom/b44.h +++ b/drivers/net/ethernet/broadcom/b44.h @@ -404,7 +404,6 @@ struct b44 { u32 tx_pending; u8 phy_addr; u8 force_copybreak; - struct phy_device *phydev; struct mii_bus *mii_bus; int old_link; struct mii_if_info mii_if; From 2406e5d4c4c48c9402c0629748c001178ea4c149 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sun, 18 Sep 2016 00:11:35 +0200 Subject: [PATCH 0486/1050] net: ethernet: broadcom: b44: use new api ethtool_{get|set}_link_ksettings The ethtool api {get|set}_settings is deprecated. We move this driver to new api {get|set}_link_ksettings. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/b44.c | 98 ++++++++++++++++------------- 1 file changed, 54 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 936f06ffef23..17aa33c5567d 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -1832,58 +1832,65 @@ static int b44_nway_reset(struct net_device *dev) return r; } -static int b44_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int b44_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct b44 *bp = netdev_priv(dev); + u32 supported, advertising; if (bp->flags & B44_FLAG_EXTERNAL_PHY) { BUG_ON(!dev->phydev); - return phy_ethtool_gset(dev->phydev, cmd); + return phy_ethtool_ksettings_get(dev->phydev, cmd); } - cmd->supported = (SUPPORTED_Autoneg); - cmd->supported |= (SUPPORTED_100baseT_Half | - SUPPORTED_100baseT_Full | - SUPPORTED_10baseT_Half | - SUPPORTED_10baseT_Full | - SUPPORTED_MII); + supported = (SUPPORTED_Autoneg); + supported |= (SUPPORTED_100baseT_Half | + SUPPORTED_100baseT_Full | + SUPPORTED_10baseT_Half | + SUPPORTED_10baseT_Full | + SUPPORTED_MII); - cmd->advertising = 0; + advertising = 0; if (bp->flags & B44_FLAG_ADV_10HALF) - cmd->advertising |= ADVERTISED_10baseT_Half; + advertising |= ADVERTISED_10baseT_Half; if (bp->flags & B44_FLAG_ADV_10FULL) - cmd->advertising |= ADVERTISED_10baseT_Full; + advertising |= ADVERTISED_10baseT_Full; if (bp->flags & B44_FLAG_ADV_100HALF) - cmd->advertising |= ADVERTISED_100baseT_Half; + advertising |= ADVERTISED_100baseT_Half; if (bp->flags & B44_FLAG_ADV_100FULL) - cmd->advertising |= ADVERTISED_100baseT_Full; - cmd->advertising |= ADVERTISED_Pause | ADVERTISED_Asym_Pause; - ethtool_cmd_speed_set(cmd, ((bp->flags & B44_FLAG_100_BASE_T) ? - SPEED_100 : SPEED_10)); - cmd->duplex = (bp->flags & B44_FLAG_FULL_DUPLEX) ? + advertising |= ADVERTISED_100baseT_Full; + advertising |= ADVERTISED_Pause | ADVERTISED_Asym_Pause; + cmd->base.speed = (bp->flags & B44_FLAG_100_BASE_T) ? + SPEED_100 : SPEED_10; + cmd->base.duplex = (bp->flags & B44_FLAG_FULL_DUPLEX) ? DUPLEX_FULL : DUPLEX_HALF; - cmd->port = 0; - cmd->phy_address = bp->phy_addr; - cmd->transceiver = (bp->flags & B44_FLAG_EXTERNAL_PHY) ? - XCVR_EXTERNAL : XCVR_INTERNAL; - cmd->autoneg = (bp->flags & B44_FLAG_FORCE_LINK) ? + cmd->base.port = 0; + cmd->base.phy_address = bp->phy_addr; + cmd->base.autoneg = (bp->flags & B44_FLAG_FORCE_LINK) ? AUTONEG_DISABLE : AUTONEG_ENABLE; - if (cmd->autoneg == AUTONEG_ENABLE) - cmd->advertising |= ADVERTISED_Autoneg; + if (cmd->base.autoneg == AUTONEG_ENABLE) + advertising |= ADVERTISED_Autoneg; + + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); + if (!netif_running(dev)){ - ethtool_cmd_speed_set(cmd, 0); - cmd->duplex = 0xff; + cmd->base.speed = 0; + cmd->base.duplex = 0xff; } - cmd->maxtxpkt = 0; - cmd->maxrxpkt = 0; + return 0; } -static int b44_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int b44_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct b44 *bp = netdev_priv(dev); u32 speed; int ret; + u32 advertising; if (bp->flags & B44_FLAG_EXTERNAL_PHY) { BUG_ON(!dev->phydev); @@ -1891,31 +1898,34 @@ static int b44_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) if (netif_running(dev)) b44_setup_phy(bp); - ret = phy_ethtool_sset(dev->phydev, cmd); + ret = phy_ethtool_ksettings_set(dev->phydev, cmd); spin_unlock_irq(&bp->lock); return ret; } - speed = ethtool_cmd_speed(cmd); + speed = cmd->base.speed; + + ethtool_convert_link_mode_to_legacy_u32(&advertising, + cmd->link_modes.advertising); /* We do not support gigabit. */ - if (cmd->autoneg == AUTONEG_ENABLE) { - if (cmd->advertising & + if (cmd->base.autoneg == AUTONEG_ENABLE) { + if (advertising & (ADVERTISED_1000baseT_Half | ADVERTISED_1000baseT_Full)) return -EINVAL; } else if ((speed != SPEED_100 && speed != SPEED_10) || - (cmd->duplex != DUPLEX_HALF && - cmd->duplex != DUPLEX_FULL)) { + (cmd->base.duplex != DUPLEX_HALF && + cmd->base.duplex != DUPLEX_FULL)) { return -EINVAL; } spin_lock_irq(&bp->lock); - if (cmd->autoneg == AUTONEG_ENABLE) { + if (cmd->base.autoneg == AUTONEG_ENABLE) { bp->flags &= ~(B44_FLAG_FORCE_LINK | B44_FLAG_100_BASE_T | B44_FLAG_FULL_DUPLEX | @@ -1923,19 +1933,19 @@ static int b44_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) B44_FLAG_ADV_10FULL | B44_FLAG_ADV_100HALF | B44_FLAG_ADV_100FULL); - if (cmd->advertising == 0) { + if (advertising == 0) { bp->flags |= (B44_FLAG_ADV_10HALF | B44_FLAG_ADV_10FULL | B44_FLAG_ADV_100HALF | B44_FLAG_ADV_100FULL); } else { - if (cmd->advertising & ADVERTISED_10baseT_Half) + if (advertising & ADVERTISED_10baseT_Half) bp->flags |= B44_FLAG_ADV_10HALF; - if (cmd->advertising & ADVERTISED_10baseT_Full) + if (advertising & ADVERTISED_10baseT_Full) bp->flags |= B44_FLAG_ADV_10FULL; - if (cmd->advertising & ADVERTISED_100baseT_Half) + if (advertising & ADVERTISED_100baseT_Half) bp->flags |= B44_FLAG_ADV_100HALF; - if (cmd->advertising & ADVERTISED_100baseT_Full) + if (advertising & ADVERTISED_100baseT_Full) bp->flags |= B44_FLAG_ADV_100FULL; } } else { @@ -1943,7 +1953,7 @@ static int b44_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) bp->flags &= ~(B44_FLAG_100_BASE_T | B44_FLAG_FULL_DUPLEX); if (speed == SPEED_100) bp->flags |= B44_FLAG_100_BASE_T; - if (cmd->duplex == DUPLEX_FULL) + if (cmd->base.duplex == DUPLEX_FULL) bp->flags |= B44_FLAG_FULL_DUPLEX; } @@ -2110,8 +2120,6 @@ static int b44_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) static const struct ethtool_ops b44_ethtool_ops = { .get_drvinfo = b44_get_drvinfo, - .get_settings = b44_get_settings, - .set_settings = b44_set_settings, .nway_reset = b44_nway_reset, .get_link = ethtool_op_get_link, .get_wol = b44_get_wol, @@ -2125,6 +2133,8 @@ static const struct ethtool_ops b44_ethtool_ops = { .get_strings = b44_get_strings, .get_sset_count = b44_get_sset_count, .get_ethtool_stats = b44_get_ethtool_stats, + .get_link_ksettings = b44_get_link_ksettings, + .set_link_ksettings = b44_set_link_ksettings, }; static int b44_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) From 625eb8667d6fcb22e474502133986b2d2838917d Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sun, 18 Sep 2016 16:59:06 +0200 Subject: [PATCH 0487/1050] net: ethernet: broadcom: bcm63xx: use phydev from struct net_device The private structure contain a pointer to phydev, but the structure net_device already contain such pointer. So we can remove the pointer phydev in the private structure, and update the driver to use the one contained in struct net_device. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 31 +++++++++----------- drivers/net/ethernet/broadcom/bcm63xx_enet.h | 1 - 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 6c8bc5fadac7..082f3f0cf5a0 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -791,7 +791,7 @@ static void bcm_enet_adjust_phy_link(struct net_device *dev) int status_changed; priv = netdev_priv(dev); - phydev = priv->phydev; + phydev = dev->phydev; status_changed = 0; if (priv->old_link != phydev->link) { @@ -913,7 +913,6 @@ static int bcm_enet_open(struct net_device *dev) priv->old_link = 0; priv->old_duplex = -1; priv->old_pause = -1; - priv->phydev = phydev; } /* mask all interrupts and request them */ @@ -1085,7 +1084,7 @@ static int bcm_enet_open(struct net_device *dev) ENETDMAC_IRMASK, priv->tx_chan); if (priv->has_phy) - phy_start(priv->phydev); + phy_start(phydev); else bcm_enet_adjust_link(dev); @@ -1127,7 +1126,7 @@ out_freeirq: free_irq(dev->irq, dev); out_phy_disconnect: - phy_disconnect(priv->phydev); + phy_disconnect(phydev); return ret; } @@ -1190,7 +1189,7 @@ static int bcm_enet_stop(struct net_device *dev) netif_stop_queue(dev); napi_disable(&priv->napi); if (priv->has_phy) - phy_stop(priv->phydev); + phy_stop(dev->phydev); del_timer_sync(&priv->rx_timeout); /* mask all interrupts */ @@ -1234,10 +1233,8 @@ static int bcm_enet_stop(struct net_device *dev) free_irq(dev->irq, dev); /* release phy */ - if (priv->has_phy) { - phy_disconnect(priv->phydev); - priv->phydev = NULL; - } + if (priv->has_phy) + phy_disconnect(dev->phydev); return 0; } @@ -1437,9 +1434,9 @@ static int bcm_enet_nway_reset(struct net_device *dev) priv = netdev_priv(dev); if (priv->has_phy) { - if (!priv->phydev) + if (!dev->phydev) return -ENODEV; - return genphy_restart_aneg(priv->phydev); + return genphy_restart_aneg(dev->phydev); } return -EOPNOTSUPP; @@ -1456,9 +1453,9 @@ static int bcm_enet_get_settings(struct net_device *dev, cmd->maxtxpkt = 0; if (priv->has_phy) { - if (!priv->phydev) + if (!dev->phydev) return -ENODEV; - return phy_ethtool_gset(priv->phydev, cmd); + return phy_ethtool_gset(dev->phydev, cmd); } else { cmd->autoneg = 0; ethtool_cmd_speed_set(cmd, ((priv->force_speed_100) @@ -1483,9 +1480,9 @@ static int bcm_enet_set_settings(struct net_device *dev, priv = netdev_priv(dev); if (priv->has_phy) { - if (!priv->phydev) + if (!dev->phydev) return -ENODEV; - return phy_ethtool_sset(priv->phydev, cmd); + return phy_ethtool_sset(dev->phydev, cmd); } else { if (cmd->autoneg || @@ -1604,9 +1601,9 @@ static int bcm_enet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) priv = netdev_priv(dev); if (priv->has_phy) { - if (!priv->phydev) + if (!dev->phydev) return -ENODEV; - return phy_mii_ioctl(priv->phydev, rq, cmd); + return phy_mii_ioctl(dev->phydev, rq, cmd); } else { struct mii_if_info mii; diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.h b/drivers/net/ethernet/broadcom/bcm63xx_enet.h index f55af4310085..0a1b7b2e55bd 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.h +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.h @@ -290,7 +290,6 @@ struct bcm_enet_priv { /* used when a phy is connected (phylib used) */ struct mii_bus *mii_bus; - struct phy_device *phydev; int old_link; int old_duplex; int old_pause; From 639cfa9e8cdaca5276c9786e22195653a0d4391b Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sun, 18 Sep 2016 16:59:07 +0200 Subject: [PATCH 0488/1050] net: ethernet: broadcom: bcm63xx: use new api ethtool_{get|set}_link_ksettings The ethtool api {get|set}_settings is deprecated. We move this driver to new api {get|set}_link_ksettings. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 52 +++++++++++--------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 082f3f0cf5a0..ae364c74baf3 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -1442,39 +1442,40 @@ static int bcm_enet_nway_reset(struct net_device *dev) return -EOPNOTSUPP; } -static int bcm_enet_get_settings(struct net_device *dev, - struct ethtool_cmd *cmd) +static int bcm_enet_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct bcm_enet_priv *priv; + u32 supported, advertising; priv = netdev_priv(dev); - cmd->maxrxpkt = 0; - cmd->maxtxpkt = 0; - if (priv->has_phy) { if (!dev->phydev) return -ENODEV; - return phy_ethtool_gset(dev->phydev, cmd); + return phy_ethtool_ksettings_get(dev->phydev, cmd); } else { - cmd->autoneg = 0; - ethtool_cmd_speed_set(cmd, ((priv->force_speed_100) - ? SPEED_100 : SPEED_10)); - cmd->duplex = (priv->force_duplex_full) ? + cmd->base.autoneg = 0; + cmd->base.speed = (priv->force_speed_100) ? + SPEED_100 : SPEED_10; + cmd->base.duplex = (priv->force_duplex_full) ? DUPLEX_FULL : DUPLEX_HALF; - cmd->supported = ADVERTISED_10baseT_Half | + supported = ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full | ADVERTISED_100baseT_Half | ADVERTISED_100baseT_Full; - cmd->advertising = 0; - cmd->port = PORT_MII; - cmd->transceiver = XCVR_EXTERNAL; + advertising = 0; + ethtool_convert_legacy_u32_to_link_mode( + cmd->link_modes.supported, supported); + ethtool_convert_legacy_u32_to_link_mode( + cmd->link_modes.advertising, advertising); + cmd->base.port = PORT_MII; } return 0; } -static int bcm_enet_set_settings(struct net_device *dev, - struct ethtool_cmd *cmd) +static int bcm_enet_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct bcm_enet_priv *priv; @@ -1482,16 +1483,19 @@ static int bcm_enet_set_settings(struct net_device *dev, if (priv->has_phy) { if (!dev->phydev) return -ENODEV; - return phy_ethtool_sset(dev->phydev, cmd); + return phy_ethtool_ksettings_set(dev->phydev, cmd); } else { - if (cmd->autoneg || - (cmd->speed != SPEED_100 && cmd->speed != SPEED_10) || - cmd->port != PORT_MII) + if (cmd->base.autoneg || + (cmd->base.speed != SPEED_100 && + cmd->base.speed != SPEED_10) || + cmd->base.port != PORT_MII) return -EINVAL; - priv->force_speed_100 = (cmd->speed == SPEED_100) ? 1 : 0; - priv->force_duplex_full = (cmd->duplex == DUPLEX_FULL) ? 1 : 0; + priv->force_speed_100 = + (cmd->base.speed == SPEED_100) ? 1 : 0; + priv->force_duplex_full = + (cmd->base.duplex == DUPLEX_FULL) ? 1 : 0; if (netif_running(dev)) bcm_enet_adjust_link(dev); @@ -1585,14 +1589,14 @@ static const struct ethtool_ops bcm_enet_ethtool_ops = { .get_sset_count = bcm_enet_get_sset_count, .get_ethtool_stats = bcm_enet_get_ethtool_stats, .nway_reset = bcm_enet_nway_reset, - .get_settings = bcm_enet_get_settings, - .set_settings = bcm_enet_set_settings, .get_drvinfo = bcm_enet_get_drvinfo, .get_link = ethtool_op_get_link, .get_ringparam = bcm_enet_get_ringparam, .set_ringparam = bcm_enet_set_ringparam, .get_pauseparam = bcm_enet_get_pauseparam, .set_pauseparam = bcm_enet_set_pauseparam, + .get_link_ksettings = bcm_enet_get_link_ksettings, + .set_link_ksettings = bcm_enet_set_link_ksettings, }; static int bcm_enet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) From 6b352ebccbcf68866fa5e2ec98cce5e6b7cdf92e Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sun, 18 Sep 2016 17:16:45 +0200 Subject: [PATCH 0489/1050] net: ethernet: broadcom: bcmgenet: use new api ethtool_{get|set}_link_ksettings The ethtool api {get|set}_settings is deprecated. We move this driver to new api {get|set}_link_ksettings. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 46f904392f88..2013474bfdbf 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -450,8 +450,8 @@ static inline void bcmgenet_rdma_ring_writel(struct bcmgenet_priv *priv, genet_dma_ring_regs[r]); } -static int bcmgenet_get_settings(struct net_device *dev, - struct ethtool_cmd *cmd) +static int bcmgenet_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { if (!netif_running(dev)) return -EINVAL; @@ -459,11 +459,11 @@ static int bcmgenet_get_settings(struct net_device *dev, if (!dev->phydev) return -ENODEV; - return phy_ethtool_gset(dev->phydev, cmd); + return phy_ethtool_ksettings_get(dev->phydev, cmd); } -static int bcmgenet_set_settings(struct net_device *dev, - struct ethtool_cmd *cmd) +static int bcmgenet_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { if (!netif_running(dev)) return -EINVAL; @@ -471,7 +471,7 @@ static int bcmgenet_set_settings(struct net_device *dev, if (!dev->phydev) return -ENODEV; - return phy_ethtool_sset(dev->phydev, cmd); + return phy_ethtool_ksettings_set(dev->phydev, cmd); } static int bcmgenet_set_rx_csum(struct net_device *dev, @@ -977,8 +977,6 @@ static const struct ethtool_ops bcmgenet_ethtool_ops = { .get_strings = bcmgenet_get_strings, .get_sset_count = bcmgenet_get_sset_count, .get_ethtool_stats = bcmgenet_get_ethtool_stats, - .get_settings = bcmgenet_get_settings, - .set_settings = bcmgenet_set_settings, .get_drvinfo = bcmgenet_get_drvinfo, .get_link = ethtool_op_get_link, .get_msglevel = bcmgenet_get_msglevel, @@ -990,6 +988,8 @@ static const struct ethtool_ops bcmgenet_ethtool_ops = { .nway_reset = bcmgenet_nway_reset, .get_coalesce = bcmgenet_get_coalesce, .set_coalesce = bcmgenet_set_coalesce, + .get_link_ksettings = bcmgenet_get_link_ksettings, + .set_link_ksettings = bcmgenet_set_link_ksettings, }; /* Power down the unimac, based on mode. */ From 9940803065e3e15df8fd0d6a9e29f7b0617d8935 Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Sun, 18 Sep 2016 16:26:34 +0800 Subject: [PATCH 0490/1050] phy: mark lan88xx_suspend() static We get 1 warning when building kernel with W=1: drivers/net/phy/microchip.c:58:5: warning: no previous prototype for 'lan88xx_suspend' [-Wmissing-prototypes] In fact, this function is only used in the file in which it is declared and don't need a declaration, but can be made static. so this patch marks this function with 'static'. Signed-off-by: Baoyou Xie Signed-off-by: David S. Miller --- drivers/net/phy/microchip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c index 15f820648f82..7c00e508a101 100644 --- a/drivers/net/phy/microchip.c +++ b/drivers/net/phy/microchip.c @@ -55,7 +55,7 @@ static int lan88xx_phy_ack_interrupt(struct phy_device *phydev) return rc < 0 ? rc : 0; } -int lan88xx_suspend(struct phy_device *phydev) +static int lan88xx_suspend(struct phy_device *phydev) { struct lan88xx_priv *priv = phydev->priv; From d766e7e6b68d681d46d74e228ad0ba133e730e36 Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Sun, 18 Sep 2016 16:35:29 +0800 Subject: [PATCH 0491/1050] be2net: mark symbols static where possible We get 4 warnings when building kernel with W=1: drivers/net/ethernet/emulex/benet/be_main.c:4368:6: warning: no previous prototype for 'be_calculate_pf_pool_rss_tables' [-Wmissing-prototypes] drivers/net/ethernet/emulex/benet/be_cmds.c:4385:5: warning: no previous prototype for 'be_get_nic_pf_num_list' [-Wmissing-prototypes] drivers/net/ethernet/emulex/benet/be_cmds.c:4537:6: warning: no previous prototype for 'be_reset_nic_desc' [-Wmissing-prototypes] drivers/net/ethernet/emulex/benet/be_cmds.c:4910:5: warning: no previous prototype for '__be_cmd_set_logical_link_config' [-Wmissing-prototypes] In fact, these functions are only used in the file in which they are declared and don't need a declaration, but can be made static. so this patch marks these functions with 'static'. Signed-off-by: Baoyou Xie Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_cmds.c | 9 +++++---- drivers/net/ethernet/emulex/benet/be_main.c | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 15d02da08d8f..9cffe48be156 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -4382,7 +4382,7 @@ err: } /* This routine returns a list of all the NIC PF_nums in the adapter */ -u16 be_get_nic_pf_num_list(u8 *buf, u32 desc_count, u16 *nic_pf_nums) +static u16 be_get_nic_pf_num_list(u8 *buf, u32 desc_count, u16 *nic_pf_nums) { struct be_res_desc_hdr *hdr = (struct be_res_desc_hdr *)buf; struct be_pcie_res_desc *pcie = NULL; @@ -4534,7 +4534,7 @@ static int be_cmd_set_profile_config(struct be_adapter *adapter, void *desc, } /* Mark all fields invalid */ -void be_reset_nic_desc(struct be_nic_res_desc *nic) +static void be_reset_nic_desc(struct be_nic_res_desc *nic) { memset(nic, 0, sizeof(*nic)); nic->unicast_mac_count = 0xFFFF; @@ -4907,8 +4907,9 @@ err: return status; } -int __be_cmd_set_logical_link_config(struct be_adapter *adapter, - int link_state, int version, u8 domain) +static int +__be_cmd_set_logical_link_config(struct be_adapter *adapter, + int link_state, int version, u8 domain) { struct be_mcc_wrb *wrb; struct be_cmd_req_set_ll_link *req; diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 34f63eff6e8a..9a94840c5757 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -4365,7 +4365,7 @@ static void be_setup_init(struct be_adapter *adapter) * for distribution between the VFs. This self-imposed limit will determine the * no: of VFs for which RSS can be enabled. */ -void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter) +static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter) { struct be_port_resources port_res = {0}; u8 rss_tables_on_port; From 766a0e978fc2ba98d4865b466f8b572402317189 Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Sun, 18 Sep 2016 16:44:22 +0800 Subject: [PATCH 0492/1050] net/mlx5: clean function declarations in eswitch.c up We get 2 warnings when building kernel with W=1: drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c:463:5: warning: no previous prototype for 'esw_offloads_init' [-Wmissing-prototypes] drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c:521:6: warning: no previous prototype for 'esw_offloads_cleanup' [-Wmissing-prototypes] In fact, both functions are declared in drivers/net/ethernet/mellanox/mlx5/core/eswitch.c,but should be declared in a header file, thus can be recognized in other file. So this patch moves the declarations into drivers/net/ethernet/mellanox/mlx5/core/eswitch.h Signed-off-by: Baoyou Xie Acked-by: Leon Romanovsky Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 3 --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 654b76ff962f..492749473e77 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -81,9 +81,6 @@ enum { MC_ADDR_CHANGE | \ PROMISC_CHANGE) -int esw_offloads_init(struct mlx5_eswitch *esw, int nvports); -void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports); - static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, u32 events_mask) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 6855783f3bb3..b96e8c93bf9d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -209,6 +209,9 @@ struct mlx5_eswitch { int mode; }; +void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports); +int esw_offloads_init(struct mlx5_eswitch *esw, int nvports); + /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); From 6a5d58b67e205f2ffc62d0a9ee4ef7d237e9a7fb Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sun, 18 Sep 2016 07:31:42 -0400 Subject: [PATCH 0493/1050] net sched ife action: add 16 bit helpers encoder and checker for 16 bits metadata Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/tc_act/tc_ife.h | 2 ++ net/sched/act_ife.c | 26 ++++++++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h index 5164bd7a38fb..9fd2bea0a6e0 100644 --- a/include/net/tc_act/tc_ife.h +++ b/include/net/tc_act/tc_ife.h @@ -50,9 +50,11 @@ int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval, gfp_t gfp); int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval, gfp_t gfp); int ife_check_meta_u32(u32 metaval, struct tcf_meta_info *mi); +int ife_check_meta_u16(u16 metaval, struct tcf_meta_info *mi); int ife_encode_meta_u32(u32 metaval, void *skbdata, struct tcf_meta_info *mi); int ife_validate_meta_u32(void *val, int len); int ife_validate_meta_u16(void *val, int len); +int ife_encode_meta_u16(u16 metaval, void *skbdata, struct tcf_meta_info *mi); void ife_release_meta_gen(struct tcf_meta_info *mi); int register_ife_op(struct tcf_meta_ops *mops); int unregister_ife_op(struct tcf_meta_ops *mops); diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index e87cd81315e1..ccf7b4b655fe 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -63,6 +63,23 @@ int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, const void *dval) } EXPORT_SYMBOL_GPL(ife_tlv_meta_encode); +int ife_encode_meta_u16(u16 metaval, void *skbdata, struct tcf_meta_info *mi) +{ + u16 edata = 0; + + if (mi->metaval) + edata = *(u16 *)mi->metaval; + else if (metaval) + edata = metaval; + + if (!edata) /* will not encode */ + return 0; + + edata = htons(edata); + return ife_tlv_meta_encode(skbdata, mi->metaid, 2, &edata); +} +EXPORT_SYMBOL_GPL(ife_encode_meta_u16); + int ife_get_meta_u32(struct sk_buff *skb, struct tcf_meta_info *mi) { if (mi->metaval) @@ -81,6 +98,15 @@ int ife_check_meta_u32(u32 metaval, struct tcf_meta_info *mi) } EXPORT_SYMBOL_GPL(ife_check_meta_u32); +int ife_check_meta_u16(u16 metaval, struct tcf_meta_info *mi) +{ + if (metaval || mi->metaval) + return 8; /* T+L+(V) == 2+2+(2+2bytepad) */ + + return 0; +} +EXPORT_SYMBOL_GPL(ife_check_meta_u16); + int ife_encode_meta_u32(u32 metaval, void *skbdata, struct tcf_meta_info *mi) { u32 edata = metaval; From 408fbc22ef1efb00dd896acd00e9f7d9b641e047 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sun, 18 Sep 2016 07:31:43 -0400 Subject: [PATCH 0494/1050] net sched ife action: Introduce skb tcindex metadata encap decap Sample use case of how this is encoded: user space via tuntap (or a connected VM/Machine/container) encodes the tcindex TLV. Sample use case of decoding: IFE action decodes it and the skb->tc_index is then used to classify. So something like this for encoded ICMP packets: .. first decode then reclassify... skb->tcindex will be set sudo $TC filter add dev $ETH parent ffff: prio 2 protocol 0xbeef \ u32 match u32 0 0 flowid 1:1 \ action ife decode reclassify ...next match the decode icmp packet... sudo $TC filter add dev $ETH parent ffff: prio 4 protocol ip \ u32 match ip protocol 1 0xff flowid 1:1 \ action continue ... last classify it using the tcindex classifier and do someaction.. sudo $TC filter add dev $ETH parent ffff: prio 5 protocol ip \ handle 0x11 tcindex classid 1:1 \ action blah.. Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/uapi/linux/tc_act/tc_ife.h | 3 +- net/sched/Kconfig | 5 ++ net/sched/Makefile | 1 + net/sched/act_meta_skbtcindex.c | 79 ++++++++++++++++++++++++++++++ 4 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 net/sched/act_meta_skbtcindex.c diff --git a/include/uapi/linux/tc_act/tc_ife.h b/include/uapi/linux/tc_act/tc_ife.h index 4ece02a77b9a..cd18360eca24 100644 --- a/include/uapi/linux/tc_act/tc_ife.h +++ b/include/uapi/linux/tc_act/tc_ife.h @@ -32,8 +32,9 @@ enum { #define IFE_META_HASHID 2 #define IFE_META_PRIO 3 #define IFE_META_QMAP 4 +#define IFE_META_TCINDEX 5 /*Can be overridden at runtime by module option*/ -#define __IFE_META_MAX 5 +#define __IFE_META_MAX 6 #define IFE_META_MAX (__IFE_META_MAX - 1) #endif diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 7795d5a3f79a..87956a768d1b 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -793,6 +793,11 @@ config NET_IFE_SKBPRIO depends on NET_ACT_IFE ---help--- +config NET_IFE_SKBTCINDEX + tristate "Support to encoding decoding skb tcindex on IFE action" + depends on NET_ACT_IFE + ---help--- + config NET_CLS_IND bool "Incoming device classification" depends on NET_CLS_U32 || NET_CLS_FW diff --git a/net/sched/Makefile b/net/sched/Makefile index 148ae0d5ac2c..4bdda3634e0b 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -23,6 +23,7 @@ obj-$(CONFIG_NET_ACT_SKBMOD) += act_skbmod.o obj-$(CONFIG_NET_ACT_IFE) += act_ife.o obj-$(CONFIG_NET_IFE_SKBMARK) += act_meta_mark.o obj-$(CONFIG_NET_IFE_SKBPRIO) += act_meta_skbprio.o +obj-$(CONFIG_NET_IFE_SKBTCINDEX) += act_meta_skbtcindex.o obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o diff --git a/net/sched/act_meta_skbtcindex.c b/net/sched/act_meta_skbtcindex.c new file mode 100644 index 000000000000..3b35774ce890 --- /dev/null +++ b/net/sched/act_meta_skbtcindex.c @@ -0,0 +1,79 @@ +/* + * net/sched/act_meta_tc_index.c IFE skb->tc_index metadata module + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * copyright Jamal Hadi Salim (2016) + * +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int skbtcindex_encode(struct sk_buff *skb, void *skbdata, + struct tcf_meta_info *e) +{ + u32 ifetc_index = skb->tc_index; + + return ife_encode_meta_u16(ifetc_index, skbdata, e); +} + +static int skbtcindex_decode(struct sk_buff *skb, void *data, u16 len) +{ + u16 ifetc_index = *(u16 *)data; + + skb->tc_index = ntohs(ifetc_index); + return 0; +} + +static int skbtcindex_check(struct sk_buff *skb, struct tcf_meta_info *e) +{ + return ife_check_meta_u16(skb->tc_index, e); +} + +static struct tcf_meta_ops ife_skbtcindex_ops = { + .metaid = IFE_META_TCINDEX, + .metatype = NLA_U16, + .name = "tc_index", + .synopsis = "skb tc_index 16 bit metadata", + .check_presence = skbtcindex_check, + .encode = skbtcindex_encode, + .decode = skbtcindex_decode, + .get = ife_get_meta_u16, + .alloc = ife_alloc_meta_u16, + .release = ife_release_meta_gen, + .validate = ife_validate_meta_u16, + .owner = THIS_MODULE, +}; + +static int __init ifetc_index_init_module(void) +{ + return register_ife_op(&ife_skbtcindex_ops); +} + +static void __exit ifetc_index_cleanup_module(void) +{ + unregister_ife_op(&ife_skbtcindex_ops); +} + +module_init(ifetc_index_init_module); +module_exit(ifetc_index_cleanup_module); + +MODULE_AUTHOR("Jamal Hadi Salim(2016)"); +MODULE_DESCRIPTION("Inter-FE skb tc_index metadata module"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_IFE_META(IFE_META_SKBTCINDEX); From f71b109f1730902b73f70d78764d8a41265080dd Mon Sep 17 00:00:00 2001 From: Roman Mashak Date: Sun, 18 Sep 2016 07:53:08 -0400 Subject: [PATCH 0495/1050] net sched actions police: peg drop stats for conforming traffic setting conforming action to drop is a valid policy. When it is set we need to at least see the stats indicating it for debugging. Signed-off-by: Roman Mashak Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_police.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 8a3be1d99775..ba7074b391ae 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -249,6 +249,8 @@ static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a, police->tcfp_t_c = now; police->tcfp_toks = toks; police->tcfp_ptoks = ptoks; + if (police->tcfp_result == TC_ACT_SHOT) + police->tcf_qstats.drops++; spin_unlock(&police->tcf_lock); return police->tcfp_result; } From 5a7a5555a362f60350668cd124df9a396f546c61 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sun, 18 Sep 2016 08:45:33 -0400 Subject: [PATCH 0496/1050] net sched: stylistic cleanups Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_api.c | 16 ++++++---------- net/sched/act_csum.c | 36 ++++++++++++++++++------------------ net/sched/act_gact.c | 3 ++- net/sched/act_mirred.c | 3 ++- net/sched/act_police.c | 10 ++++------ net/sched/cls_api.c | 18 ++++++++++-------- net/sched/cls_bpf.c | 6 ++++-- net/sched/cls_flow.c | 21 ++++++++++++++------- net/sched/cls_flower.c | 3 ++- net/sched/cls_fw.c | 10 +++++----- net/sched/cls_route.c | 9 +++------ net/sched/cls_tcindex.c | 12 ++++++------ net/sched/cls_u32.c | 30 ++++++++++++------------------ net/sched/sch_api.c | 41 ++++++++++++++++++++++++++--------------- 14 files changed, 114 insertions(+), 104 deletions(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index d09d0687594b..d0aceb1740b1 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -592,9 +592,8 @@ err_out: return ERR_PTR(err); } -int tcf_action_init(struct net *net, struct nlattr *nla, - struct nlattr *est, char *name, int ovr, - int bind, struct list_head *actions) +int tcf_action_init(struct net *net, struct nlattr *nla, struct nlattr *est, + char *name, int ovr, int bind, struct list_head *actions) { struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; struct tc_action *act; @@ -923,9 +922,8 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions, return err; } -static int -tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, - u32 portid, int ovr) +static int tcf_action_add(struct net *net, struct nlattr *nla, + struct nlmsghdr *n, u32 portid, int ovr) { int ret = 0; LIST_HEAD(actions); @@ -988,8 +986,7 @@ replay: return ret; } -static struct nlattr * -find_dump_kind(const struct nlmsghdr *n) +static struct nlattr *find_dump_kind(const struct nlmsghdr *n) { struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1]; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; @@ -1016,8 +1013,7 @@ find_dump_kind(const struct nlmsghdr *n) return kind; } -static int -tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) +static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); struct nlmsghdr *nlh; diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index b5dbf633a863..e0defcef376d 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -116,8 +116,8 @@ static void *tcf_csum_skb_nextlayer(struct sk_buff *skb, return (void *)(skb_network_header(skb) + ihl); } -static int tcf_csum_ipv4_icmp(struct sk_buff *skb, - unsigned int ihl, unsigned int ipl) +static int tcf_csum_ipv4_icmp(struct sk_buff *skb, unsigned int ihl, + unsigned int ipl) { struct icmphdr *icmph; @@ -152,8 +152,8 @@ static int tcf_csum_ipv4_igmp(struct sk_buff *skb, return 1; } -static int tcf_csum_ipv6_icmp(struct sk_buff *skb, - unsigned int ihl, unsigned int ipl) +static int tcf_csum_ipv6_icmp(struct sk_buff *skb, unsigned int ihl, + unsigned int ipl) { struct icmp6hdr *icmp6h; const struct ipv6hdr *ip6h; @@ -174,8 +174,8 @@ static int tcf_csum_ipv6_icmp(struct sk_buff *skb, return 1; } -static int tcf_csum_ipv4_tcp(struct sk_buff *skb, - unsigned int ihl, unsigned int ipl) +static int tcf_csum_ipv4_tcp(struct sk_buff *skb, unsigned int ihl, + unsigned int ipl) { struct tcphdr *tcph; const struct iphdr *iph; @@ -195,8 +195,8 @@ static int tcf_csum_ipv4_tcp(struct sk_buff *skb, return 1; } -static int tcf_csum_ipv6_tcp(struct sk_buff *skb, - unsigned int ihl, unsigned int ipl) +static int tcf_csum_ipv6_tcp(struct sk_buff *skb, unsigned int ihl, + unsigned int ipl) { struct tcphdr *tcph; const struct ipv6hdr *ip6h; @@ -217,8 +217,8 @@ static int tcf_csum_ipv6_tcp(struct sk_buff *skb, return 1; } -static int tcf_csum_ipv4_udp(struct sk_buff *skb, - unsigned int ihl, unsigned int ipl, int udplite) +static int tcf_csum_ipv4_udp(struct sk_buff *skb, unsigned int ihl, + unsigned int ipl, int udplite) { struct udphdr *udph; const struct iphdr *iph; @@ -270,8 +270,8 @@ ignore_obscure_skb: return 1; } -static int tcf_csum_ipv6_udp(struct sk_buff *skb, - unsigned int ihl, unsigned int ipl, int udplite) +static int tcf_csum_ipv6_udp(struct sk_buff *skb, unsigned int ihl, + unsigned int ipl, int udplite) { struct udphdr *udph; const struct ipv6hdr *ip6h; @@ -380,8 +380,8 @@ fail: return 0; } -static int tcf_csum_ipv6_hopopts(struct ipv6_opt_hdr *ip6xh, - unsigned int ixhl, unsigned int *pl) +static int tcf_csum_ipv6_hopopts(struct ipv6_opt_hdr *ip6xh, unsigned int ixhl, + unsigned int *pl) { int off, len, optlen; unsigned char *xh = (void *)ip6xh; @@ -494,8 +494,8 @@ fail: return 0; } -static int tcf_csum(struct sk_buff *skb, - const struct tc_action *a, struct tcf_result *res) +static int tcf_csum(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) { struct tcf_csum *p = to_tcf_csum(a); int action; @@ -531,8 +531,8 @@ drop: return TC_ACT_SHOT; } -static int tcf_csum_dump(struct sk_buff *skb, - struct tc_action *a, int bind, int ref) +static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind, + int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_csum *p = to_tcf_csum(a); diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index e24a4093d6f6..e0aa30f83c6c 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -156,7 +156,8 @@ static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets, int action = READ_ONCE(gact->tcf_action); struct tcf_t *tm = &gact->tcf_tm; - _bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats), bytes, packets); + _bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats), bytes, + packets); if (action == TC_ACT_SHOT) this_cpu_ptr(gact->common.cpu_qstats)->drops += packets; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 6038c85d92f5..1c76387c5d9c 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -204,7 +204,8 @@ out: return retval; } -static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) +static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, + int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_mirred *m = to_mirred(a); diff --git a/net/sched/act_police.c b/net/sched/act_police.c index ba7074b391ae..d1bd248fe146 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -263,8 +263,8 @@ static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a, return police->tcf_action; } -static int -tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) +static int tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_police *police = to_police(a); @@ -349,14 +349,12 @@ static struct pernet_operations police_net_ops = { .size = sizeof(struct tc_action_net), }; -static int __init -police_init_module(void) +static int __init police_init_module(void) { return tcf_register_action(&act_police_ops, &police_net_ops); } -static void __exit -police_cleanup_module(void) +static void __exit police_cleanup_module(void) { tcf_unregister_action(&act_police_ops, &police_net_ops); } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index a7c5645373af..11da7da0b7c4 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -344,13 +344,15 @@ replay: if (err == 0) { struct tcf_proto *next = rtnl_dereference(tp->next); - tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER); + tfilter_notify(net, skb, n, tp, fh, + RTM_DELTFILTER); if (tcf_destroy(tp, false)) RCU_INIT_POINTER(*back, next); } goto errout; case RTM_GETTFILTER: - err = tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER); + err = tfilter_notify(net, skb, n, tp, fh, + RTM_NEWTFILTER); goto errout; default: err = -EINVAL; @@ -448,7 +450,8 @@ static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, struct net *net = sock_net(a->skb->sk); return tcf_fill_node(net, a->skb, tp, n, NETLINK_CB(a->cb->skb).portid, - a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER); + a->cb->nlh->nlmsg_seq, NLM_F_MULTI, + RTM_NEWTFILTER); } /* called with RTNL */ @@ -552,7 +555,7 @@ void tcf_exts_destroy(struct tcf_exts *exts) EXPORT_SYMBOL(tcf_exts_destroy); int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, - struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr) + struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr) { #ifdef CONFIG_NET_CLS_ACT { @@ -560,8 +563,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, if (exts->police && tb[exts->police]) { act = tcf_action_init_1(net, tb[exts->police], rate_tlv, - "police", ovr, - TCA_ACT_BIND); + "police", ovr, TCA_ACT_BIND); if (IS_ERR(act)) return PTR_ERR(act); @@ -573,8 +575,8 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, int err, i = 0; err = tcf_action_init(net, tb[exts->action], rate_tlv, - NULL, ovr, - TCA_ACT_BIND, &actions); + NULL, ovr, TCA_ACT_BIND, + &actions); if (err) return err; list_for_each_entry(act, &actions, list) diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 1d92d4d3f222..c6f7a47541eb 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -55,7 +55,8 @@ static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = { [TCA_BPF_CLASSID] = { .type = NLA_U32 }, [TCA_BPF_FLAGS] = { .type = NLA_U32 }, [TCA_BPF_FD] = { .type = NLA_U32 }, - [TCA_BPF_NAME] = { .type = NLA_NUL_STRING, .len = CLS_BPF_NAME_LEN }, + [TCA_BPF_NAME] = { .type = NLA_NUL_STRING, + .len = CLS_BPF_NAME_LEN }, [TCA_BPF_OPS_LEN] = { .type = NLA_U16 }, [TCA_BPF_OPS] = { .type = NLA_BINARY, .len = sizeof(struct sock_filter) * BPF_MAXINSNS }, @@ -409,7 +410,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, goto errout; } - ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE], ovr); + ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE], + ovr); if (ret < 0) goto errout; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index a379bae1d74e..e39672394c7b 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -87,12 +87,14 @@ static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow) return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); } -static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow) +static u32 flow_get_proto(const struct sk_buff *skb, + const struct flow_keys *flow) { return flow->basic.ip_proto; } -static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys *flow) +static u32 flow_get_proto_src(const struct sk_buff *skb, + const struct flow_keys *flow) { if (flow->ports.ports) return ntohs(flow->ports.src); @@ -100,7 +102,8 @@ static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys return addr_fold(skb->sk); } -static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow) +static u32 flow_get_proto_dst(const struct sk_buff *skb, + const struct flow_keys *flow) { if (flow->ports.ports) return ntohs(flow->ports.dst); @@ -149,7 +152,8 @@ static u32 flow_get_nfct(const struct sk_buff *skb) }) #endif -static u32 flow_get_nfct_src(const struct sk_buff *skb, const struct flow_keys *flow) +static u32 flow_get_nfct_src(const struct sk_buff *skb, + const struct flow_keys *flow) { switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): @@ -161,7 +165,8 @@ fallback: return flow_get_src(skb, flow); } -static u32 flow_get_nfct_dst(const struct sk_buff *skb, const struct flow_keys *flow) +static u32 flow_get_nfct_dst(const struct sk_buff *skb, + const struct flow_keys *flow) { switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): @@ -173,14 +178,16 @@ fallback: return flow_get_dst(skb, flow); } -static u32 flow_get_nfct_proto_src(const struct sk_buff *skb, const struct flow_keys *flow) +static u32 flow_get_nfct_proto_src(const struct sk_buff *skb, + const struct flow_keys *flow) { return ntohs(CTTUPLE(skb, src.u.all)); fallback: return flow_get_proto_src(skb, flow); } -static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow) +static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb, + const struct flow_keys *flow) { return ntohs(CTTUPLE(skb, dst.u.all)); fallback: diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index a3f4c706dfaa..2af09c872a1a 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -241,7 +241,8 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, tc.type = TC_SETUP_CLSFLOWER; tc.cls_flower = &offload; - err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc); + err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, + &tc); if (tc_skip_sw(flags)) return err; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index cc0bda945800..9dc63d54e167 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -57,7 +57,7 @@ static u32 fw_hash(u32 handle) } static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res) + struct tcf_result *res) { struct fw_head *head = rcu_dereference_bh(tp->root); struct fw_filter *f; @@ -188,7 +188,8 @@ static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = { static int fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, - struct nlattr **tb, struct nlattr **tca, unsigned long base, bool ovr) + struct nlattr **tb, struct nlattr **tca, unsigned long base, + bool ovr) { struct fw_head *head = rtnl_dereference(tp->root); struct tcf_exts e; @@ -237,9 +238,8 @@ errout: static int fw_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, - u32 handle, - struct nlattr **tca, - unsigned long *arg, bool ovr) + u32 handle, struct nlattr **tca, unsigned long *arg, + bool ovr) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f = (struct fw_filter *) *arg; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index c91e65d81a48..a4ce39b19be0 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -268,8 +268,7 @@ static int route4_init(struct tcf_proto *tp) return 0; } -static void -route4_delete_filter(struct rcu_head *head) +static void route4_delete_filter(struct rcu_head *head) { struct route4_filter *f = container_of(head, struct route4_filter, rcu); @@ -474,10 +473,8 @@ errout: } static int route4_change(struct net *net, struct sk_buff *in_skb, - struct tcf_proto *tp, unsigned long base, - u32 handle, - struct nlattr **tca, - unsigned long *arg, bool ovr) + struct tcf_proto *tp, unsigned long base, u32 handle, + struct nlattr **tca, unsigned long *arg, bool ovr) { struct route4_head *head = rtnl_dereference(tp->root); struct route4_filter __rcu **fp; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index d9500709831f..96144bdf30db 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -50,14 +50,13 @@ struct tcindex_data { struct rcu_head rcu; }; -static inline int -tcindex_filter_is_set(struct tcindex_filter_result *r) +static inline int tcindex_filter_is_set(struct tcindex_filter_result *r) { return tcf_exts_is_predicative(&r->exts) || r->res.classid; } -static struct tcindex_filter_result * -tcindex_lookup(struct tcindex_data *p, u16 key) +static struct tcindex_filter_result *tcindex_lookup(struct tcindex_data *p, + u16 key) { if (p->perfect) { struct tcindex_filter_result *f = p->perfect + key; @@ -144,7 +143,8 @@ static void tcindex_destroy_rexts(struct rcu_head *head) static void tcindex_destroy_fexts(struct rcu_head *head) { - struct tcindex_filter *f = container_of(head, struct tcindex_filter, rcu); + struct tcindex_filter *f = container_of(head, struct tcindex_filter, + rcu); tcf_exts_destroy(&f->result.exts); kfree(f); @@ -550,7 +550,7 @@ static bool tcindex_destroy(struct tcf_proto *tp, bool force) static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t) { struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index a29263a9d8c1..ae83c3aec308 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -104,7 +104,8 @@ static inline unsigned int u32_hash_fold(__be32 key, return h; } -static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) +static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res) { struct { struct tc_u_knode *knode; @@ -256,8 +257,7 @@ deadloop: return -1; } -static struct tc_u_hnode * -u32_lookup_ht(struct tc_u_common *tp_c, u32 handle) +static struct tc_u_hnode *u32_lookup_ht(struct tc_u_common *tp_c, u32 handle) { struct tc_u_hnode *ht; @@ -270,8 +270,7 @@ u32_lookup_ht(struct tc_u_common *tp_c, u32 handle) return ht; } -static struct tc_u_knode * -u32_lookup_key(struct tc_u_hnode *ht, u32 handle) +static struct tc_u_knode *u32_lookup_key(struct tc_u_hnode *ht, u32 handle) { unsigned int sel; struct tc_u_knode *n = NULL; @@ -360,8 +359,7 @@ static int u32_init(struct tcf_proto *tp) return 0; } -static int u32_destroy_key(struct tcf_proto *tp, - struct tc_u_knode *n, +static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n, bool free_pf) { tcf_exts_destroy(&n->exts); @@ -448,9 +446,8 @@ static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle) } } -static int u32_replace_hw_hnode(struct tcf_proto *tp, - struct tc_u_hnode *h, - u32 flags) +static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, + u32 flags) { struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_u32_offload u32_offload = {0}; @@ -496,9 +493,8 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) } } -static int u32_replace_hw_knode(struct tcf_proto *tp, - struct tc_u_knode *n, - u32 flags) +static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, + u32 flags) { struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_u32_offload u32_offload = {0}; @@ -763,8 +759,7 @@ errout: return err; } -static void u32_replace_knode(struct tcf_proto *tp, - struct tc_u_common *tp_c, +static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c, struct tc_u_knode *n) { struct tc_u_knode __rcu **ins; @@ -845,8 +840,7 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp, static int u32_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, - unsigned long *arg, bool ovr) + struct nlattr **tca, unsigned long *arg, bool ovr) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *ht; @@ -1088,7 +1082,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg) } static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t) { struct tc_u_knode *n = (struct tc_u_knode *)fh; struct tc_u_hnode *ht_up, *ht_down; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index d677b3484d81..206dc24add3a 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -389,7 +389,8 @@ static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab) static struct qdisc_rate_table *qdisc_rtab_list; -struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab) +struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, + struct nlattr *tab) { struct qdisc_rate_table *rtab; @@ -541,7 +542,8 @@ nla_put_failure: return -1; } -void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab) +void __qdisc_calculate_pkt_len(struct sk_buff *skb, + const struct qdisc_size_table *stab) { int pkt_len, slot; @@ -888,10 +890,10 @@ static struct lock_class_key qdisc_rx_lock; Parameters are passed via opt. */ -static struct Qdisc * -qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, - struct Qdisc *p, u32 parent, u32 handle, - struct nlattr **tca, int *errp) +static struct Qdisc *qdisc_create(struct net_device *dev, + struct netdev_queue *dev_queue, + struct Qdisc *p, u32 parent, u32 handle, + struct nlattr **tca, int *errp) { int err; struct nlattr *kind = tca[TCA_KIND]; @@ -1073,7 +1075,8 @@ struct check_loop_arg { int depth; }; -static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w); +static int check_loop_fn(struct Qdisc *q, unsigned long cl, + struct qdisc_walker *w); static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth) { @@ -1450,7 +1453,8 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, } else { if (!tc_qdisc_dump_ignore(q) && tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) + cb->nlh->nlmsg_seq, NLM_F_MULTI, + RTM_NEWQDISC) <= 0) goto done; q_idx++; } @@ -1471,7 +1475,8 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, } if (!tc_qdisc_dump_ignore(q) && tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) + cb->nlh->nlmsg_seq, NLM_F_MULTI, + RTM_NEWQDISC) <= 0) goto done; q_idx++; } @@ -1505,7 +1510,8 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) s_q_idx = 0; q_idx = 0; - if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx, true) < 0) + if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx, + true) < 0) goto done; dev_queue = dev_ingress_queue(dev); @@ -1640,7 +1646,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n) if (cops->delete) err = cops->delete(q, cl); if (err == 0) - tclass_notify(net, skb, n, q, cl, RTM_DELTCLASS); + tclass_notify(net, skb, n, q, cl, + RTM_DELTCLASS); goto out; case RTM_GETTCLASS: err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS); @@ -1738,12 +1745,14 @@ struct qdisc_dump_args { struct netlink_callback *cb; }; -static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg) +static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, + struct qdisc_walker *arg) { struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg; return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid, - a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS); + a->cb->nlh->nlmsg_seq, NLM_F_MULTI, + RTM_NEWTCLASS); } static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb, @@ -1976,10 +1985,12 @@ static int __init pktsched_init(void) rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL); rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, NULL); + rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, + NULL); rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, NULL); rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, NULL); + rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, + NULL); return 0; } From babd6134a54d70efe875fa5661a20eaecb63f278 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Sun, 18 Sep 2016 18:20:27 +0300 Subject: [PATCH 0497/1050] net/mlx5: Fix flow counter bulk command out mailbox allocation The FW command output length should be only the length of struct mlx5_cmd_fc_bulk out field. Failing to do so will cause the memcpy call which is invoked later in the driver to write over wrong memory address and corrupt kernel memory which results in random crashes. This bug was found using the kernel address sanitizer (kasan). Fixes: a351a1b03bf1 ('net/mlx5: Introduce bulk reading of flow counters') Signed-off-by: Roi Dayan Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 9134010e2921..287ade151ec8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -425,11 +425,11 @@ struct mlx5_cmd_fc_bulk * mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num) { struct mlx5_cmd_fc_bulk *b; - int outlen = sizeof(*b) + + int outlen = MLX5_ST_SZ_BYTES(query_flow_counter_out) + MLX5_ST_SZ_BYTES(traffic_counter) * num; - b = kzalloc(outlen, GFP_KERNEL); + b = kzalloc(sizeof(*b) + outlen, GFP_KERNEL); if (!b) return NULL; From 4eea37d7b92076fdeac2a21e5f4dbd92d286719d Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 18 Sep 2016 18:20:28 +0300 Subject: [PATCH 0498/1050] net/mlx5: E-Switch, Fix error flow in the SRIOV e-switch init code When enablement of the SRIOV e-switch in certain mode (switchdev or legacy) fails, we must set the mode to none. Otherwise, we'll run into double free based crashes when further attempting to deal with the e-switch (such as when disabling sriov or unloading the driver). Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 8b78f156214e..b247949df135 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1554,6 +1554,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) abort: esw_enable_vport(esw, 0, UC_ADDR_CHANGE); + esw->mode = SRIOV_NONE; return err; } From 6c419ba8e2580ab17c164db6e918e163d3537ec1 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 18 Sep 2016 18:20:29 +0300 Subject: [PATCH 0499/1050] net/mlx5: E-Switch, Handle mode change failures E-switch mode changes involve creating HW tables, potentially allocating netdevices, etc, and things can fail. Add an attempt to rollback to the existing mode when changing to the new mode fails. Only if rollback fails, getting proper SRIOV functionality requires module unload or sriov disablement/enablement. Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- .../mellanox/mlx5/core/eswitch_offloads.c | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 3dc83a9459a4..7de40e6b0c25 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -446,7 +446,7 @@ out: static int esw_offloads_start(struct mlx5_eswitch *esw) { - int err, num_vfs = esw->dev->priv.sriov.num_vfs; + int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs; if (esw->mode != SRIOV_LEGACY) { esw_warn(esw->dev, "Can't set offloads mode, SRIOV legacy not enabled\n"); @@ -455,8 +455,12 @@ static int esw_offloads_start(struct mlx5_eswitch *esw) mlx5_eswitch_disable_sriov(esw); err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS); - if (err) - esw_warn(esw->dev, "Failed set eswitch to offloads, err %d\n", err); + if (err) { + esw_warn(esw->dev, "Failed setting eswitch to offloads, err %d\n", err); + err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY); + if (err1) + esw_warn(esw->dev, "Failed setting eswitch back to legacy, err %d\n", err); + } return err; } @@ -508,12 +512,16 @@ create_ft_err: static int esw_offloads_stop(struct mlx5_eswitch *esw) { - int err, num_vfs = esw->dev->priv.sriov.num_vfs; + int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs; mlx5_eswitch_disable_sriov(esw); err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY); - if (err) - esw_warn(esw->dev, "Failed set eswitch legacy mode. err %d\n", err); + if (err) { + esw_warn(esw->dev, "Failed setting eswitch to legacy, err %d\n", err); + err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS); + if (err1) + esw_warn(esw->dev, "Failed setting eswitch back to offloads, err %d\n", err); + } return err; } From a435a07f9164dda7c0c26e8ad758881f4bafc127 Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Sun, 18 Sep 2016 17:46:07 +0200 Subject: [PATCH 0500/1050] net: ipv6: fallback to full lookup if table lookup is unsuitable Commit 8c14586fc320 ("net: ipv6: Use passed in table for nexthop lookups") introduced a regression: insertion of an IPv6 route in a table not containing the appropriate connected route for the gateway but which contained a non-connected route (like a default gateway) fails while it was previously working: $ ip link add eth0 type dummy $ ip link set up dev eth0 $ ip addr add 2001:db8::1/64 dev eth0 $ ip route add ::/0 via 2001:db8::5 dev eth0 table 20 $ ip route add 2001:db8:cafe::1/128 via 2001:db8::6 dev eth0 table 20 RTNETLINK answers: No route to host $ ip -6 route show table 20 default via 2001:db8::5 dev eth0 metric 1024 pref medium After this patch, we get: $ ip route add 2001:db8:cafe::1/128 via 2001:db8::6 dev eth0 table 20 $ ip -6 route show table 20 2001:db8:cafe::1 via 2001:db8::6 dev eth0 metric 1024 pref medium default via 2001:db8::5 dev eth0 metric 1024 pref medium Fixes: 8c14586fc320 ("net: ipv6: Use passed in table for nexthop lookups") Signed-off-by: Vincent Bernat Acked-by: David Ahern Tested-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 49817555449e..e3a224b97905 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1986,9 +1986,18 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg) if (!(gwa_type & IPV6_ADDR_UNICAST)) goto out; - if (cfg->fc_table) + if (cfg->fc_table) { grt = ip6_nh_lookup_table(net, cfg, gw_addr); + if (grt) { + if (grt->rt6i_flags & RTF_GATEWAY || + (dev && dev != grt->dst.dev)) { + ip6_rt_put(grt); + grt = NULL; + } + } + } + if (!grt) grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); From 878786d95e07ce2f5fb6e3cd8a6c2ed320339196 Mon Sep 17 00:00:00 2001 From: Rob Swindell Date: Tue, 20 Sep 2016 03:36:33 -0400 Subject: [PATCH 0501/1050] bnxt_en: Fix build error for kernesl without RTC-LIB bnxt_hwrm_fw_set_time() now returns -EOPNOTSUPP when built for kernel without RTC_LIB. Setting the firmware time is not critical to the successful completion of the firmware update process. Signed-off-by: Rob Swindell Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index cbc0b8ad916c..a9f9f3738022 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4323,6 +4323,7 @@ hwrm_ver_get_exit: int bnxt_hwrm_fw_set_time(struct bnxt *bp) { +#if IS_ENABLED(CONFIG_RTC_LIB) struct hwrm_fw_set_time_input req = {0}; struct rtc_time tm; struct timeval tv; @@ -4340,6 +4341,9 @@ int bnxt_hwrm_fw_set_time(struct bnxt *bp) req.minute = tm.tm_min; req.second = tm.tm_sec; return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); +#else + return -EOPNOTSUPP; +#endif } static int bnxt_hwrm_port_qstats(struct bnxt *bp) From 3ed6e498b91a4dc5d0e8b6270a6c144061db2455 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 18 Sep 2016 21:17:19 +0200 Subject: [PATCH 0502/1050] MAINTAINERS: Add an entry for the core network DSA code The core distributed switch architecture code currently does not have a MAINTAINERS entry, which results in some contributions not landing in the right peoples inbox. Signed-off-by: Andrew Lunn Acked-by: Florian Fainelli Acked-by: Vivien Didelot Signed-off-by: David S. Miller --- MAINTAINERS | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index a5e1270dfbf1..247b418959fa 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8160,6 +8160,15 @@ S: Maintained W: https://fedorahosted.org/dropwatch/ F: net/core/drop_monitor.c +NETWORKING [DSA] +M: Andrew Lunn +M: Vivien Didelot +M: Florian Fainelli +S: Maintained +F: net/dsa/ +F: include/net/dsa.h +F: drivers/net/dsa/ + NETWORKING [GENERAL] M: "David S. Miller" L: netdev@vger.kernel.org From 18c2d2c113eb330d260277350d09aae454e80177 Mon Sep 17 00:00:00 2001 From: Elad Raz Date: Mon, 19 Sep 2016 08:28:24 +0200 Subject: [PATCH 0503/1050] mlxsw: Change the RX LAG hash function from XOR to CRC Change the RX hash function from XOR to CRC in order to have better distribution of the traffic. Signed-off-by: Elad Raz Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 4e2354ca0e4a..6460c7256f2b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -1392,7 +1392,7 @@ static inline void mlxsw_reg_slcr_pack(char *payload, u16 lag_hash) { MLXSW_REG_ZERO(slcr, payload); mlxsw_reg_slcr_pp_set(payload, MLXSW_REG_SLCR_PP_GLOBAL); - mlxsw_reg_slcr_type_set(payload, MLXSW_REG_SLCR_TYPE_XOR); + mlxsw_reg_slcr_type_set(payload, MLXSW_REG_SLCR_TYPE_CRC); mlxsw_reg_slcr_lag_hash_set(payload, lag_hash); } From 1a9234e66eddb0f18447532ab3f12bd136473ed6 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 19 Sep 2016 08:29:26 +0200 Subject: [PATCH 0504/1050] mlxsw: spectrum: Fix sparse warnings drivers/net/ethernet/mellanox/mlxsw//spectrum.c:251:28: warning: symbol 'mlxsw_sp_span_entry_find' was not declared. Should it be static? drivers/net/ethernet/mellanox/mlxsw//spectrum.c:265:28: warning: symbol 'mlxsw_sp_span_entry_get' was not declared. Should it be static? drivers/net/ethernet/mellanox/mlxsw//spectrum.c:367:56: warning: mixing different enum types drivers/net/ethernet/mellanox/mlxsw//spectrum.c:367:56: int enum mlxsw_sp_span_type versus drivers/net/ethernet/mellanox/mlxsw//spectrum.c:367:56: int enum mlxsw_reg_mpar_i_e ... drivers/net/ethernet/mellanox/mlxsw//spectrum_buffers.c:598:32: warning: mixing different enum types drivers/net/ethernet/mellanox/mlxsw//spectrum_buffers.c:598:32: int enum mlxsw_reg_sbxx_dir versus drivers/net/ethernet/mellanox/mlxsw//spectrum_buffers.c:598:32: int enum devlink_sb_pool_type drivers/net/ethernet/mellanox/mlxsw//spectrum_buffers.c:600:39: warning: mixing different enum types drivers/net/ethernet/mellanox/mlxsw//spectrum_buffers.c:600:39: int enum mlxsw_reg_sbpr_mode versus drivers/net/ethernet/mellanox/mlxsw//spectrum_buffers.c:600:39: int enum devlink_sb_threshold_type ... drivers/net/ethernet/mellanox/mlxsw//spectrum_router.c:255:54: warning: mixing different enum types drivers/net/ethernet/mellanox/mlxsw//spectrum_router.c:255:54: int enum mlxsw_sp_l3proto versus drivers/net/ethernet/mellanox/mlxsw//spectrum_router.c:255:54: int enum mlxsw_reg_ralxx_protocol ... drivers/net/ethernet/mellanox/mlxsw//spectrum_router.c:1749:6: warning: symbol 'mlxsw_sp_fib_entry_put' was not declared. Should it be static? Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 12 ++++-- .../mellanox/mlxsw/spectrum_buffers.c | 15 +++---- .../ethernet/mellanox/mlxsw/spectrum_router.c | 41 ++++++++++++------- 3 files changed, 42 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 171f8dd19efa..fa312610c2d2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -248,7 +248,8 @@ static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp, span_entry->used = false; } -struct mlxsw_sp_span_entry *mlxsw_sp_span_entry_find(struct mlxsw_sp_port *port) +static struct mlxsw_sp_span_entry * +mlxsw_sp_span_entry_find(struct mlxsw_sp_port *port) { struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; int i; @@ -262,7 +263,8 @@ struct mlxsw_sp_span_entry *mlxsw_sp_span_entry_find(struct mlxsw_sp_port *port) return NULL; } -struct mlxsw_sp_span_entry *mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port) +static struct mlxsw_sp_span_entry +*mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port) { struct mlxsw_sp_span_entry *span_entry; @@ -364,7 +366,8 @@ mlxsw_sp_span_inspected_port_bind(struct mlxsw_sp_port *port, } /* bind the port to the SPAN entry */ - mlxsw_reg_mpar_pack(mpar_pl, port->local_port, type, true, pa_id); + mlxsw_reg_mpar_pack(mpar_pl, port->local_port, + (enum mlxsw_reg_mpar_i_e) type, true, pa_id); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl); if (err) goto err_mpar_reg_write; @@ -405,7 +408,8 @@ mlxsw_sp_span_inspected_port_unbind(struct mlxsw_sp_port *port, return; /* remove the inspected port */ - mlxsw_reg_mpar_pack(mpar_pl, port->local_port, type, false, pa_id); + mlxsw_reg_mpar_pack(mpar_pl, port->local_port, + (enum mlxsw_reg_mpar_i_e) type, false, pa_id); mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl); /* remove the SBIB buffer if it was egress SPAN */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index 953b214f38d0..bcaed8a38037 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -595,9 +595,9 @@ int mlxsw_sp_sb_pool_get(struct mlxsw_core *mlxsw_core, enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index); struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir); - pool_info->pool_type = dir; + pool_info->pool_type = (enum devlink_sb_pool_type) dir; pool_info->size = MLXSW_SP_CELLS_TO_BYTES(pr->size); - pool_info->threshold_type = pr->mode; + pool_info->threshold_type = (enum devlink_sb_threshold_type) pr->mode; return 0; } @@ -608,9 +608,10 @@ int mlxsw_sp_sb_pool_set(struct mlxsw_core *mlxsw_core, struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); u8 pool = pool_get(pool_index); enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index); - enum mlxsw_reg_sbpr_mode mode = threshold_type; u32 pool_size = MLXSW_SP_BYTES_TO_CELLS(size); + enum mlxsw_reg_sbpr_mode mode; + mode = (enum mlxsw_reg_sbpr_mode) threshold_type; return mlxsw_sp_sb_pr_write(mlxsw_sp, pool, dir, mode, pool_size); } @@ -696,13 +697,13 @@ int mlxsw_sp_sb_tc_pool_bind_get(struct mlxsw_core_port *mlxsw_core_port, struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; u8 local_port = mlxsw_sp_port->local_port; u8 pg_buff = tc_index; - enum mlxsw_reg_sbxx_dir dir = pool_type; + enum mlxsw_reg_sbxx_dir dir = (enum mlxsw_reg_sbxx_dir) pool_type; struct mlxsw_sp_sb_cm *cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, pg_buff, dir); *p_threshold = mlxsw_sp_sb_threshold_out(mlxsw_sp, cm->pool, dir, cm->max_buff); - *p_pool_index = pool_index_get(cm->pool, pool_type); + *p_pool_index = pool_index_get(cm->pool, dir); return 0; } @@ -716,7 +717,7 @@ int mlxsw_sp_sb_tc_pool_bind_set(struct mlxsw_core_port *mlxsw_core_port, struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; u8 local_port = mlxsw_sp_port->local_port; u8 pg_buff = tc_index; - enum mlxsw_reg_sbxx_dir dir = pool_type; + enum mlxsw_reg_sbxx_dir dir = (enum mlxsw_reg_sbxx_dir) pool_type; u8 pool = pool_get(pool_index); u32 max_buff; int err; @@ -943,7 +944,7 @@ int mlxsw_sp_sb_occ_tc_port_bind_get(struct mlxsw_core_port *mlxsw_core_port, struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; u8 local_port = mlxsw_sp_port->local_port; u8 pg_buff = tc_index; - enum mlxsw_reg_sbxx_dir dir = pool_type; + enum mlxsw_reg_sbxx_dir dir = (enum mlxsw_reg_sbxx_dir) pool_type; struct mlxsw_sp_sb_cm *cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, pg_buff, dir); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 3f5c51da6d3e..4afb49854eec 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -252,7 +252,9 @@ static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp, { char ralta_pl[MLXSW_REG_RALTA_LEN]; - mlxsw_reg_ralta_pack(ralta_pl, true, lpm_tree->proto, lpm_tree->id); + mlxsw_reg_ralta_pack(ralta_pl, true, + (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto, + lpm_tree->id); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); } @@ -261,7 +263,9 @@ static int mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp, { char ralta_pl[MLXSW_REG_RALTA_LEN]; - mlxsw_reg_ralta_pack(ralta_pl, false, lpm_tree->proto, lpm_tree->id); + mlxsw_reg_ralta_pack(ralta_pl, false, + (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto, + lpm_tree->id); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); } @@ -384,7 +388,9 @@ static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp, { char raltb_pl[MLXSW_REG_RALTB_LEN]; - mlxsw_reg_raltb_pack(raltb_pl, vr->id, vr->proto, vr->lpm_tree->id); + mlxsw_reg_raltb_pack(raltb_pl, vr->id, + (enum mlxsw_reg_ralxx_protocol) vr->proto, + vr->lpm_tree->id); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); } @@ -394,7 +400,8 @@ static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp, char raltb_pl[MLXSW_REG_RALTB_LEN]; /* Bind to tree 0 which is default */ - mlxsw_reg_raltb_pack(raltb_pl, vr->id, vr->proto, 0); + mlxsw_reg_raltb_pack(raltb_pl, vr->id, + (enum mlxsw_reg_ralxx_protocol) vr->proto, 0); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); } @@ -1081,9 +1088,10 @@ static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp, { char raleu_pl[MLXSW_REG_RALEU_LEN]; - mlxsw_reg_raleu_pack(raleu_pl, vr->proto, vr->id, - adj_index, ecmp_size, - new_adj_index, new_ecmp_size); + mlxsw_reg_raleu_pack(raleu_pl, + (enum mlxsw_reg_ralxx_protocol) vr->proto, vr->id, + adj_index, ecmp_size, new_adj_index, + new_ecmp_size); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl); } @@ -1558,8 +1566,9 @@ static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp, trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; } - mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id, - fib_entry->key.prefix_len, *p_dip); + mlxsw_reg_ralue_pack4(ralue_pl, + (enum mlxsw_reg_ralxx_protocol) vr->proto, op, + vr->id, fib_entry->key.prefix_len, *p_dip); mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id, adjacency_index, ecmp_size); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); @@ -1573,8 +1582,9 @@ static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp, u32 *p_dip = (u32 *) fib_entry->key.addr; struct mlxsw_sp_vr *vr = fib_entry->vr; - mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id, - fib_entry->key.prefix_len, *p_dip); + mlxsw_reg_ralue_pack4(ralue_pl, + (enum mlxsw_reg_ralxx_protocol) vr->proto, op, + vr->id, fib_entry->key.prefix_len, *p_dip); mlxsw_reg_ralue_act_local_pack(ralue_pl, MLXSW_REG_RALUE_TRAP_ACTION_NOP, 0, fib_entry->rif); @@ -1589,8 +1599,9 @@ static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp, u32 *p_dip = (u32 *) fib_entry->key.addr; struct mlxsw_sp_vr *vr = fib_entry->vr; - mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id, - fib_entry->key.prefix_len, *p_dip); + mlxsw_reg_ralue_pack4(ralue_pl, + (enum mlxsw_reg_ralxx_protocol) vr->proto, op, + vr->id, fib_entry->key.prefix_len, *p_dip); mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } @@ -1753,8 +1764,8 @@ mlxsw_sp_fib_entry_find(struct mlxsw_sp *mlxsw_sp, fib4->fi->fib_dev); } -void mlxsw_sp_fib_entry_put(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry) +static void mlxsw_sp_fib_entry_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { struct mlxsw_sp_vr *vr = fib_entry->vr; From 1a21101d21d7ef056dfda1d7b843289e05ecd034 Mon Sep 17 00:00:00 2001 From: Raju Lakkaraju Date: Mon, 19 Sep 2016 15:33:54 +0530 Subject: [PATCH 0505/1050] net: phy: Add MAC-IF driver for Microsemi PHYs. All the review comments updated and resending for review. This is MAC interface feature. Microsemi PHY can support RGMII, RMII or GMII/MII interface between MAC and PHY. MAC-IF function program the right value based on Device tree configuration. Tested on Beaglebone Black with VSC 8531 PHY. Signed-off-by: Raju Lakkaraju Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/mscc.c | 51 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c index c09cc4a3d166..d350debd174a 100644 --- a/drivers/net/phy/mscc.c +++ b/drivers/net/phy/mscc.c @@ -23,6 +23,16 @@ enum rgmii_rx_clock_delay { RGMII_RX_CLK_DELAY_3_4_NS = 7 }; +/* Microsemi VSC85xx PHY registers */ +/* IEEE 802. Std Registers */ +#define MSCC_PHY_EXT_PHY_CNTL_1 23 +#define MAC_IF_SELECTION_MASK 0x1800 +#define MAC_IF_SELECTION_GMII 0 +#define MAC_IF_SELECTION_RMII 1 +#define MAC_IF_SELECTION_RGMII 2 +#define MAC_IF_SELECTION_POS 11 +#define FAR_END_LOOPBACK_MODE_MASK 0x0008 + #define MII_VSC85XX_INT_MASK 25 #define MII_VSC85XX_INT_MASK_MASK 0xa000 #define MII_VSC85XX_INT_STATUS 26 @@ -48,6 +58,42 @@ static int vsc85xx_phy_page_set(struct phy_device *phydev, u8 page) return rc; } +static int vsc85xx_mac_if_set(struct phy_device *phydev, + phy_interface_t interface) +{ + int rc; + u16 reg_val; + + mutex_lock(&phydev->lock); + reg_val = phy_read(phydev, MSCC_PHY_EXT_PHY_CNTL_1); + reg_val &= ~(MAC_IF_SELECTION_MASK); + switch (interface) { + case PHY_INTERFACE_MODE_RGMII: + reg_val |= (MAC_IF_SELECTION_RGMII << MAC_IF_SELECTION_POS); + break; + case PHY_INTERFACE_MODE_RMII: + reg_val |= (MAC_IF_SELECTION_RMII << MAC_IF_SELECTION_POS); + break; + case PHY_INTERFACE_MODE_MII: + case PHY_INTERFACE_MODE_GMII: + reg_val |= (MAC_IF_SELECTION_GMII << MAC_IF_SELECTION_POS); + break; + default: + rc = -EINVAL; + goto out_unlock; + } + rc = phy_write(phydev, MSCC_PHY_EXT_PHY_CNTL_1, reg_val); + if (rc != 0) + goto out_unlock; + + rc = genphy_soft_reset(phydev); + +out_unlock: + mutex_unlock(&phydev->lock); + + return rc; +} + static int vsc85xx_default_config(struct phy_device *phydev) { int rc; @@ -77,6 +123,11 @@ static int vsc85xx_config_init(struct phy_device *phydev) rc = vsc85xx_default_config(phydev); if (rc) return rc; + + rc = vsc85xx_mac_if_set(phydev, phydev->interface); + if (rc) + return rc; + rc = genphy_config_init(phydev); return rc; From fd07160bb7180cdd0afeb089d8cdfd66002f17e6 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 19 Sep 2016 12:53:40 +0200 Subject: [PATCH 0506/1050] xen-netfront: avoid packet loss when ethernet header crosses page boundary Small packet loss is reported on complex multi host network configurations including tunnels, NAT, ... My investigation led me to the following check in netback which drops packets: if (unlikely(txreq.size < ETH_HLEN)) { netdev_err(queue->vif->dev, "Bad packet size: %d\n", txreq.size); xenvif_tx_err(queue, &txreq, extra_count, idx); break; } But this check itself is legitimate. SKBs consist of a linear part (which has to have the ethernet header) and (optionally) a number of frags. Netfront transmits the head of the linear part up to the page boundary as the first request and all the rest becomes frags so when we're reconstructing the SKB in netback we can't distinguish between original frags and the 'tail' of the linear part. The first SKB needs to be at least ETH_HLEN size. So in case we have an SKB with its linear part starting too close to the page boundary the packet is lost. I see two ways to fix the issue: - Change the 'wire' protocol between netfront and netback to start keeping the original SKB structure. We'll have to add a flag indicating the fact that the particular request is a part of the original linear part and not a frag. We'll need to know the length of the linear part to pre-allocate memory. - Avoid transmitting SKBs with linear parts starting too close to the page boundary. That seems preferable short-term and shouldn't bring significant performance degradation as such packets are rare. That's what this patch is trying to achieve with skb_copy(). Signed-off-by: Vitaly Kuznetsov Acked-by: David Vrabel Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 96ccd4e943db..e17879dd5d5a 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -565,6 +565,7 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) struct netfront_queue *queue = NULL; unsigned int num_queues = dev->real_num_tx_queues; u16 queue_index; + struct sk_buff *nskb; /* Drop the packet if no queues are set up */ if (num_queues < 1) @@ -593,6 +594,20 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) page = virt_to_page(skb->data); offset = offset_in_page(skb->data); + + /* The first req should be at least ETH_HLEN size or the packet will be + * dropped by netback. + */ + if (unlikely(PAGE_SIZE - offset < ETH_HLEN)) { + nskb = skb_copy(skb, GFP_ATOMIC); + if (!nskb) + goto drop; + dev_kfree_skb_any(skb); + skb = nskb; + page = virt_to_page(skb->data); + offset = offset_in_page(skb->data); + } + len = skb_headlen(skb); spin_lock_irqsave(&queue->tx_lock, flags); From ca26893f05e86497a86732768ec53cd38c0819ca Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 19 Sep 2016 19:00:09 +0800 Subject: [PATCH 0507/1050] rhashtable: Add rhlist interface The insecure_elasticity setting is an ugly wart brought out by users who need to insert duplicate objects (that is, distinct objects with identical keys) into the same table. In fact, those users have a much bigger problem. Once those duplicate objects are inserted, they don't have an interface to find them (unless you count the walker interface which walks over the entire table). Some users have resorted to doing a manual walk over the hash table which is of course broken because they don't handle the potential existence of multiple hash tables. The result is that they will break sporadically when they encounter a hash table resize/rehash. This patch provides a way out for those users, at the expense of an extra pointer per object. Essentially each object is now a list of objects carrying the same key. The hash table will only see the lists so nothing changes as far as rhashtable is concerned. To use this new interface, you need to insert a struct rhlist_head into your objects instead of struct rhash_head. While the hash table is unchanged, for type-safety you'll need to use struct rhltable instead of struct rhashtable. All the existing interfaces have been duplicated for rhlist, including the hash table walker. One missing feature is nulls marking because AFAIK the only potential user of it does not need duplicate objects. Should anyone need this it shouldn't be too hard to add. Signed-off-by: Herbert Xu Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 491 ++++++++++++++++++++++++++++--------- lib/rhashtable.c | 256 +++++++++++++++---- 2 files changed, 582 insertions(+), 165 deletions(-) diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index fd82584acd48..5c132d3188be 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -1,7 +1,7 @@ /* * Resizable, Scalable, Concurrent Hash Table * - * Copyright (c) 2015 Herbert Xu + * Copyright (c) 2015-2016 Herbert Xu * Copyright (c) 2014-2015 Thomas Graf * Copyright (c) 2008-2014 Patrick McHardy * @@ -53,6 +53,11 @@ struct rhash_head { struct rhash_head __rcu *next; }; +struct rhlist_head { + struct rhash_head rhead; + struct rhlist_head __rcu *next; +}; + /** * struct bucket_table - Table of hash buckets * @size: Number of hash buckets @@ -137,6 +142,7 @@ struct rhashtable_params { * @key_len: Key length for hashfn * @elasticity: Maximum chain length before rehash * @p: Configuration parameters + * @rhlist: True if this is an rhltable * @run_work: Deferred worker to expand/shrink asynchronously * @mutex: Mutex to protect current/future table swapping * @lock: Spin lock to protect walker list @@ -147,11 +153,20 @@ struct rhashtable { unsigned int key_len; unsigned int elasticity; struct rhashtable_params p; + bool rhlist; struct work_struct run_work; struct mutex mutex; spinlock_t lock; }; +/** + * struct rhltable - Hash table with duplicate objects in a list + * @ht: Underlying rhtable + */ +struct rhltable { + struct rhashtable ht; +}; + /** * struct rhashtable_walker - Hash table walker * @list: List entry on list of walkers @@ -163,9 +178,10 @@ struct rhashtable_walker { }; /** - * struct rhashtable_iter - Hash table iterator, fits into netlink cb + * struct rhashtable_iter - Hash table iterator * @ht: Table to iterate through * @p: Current pointer + * @list: Current hash list pointer * @walker: Associated rhashtable walker * @slot: Current slot * @skip: Number of entries to skip in slot @@ -173,6 +189,7 @@ struct rhashtable_walker { struct rhashtable_iter { struct rhashtable *ht; struct rhash_head *p; + struct rhlist_head *list; struct rhashtable_walker walker; unsigned int slot; unsigned int skip; @@ -339,13 +356,11 @@ static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, int rhashtable_init(struct rhashtable *ht, const struct rhashtable_params *params); +int rhltable_init(struct rhltable *hlt, + const struct rhashtable_params *params); -struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht, - const void *key, - struct rhash_head *obj, - struct bucket_table *old_tbl, - void **data); -int rhashtable_insert_rehash(struct rhashtable *ht, struct bucket_table *tbl); +void *rhashtable_insert_slow(struct rhashtable *ht, const void *key, + struct rhash_head *obj); void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter); @@ -507,6 +522,31 @@ void rhashtable_destroy(struct rhashtable *ht); rht_for_each_entry_rcu_continue(tpos, pos, (tbl)->buckets[hash],\ tbl, hash, member) +/** + * rhl_for_each_rcu - iterate over rcu hash table list + * @pos: the &struct rlist_head to use as a loop cursor. + * @list: the head of the list + * + * This hash chain list-traversal primitive should be used on the + * list returned by rhltable_lookup. + */ +#define rhl_for_each_rcu(pos, list) \ + for (pos = list; pos; pos = rcu_dereference_raw(pos->next)) + +/** + * rhl_for_each_entry_rcu - iterate over rcu hash table list of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct rlist_head to use as a loop cursor. + * @list: the head of the list + * @member: name of the &struct rlist_head within the hashable struct. + * + * This hash chain list-traversal primitive should be used on the + * list returned by rhltable_lookup. + */ +#define rhl_for_each_entry_rcu(tpos, pos, list, member) \ + for (pos = list; pos && rht_entry(tpos, pos, member); \ + pos = rcu_dereference_raw(pos->next)) + static inline int rhashtable_compare(struct rhashtable_compare_arg *arg, const void *obj) { @@ -516,18 +556,8 @@ static inline int rhashtable_compare(struct rhashtable_compare_arg *arg, return memcmp(ptr + ht->p.key_offset, arg->key, ht->p.key_len); } -/** - * rhashtable_lookup_fast - search hash table, inlined version - * @ht: hash table - * @key: the pointer to the key - * @params: hash table parameters - * - * Computes the hash value for the key and traverses the bucket chain looking - * for a entry with an identical key. The first matching entry is returned. - * - * Returns the first entry on which the compare function returned true. - */ -static inline void *rhashtable_lookup_fast( +/* Internal function, do not use. */ +static inline struct rhash_head *__rhashtable_lookup( struct rhashtable *ht, const void *key, const struct rhashtable_params params) { @@ -539,8 +569,6 @@ static inline void *rhashtable_lookup_fast( struct rhash_head *he; unsigned int hash; - rcu_read_lock(); - tbl = rht_dereference_rcu(ht->tbl, ht); restart: hash = rht_key_hashfn(ht, tbl, key, params); @@ -549,8 +577,7 @@ restart: params.obj_cmpfn(&arg, rht_obj(ht, he)) : rhashtable_compare(&arg, rht_obj(ht, he))) continue; - rcu_read_unlock(); - return rht_obj(ht, he); + return he; } /* Ensure we see any new tables. */ @@ -559,96 +586,165 @@ restart: tbl = rht_dereference_rcu(tbl->future_tbl, ht); if (unlikely(tbl)) goto restart; - rcu_read_unlock(); return NULL; } +/** + * rhashtable_lookup - search hash table + * @ht: hash table + * @key: the pointer to the key + * @params: hash table parameters + * + * Computes the hash value for the key and traverses the bucket chain looking + * for a entry with an identical key. The first matching entry is returned. + * + * This must only be called under the RCU read lock. + * + * Returns the first entry on which the compare function returned true. + */ +static inline void *rhashtable_lookup( + struct rhashtable *ht, const void *key, + const struct rhashtable_params params) +{ + struct rhash_head *he = __rhashtable_lookup(ht, key, params); + + return he ? rht_obj(ht, he) : NULL; +} + +/** + * rhashtable_lookup_fast - search hash table, without RCU read lock + * @ht: hash table + * @key: the pointer to the key + * @params: hash table parameters + * + * Computes the hash value for the key and traverses the bucket chain looking + * for a entry with an identical key. The first matching entry is returned. + * + * Only use this function when you have other mechanisms guaranteeing + * that the object won't go away after the RCU read lock is released. + * + * Returns the first entry on which the compare function returned true. + */ +static inline void *rhashtable_lookup_fast( + struct rhashtable *ht, const void *key, + const struct rhashtable_params params) +{ + void *obj; + + rcu_read_lock(); + obj = rhashtable_lookup(ht, key, params); + rcu_read_unlock(); + + return obj; +} + +/** + * rhltable_lookup - search hash list table + * @hlt: hash table + * @key: the pointer to the key + * @params: hash table parameters + * + * Computes the hash value for the key and traverses the bucket chain looking + * for a entry with an identical key. All matching entries are returned + * in a list. + * + * This must only be called under the RCU read lock. + * + * Returns the list of entries that match the given key. + */ +static inline struct rhlist_head *rhltable_lookup( + struct rhltable *hlt, const void *key, + const struct rhashtable_params params) +{ + struct rhash_head *he = __rhashtable_lookup(&hlt->ht, key, params); + + return he ? container_of(he, struct rhlist_head, rhead) : NULL; +} + /* Internal function, please use rhashtable_insert_fast() instead. This * function returns the existing element already in hashes in there is a clash, * otherwise it returns an error via ERR_PTR(). */ static inline void *__rhashtable_insert_fast( struct rhashtable *ht, const void *key, struct rhash_head *obj, - const struct rhashtable_params params) + const struct rhashtable_params params, bool rhlist) { struct rhashtable_compare_arg arg = { .ht = ht, .key = key, }; - struct bucket_table *tbl, *new_tbl; + struct rhash_head __rcu **pprev; + struct bucket_table *tbl; struct rhash_head *head; spinlock_t *lock; - unsigned int elasticity; unsigned int hash; - void *data = NULL; - int err; + int elasticity; + void *data; -restart: rcu_read_lock(); tbl = rht_dereference_rcu(ht->tbl, ht); + hash = rht_head_hashfn(ht, tbl, obj, params); + lock = rht_bucket_lock(tbl, hash); + spin_lock_bh(lock); - /* All insertions must grab the oldest table containing - * the hashed bucket that is yet to be rehashed. - */ - for (;;) { - hash = rht_head_hashfn(ht, tbl, obj, params); - lock = rht_bucket_lock(tbl, hash); - spin_lock_bh(lock); - - if (tbl->rehash <= hash) - break; - + if (unlikely(rht_dereference_bucket(tbl->future_tbl, tbl, hash))) { +slow_path: spin_unlock_bh(lock); - tbl = rht_dereference_rcu(tbl->future_tbl, ht); + rcu_read_unlock(); + return rhashtable_insert_slow(ht, key, obj); } - new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); - if (unlikely(new_tbl)) { - tbl = rhashtable_insert_slow(ht, key, obj, new_tbl, &data); - if (!IS_ERR_OR_NULL(tbl)) - goto slow_path; + elasticity = ht->elasticity; + pprev = &tbl->buckets[hash]; + rht_for_each(head, tbl, hash) { + struct rhlist_head *plist; + struct rhlist_head *list; - err = PTR_ERR(tbl); - if (err == -EEXIST) - err = 0; + elasticity--; + if (!key || + (params.obj_cmpfn ? + params.obj_cmpfn(&arg, rht_obj(ht, head)) : + rhashtable_compare(&arg, rht_obj(ht, head)))) + continue; - goto out; + data = rht_obj(ht, head); + + if (!rhlist) + goto out; + + + list = container_of(obj, struct rhlist_head, rhead); + plist = container_of(head, struct rhlist_head, rhead); + + RCU_INIT_POINTER(list->next, plist); + head = rht_dereference_bucket(head->next, tbl, hash); + RCU_INIT_POINTER(list->rhead.next, head); + rcu_assign_pointer(*pprev, obj); + + goto good; } - err = -E2BIG; + if (elasticity <= 0) + goto slow_path; + + data = ERR_PTR(-E2BIG); if (unlikely(rht_grow_above_max(ht, tbl))) goto out; - if (unlikely(rht_grow_above_100(ht, tbl))) { -slow_path: - spin_unlock_bh(lock); - err = rhashtable_insert_rehash(ht, tbl); - rcu_read_unlock(); - if (err) - return ERR_PTR(err); - - goto restart; - } - - err = 0; - elasticity = ht->elasticity; - rht_for_each(head, tbl, hash) { - if (key && - unlikely(!(params.obj_cmpfn ? - params.obj_cmpfn(&arg, rht_obj(ht, head)) : - rhashtable_compare(&arg, rht_obj(ht, head))))) { - data = rht_obj(ht, head); - goto out; - } - if (!--elasticity) - goto slow_path; - } + if (unlikely(rht_grow_above_100(ht, tbl))) + goto slow_path; head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); RCU_INIT_POINTER(obj->next, head); + if (rhlist) { + struct rhlist_head *list; + + list = container_of(obj, struct rhlist_head, rhead); + RCU_INIT_POINTER(list->next, NULL); + } rcu_assign_pointer(tbl->buckets[hash], obj); @@ -656,11 +752,14 @@ slow_path: if (rht_grow_above_75(ht, tbl)) schedule_work(&ht->run_work); +good: + data = NULL; + out: spin_unlock_bh(lock); rcu_read_unlock(); - return err ? ERR_PTR(err) : data; + return data; } /** @@ -685,13 +784,65 @@ static inline int rhashtable_insert_fast( { void *ret; - ret = __rhashtable_insert_fast(ht, NULL, obj, params); + ret = __rhashtable_insert_fast(ht, NULL, obj, params, false); if (IS_ERR(ret)) return PTR_ERR(ret); return ret == NULL ? 0 : -EEXIST; } +/** + * rhltable_insert_key - insert object into hash list table + * @hlt: hash list table + * @key: the pointer to the key + * @list: pointer to hash list head inside object + * @params: hash table parameters + * + * Will take a per bucket spinlock to protect against mutual mutations + * on the same bucket. Multiple insertions may occur in parallel unless + * they map to the same bucket lock. + * + * It is safe to call this function from atomic context. + * + * Will trigger an automatic deferred table resizing if the size grows + * beyond the watermark indicated by grow_decision() which can be passed + * to rhashtable_init(). + */ +static inline int rhltable_insert_key( + struct rhltable *hlt, const void *key, struct rhlist_head *list, + const struct rhashtable_params params) +{ + return PTR_ERR(__rhashtable_insert_fast(&hlt->ht, key, &list->rhead, + params, true)); +} + +/** + * rhltable_insert - insert object into hash list table + * @hlt: hash list table + * @list: pointer to hash list head inside object + * @params: hash table parameters + * + * Will take a per bucket spinlock to protect against mutual mutations + * on the same bucket. Multiple insertions may occur in parallel unless + * they map to the same bucket lock. + * + * It is safe to call this function from atomic context. + * + * Will trigger an automatic deferred table resizing if the size grows + * beyond the watermark indicated by grow_decision() which can be passed + * to rhashtable_init(). + */ +static inline int rhltable_insert( + struct rhltable *hlt, struct rhlist_head *list, + const struct rhashtable_params params) +{ + const char *key = rht_obj(&hlt->ht, &list->rhead); + + key += params.key_offset; + + return rhltable_insert_key(hlt, key, list, params); +} + /** * rhashtable_lookup_insert_fast - lookup and insert object into hash table * @ht: hash table @@ -722,7 +873,8 @@ static inline int rhashtable_lookup_insert_fast( BUG_ON(ht->p.obj_hashfn); - ret = __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params); + ret = __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params, + false); if (IS_ERR(ret)) return PTR_ERR(ret); @@ -759,7 +911,7 @@ static inline int rhashtable_lookup_insert_key( BUG_ON(!ht->p.obj_hashfn || !key); - ret = __rhashtable_insert_fast(ht, key, obj, params); + ret = __rhashtable_insert_fast(ht, key, obj, params, false); if (IS_ERR(ret)) return PTR_ERR(ret); @@ -783,13 +935,14 @@ static inline void *rhashtable_lookup_get_insert_key( { BUG_ON(!ht->p.obj_hashfn || !key); - return __rhashtable_insert_fast(ht, key, obj, params); + return __rhashtable_insert_fast(ht, key, obj, params, false); } /* Internal function, please use rhashtable_remove_fast() instead */ -static inline int __rhashtable_remove_fast( +static inline int __rhashtable_remove_fast_one( struct rhashtable *ht, struct bucket_table *tbl, - struct rhash_head *obj, const struct rhashtable_params params) + struct rhash_head *obj, const struct rhashtable_params params, + bool rhlist) { struct rhash_head __rcu **pprev; struct rhash_head *he; @@ -804,18 +957,86 @@ static inline int __rhashtable_remove_fast( pprev = &tbl->buckets[hash]; rht_for_each(he, tbl, hash) { + struct rhlist_head *list; + + list = container_of(he, struct rhlist_head, rhead); + if (he != obj) { + struct rhlist_head __rcu **lpprev; + pprev = &he->next; - continue; + + if (!rhlist) + continue; + + do { + lpprev = &list->next; + list = rht_dereference_bucket(list->next, + tbl, hash); + } while (list && obj != &list->rhead); + + if (!list) + continue; + + list = rht_dereference_bucket(list->next, tbl, hash); + RCU_INIT_POINTER(*lpprev, list); + err = 0; + break; } - rcu_assign_pointer(*pprev, obj->next); - err = 0; + obj = rht_dereference_bucket(obj->next, tbl, hash); + err = 1; + + if (rhlist) { + list = rht_dereference_bucket(list->next, tbl, hash); + if (list) { + RCU_INIT_POINTER(list->rhead.next, obj); + obj = &list->rhead; + err = 0; + } + } + + rcu_assign_pointer(*pprev, obj); break; } spin_unlock_bh(lock); + if (err > 0) { + atomic_dec(&ht->nelems); + if (unlikely(ht->p.automatic_shrinking && + rht_shrink_below_30(ht, tbl))) + schedule_work(&ht->run_work); + err = 0; + } + + return err; +} + +/* Internal function, please use rhashtable_remove_fast() instead */ +static inline int __rhashtable_remove_fast( + struct rhashtable *ht, struct rhash_head *obj, + const struct rhashtable_params params, bool rhlist) +{ + struct bucket_table *tbl; + int err; + + rcu_read_lock(); + + tbl = rht_dereference_rcu(ht->tbl, ht); + + /* Because we have already taken (and released) the bucket + * lock in old_tbl, if we find that future_tbl is not yet + * visible then that guarantees the entry to still be in + * the old tbl if it exists. + */ + while ((err = __rhashtable_remove_fast_one(ht, tbl, obj, params, + rhlist)) && + (tbl = rht_dereference_rcu(tbl->future_tbl, ht))) + ; + + rcu_read_unlock(); + return err; } @@ -838,34 +1059,29 @@ static inline int rhashtable_remove_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params) { - struct bucket_table *tbl; - int err; + return __rhashtable_remove_fast(ht, obj, params, false); +} - rcu_read_lock(); - - tbl = rht_dereference_rcu(ht->tbl, ht); - - /* Because we have already taken (and released) the bucket - * lock in old_tbl, if we find that future_tbl is not yet - * visible then that guarantees the entry to still be in - * the old tbl if it exists. - */ - while ((err = __rhashtable_remove_fast(ht, tbl, obj, params)) && - (tbl = rht_dereference_rcu(tbl->future_tbl, ht))) - ; - - if (err) - goto out; - - atomic_dec(&ht->nelems); - if (unlikely(ht->p.automatic_shrinking && - rht_shrink_below_30(ht, tbl))) - schedule_work(&ht->run_work); - -out: - rcu_read_unlock(); - - return err; +/** + * rhltable_remove - remove object from hash list table + * @hlt: hash list table + * @list: pointer to hash list head inside object + * @params: hash table parameters + * + * Since the hash chain is single linked, the removal operation needs to + * walk the bucket chain upon removal. The removal operation is thus + * considerable slow if the hash table is not correctly sized. + * + * Will automatically shrink the table via rhashtable_expand() if the + * shrink_decision function specified at rhashtable_init() returns true. + * + * Returns zero on success, -ENOENT if the entry could not be found. + */ +static inline int rhltable_remove( + struct rhltable *hlt, struct rhlist_head *list, + const struct rhashtable_params params) +{ + return __rhashtable_remove_fast(&hlt->ht, &list->rhead, params, true); } /* Internal function, please use rhashtable_replace_fast() instead */ @@ -958,4 +1174,51 @@ static inline int rhashtable_walk_init(struct rhashtable *ht, return 0; } +/** + * rhltable_walk_enter - Initialise an iterator + * @hlt: Table to walk over + * @iter: Hash table Iterator + * + * This function prepares a hash table walk. + * + * Note that if you restart a walk after rhashtable_walk_stop you + * may see the same object twice. Also, you may miss objects if + * there are removals in between rhashtable_walk_stop and the next + * call to rhashtable_walk_start. + * + * For a completely stable walk you should construct your own data + * structure outside the hash table. + * + * This function may sleep so you must not call it from interrupt + * context or with spin locks held. + * + * You must call rhashtable_walk_exit after this function returns. + */ +static inline void rhltable_walk_enter(struct rhltable *hlt, + struct rhashtable_iter *iter) +{ + return rhashtable_walk_enter(&hlt->ht, iter); +} + +/** + * rhltable_free_and_destroy - free elements and destroy hash list table + * @hlt: the hash list table to destroy + * @free_fn: callback to release resources of element + * @arg: pointer passed to free_fn + * + * See documentation for rhashtable_free_and_destroy. + */ +static inline void rhltable_free_and_destroy(struct rhltable *hlt, + void (*free_fn)(void *ptr, + void *arg), + void *arg) +{ + return rhashtable_free_and_destroy(&hlt->ht, free_fn, arg); +} + +static inline void rhltable_destroy(struct rhltable *hlt) +{ + return rhltable_free_and_destroy(hlt, NULL, NULL); +} + #endif /* _LINUX_RHASHTABLE_H */ diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 06c28728bb53..32d0ad058380 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -378,22 +378,8 @@ static void rht_deferred_worker(struct work_struct *work) schedule_work(&ht->run_work); } -static bool rhashtable_check_elasticity(struct rhashtable *ht, - struct bucket_table *tbl, - unsigned int hash) -{ - unsigned int elasticity = ht->elasticity; - struct rhash_head *head; - - rht_for_each(head, tbl, hash) - if (!--elasticity) - return true; - - return false; -} - -int rhashtable_insert_rehash(struct rhashtable *ht, - struct bucket_table *tbl) +static int rhashtable_insert_rehash(struct rhashtable *ht, + struct bucket_table *tbl) { struct bucket_table *old_tbl; struct bucket_table *new_tbl; @@ -439,57 +425,165 @@ fail: return err; } -EXPORT_SYMBOL_GPL(rhashtable_insert_rehash); -struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht, - const void *key, - struct rhash_head *obj, - struct bucket_table *tbl, - void **data) +static void *rhashtable_lookup_one(struct rhashtable *ht, + struct bucket_table *tbl, unsigned int hash, + const void *key, struct rhash_head *obj) { + struct rhashtable_compare_arg arg = { + .ht = ht, + .key = key, + }; + struct rhash_head __rcu **pprev; struct rhash_head *head; - unsigned int hash; - int err; + int elasticity; - tbl = rhashtable_last_table(ht, tbl); - hash = head_hashfn(ht, tbl, obj); - spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING); + elasticity = ht->elasticity; + pprev = &tbl->buckets[hash]; + rht_for_each(head, tbl, hash) { + struct rhlist_head *list; + struct rhlist_head *plist; - err = -EEXIST; - if (key) { - *data = rhashtable_lookup_fast(ht, key, ht->p); - if (*data) - goto exit; + elasticity--; + if (!key || + (ht->p.obj_cmpfn ? + ht->p.obj_cmpfn(&arg, rht_obj(ht, head)) : + rhashtable_compare(&arg, rht_obj(ht, head)))) + continue; + + if (!ht->rhlist) + return rht_obj(ht, head); + + list = container_of(obj, struct rhlist_head, rhead); + plist = container_of(head, struct rhlist_head, rhead); + + RCU_INIT_POINTER(list->next, plist); + head = rht_dereference_bucket(head->next, tbl, hash); + RCU_INIT_POINTER(list->rhead.next, head); + rcu_assign_pointer(*pprev, obj); + + return NULL; } - err = -E2BIG; + if (elasticity <= 0) + return ERR_PTR(-EAGAIN); + + return ERR_PTR(-ENOENT); +} + +static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht, + struct bucket_table *tbl, + unsigned int hash, + struct rhash_head *obj, + void *data) +{ + struct bucket_table *new_tbl; + struct rhash_head *head; + + if (!IS_ERR_OR_NULL(data)) + return ERR_PTR(-EEXIST); + + if (PTR_ERR(data) != -EAGAIN && PTR_ERR(data) != -ENOENT) + return ERR_CAST(data); + + new_tbl = rcu_dereference(tbl->future_tbl); + if (new_tbl) + return new_tbl; + + if (PTR_ERR(data) != -ENOENT) + return ERR_CAST(data); + if (unlikely(rht_grow_above_max(ht, tbl))) - goto exit; + return ERR_PTR(-E2BIG); - err = -EAGAIN; - if (rhashtable_check_elasticity(ht, tbl, hash) || - rht_grow_above_100(ht, tbl)) - goto exit; - - err = 0; + if (unlikely(rht_grow_above_100(ht, tbl))) + return ERR_PTR(-EAGAIN); head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); RCU_INIT_POINTER(obj->next, head); + if (ht->rhlist) { + struct rhlist_head *list; + + list = container_of(obj, struct rhlist_head, rhead); + RCU_INIT_POINTER(list->next, NULL); + } rcu_assign_pointer(tbl->buckets[hash], obj); atomic_inc(&ht->nelems); + if (rht_grow_above_75(ht, tbl)) + schedule_work(&ht->run_work); -exit: - spin_unlock(rht_bucket_lock(tbl, hash)); + return NULL; +} - if (err == 0) - return NULL; - else if (err == -EAGAIN) - return tbl; - else - return ERR_PTR(err); +static void *rhashtable_try_insert(struct rhashtable *ht, const void *key, + struct rhash_head *obj) +{ + struct bucket_table *new_tbl; + struct bucket_table *tbl; + unsigned int hash; + spinlock_t *lock; + void *data; + + tbl = rcu_dereference(ht->tbl); + + /* All insertions must grab the oldest table containing + * the hashed bucket that is yet to be rehashed. + */ + for (;;) { + hash = rht_head_hashfn(ht, tbl, obj, ht->p); + lock = rht_bucket_lock(tbl, hash); + spin_lock_bh(lock); + + if (tbl->rehash <= hash) + break; + + spin_unlock_bh(lock); + tbl = rcu_dereference(tbl->future_tbl); + } + + data = rhashtable_lookup_one(ht, tbl, hash, key, obj); + new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data); + if (PTR_ERR(new_tbl) != -EEXIST) + data = ERR_CAST(new_tbl); + + while (!IS_ERR_OR_NULL(new_tbl)) { + tbl = new_tbl; + hash = rht_head_hashfn(ht, tbl, obj, ht->p); + spin_lock_nested(rht_bucket_lock(tbl, hash), + SINGLE_DEPTH_NESTING); + + data = rhashtable_lookup_one(ht, tbl, hash, key, obj); + new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data); + if (PTR_ERR(new_tbl) != -EEXIST) + data = ERR_CAST(new_tbl); + + spin_unlock(rht_bucket_lock(tbl, hash)); + } + + spin_unlock_bh(lock); + + if (PTR_ERR(data) == -EAGAIN) + data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?: + -EAGAIN); + + return data; +} + +void *rhashtable_insert_slow(struct rhashtable *ht, const void *key, + struct rhash_head *obj) +{ + void *data; + + do { + rcu_read_lock(); + data = rhashtable_try_insert(ht, key, obj); + rcu_read_unlock(); + } while (PTR_ERR(data) == -EAGAIN); + + return data; } EXPORT_SYMBOL_GPL(rhashtable_insert_slow); @@ -593,11 +687,16 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_start); void *rhashtable_walk_next(struct rhashtable_iter *iter) { struct bucket_table *tbl = iter->walker.tbl; + struct rhlist_head *list = iter->list; struct rhashtable *ht = iter->ht; struct rhash_head *p = iter->p; + bool rhlist = ht->rhlist; if (p) { - p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot); + if (!rhlist || !(list = rcu_dereference(list->next))) { + p = rcu_dereference(p->next); + list = container_of(p, struct rhlist_head, rhead); + } goto next; } @@ -605,6 +704,18 @@ void *rhashtable_walk_next(struct rhashtable_iter *iter) int skip = iter->skip; rht_for_each_rcu(p, tbl, iter->slot) { + if (rhlist) { + list = container_of(p, struct rhlist_head, + rhead); + do { + if (!skip) + goto next; + skip--; + list = rcu_dereference(list->next); + } while (list); + + continue; + } if (!skip) break; skip--; @@ -614,7 +725,8 @@ next: if (!rht_is_a_nulls(p)) { iter->skip++; iter->p = p; - return rht_obj(ht, p); + iter->list = list; + return rht_obj(ht, rhlist ? &list->rhead : p); } iter->skip = 0; @@ -802,6 +914,48 @@ int rhashtable_init(struct rhashtable *ht, } EXPORT_SYMBOL_GPL(rhashtable_init); +/** + * rhltable_init - initialize a new hash list table + * @hlt: hash list table to be initialized + * @params: configuration parameters + * + * Initializes a new hash list table. + * + * See documentation for rhashtable_init. + */ +int rhltable_init(struct rhltable *hlt, const struct rhashtable_params *params) +{ + int err; + + /* No rhlist NULLs marking for now. */ + if (params->nulls_base) + return -EINVAL; + + err = rhashtable_init(&hlt->ht, params); + hlt->ht.rhlist = true; + return err; +} +EXPORT_SYMBOL_GPL(rhltable_init); + +static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj, + void (*free_fn)(void *ptr, void *arg), + void *arg) +{ + struct rhlist_head *list; + + if (!ht->rhlist) { + free_fn(rht_obj(ht, obj), arg); + return; + } + + list = container_of(obj, struct rhlist_head, rhead); + do { + obj = &list->rhead; + list = rht_dereference(list->next, ht); + free_fn(rht_obj(ht, obj), arg); + } while (list); +} + /** * rhashtable_free_and_destroy - free elements and destroy hash table * @ht: the hash table to destroy @@ -839,7 +993,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, pos = next, next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL) - free_fn(rht_obj(ht, pos), arg); + rhashtable_free_one(ht, pos, free_fn, arg); } } From 83e7e4ce9e93c3b020497144f4354b62aed5d894 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 19 Sep 2016 19:00:10 +0800 Subject: [PATCH 0508/1050] mac80211: Use rhltable instead of rhashtable mac80211 currently uses rhashtable with insecure_elasticity set to true. The latter is because of duplicate objects. What's more, mac80211 walks the rhashtable chains by hand which is broken as rhashtable may contain multiple tables due to resizing or rehashing. This patch fixes it by converting it to the newly added rhltable interface which is designed for use with duplicate objects. With rhltable a lookup returns a list of objects instead of a single one. This is then fed into the existing for_each_sta_info macro. This patch also deletes the sta_addr_hash function since rhashtable defaults to jhash. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/rx.c | 7 ++--- net/mac80211/sta_info.c | 52 +++++++++++++++----------------------- net/mac80211/sta_info.h | 19 +++++--------- net/mac80211/status.c | 7 ++--- 5 files changed, 33 insertions(+), 54 deletions(-) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index c71c73594790..e496dee5af08 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1213,7 +1213,7 @@ struct ieee80211_local { spinlock_t tim_lock; unsigned long num_sta; struct list_head sta_list; - struct rhashtable sta_hash; + struct rhltable sta_hash; struct timer_list sta_cleanup; int sta_generation; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index e796060b7c5e..f7cf342bab52 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -4003,7 +4003,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, __le16 fc; struct ieee80211_rx_data rx; struct ieee80211_sub_if_data *prev; - struct rhash_head *tmp; + struct rhlist_head *tmp; int err = 0; fc = ((struct ieee80211_hdr *)skb->data)->frame_control; @@ -4046,13 +4046,10 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, goto out; } else if (ieee80211_is_data(fc)) { struct sta_info *sta, *prev_sta; - const struct bucket_table *tbl; prev_sta = NULL; - tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash); - - for_each_sta_info(local, tbl, hdr->addr2, sta, tmp) { + for_each_sta_info(local, hdr->addr2, sta, tmp) { if (!prev_sta) { prev_sta = sta; continue; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 1b1b28ff4fdb..c803e2cb58bc 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -67,12 +67,10 @@ static const struct rhashtable_params sta_rht_params = { .nelem_hint = 3, /* start small */ - .insecure_elasticity = true, /* Disable chain-length checks. */ .automatic_shrinking = true, .head_offset = offsetof(struct sta_info, hash_node), .key_offset = offsetof(struct sta_info, addr), .key_len = ETH_ALEN, - .hashfn = sta_addr_hash, .max_size = CONFIG_MAC80211_STA_HASH_MAX_SIZE, }; @@ -80,8 +78,8 @@ static const struct rhashtable_params sta_rht_params = { static int sta_info_hash_del(struct ieee80211_local *local, struct sta_info *sta) { - return rhashtable_remove_fast(&local->sta_hash, &sta->hash_node, - sta_rht_params); + return rhltable_remove(&local->sta_hash, &sta->hash_node, + sta_rht_params); } static void __cleanup_single_sta(struct sta_info *sta) @@ -157,19 +155,22 @@ static void cleanup_single_sta(struct sta_info *sta) sta_info_free(local, sta); } +struct rhlist_head *sta_info_hash_lookup(struct ieee80211_local *local, + const u8 *addr) +{ + return rhltable_lookup(&local->sta_hash, addr, sta_rht_params); +} + /* protected by RCU */ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata, const u8 *addr) { struct ieee80211_local *local = sdata->local; + struct rhlist_head *tmp; struct sta_info *sta; - struct rhash_head *tmp; - const struct bucket_table *tbl; rcu_read_lock(); - tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash); - - for_each_sta_info(local, tbl, addr, sta, tmp) { + for_each_sta_info(local, addr, sta, tmp) { if (sta->sdata == sdata) { rcu_read_unlock(); /* this is safe as the caller must already hold @@ -190,14 +191,11 @@ struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata, const u8 *addr) { struct ieee80211_local *local = sdata->local; + struct rhlist_head *tmp; struct sta_info *sta; - struct rhash_head *tmp; - const struct bucket_table *tbl; rcu_read_lock(); - tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash); - - for_each_sta_info(local, tbl, addr, sta, tmp) { + for_each_sta_info(local, addr, sta, tmp) { if (sta->sdata == sdata || (sta->sdata->bss && sta->sdata->bss == sdata->bss)) { rcu_read_unlock(); @@ -263,8 +261,8 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) static int sta_info_hash_add(struct ieee80211_local *local, struct sta_info *sta) { - return rhashtable_insert_fast(&local->sta_hash, &sta->hash_node, - sta_rht_params); + return rhltable_insert(&local->sta_hash, &sta->hash_node, + sta_rht_params); } static void sta_deliver_ps_frames(struct work_struct *wk) @@ -453,9 +451,9 @@ static int sta_info_insert_check(struct sta_info *sta) is_multicast_ether_addr(sta->sta.addr))) return -EINVAL; - /* Strictly speaking this isn't necessary as we hold the mutex, but - * the rhashtable code can't really deal with that distinction. We - * do require the mutex for correctness though. + /* The RCU read lock is required by rhashtable due to + * asynchronous resize/rehash. We also require the mutex + * for correctness. */ rcu_read_lock(); lockdep_assert_held(&sdata->local->sta_mtx); @@ -1043,16 +1041,11 @@ static void sta_info_cleanup(unsigned long data) round_jiffies(jiffies + STA_INFO_CLEANUP_INTERVAL)); } -u32 sta_addr_hash(const void *key, u32 length, u32 seed) -{ - return jhash(key, ETH_ALEN, seed); -} - int sta_info_init(struct ieee80211_local *local) { int err; - err = rhashtable_init(&local->sta_hash, &sta_rht_params); + err = rhltable_init(&local->sta_hash, &sta_rht_params); if (err) return err; @@ -1068,7 +1061,7 @@ int sta_info_init(struct ieee80211_local *local) void sta_info_stop(struct ieee80211_local *local) { del_timer_sync(&local->sta_cleanup); - rhashtable_destroy(&local->sta_hash); + rhltable_destroy(&local->sta_hash); } @@ -1138,17 +1131,14 @@ struct ieee80211_sta *ieee80211_find_sta_by_ifaddr(struct ieee80211_hw *hw, const u8 *localaddr) { struct ieee80211_local *local = hw_to_local(hw); + struct rhlist_head *tmp; struct sta_info *sta; - struct rhash_head *tmp; - const struct bucket_table *tbl; - - tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash); /* * Just return a random station if localaddr is NULL * ... first in list. */ - for_each_sta_info(local, tbl, addr, sta, tmp) { + for_each_sta_info(local, addr, sta, tmp) { if (localaddr && !ether_addr_equal(sta->sdata->vif.addr, localaddr)) continue; diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 530231b73278..ed5fcb984a01 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -455,7 +455,7 @@ struct sta_info { /* General information, mostly static */ struct list_head list, free_list; struct rcu_head rcu_head; - struct rhash_head hash_node; + struct rhlist_head hash_node; u8 addr[ETH_ALEN]; struct ieee80211_local *local; struct ieee80211_sub_if_data *sdata; @@ -638,6 +638,9 @@ rcu_dereference_protected_tid_tx(struct sta_info *sta, int tid) */ #define STA_INFO_CLEANUP_INTERVAL (10 * HZ) +struct rhlist_head *sta_info_hash_lookup(struct ieee80211_local *local, + const u8 *addr); + /* * Get a STA info, must be under RCU read lock. */ @@ -647,17 +650,9 @@ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata, struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata, const u8 *addr); -u32 sta_addr_hash(const void *key, u32 length, u32 seed); - -#define _sta_bucket_idx(_tbl, _a) \ - rht_bucket_index(_tbl, sta_addr_hash(_a, ETH_ALEN, (_tbl)->hash_rnd)) - -#define for_each_sta_info(local, tbl, _addr, _sta, _tmp) \ - rht_for_each_entry_rcu(_sta, _tmp, tbl, \ - _sta_bucket_idx(tbl, _addr), \ - hash_node) \ - /* compare address and run code only if it matches */ \ - if (ether_addr_equal(_sta->addr, (_addr))) +#define for_each_sta_info(local, _addr, _sta, _tmp) \ + rhl_for_each_entry_rcu(_sta, _tmp, \ + sta_info_hash_lookup(local, _addr), hash_node) /* * Get STA info by index, BROKEN! diff --git a/net/mac80211/status.c b/net/mac80211/status.c index ea39f8a7baf3..ddf71c648cab 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -746,8 +746,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); __le16 fc; struct ieee80211_supported_band *sband; + struct rhlist_head *tmp; struct sta_info *sta; - struct rhash_head *tmp; int retry_count; int rates_idx; bool send_to_cooked; @@ -755,7 +755,6 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) struct ieee80211_bar *bar; int shift = 0; int tid = IEEE80211_NUM_TIDS; - const struct bucket_table *tbl; rates_idx = ieee80211_tx_get_rates(hw, info, &retry_count); @@ -764,9 +763,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) sband = local->hw.wiphy->bands[info->band]; fc = hdr->frame_control; - tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash); - - for_each_sta_info(local, tbl, hdr->addr1, sta, tmp) { + for_each_sta_info(local, hdr->addr1, sta, tmp) { /* skip wrong virtual interface */ if (!ether_addr_equal(hdr->addr2, sta->sdata->vif.addr)) continue; From 06f8ec9041f02d44bb0b75d47668e2fe00d5e0c3 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Mon, 19 Sep 2016 15:28:00 +0200 Subject: [PATCH 0509/1050] net-next: dsa: fix duplicate invocation of set_addr() commit 83c0afaec7b730b ("net: dsa: Add new binding implementation") has a duplicate invocation of the set_addr() operation callback. Remove one of them. Signed-off-by: John Crispin Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 8278385dcd21..cffc19e972a1 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -308,10 +308,6 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds) if (err < 0) return err; - err = ds->ops->set_addr(ds, dst->master_netdev->dev_addr); - if (err < 0) - return err; - if (!ds->slave_mii_bus && ds->ops->phy_read) { ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev); if (!ds->slave_mii_bus) From 092183df0fa1f4b49baad3a980c55d55de07dfb7 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Mon, 19 Sep 2016 15:28:01 +0200 Subject: [PATCH 0510/1050] net-next: dsa: make the set_addr() operation optional Only 1 of the 3 drivers currently has a set_addr() operation. Make the set_addr() callback optional to reduce the amount of empty stubs inside the drivers. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- net/dsa/dsa.c | 8 +++++--- net/dsa/dsa2.c | 8 +++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 66e31acfcad8..a6902c1e2f28 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -378,9 +378,11 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) if (ret < 0) goto out; - ret = ops->set_addr(ds, dst->master_netdev->dev_addr); - if (ret < 0) - goto out; + if (ops->set_addr) { + ret = ops->set_addr(ds, dst->master_netdev->dev_addr); + if (ret < 0) + goto out; + } if (!ds->slave_mii_bus && ops->phy_read) { ds->slave_mii_bus = devm_mdiobus_alloc(parent); diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index cffc19e972a1..f8a7d9aab437 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -304,9 +304,11 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds) if (err < 0) return err; - err = ds->ops->set_addr(ds, dst->master_netdev->dev_addr); - if (err < 0) - return err; + if (ds->ops->set_addr) { + err = ds->ops->set_addr(ds, dst->master_netdev->dev_addr); + if (err < 0) + return err; + } if (!ds->slave_mii_bus && ds->ops->phy_read) { ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev); From 1f449736525addd6fcce674d654bd1471748484e Mon Sep 17 00:00:00 2001 From: John Crispin Date: Mon, 19 Sep 2016 15:28:02 +0200 Subject: [PATCH 0511/1050] net-next: dsa: b53: remove empty set_addr() stub The set_addr() callback is now optional. Remove the empty stub that b53 has. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 0afc2e5a04dc..1a492c053e27 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -764,11 +764,6 @@ static int b53_get_sset_count(struct dsa_switch *ds) return b53_get_mib_size(dev); } -static int b53_set_addr(struct dsa_switch *ds, u8 *addr) -{ - return 0; -} - static int b53_setup(struct dsa_switch *ds) { struct b53_device *dev = ds->priv; @@ -1466,7 +1461,6 @@ static enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds) static struct dsa_switch_ops b53_switch_ops = { .get_tag_protocol = b53_get_tag_protocol, .setup = b53_setup, - .set_addr = b53_set_addr, .get_strings = b53_get_strings, .get_ethtool_stats = b53_get_ethtool_stats, .get_sset_count = b53_get_sset_count, From 8941ee36e3266a5efca52e32c4dc214f202c751a Mon Sep 17 00:00:00 2001 From: John Crispin Date: Mon, 19 Sep 2016 15:28:03 +0200 Subject: [PATCH 0512/1050] net-next: dsa: qca8k: remove empty set_addr() stub The set_addr() callback is now optional. Remove the empty stub that qca8k has. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/dsa/qca8k.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 7f3f1781c202..4788a89520dd 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -585,13 +585,6 @@ qca8k_setup(struct dsa_switch *ds) return 0; } -static int -qca8k_set_addr(struct dsa_switch *ds, u8 *addr) -{ - /* The subsystem always calls this function so add an empty stub */ - return 0; -} - static int qca8k_phy_read(struct dsa_switch *ds, int phy, int regnum) { @@ -921,7 +914,6 @@ qca8k_get_tag_protocol(struct dsa_switch *ds) static struct dsa_switch_ops qca8k_switch_ops = { .get_tag_protocol = qca8k_get_tag_protocol, .setup = qca8k_setup, - .set_addr = qca8k_set_addr, .get_strings = qca8k_get_strings, .phy_read = qca8k_phy_read, .phy_write = qca8k_phy_write, From 67a99b7061c07b190ac6c39f136afedbb7aa86e9 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Mon, 19 Sep 2016 17:47:41 +0300 Subject: [PATCH 0513/1050] qed: Fix stack corruption on probe Commit fe56b9e6a8d95 ("qed: Add module with basic common support") has introduced a stack corruption during probe, where filling a local struct with data to be sent to management firmware is incorrectly filled; The data is written outside of the struct and corrupts the stack. Changes from v1: ---------------- - Correct the value written [Caught by David Laight] Fixes: fe56b9e6a8d95 ("qed: Add module with basic common support") Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index a240f26344a4..f776a77794c5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -1153,8 +1153,8 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn, p_drv_version = &union_data.drv_version; p_drv_version->version = p_ver->version; - for (i = 0; i < MCP_DRV_VER_STR_SIZE - 1; i += 4) { - val = cpu_to_be32(p_ver->name[i]); + for (i = 0; i < (MCP_DRV_VER_STR_SIZE - 4) / sizeof(u32); i++) { + val = cpu_to_be32(*((u32 *)&p_ver->name[i * sizeof(u32)])); *(__be32 *)&p_drv_version->name[i * sizeof(u32)] = val; } From 5737f6c92681939e417579b421f81f035e57c582 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Mon, 19 Sep 2016 17:46:38 +0200 Subject: [PATCH 0514/1050] mlx4: add missed recycle opportunity for XDP_TX on TX failure Correct drop handling for XDP_TX on TX failure, were recently added in commit 95357907ae73 ("mlx4: fix XDP_TX is acting like XDP_PASS on TX ring full"). The change missed an opportunity for recycling the RX page, instead of going through the page allocator, like the regular XDP_DROP action does. This patch cease the opportunity, by going through the XDP_DROP case. Fixes: 95357907ae73 ("mlx4: fix XDP_TX is acting like XDP_PASS on TX ring full") Signed-off-by: Jesper Dangaard Brouer Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index c80073e4947f..c46355bce613 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -906,11 +906,12 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud length, tx_index, &doorbell_pending)) goto consumed; - goto next; /* Drop on xmit failure */ + goto xdp_drop; /* Drop on xmit failure */ default: bpf_warn_invalid_xdp_action(act); case XDP_ABORTED: case XDP_DROP: +xdp_drop: if (mlx4_en_rx_recycle(ring, frags)) goto consumed; goto next; From e535ec0899d1fe52ec3a84c9bc03457ac67ad6f7 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Tue, 20 Sep 2016 14:26:21 +0100 Subject: [PATCH 0515/1050] x86/mm/pat: Prevent hang during boot when mapping pages There's a mixture of signed 32-bit and unsigned 32-bit and 64-bit data types used for keeping track of how many pages have been mapped. This leads to hangs during boot when mapping large numbers of pages (multiple terabytes, as reported by Waiman) because those values are interpreted as being negative. commit 742563777e8d ("x86/mm/pat: Avoid truncation when converting cpa->numpages to address") fixed one of those bugs, but there is another lurking in __change_page_attr_set_clr(). Additionally, the return value type for the populate_*() functions can return negative values when a large number of pages have been mapped, triggering the error paths even though no error occurred. Consistently use 64-bit types on 64-bit platforms when counting pages. Even in the signed case this gives us room for regions 8PiB (pebibytes) in size whilst still allowing the usual negative value error checking idiom. Reported-by: Waiman Long Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Linus Torvalds CC: Theodore Ts'o Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: Scott J Norton Cc: Douglas Hatch Signed-off-by: Matt Fleming --- arch/x86/mm/pageattr.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 849dc09fa4f0..e3353c97d086 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -917,11 +917,11 @@ static void populate_pte(struct cpa_data *cpa, } } -static int populate_pmd(struct cpa_data *cpa, - unsigned long start, unsigned long end, - unsigned num_pages, pud_t *pud, pgprot_t pgprot) +static long populate_pmd(struct cpa_data *cpa, + unsigned long start, unsigned long end, + unsigned num_pages, pud_t *pud, pgprot_t pgprot) { - unsigned int cur_pages = 0; + long cur_pages = 0; pmd_t *pmd; pgprot_t pmd_pgprot; @@ -991,12 +991,12 @@ static int populate_pmd(struct cpa_data *cpa, return num_pages; } -static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, - pgprot_t pgprot) +static long populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, + pgprot_t pgprot) { pud_t *pud; unsigned long end; - int cur_pages = 0; + long cur_pages = 0; pgprot_t pud_pgprot; end = start + (cpa->numpages << PAGE_SHIFT); @@ -1052,7 +1052,7 @@ static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, /* Map trailing leftover */ if (start < end) { - int tmp; + long tmp; pud = pud_offset(pgd, start); if (pud_none(*pud)) @@ -1078,7 +1078,7 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr) pgprot_t pgprot = __pgprot(_KERNPG_TABLE); pud_t *pud = NULL; /* shut up gcc */ pgd_t *pgd_entry; - int ret; + long ret; pgd_entry = cpa->pgd + pgd_index(addr); @@ -1327,7 +1327,8 @@ static int cpa_process_alias(struct cpa_data *cpa) static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) { - int ret, numpages = cpa->numpages; + unsigned long numpages = cpa->numpages; + int ret; while (numpages) { /* From 1297667083d5442aafe3e337b9413bf02b114edb Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 19 Sep 2016 13:09:09 +0100 Subject: [PATCH 0516/1050] x86/efi: Only map RAM into EFI page tables if in mixed-mode Waiman reported that booting with CONFIG_EFI_MIXED enabled on his multi-terabyte HP machine results in boot crashes, because the EFI region mapping functions loop forever while trying to map those regions describing RAM. While this patch doesn't fix the underlying hang, there's really no reason to map EFI_CONVENTIONAL_MEMORY regions into the EFI page tables when mixed-mode is not in use at runtime. Reported-by: Waiman Long Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Linus Torvalds CC: Theodore Ts'o Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: Scott J Norton Cc: Douglas Hatch Cc: # v4.6+ Signed-off-by: Matt Fleming --- arch/x86/platform/efi/efi_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 677e29e29473..8dd3784eb075 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -245,7 +245,7 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) * text and allocate a new stack because we can't rely on the * stack pointer being < 4GB. */ - if (!IS_ENABLED(CONFIG_EFI_MIXED)) + if (!IS_ENABLED(CONFIG_EFI_MIXED) || efi_is_native()) return 0; /* From f1e1c9e5e357c05253affb13be29285c5cb56bf0 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 20 Sep 2016 15:12:21 +0200 Subject: [PATCH 0517/1050] perf/x86/intel/bts: Make sure debug store is valid Since commit 4d4c47412464 ("perf/x86/intel/bts: Fix BTS PMI detection") my box goes boom on boot: | .... node #0, CPUs: #1 #2 #3 #4 #5 #6 #7 | BUG: unable to handle kernel NULL pointer dereference at 0000000000000018 | IP: [] intel_bts_interrupt+0x43/0x130 | Call Trace: | d [] intel_pmu_handle_irq+0x51/0x4b0 | [] perf_event_nmi_handler+0x27/0x40 This happens because the code introduced in this commit dereferences the debug store pointer unconditionally. The debug store is not guaranteed to be available, so a NULL pointer check as on other places is required. Fixes: 4d4c47412464 ("perf/x86/intel/bts: Fix BTS PMI detection") Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: vince@deater.net Cc: eranian@google.com Link: http://lkml.kernel.org/r/20160920131220.xg5pbdjtznszuyzb@breakpoint.cc Signed-off-by: Thomas Gleixner --- arch/x86/events/intel/bts.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index bdcd6510992c..6ff66efa0feb 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -455,7 +455,7 @@ int intel_bts_interrupt(void) * The only surefire way of knowing if this NMI is ours is by checking * the write ptr against the PMI threshold. */ - if (ds->bts_index >= ds->bts_interrupt_threshold) + if (ds && (ds->bts_index >= ds->bts_interrupt_threshold)) handled = 1; /* From f5beeb1851ea6f8cfcf2657f26cb24c0582b4945 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 8 Sep 2016 09:57:07 +0200 Subject: [PATCH 0518/1050] fs/proc/kcore.c: Make bounce buffer global for read Next patch adds bounce buffer for ktext area, so it's convenient to have single bounce buffer for both vmalloc/module and ktext cases. Suggested-by: Linus Torvalds Signed-off-by: Jiri Olsa Acked-by: Kees Cook Signed-off-by: Linus Torvalds --- fs/proc/kcore.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index a939f5ed7f89..bd3ac9dca252 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -430,6 +430,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff) static ssize_t read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) { + char *buf = file->private_data; ssize_t acc = 0; size_t size, tsz; size_t elf_buflen; @@ -500,18 +501,10 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) if (clear_user(buffer, tsz)) return -EFAULT; } else if (is_vmalloc_or_module_addr((void *)start)) { - char * elf_buf; - - elf_buf = kzalloc(tsz, GFP_KERNEL); - if (!elf_buf) - return -ENOMEM; - vread(elf_buf, (char *)start, tsz); + vread(buf, (char *)start, tsz); /* we have to zero-fill user buffer even if no read */ - if (copy_to_user(buffer, elf_buf, tsz)) { - kfree(elf_buf); + if (copy_to_user(buffer, buf, tsz)) return -EFAULT; - } - kfree(elf_buf); } else { if (kern_addr_valid(start)) { unsigned long n; @@ -549,6 +542,11 @@ static int open_kcore(struct inode *inode, struct file *filp) { if (!capable(CAP_SYS_RAWIO)) return -EPERM; + + filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!filp->private_data) + return -ENOMEM; + if (kcore_need_update) kcore_update_ram(); if (i_size_read(inode) != proc_root_kcore->size) { @@ -559,10 +557,16 @@ static int open_kcore(struct inode *inode, struct file *filp) return 0; } +static int release_kcore(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} static const struct file_operations proc_kcore_operations = { .read = read_kcore, .open = open_kcore, + .release = release_kcore, .llseek = default_llseek, }; From df04abfd181acc276ba6762c8206891ae10ae00d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 8 Sep 2016 09:57:08 +0200 Subject: [PATCH 0519/1050] fs/proc/kcore.c: Add bounce buffer for ktext data We hit hardened usercopy feature check for kernel text access by reading kcore file: usercopy: kernel memory exposure attempt detected from ffffffff8179a01f () (4065 bytes) kernel BUG at mm/usercopy.c:75! Bypassing this check for kcore by adding bounce buffer for ktext data. Reported-by: Steve Best Fixes: f5509cc18daa ("mm: Hardened usercopy") Suggested-by: Kees Cook Signed-off-by: Jiri Olsa Acked-by: Kees Cook Signed-off-by: Linus Torvalds --- fs/proc/kcore.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index bd3ac9dca252..5c89a07e3d7f 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -509,7 +509,12 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) if (kern_addr_valid(start)) { unsigned long n; - n = copy_to_user(buffer, (char *)start, tsz); + /* + * Using bounce buffer to bypass the + * hardened user copy kernel text checks. + */ + memcpy(buf, (char *) start, tsz); + n = copy_to_user(buffer, buf, tsz); /* * We cannot distinguish between fault on source * and fault on destination. When this happens From e875bd66dfb68f4e898e9a43ef42858c504a7f23 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Tue, 13 Sep 2016 17:53:35 +0100 Subject: [PATCH 0520/1050] irqchip/mips-gic: Fix local interrupts Since the device hierarchy domain was added by commit c98c1822ee13 ("irqchip/mips-gic: Add device hierarchy domain"), GIC local interrupts have been broken. Users attempting to setup a per-cpu local IRQ, for example the GIC timer clock events code in drivers/clocksource/mips-gic-timer.c, the setup_percpu_irq function would refuse with -EINVAL because the GIC irqchip driver never called irq_set_percpu_devid so the IRQ_PER_CPU_DEVID flag was never set for the IRQ. This happens because irq_set_percpu_devid was being called from the gic_irq_domain_map function which is no longer called. Doing only that runs into further problems because gic_dev_domain_alloc set the struct irq_chip for all interrupts, local or shared, to gic_level_irq_controller despite that only being suitable for shared interrupts. The typical outcome of this is that gic_level_irq_controller callback functions are called for local interrupts, and then hwirq number calculations overflow & the driver ends up attempting to access some invalid register with an address calculated from an invalid hwirq number. Best case scenario is that this then leads to a bus error. This is fixed by abstracting the setup of the hwirq & chip to a new function gic_setup_dev_chip which is used by both the root GIC IRQ domain & the device domain. Finally, decoding local interrupts failed because gic_dev_domain_alloc only called irq_domain_alloc_irqs_parent for shared interrupts. Local ones were therefore never associated with hwirqs in the root GIC IRQ domain and the virq in gic_handle_local_int would always be 0. This is fixed by calling irq_domain_alloc_irqs_parent unconditionally & having gic_irq_domain_alloc handle both local & shared interrupts, which is easy due to the aforementioned abstraction of chip setup into gic_setup_dev_chip. This fixes use of the MIPS GIC timer for clock events, which has been broken since c98c1822ee13 ("irqchip/mips-gic: Add device hierarchy domain") but hadn't been noticed due to a silent fallback to the MIPS coprocessor 0 count/compare clock events device. Fixes: c98c1822ee13 ("irqchip/mips-gic: Add device hierarchy domain") Signed-off-by: Paul Burton Cc: linux-mips@linux-mips.org Cc: Jason Cooper Cc: Qais Yousef Cc: stable@vger.kernel.org Cc: Marc Zyngier Link: http://lkml.kernel.org/r/20160913165335.31389-1-paul.burton@imgtec.com Signed-off-by: Thomas Gleixner --- drivers/irqchip/irq-mips-gic.c | 105 ++++++++++++++++----------------- 1 file changed, 50 insertions(+), 55 deletions(-) diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index 83f498393a7f..6185696405d5 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -638,27 +638,6 @@ static int gic_local_irq_domain_map(struct irq_domain *d, unsigned int virq, if (!gic_local_irq_is_routable(intr)) return -EPERM; - /* - * HACK: These are all really percpu interrupts, but the rest - * of the MIPS kernel code does not use the percpu IRQ API for - * the CP0 timer and performance counter interrupts. - */ - switch (intr) { - case GIC_LOCAL_INT_TIMER: - case GIC_LOCAL_INT_PERFCTR: - case GIC_LOCAL_INT_FDC: - irq_set_chip_and_handler(virq, - &gic_all_vpes_local_irq_controller, - handle_percpu_irq); - break; - default: - irq_set_chip_and_handler(virq, - &gic_local_irq_controller, - handle_percpu_devid_irq); - irq_set_percpu_devid(virq); - break; - } - spin_lock_irqsave(&gic_lock, flags); for (i = 0; i < gic_vpes; i++) { u32 val = GIC_MAP_TO_PIN_MSK | gic_cpu_pin; @@ -724,16 +703,42 @@ static int gic_shared_irq_domain_map(struct irq_domain *d, unsigned int virq, return 0; } -static int gic_irq_domain_map(struct irq_domain *d, unsigned int virq, - irq_hw_number_t hw) +static int gic_setup_dev_chip(struct irq_domain *d, unsigned int virq, + unsigned int hwirq) { - if (GIC_HWIRQ_TO_LOCAL(hw) < GIC_NUM_LOCAL_INTRS) - return gic_local_irq_domain_map(d, virq, hw); + struct irq_chip *chip; + int err; - irq_set_chip_and_handler(virq, &gic_level_irq_controller, - handle_level_irq); + if (hwirq >= GIC_SHARED_HWIRQ_BASE) { + err = irq_domain_set_hwirq_and_chip(d, virq, hwirq, + &gic_level_irq_controller, + NULL); + } else { + switch (GIC_HWIRQ_TO_LOCAL(hwirq)) { + case GIC_LOCAL_INT_TIMER: + case GIC_LOCAL_INT_PERFCTR: + case GIC_LOCAL_INT_FDC: + /* + * HACK: These are all really percpu interrupts, but + * the rest of the MIPS kernel code does not use the + * percpu IRQ API for them. + */ + chip = &gic_all_vpes_local_irq_controller; + irq_set_handler(virq, handle_percpu_irq); + break; - return gic_shared_irq_domain_map(d, virq, hw, 0); + default: + chip = &gic_local_irq_controller; + irq_set_handler(virq, handle_percpu_devid_irq); + irq_set_percpu_devid(virq); + break; + } + + err = irq_domain_set_hwirq_and_chip(d, virq, hwirq, + chip, NULL); + } + + return err; } static int gic_irq_domain_alloc(struct irq_domain *d, unsigned int virq, @@ -744,15 +749,12 @@ static int gic_irq_domain_alloc(struct irq_domain *d, unsigned int virq, int cpu, ret, i; if (spec->type == GIC_DEVICE) { - /* verify that it doesn't conflict with an IPI irq */ - if (test_bit(spec->hwirq, ipi_resrv)) + /* verify that shared irqs don't conflict with an IPI irq */ + if ((spec->hwirq >= GIC_SHARED_HWIRQ_BASE) && + test_bit(GIC_HWIRQ_TO_SHARED(spec->hwirq), ipi_resrv)) return -EBUSY; - hwirq = GIC_SHARED_TO_HWIRQ(spec->hwirq); - - return irq_domain_set_hwirq_and_chip(d, virq, hwirq, - &gic_level_irq_controller, - NULL); + return gic_setup_dev_chip(d, virq, spec->hwirq); } else { base_hwirq = find_first_bit(ipi_resrv, gic_shared_intrs); if (base_hwirq == gic_shared_intrs) { @@ -821,7 +823,6 @@ int gic_irq_domain_match(struct irq_domain *d, struct device_node *node, } static const struct irq_domain_ops gic_irq_domain_ops = { - .map = gic_irq_domain_map, .alloc = gic_irq_domain_alloc, .free = gic_irq_domain_free, .match = gic_irq_domain_match, @@ -852,29 +853,20 @@ static int gic_dev_domain_alloc(struct irq_domain *d, unsigned int virq, struct irq_fwspec *fwspec = arg; struct gic_irq_spec spec = { .type = GIC_DEVICE, - .hwirq = fwspec->param[1], }; int i, ret; - bool is_shared = fwspec->param[0] == GIC_SHARED; - if (is_shared) { - ret = irq_domain_alloc_irqs_parent(d, virq, nr_irqs, &spec); - if (ret) - return ret; - } + if (fwspec->param[0] == GIC_SHARED) + spec.hwirq = GIC_SHARED_TO_HWIRQ(fwspec->param[1]); + else + spec.hwirq = GIC_LOCAL_TO_HWIRQ(fwspec->param[1]); + + ret = irq_domain_alloc_irqs_parent(d, virq, nr_irqs, &spec); + if (ret) + return ret; for (i = 0; i < nr_irqs; i++) { - irq_hw_number_t hwirq; - - if (is_shared) - hwirq = GIC_SHARED_TO_HWIRQ(spec.hwirq + i); - else - hwirq = GIC_LOCAL_TO_HWIRQ(spec.hwirq + i); - - ret = irq_domain_set_hwirq_and_chip(d, virq + i, - hwirq, - &gic_level_irq_controller, - NULL); + ret = gic_setup_dev_chip(d, virq + i, spec.hwirq + i); if (ret) goto error; } @@ -896,7 +888,10 @@ void gic_dev_domain_free(struct irq_domain *d, unsigned int virq, static void gic_dev_domain_activate(struct irq_domain *domain, struct irq_data *d) { - gic_shared_irq_domain_map(domain, d->irq, d->hwirq, 0); + if (GIC_HWIRQ_TO_LOCAL(d->hwirq) < GIC_NUM_LOCAL_INTRS) + gic_local_irq_domain_map(domain, d->irq, d->hwirq); + else + gic_shared_irq_domain_map(domain, d->irq, d->hwirq, 0); } static struct irq_domain_ops gic_dev_domain_ops = { From aa4f0601115319a52c80f468c8f007e5aa9277cb Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 20 Sep 2016 08:56:36 -0700 Subject: [PATCH 0521/1050] mm: usercopy: Check for module addresses While running a compile on arm64, I hit a memory exposure usercopy: kernel memory exposure attempt detected from fffffc0000f3b1a8 (buffer_head) (1 bytes) ------------[ cut here ]------------ kernel BUG at mm/usercopy.c:75! Internal error: Oops - BUG: 0 [#1] SMP Modules linked in: ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 xt_conntrack ip_set nfnetlink ebtable_broute bridge stp llc ebtable_nat ip6table_security ip6table_raw ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle iptable_security iptable_raw iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle ebtable_filter ebtables ip6table_filter ip6_tables vfat fat xgene_edac xgene_enet edac_core i2c_xgene_slimpro i2c_core at803x realtek xgene_dma mdio_xgene gpio_dwapb gpio_xgene_sb xgene_rng mailbox_xgene_slimpro nfsd auth_rpcgss nfs_acl lockd grace sunrpc xfs libcrc32c sdhci_of_arasan sdhci_pltfm sdhci mmc_core xhci_plat_hcd gpio_keys CPU: 0 PID: 19744 Comm: updatedb Tainted: G W 4.8.0-rc3-threadinfo+ #1 Hardware name: AppliedMicro X-Gene Mustang Board/X-Gene Mustang Board, BIOS 3.06.12 Aug 12 2016 task: fffffe03df944c00 task.stack: fffffe00d128c000 PC is at __check_object_size+0x70/0x3f0 LR is at __check_object_size+0x70/0x3f0 ... [] __check_object_size+0x70/0x3f0 [] filldir64+0x158/0x1a0 [] __fat_readdir+0x4a0/0x558 [fat] [] fat_readdir+0x34/0x40 [fat] [] iterate_dir+0x190/0x1e0 [] SyS_getdents64+0x88/0x120 [] el0_svc_naked+0x24/0x28 fffffc0000f3b1a8 is a module address. Modules may have compiled in strings which could get copied to userspace. In this instance, it looks like "." which matches with a size of 1 byte. Extend the is_vmalloc_addr check to be is_vmalloc_or_module_addr to cover all possible cases. Signed-off-by: Laura Abbott Signed-off-by: Kees Cook --- mm/usercopy.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/usercopy.c b/mm/usercopy.c index 089328f2b920..3c8da0af9695 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -207,8 +207,11 @@ static inline const char *check_heap_object(const void *ptr, unsigned long n, * Some architectures (arm64) return true for virt_addr_valid() on * vmalloced addresses. Work around this by checking for vmalloc * first. + * + * We also need to check for module addresses explicitly since we + * may copy static data from modules to userspace */ - if (is_vmalloc_addr(ptr)) + if (is_vmalloc_or_module_addr(ptr)) return NULL; if (!virt_addr_valid(ptr)) From e23d4159b109167126e5bcd7f3775c95de7fee47 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 20 Sep 2016 20:07:42 +0100 Subject: [PATCH 0522/1050] fix fault_in_multipages_...() on architectures with no-op access_ok() Switching iov_iter fault-in to multipages variants has exposed an old bug in underlying fault_in_multipages_...(); they break if the range passed to them wraps around. Normally access_ok() done by callers will prevent such (and it's a guaranteed EFAULT - ERR_PTR() values fall into such a range and they should not point to any valid objects). However, on architectures where userland and kernel live in different MMU contexts (e.g. s390) access_ok() is a no-op and on those a range with a wraparound can reach fault_in_multipages_...(). Since any wraparound means EFAULT there, the fix is trivial - turn those while (uaddr <= end) ... into if (unlikely(uaddr > end)) return -EFAULT; do ... while (uaddr <= end); Reported-by: Jan Stancek Tested-by: Jan Stancek Cc: stable@vger.kernel.org # v3.5+ Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 66a1260b33de..7e3d53753612 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -571,56 +571,56 @@ static inline int fault_in_pages_readable(const char __user *uaddr, int size) */ static inline int fault_in_multipages_writeable(char __user *uaddr, int size) { - int ret = 0; char __user *end = uaddr + size - 1; if (unlikely(size == 0)) - return ret; + return 0; + if (unlikely(uaddr > end)) + return -EFAULT; /* * Writing zeroes into userspace here is OK, because we know that if * the zero gets there, we'll be overwriting it. */ - while (uaddr <= end) { - ret = __put_user(0, uaddr); - if (ret != 0) - return ret; + do { + if (unlikely(__put_user(0, uaddr) != 0)) + return -EFAULT; uaddr += PAGE_SIZE; - } + } while (uaddr <= end); /* Check whether the range spilled into the next page. */ if (((unsigned long)uaddr & PAGE_MASK) == ((unsigned long)end & PAGE_MASK)) - ret = __put_user(0, end); + return __put_user(0, end); - return ret; + return 0; } static inline int fault_in_multipages_readable(const char __user *uaddr, int size) { volatile char c; - int ret = 0; const char __user *end = uaddr + size - 1; if (unlikely(size == 0)) - return ret; + return 0; - while (uaddr <= end) { - ret = __get_user(c, uaddr); - if (ret != 0) - return ret; + if (unlikely(uaddr > end)) + return -EFAULT; + + do { + if (unlikely(__get_user(c, uaddr) != 0)) + return -EFAULT; uaddr += PAGE_SIZE; - } + } while (uaddr <= end); /* Check whether the range spilled into the next page. */ if (((unsigned long)uaddr & PAGE_MASK) == ((unsigned long)end & PAGE_MASK)) { - ret = __get_user(c, end); - (void)c; + return __get_user(c, end); } - return ret; + return 0; } int add_to_page_cache_locked(struct page *page, struct address_space *mapping, From ad9798967dd67f080bf0e8d611b382a5d292aae2 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Mon, 19 Sep 2016 20:15:24 +0100 Subject: [PATCH 0523/1050] 6pack: fix buffer length mishandling Dmitry Vyukov wrote: > different runs). Looking at code, the following looks suspicious -- we > limit copy by 512 bytes, but use the original count which can be > larger than 512: > > static void sixpack_receive_buf(struct tty_struct *tty, > const unsigned char *cp, char *fp, int count) > { > unsigned char buf[512]; > .... > memcpy(buf, cp, count < sizeof(buf) ? count : sizeof(buf)); > .... > sixpack_decode(sp, buf, count1); With the sane tty locking we now have I believe the following is safe as we consume the bytes and move them into the decoded buffer before returning. Signed-off-by: Alan Cox Signed-off-by: David S. Miller --- drivers/net/hamradio/6pack.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index 5a1e98547031..470b3dcd54e5 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -127,7 +127,7 @@ struct sixpack { #define AX25_6PACK_HEADER_LEN 0 -static void sixpack_decode(struct sixpack *, unsigned char[], int); +static void sixpack_decode(struct sixpack *, const unsigned char[], int); static int encode_sixpack(unsigned char *, unsigned char *, int, unsigned char); /* @@ -428,7 +428,7 @@ out: /* * Handle the 'receiver data ready' interrupt. - * This function is called by the 'tty_io' module in the kernel when + * This function is called by the tty module in the kernel when * a block of 6pack data has been received, which can now be decapsulated * and sent on to some IP layer for further processing. */ @@ -436,7 +436,6 @@ static void sixpack_receive_buf(struct tty_struct *tty, const unsigned char *cp, char *fp, int count) { struct sixpack *sp; - unsigned char buf[512]; int count1; if (!count) @@ -446,10 +445,7 @@ static void sixpack_receive_buf(struct tty_struct *tty, if (!sp) return; - memcpy(buf, cp, count < sizeof(buf) ? count : sizeof(buf)); - /* Read the characters out of the buffer */ - count1 = count; while (count) { count--; @@ -459,7 +455,7 @@ static void sixpack_receive_buf(struct tty_struct *tty, continue; } } - sixpack_decode(sp, buf, count1); + sixpack_decode(sp, cp, count1); sp_put(sp); tty_unthrottle(tty); @@ -992,7 +988,7 @@ static void decode_std_command(struct sixpack *sp, unsigned char cmd) /* decode a 6pack packet */ static void -sixpack_decode(struct sixpack *sp, unsigned char *pre_rbuff, int count) +sixpack_decode(struct sixpack *sp, const unsigned char *pre_rbuff, int count) { unsigned char inbyte; int count1; From 190aa3e77880a05332ea1ccb382a51285d57adb5 Mon Sep 17 00:00:00 2001 From: pravin shelar Date: Mon, 19 Sep 2016 13:50:59 -0700 Subject: [PATCH 0524/1050] openvswitch: Fix Frame-size larger than 1024 bytes warning. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no need to declare separate key on stack, we can just use sw_flow->key to store the key directly. This commit fixes following warning: net/openvswitch/datapath.c: In function ‘ovs_flow_cmd_new’: net/openvswitch/datapath.c:1080:1: warning: the frame size of 1040 bytes is larger than 1024 bytes [-Wframe-larger-than=] Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 0536ab3504d5..474e7a6bfeb7 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -928,7 +928,6 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) struct sw_flow_mask mask; struct sk_buff *reply; struct datapath *dp; - struct sw_flow_key key; struct sw_flow_actions *acts; struct sw_flow_match match; u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); @@ -956,20 +955,24 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) } /* Extract key. */ - ovs_match_init(&match, &key, &mask); + ovs_match_init(&match, &new_flow->key, &mask); error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK], log); if (error) goto err_kfree_flow; - ovs_flow_mask_key(&new_flow->key, &key, true, &mask); - /* Extract flow identifier. */ error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID], - &key, log); + &new_flow->key, log); if (error) goto err_kfree_flow; + /* unmasked key is needed to match when ufid is not used. */ + if (ovs_identifier_is_key(&new_flow->id)) + match.key = new_flow->id.unmasked_key; + + ovs_flow_mask_key(&new_flow->key, &new_flow->key, true, &mask); + /* Validate actions. */ error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, &acts, log); @@ -996,7 +999,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) if (ovs_identifier_is_ufid(&new_flow->id)) flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id); if (!flow) - flow = ovs_flow_tbl_lookup(&dp->table, &key); + flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->key); if (likely(!flow)) { rcu_assign_pointer(new_flow->sf_acts, acts); From 2279994d07ab67ff7a1d09bfbd65588332dfb6d8 Mon Sep 17 00:00:00 2001 From: pravin shelar Date: Mon, 19 Sep 2016 13:51:00 -0700 Subject: [PATCH 0525/1050] openvswitch: avoid resetting flow key while installing new flow. since commit commit db74a3335e0f6 ("openvswitch: use percpu flow stats") flow alloc resets flow-key. So there is no need to reset the flow-key again if OVS is using newly allocated flow-key. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 8 ++++---- net/openvswitch/flow.c | 2 -- net/openvswitch/flow_netlink.c | 6 ++++-- net/openvswitch/flow_netlink.h | 3 ++- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 474e7a6bfeb7..4d67ea856067 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -955,7 +955,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) } /* Extract key. */ - ovs_match_init(&match, &new_flow->key, &mask); + ovs_match_init(&match, &new_flow->key, false, &mask); error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK], log); if (error) @@ -1124,7 +1124,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log); if (a[OVS_FLOW_ATTR_KEY]) { - ovs_match_init(&match, &key, &mask); + ovs_match_init(&match, &key, true, &mask); error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK], log); } else if (!ufid_present) { @@ -1241,7 +1241,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log); if (a[OVS_FLOW_ATTR_KEY]) { - ovs_match_init(&match, &key, NULL); + ovs_match_init(&match, &key, true, NULL); err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL, log); } else if (!ufid_present) { @@ -1300,7 +1300,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log); if (a[OVS_FLOW_ATTR_KEY]) { - ovs_match_init(&match, &key, NULL); + ovs_match_init(&match, &key, true, NULL); err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL, log); if (unlikely(err)) diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 0fa45439def1..634cc10d6dee 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -767,8 +767,6 @@ int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr, { int err; - memset(key, 0, OVS_SW_FLOW_KEY_METADATA_SIZE); - /* Extract metadata from netlink attributes. */ err = ovs_nla_get_flow_metadata(net, attr, key, log); if (err) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 8efa718ddb5e..ae25ded82b3b 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -1996,13 +1996,15 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, void ovs_match_init(struct sw_flow_match *match, struct sw_flow_key *key, + bool reset_key, struct sw_flow_mask *mask) { memset(match, 0, sizeof(*match)); match->key = key; match->mask = mask; - memset(key, 0, sizeof(*key)); + if (reset_key) + memset(key, 0, sizeof(*key)); if (mask) { memset(&mask->key, 0, sizeof(mask->key)); @@ -2049,7 +2051,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, struct nlattr *a; int err = 0, start, opts_type; - ovs_match_init(&match, &key, NULL); + ovs_match_init(&match, &key, true, NULL); opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log); if (opts_type < 0) return opts_type; diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index 47dd142eca1c..45f9769e5aac 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -41,7 +41,8 @@ size_t ovs_tun_key_attr_size(void); size_t ovs_key_attr_size(void); void ovs_match_init(struct sw_flow_match *match, - struct sw_flow_key *key, struct sw_flow_mask *mask); + struct sw_flow_key *key, bool reset_key, + struct sw_flow_mask *mask); int ovs_nla_put_key(const struct sw_flow_key *, const struct sw_flow_key *, int attr, bool is_mask, struct sk_buff *); From cf714ac147e08bc13cd6bc79f2b090da905398ef Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Mon, 19 Sep 2016 13:56:29 -0700 Subject: [PATCH 0526/1050] ipvlan: Fix dependency issue kbuild-build-bot reported that if NETFILTER is not selected, the build fails pointing to netfilter symbols. Fixes: 4fbae7d83c98 ("ipvlan: Introduce l3s mode") Signed-off-by: Mahesh Bandewar Signed-off-by: David S. Miller --- drivers/net/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 8768a625350d..95c32f2d7601 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -149,6 +149,7 @@ config IPVLAN tristate "IP-VLAN support" depends on INET depends on IPV6 + depends on NETFILTER depends on NET_L3_MASTER_DEV ---help--- This allows one to create virtual devices off of a main interface From b399cf64e318ac8c5f10d36bb911e61c746b8788 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 20 Sep 2016 00:26:12 +0200 Subject: [PATCH 0527/1050] bpf, verifier: enforce larger zero range for pkt on overloading stack buffs Current contract for the following two helper argument types is: * ARG_CONST_STACK_SIZE: passed argument pair must be (ptr, >0). * ARG_CONST_STACK_SIZE_OR_ZERO: passed argument pair can be either (NULL, 0) or (ptr, >0). With 6841de8b0d03 ("bpf: allow helpers access the packet directly"), we can pass also raw packet data to helpers, so depending on the argument type being PTR_TO_PACKET, we now either assert memory via check_packet_access() or check_stack_boundary(). As a result, the tests in check_packet_access() currently allow more than intended with regards to reg->imm. Back in 969bf05eb3ce ("bpf: direct packet access"), check_packet_access() was fine to ignore size argument since in check_mem_access() size was bpf_size_to_bytes() derived and prior to the call to check_packet_access() guaranteed to be larger than zero. However, for the above two argument types, it currently means, we can have a <= 0 size and thus breaking current guarantees for helpers. Enforce a check for size <= 0 and bail out if so. check_stack_boundary() doesn't have such an issue since it already tests for access_size <= 0 and bails out, resp. access_size == 0 in case of NULL pointer passed when allowed. Fixes: 6841de8b0d03 ("bpf: allow helpers access the packet directly") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 90493a66dddd..bc138f34e38c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -671,7 +671,7 @@ static int check_packet_access(struct verifier_env *env, u32 regno, int off, struct reg_state *reg = ®s[regno]; off += reg->off; - if (off < 0 || off + size > reg->range) { + if (off < 0 || size <= 0 || off + size > reg->range) { verbose("invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n", off, size, regno, reg->id, reg->off, reg->range); return -EACCES; From 36bbef52c7eb646ed6247055a2acd3851e317857 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 20 Sep 2016 00:26:13 +0200 Subject: [PATCH 0528/1050] bpf: direct packet write and access for helpers for clsact progs This work implements direct packet access for helpers and direct packet write in a similar fashion as already available for XDP types via commits 4acf6c0b84c9 ("bpf: enable direct packet data write for xdp progs") and 6841de8b0d03 ("bpf: allow helpers access the packet directly"), and as a complementary feature to the already available direct packet read for tc (cls/act) programs. For enabling this, we need to introduce two helpers, bpf_skb_pull_data() and bpf_csum_update(). The first is generally needed for both, read and write, because they would otherwise only be limited to the current linear skb head. Usually, when the data_end test fails, programs just bail out, or, in the direct read case, use bpf_skb_load_bytes() as an alternative to overcome this limitation. If such data sits in non-linear parts, we can just pull them in once with the new helper, retest and eventually access them. At the same time, this also makes sure the skb is uncloned, which is, of course, a necessary condition for direct write. As this needs to be an invariant for the write part only, the verifier detects writes and adds a prologue that is calling bpf_skb_pull_data() to effectively unclone the skb from the very beginning in case it is indeed cloned. The heuristic makes use of a similar trick that was done in 233577a22089 ("net: filter: constify detection of pkt_type_offset"). This comes at zero cost for other programs that do not use the direct write feature. Should a program use this feature only sparsely and has read access for the most parts with, for example, drop return codes, then such write action can be delegated to a tail called program for mitigating this cost of potential uncloning to a late point in time where it would have been paid similarly with the bpf_skb_store_bytes() as well. Advantage of direct write is that the writes are inlined whereas the helper cannot make any length assumptions and thus needs to generate a call to memcpy() also for small sizes, as well as cost of helper call itself with sanity checks are avoided. Plus, when direct read is already used, we don't need to cache or perform rechecks on the data boundaries (due to verifier invalidating previous checks for helpers that change skb->data), so more complex programs using rewrites can benefit from switching to direct read plus write. For direct packet access to helpers, we save the otherwise needed copy into a temp struct sitting on stack memory when use-case allows. Both facilities are enabled via may_access_direct_pkt_data() in verifier. For now, we limit this to map helpers and csum_diff, and can successively enable other helpers where we find it makes sense. Helpers that definitely cannot be allowed for this are those part of bpf_helper_changes_skb_data() since they can change underlying data, and those that write into memory as this could happen for packet typed args when still cloned. bpf_csum_update() helper accommodates for the fact that we need to fixup checksum_complete when using direct write instead of bpf_skb_store_bytes(), meaning the programs can use available helpers like bpf_csum_diff(), and implement csum_add(), csum_sub(), csum_block_add(), csum_block_sub() equivalents in eBPF together with the new helper. A usage example will be provided for iproute2's examples/bpf/ directory. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 4 +- include/linux/skbuff.h | 14 +++- include/uapi/linux/bpf.h | 21 ++++++ kernel/bpf/helpers.c | 3 + kernel/bpf/verifier.c | 54 ++++++++++++---- net/core/filter.c | 134 ++++++++++++++++++++++++++++++++++----- 6 files changed, 196 insertions(+), 34 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 9a904f63f8c1..5691fdc83819 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -96,6 +96,7 @@ enum bpf_return_type { struct bpf_func_proto { u64 (*func)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); bool gpl_only; + bool pkt_access; enum bpf_return_type ret_type; enum bpf_arg_type arg1_type; enum bpf_arg_type arg2_type; @@ -151,7 +152,8 @@ struct bpf_verifier_ops { */ bool (*is_valid_access)(int off, int size, enum bpf_access_type type, enum bpf_reg_type *reg_type); - + int (*gen_prologue)(struct bpf_insn *insn, bool direct_write, + const struct bpf_prog *prog); u32 (*convert_ctx_access)(enum bpf_access_type type, int dst_reg, int src_reg, int ctx_off, struct bpf_insn *insn, struct bpf_prog *prog); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4c5662f05bda..c6dab3f7457c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -676,13 +676,23 @@ struct sk_buff { */ kmemcheck_bitfield_begin(flags1); __u16 queue_mapping; + +/* if you move cloned around you also must adapt those constants */ +#ifdef __BIG_ENDIAN_BITFIELD +#define CLONED_MASK (1 << 7) +#else +#define CLONED_MASK 1 +#endif +#define CLONED_OFFSET() offsetof(struct sk_buff, __cloned_offset) + + __u8 __cloned_offset[0]; __u8 cloned:1, nohdr:1, fclone:2, peeked:1, head_frag:1, - xmit_more:1; - /* one bit hole */ + xmit_more:1, + __unused:1; /* one bit hole */ kmemcheck_bitfield_end(flags1); /* fields enclosed in headers_start/headers_end are copied diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f896dfac4ac0..e07432b9f8b8 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -398,6 +398,27 @@ enum bpf_func_id { */ BPF_FUNC_skb_change_tail, + /** + * bpf_skb_pull_data(skb, len) + * The helper will pull in non-linear data in case the + * skb is non-linear and not all of len are part of the + * linear section. Only needed for read/write with direct + * packet access. + * @skb: pointer to skb + * @len: len to make read/writeable + * Return: 0 on success or negative error + */ + BPF_FUNC_skb_pull_data, + + /** + * bpf_csum_update(skb, csum) + * Adds csum into skb->csum in case of CHECKSUM_COMPLETE. + * @skb: pointer to skb + * @csum: csum to add + * Return: csum on success or negative error + */ + BPF_FUNC_csum_update, + __BPF_FUNC_MAX_ID, }; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index a5b8bf8cfcfd..39918402e6e9 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -36,6 +36,7 @@ BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key) const struct bpf_func_proto bpf_map_lookup_elem_proto = { .func = bpf_map_lookup_elem, .gpl_only = false, + .pkt_access = true, .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_MAP_KEY, @@ -51,6 +52,7 @@ BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key, const struct bpf_func_proto bpf_map_update_elem_proto = { .func = bpf_map_update_elem, .gpl_only = false, + .pkt_access = true, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_MAP_KEY, @@ -67,6 +69,7 @@ BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key) const struct bpf_func_proto bpf_map_delete_elem_proto = { .func = bpf_map_delete_elem, .gpl_only = false, + .pkt_access = true, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_MAP_KEY, diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index bc138f34e38c..3a75ee3bdcd1 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -196,6 +196,7 @@ struct verifier_env { u32 used_map_cnt; /* number of used maps */ u32 id_gen; /* used to generate unique reg IDs */ bool allow_ptr_leaks; + bool seen_direct_write; }; #define BPF_COMPLEXITY_LIMIT_INSNS 65536 @@ -204,6 +205,7 @@ struct verifier_env { struct bpf_call_arg_meta { struct bpf_map *map_ptr; bool raw_mode; + bool pkt_access; int regno; int access_size; }; @@ -654,10 +656,17 @@ static int check_map_access(struct verifier_env *env, u32 regno, int off, #define MAX_PACKET_OFF 0xffff -static bool may_write_pkt_data(enum bpf_prog_type type) +static bool may_access_direct_pkt_data(struct verifier_env *env, + const struct bpf_call_arg_meta *meta) { - switch (type) { + switch (env->prog->type) { + case BPF_PROG_TYPE_SCHED_CLS: + case BPF_PROG_TYPE_SCHED_ACT: case BPF_PROG_TYPE_XDP: + if (meta) + return meta->pkt_access; + + env->seen_direct_write = true; return true; default: return false; @@ -817,7 +826,7 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off, err = check_stack_read(state, off, size, value_regno); } } else if (state->regs[regno].type == PTR_TO_PACKET) { - if (t == BPF_WRITE && !may_write_pkt_data(env->prog->type)) { + if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL)) { verbose("cannot write into packet\n"); return -EACCES; } @@ -950,8 +959,8 @@ static int check_func_arg(struct verifier_env *env, u32 regno, return 0; } - if (type == PTR_TO_PACKET && !may_write_pkt_data(env->prog->type)) { - verbose("helper access to the packet is not allowed for clsact\n"); + if (type == PTR_TO_PACKET && !may_access_direct_pkt_data(env, meta)) { + verbose("helper access to the packet is not allowed\n"); return -EACCES; } @@ -1191,6 +1200,7 @@ static int check_call(struct verifier_env *env, int func_id) changes_data = bpf_helper_changes_skb_data(fn->func); memset(&meta, 0, sizeof(meta)); + meta.pkt_access = fn->pkt_access; /* We only support one arg being in raw mode at the moment, which * is sufficient for the helper functions we have right now. @@ -2675,18 +2685,35 @@ static void convert_pseudo_ld_imm64(struct verifier_env *env) */ static int convert_ctx_accesses(struct verifier_env *env) { - struct bpf_insn *insn = env->prog->insnsi; - int insn_cnt = env->prog->len; - struct bpf_insn insn_buf[16]; + const struct bpf_verifier_ops *ops = env->prog->aux->ops; + struct bpf_insn insn_buf[16], *insn; struct bpf_prog *new_prog; enum bpf_access_type type; - int i; + int i, insn_cnt, cnt; - if (!env->prog->aux->ops->convert_ctx_access) + if (ops->gen_prologue) { + cnt = ops->gen_prologue(insn_buf, env->seen_direct_write, + env->prog); + if (cnt >= ARRAY_SIZE(insn_buf)) { + verbose("bpf verifier is misconfigured\n"); + return -EINVAL; + } else if (cnt) { + new_prog = bpf_patch_insn_single(env->prog, 0, + insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + env->prog = new_prog; + } + } + + if (!ops->convert_ctx_access) return 0; + insn_cnt = env->prog->len; + insn = env->prog->insnsi; + for (i = 0; i < insn_cnt; i++, insn++) { - u32 insn_delta, cnt; + u32 insn_delta; if (insn->code == (BPF_LDX | BPF_MEM | BPF_W) || insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) @@ -2703,9 +2730,8 @@ static int convert_ctx_accesses(struct verifier_env *env) continue; } - cnt = env->prog->aux->ops-> - convert_ctx_access(type, insn->dst_reg, insn->src_reg, - insn->off, insn_buf, env->prog); + cnt = ops->convert_ctx_access(type, insn->dst_reg, insn->src_reg, + insn->off, insn_buf, env->prog); if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { verbose("bpf verifier is misconfigured\n"); return -EINVAL; diff --git a/net/core/filter.c b/net/core/filter.c index 298b146b47e7..0920c2ac1d00 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1362,6 +1362,11 @@ static inline int bpf_try_make_writable(struct sk_buff *skb, return err; } +static int bpf_try_make_head_writable(struct sk_buff *skb) +{ + return bpf_try_make_writable(skb, skb_headlen(skb)); +} + static inline void bpf_push_mac_rcsum(struct sk_buff *skb) { if (skb_at_tc_ingress(skb)) @@ -1441,6 +1446,28 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = { .arg4_type = ARG_CONST_STACK_SIZE, }; +BPF_CALL_2(bpf_skb_pull_data, struct sk_buff *, skb, u32, len) +{ + /* Idea is the following: should the needed direct read/write + * test fail during runtime, we can pull in more data and redo + * again, since implicitly, we invalidate previous checks here. + * + * Or, since we know how much we need to make read/writeable, + * this can be done once at the program beginning for direct + * access case. By this we overcome limitations of only current + * headroom being accessible. + */ + return bpf_try_make_writable(skb, len ? : skb_headlen(skb)); +} + +static const struct bpf_func_proto bpf_skb_pull_data_proto = { + .func = bpf_skb_pull_data, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; + BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset, u64, from, u64, to, u64, flags) { @@ -1567,6 +1594,7 @@ BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size, static const struct bpf_func_proto bpf_csum_diff_proto = { .func = bpf_csum_diff, .gpl_only = false, + .pkt_access = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_STACK, .arg2_type = ARG_CONST_STACK_SIZE_OR_ZERO, @@ -1575,6 +1603,26 @@ static const struct bpf_func_proto bpf_csum_diff_proto = { .arg5_type = ARG_ANYTHING, }; +BPF_CALL_2(bpf_csum_update, struct sk_buff *, skb, __wsum, csum) +{ + /* The interface is to be used in combination with bpf_csum_diff() + * for direct packet writes. csum rotation for alignment as well + * as emulating csum_sub() can be done from the eBPF program. + */ + if (skb->ip_summed == CHECKSUM_COMPLETE) + return (skb->csum = csum_add(skb->csum, csum)); + + return -ENOTSUPP; +} + +static const struct bpf_func_proto bpf_csum_update_proto = { + .func = bpf_csum_update, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; + static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb) { return dev_forward_skb(dev, skb); @@ -1602,6 +1650,8 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags) { struct net_device *dev; + struct sk_buff *clone; + int ret; if (unlikely(flags & ~(BPF_F_INGRESS))) return -EINVAL; @@ -1610,14 +1660,25 @@ BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags) if (unlikely(!dev)) return -EINVAL; - skb = skb_clone(skb, GFP_ATOMIC); - if (unlikely(!skb)) + clone = skb_clone(skb, GFP_ATOMIC); + if (unlikely(!clone)) return -ENOMEM; - bpf_push_mac_rcsum(skb); + /* For direct write, we need to keep the invariant that the skbs + * we're dealing with need to be uncloned. Should uncloning fail + * here, we need to free the just generated clone to unclone once + * again. + */ + ret = bpf_try_make_head_writable(skb); + if (unlikely(ret)) { + kfree_skb(clone); + return -ENOMEM; + } + + bpf_push_mac_rcsum(clone); return flags & BPF_F_INGRESS ? - __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb); + __bpf_rx_skb(dev, clone) : __bpf_tx_skb(dev, clone); } static const struct bpf_func_proto bpf_clone_redirect_proto = { @@ -2063,19 +2124,14 @@ static const struct bpf_func_proto bpf_skb_change_tail_proto = { bool bpf_helper_changes_skb_data(void *func) { - if (func == bpf_skb_vlan_push) - return true; - if (func == bpf_skb_vlan_pop) - return true; - if (func == bpf_skb_store_bytes) - return true; - if (func == bpf_skb_change_proto) - return true; - if (func == bpf_skb_change_tail) - return true; - if (func == bpf_l3_csum_replace) - return true; - if (func == bpf_l4_csum_replace) + if (func == bpf_skb_vlan_push || + func == bpf_skb_vlan_pop || + func == bpf_skb_store_bytes || + func == bpf_skb_change_proto || + func == bpf_skb_change_tail || + func == bpf_skb_pull_data || + func == bpf_l3_csum_replace || + func == bpf_l4_csum_replace) return true; return false; @@ -2440,8 +2496,12 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_skb_store_bytes_proto; case BPF_FUNC_skb_load_bytes: return &bpf_skb_load_bytes_proto; + case BPF_FUNC_skb_pull_data: + return &bpf_skb_pull_data_proto; case BPF_FUNC_csum_diff: return &bpf_csum_diff_proto; + case BPF_FUNC_csum_update: + return &bpf_csum_update_proto; case BPF_FUNC_l3_csum_replace: return &bpf_l3_csum_replace_proto; case BPF_FUNC_l4_csum_replace: @@ -2533,6 +2593,45 @@ static bool sk_filter_is_valid_access(int off, int size, return __is_valid_access(off, size, type); } +static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write, + const struct bpf_prog *prog) +{ + struct bpf_insn *insn = insn_buf; + + if (!direct_write) + return 0; + + /* if (!skb->cloned) + * goto start; + * + * (Fast-path, otherwise approximation that we might be + * a clone, do the rest in helper.) + */ + *insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_6, BPF_REG_1, CLONED_OFFSET()); + *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_6, CLONED_MASK); + *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 7); + + /* ret = bpf_skb_pull_data(skb, 0); */ + *insn++ = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1); + *insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_2, BPF_REG_2); + *insn++ = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_pull_data); + /* if (!ret) + * goto restore; + * return TC_ACT_SHOT; + */ + *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2); + *insn++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, TC_ACT_SHOT); + *insn++ = BPF_EXIT_INSN(); + + /* restore: */ + *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6); + /* start: */ + *insn++ = prog->insnsi[0]; + + return insn - insn_buf; +} + static bool tc_cls_act_is_valid_access(int off, int size, enum bpf_access_type type, enum bpf_reg_type *reg_type) @@ -2810,6 +2909,7 @@ static const struct bpf_verifier_ops tc_cls_act_ops = { .get_func_proto = tc_cls_act_func_proto, .is_valid_access = tc_cls_act_is_valid_access, .convert_ctx_access = tc_cls_act_convert_ctx_access, + .gen_prologue = tc_cls_act_prologue, }; static const struct bpf_verifier_ops xdp_ops = { From 7d95b0ab5bbe2dc9bf3fd99c27e80ced5bfa8acf Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 20 Sep 2016 00:26:14 +0200 Subject: [PATCH 0529/1050] bpf: add test cases for direct packet access Add couple of test cases for direct write and the negative size issue, and also adjust the direct packet access test4 since it asserts that writes are not possible, but since we've just added support for writes, we need to invert the verdict to ACCEPT, of course. Summary: 133 PASSED, 0 FAILED. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/test_verifier.c | 433 +++++++++++++++++++++++++++++++++++- 1 file changed, 430 insertions(+), 3 deletions(-) diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c index 1f6cc9b6a23b..ac590d4b7f02 100644 --- a/samples/bpf/test_verifier.c +++ b/samples/bpf/test_verifier.c @@ -290,6 +290,29 @@ static struct bpf_test tests[] = { .errstr = "invalid read from stack", .result = REJECT, }, + { + "invalid argument register", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_cgroup_classid), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_cgroup_classid), + BPF_EXIT_INSN(), + }, + .errstr = "R1 !read_ok", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "non-invalid argument register", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_cgroup_classid), + BPF_ALU64_REG(BPF_MOV, BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_cgroup_classid), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, { "check valid spill/fill", .insns = { @@ -1209,6 +1232,54 @@ static struct bpf_test tests[] = { .errstr = "invalid read from stack off -8+0 size 8", .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, + { + "raw_stack: skb_load_bytes, negative len", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack type R3", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "raw_stack: skb_load_bytes, negative len 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, ~0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack type R3", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "raw_stack: skb_load_bytes, zero len", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack type R3", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, { "raw_stack: skb_load_bytes, no init", .insns = { @@ -1511,7 +1582,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, }, { - "direct packet access: test4", + "direct packet access: test4 (write)", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct __sk_buff, data)), @@ -1524,8 +1595,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "cannot write", - .result = REJECT, + .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { @@ -1630,6 +1700,26 @@ static struct bpf_test tests[] = { .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, + { + "direct packet access: test10 (write invalid)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid access to packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, { "helper access to packet: test1, valid packet_ptr range", .insns = { @@ -1736,6 +1826,343 @@ static struct bpf_test tests[] = { .errstr = "invalid access to packet", .prog_type = BPF_PROG_TYPE_XDP, }, + { + "helper access to packet: test6, cls valid packet_ptr range", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 5), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup = {5}, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test7, cls unchecked packet_ptr", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup = {1}, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test8, cls variable add", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 10), + BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_5), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_4), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 4), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup = {11}, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test9, cls packet_ptr with bad range", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup = {7}, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test10, cls packet_ptr with too short range", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup = {6}, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test11, cls unsuitable helper 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_7, 4), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_4, 42), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_store_bytes), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "helper access to the packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test12, cls unsuitable helper 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 3), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_4, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "helper access to the packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test13, cls helper ok", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test14, cls helper fail sub", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 4), + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "type=inv expected=fp", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test15, cls helper fail range 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test16, cls helper fail range 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, -9), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test17, cls helper fail range 3", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, ~0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test18, cls helper fail range zero", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test19, pkt end as input", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R1 type=pkt_end expected=fp", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test20, wrong reg", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, }; static int probe_filter_length(struct bpf_insn *fp) From aecc5cefc389735b5327d234e11d1fe505e1c280 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 19 Sep 2016 19:02:51 -0400 Subject: [PATCH 0530/1050] net sched actions: fix GETing actions With the batch changes that translated transient actions into a temporary list lost in the translation was the fact that tcf_action_destroy() will eventually delete the action from the permanent location if the refcount is zero. Example of what broke: ...add a gact action to drop sudo $TC actions add action drop index 10 ...now retrieve it, looks good sudo $TC actions get action gact index 10 ...retrieve it again and find it is gone! sudo $TC actions get action gact index 10 Fixes: 22dc13c837c3 ("net_sched: convert tcf_exts from list to pointer array"), Fixes: 824a7e8863b3 ("net_sched: remove an unnecessary list_del()") Fixes: f07fed82ad79 ("net_sched: remove the leftover cleanup_a()") Acked-by: Cong Wang Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_api.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index d0aceb1740b1..c9102172ce3b 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -592,6 +592,17 @@ err_out: return ERR_PTR(err); } +static void cleanup_a(struct list_head *actions, int ovr) +{ + struct tc_action *a; + + if (!ovr) + return; + + list_for_each_entry(a, actions, list) + a->tcfa_refcnt--; +} + int tcf_action_init(struct net *net, struct nlattr *nla, struct nlattr *est, char *name, int ovr, int bind, struct list_head *actions) { @@ -611,8 +622,15 @@ int tcf_action_init(struct net *net, struct nlattr *nla, struct nlattr *est, goto err; } act->order = i; + if (ovr) + act->tcfa_refcnt++; list_add_tail(&act->list, actions); } + + /* Remove the temp refcnt which was necessary to protect against + * destroying an existing action which was being replaced + */ + cleanup_a(actions, ovr); return 0; err: @@ -882,6 +900,8 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, goto err; } act->order = i; + if (event == RTM_GETACTION) + act->tcfa_refcnt++; list_add_tail(&act->list, &actions); } From b79331a5eb9f96e4dfd216974581168ec4c8a4d4 Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Wed, 14 Sep 2016 16:37:17 +1000 Subject: [PATCH 0531/1050] powerpc/powernv/pci: Fix m64 checks for SR-IOV and window alignment Commit 5958d19a143e checks for prefetchable m64 BARs by comparing the addresses instead of using resource flags. This broke SR-IOV as the m64 check in pnv_pci_ioda_fixup_iov_resources() fails. The condition in pnv_pci_window_alignment() also changed to checking only IORESOURCE_MEM_64 instead of both IORESOURCE_MEM_64 and IORESOURCE_PREFETCH. Revert these cases to the previous behaviour, adding a new helper function to do so. This is named pnv_pci_is_m64_flags() to make it clear this function is only looking at resource flags and should not be relied on for non-SRIOV resources. Fixes: 5958d19a143e ("Fix incorrect PE reservation attempt on some 64-bit BARs") Reported-by: Alexey Kardashevskiy Signed-off-by: Russell Currey Tested-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index bc0c91e84ca0..38a5c657ffd3 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -124,6 +124,13 @@ static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r) r->start < (phb->ioda.m64_base + phb->ioda.m64_size)); } +static inline bool pnv_pci_is_m64_flags(unsigned long resource_flags) +{ + unsigned long flags = (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH); + + return (resource_flags & flags) == flags; +} + static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no) { phb->ioda.pe_array[pe_no].phb = phb; @@ -2871,7 +2878,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) res = &pdev->resource[i + PCI_IOV_RESOURCES]; if (!res->flags || res->parent) continue; - if (!pnv_pci_is_m64(phb, res)) { + if (!pnv_pci_is_m64_flags(res->flags)) { dev_warn(&pdev->dev, "Don't support SR-IOV with" " non M64 VF BAR%d: %pR. \n", i, res); @@ -3096,7 +3103,7 @@ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus, * alignment for any 64-bit resource, PCIe doesn't care and * bridges only do 64-bit prefetchable anyway. */ - if (phb->ioda.m64_segsize && (type & IORESOURCE_MEM_64)) + if (phb->ioda.m64_segsize && pnv_pci_is_m64_flags(type)) return phb->ioda.m64_segsize; if (type & IORESOURCE_MEM) return phb->ioda.m32_segsize; From 8847293992606677d5e446d1e712bd128ea7977f Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 19 Sep 2016 19:56:11 -0400 Subject: [PATCH 0532/1050] net: dsa: mv88e6xxx: handle multiple ports in ATU An address can be loaded in the ATU with multiple ports, for instance when adding multiple ports to a Multicast group with "bridge mdb". The current code doesn't allow that. Add an helper to get a single entry from the ATU, then set or clear the requested port, before loading the entry back in the ATU. Note that the required _mv88e6xxx_atu_getnext function is defined below mv88e6xxx_port_db_load_purge, so forward-declare it for the moment. The ATU code will be isolated in future patches. Fixes: 83dabd1fa84c ("net: dsa: mv88e6xxx: make switchdev DB ops generic") Signed-off-by: Vivien Didelot Reviewed-by: Andrew Lunn Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 56 ++++++++++++++++++++++++++++---- 1 file changed, 49 insertions(+), 7 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 70a812d159c9..1d71802396fb 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2091,12 +2091,48 @@ static int _mv88e6xxx_atu_load(struct mv88e6xxx_chip *chip, return _mv88e6xxx_atu_cmd(chip, entry->fid, GLOBAL_ATU_OP_LOAD_DB); } +static int _mv88e6xxx_atu_getnext(struct mv88e6xxx_chip *chip, u16 fid, + struct mv88e6xxx_atu_entry *entry); + +static int mv88e6xxx_atu_get(struct mv88e6xxx_chip *chip, int fid, + const u8 *addr, struct mv88e6xxx_atu_entry *entry) +{ + struct mv88e6xxx_atu_entry next; + int err; + + eth_broadcast_addr(next.mac); + + err = _mv88e6xxx_atu_mac_write(chip, next.mac); + if (err) + return err; + + do { + err = _mv88e6xxx_atu_getnext(chip, fid, &next); + if (err) + return err; + + if (next.state == GLOBAL_ATU_DATA_STATE_UNUSED) + break; + + if (ether_addr_equal(next.mac, addr)) { + *entry = next; + return 0; + } + } while (!is_broadcast_ether_addr(next.mac)); + + memset(entry, 0, sizeof(*entry)); + entry->fid = fid; + ether_addr_copy(entry->mac, addr); + + return 0; +} + static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, const unsigned char *addr, u16 vid, u8 state) { - struct mv88e6xxx_atu_entry entry = { 0 }; struct mv88e6xxx_vtu_stu_entry vlan; + struct mv88e6xxx_atu_entry entry; int err; /* Null VLAN ID corresponds to the port private database */ @@ -2107,12 +2143,18 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, if (err) return err; - entry.fid = vlan.fid; - entry.state = state; - ether_addr_copy(entry.mac, addr); - if (state != GLOBAL_ATU_DATA_STATE_UNUSED) { - entry.trunk = false; - entry.portv_trunkid = BIT(port); + err = mv88e6xxx_atu_get(chip, vlan.fid, addr, &entry); + if (err) + return err; + + /* Purge the ATU entry only if no port is using it anymore */ + if (state == GLOBAL_ATU_DATA_STATE_UNUSED) { + entry.portv_trunkid &= ~BIT(port); + if (!entry.portv_trunkid) + entry.state = GLOBAL_ATU_DATA_STATE_UNUSED; + } else { + entry.portv_trunkid |= BIT(port); + entry.state = state; } return _mv88e6xxx_atu_load(chip, &entry); From 94d308d060cd3ee65152b8ebd7a1c24fa86eee82 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Tue, 20 Sep 2016 11:26:48 +0800 Subject: [PATCH 0533/1050] net: ethernet: mediatek: enhance with avoiding superfluous assignment inside mtk_get_ethtool_stats data_src is unchanged inside the loop, so this patch moves the assignment to outside the loop to avoid unnecessarily assignment Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 481f3609a1ea..ca6b5013e275 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -2137,8 +2137,9 @@ static void mtk_get_ethtool_stats(struct net_device *dev, } } + data_src = (u64 *)hwstats; + do { - data_src = (u64 *)hwstats; data_dst = data; start = u64_stats_fetch_begin_irq(&hwstats->syncp); From f78e73e27fdeab6f9317667f7e9676b59c1ec1fb Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Mon, 19 Sep 2016 23:39:08 -0400 Subject: [PATCH 0534/1050] tcp: cdg: rename struct minmax in tcp_cdg.c to avoid a naming conflict The upcoming change "lib/win_minmax: windowed min or max estimator" introduces a struct called minmax, which is then included in include/linux/tcp.h in the upcoming change "tcp: use windowed min filter library for TCP min_rtt estimation". This would create a compilation error for tcp_cdg.c, which defines its own minmax struct. To avoid this naming conflict (and potentially others in the future), this commit renames the version used in tcp_cdg.c to cdg_minmax. Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Cc: Kenneth Klette Jonassen Acked-by: Kenneth Klette Jonassen Signed-off-by: David S. Miller --- net/ipv4/tcp_cdg.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c index 03725b294286..35b280361cb2 100644 --- a/net/ipv4/tcp_cdg.c +++ b/net/ipv4/tcp_cdg.c @@ -56,7 +56,7 @@ MODULE_PARM_DESC(use_shadow, "use shadow window heuristic"); module_param(use_tolerance, bool, 0644); MODULE_PARM_DESC(use_tolerance, "use loss tolerance heuristic"); -struct minmax { +struct cdg_minmax { union { struct { s32 min; @@ -74,10 +74,10 @@ enum cdg_state { }; struct cdg { - struct minmax rtt; - struct minmax rtt_prev; - struct minmax *gradients; - struct minmax gsum; + struct cdg_minmax rtt; + struct cdg_minmax rtt_prev; + struct cdg_minmax *gradients; + struct cdg_minmax gsum; bool gfilled; u8 tail; u8 state; @@ -353,7 +353,7 @@ static void tcp_cdg_cwnd_event(struct sock *sk, const enum tcp_ca_event ev) { struct cdg *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); - struct minmax *gradients; + struct cdg_minmax *gradients; switch (ev) { case CA_EVENT_CWND_RESTART: From a4f1f9ac8153e22869b6408832b5a9bb9c762bf6 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 19 Sep 2016 23:39:09 -0400 Subject: [PATCH 0535/1050] lib/win_minmax: windowed min or max estimator This commit introduces a generic library to estimate either the min or max value of a time-varying variable over a recent time window. This is code originally from Kathleen Nichols. The current form of the code is from Van Jacobson. A single struct minmax_sample will track the estimated windowed-max value of the series if you call minmax_running_max() or the estimated windowed-min value of the series if you call minmax_running_min(). Nearly equivalent code is already in place for minimum RTT estimation in the TCP stack. This commit extracts that code and generalizes it to handle both min and max. Moving the code here reduces the footprint and complexity of the TCP code base and makes the filter generally available for other parts of the codebase, including an upcoming TCP congestion control module. This library works well for time series where the measurements are smoothly increasing or decreasing. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/linux/win_minmax.h | 37 ++++++++++++++ lib/Makefile | 2 +- lib/win_minmax.c | 98 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 136 insertions(+), 1 deletion(-) create mode 100644 include/linux/win_minmax.h create mode 100644 lib/win_minmax.c diff --git a/include/linux/win_minmax.h b/include/linux/win_minmax.h new file mode 100644 index 000000000000..56569604278f --- /dev/null +++ b/include/linux/win_minmax.h @@ -0,0 +1,37 @@ +/** + * lib/minmax.c: windowed min/max tracker by Kathleen Nichols. + * + */ +#ifndef MINMAX_H +#define MINMAX_H + +#include + +/* A single data point for our parameterized min-max tracker */ +struct minmax_sample { + u32 t; /* time measurement was taken */ + u32 v; /* value measured */ +}; + +/* State for the parameterized min-max tracker */ +struct minmax { + struct minmax_sample s[3]; +}; + +static inline u32 minmax_get(const struct minmax *m) +{ + return m->s[0].v; +} + +static inline u32 minmax_reset(struct minmax *m, u32 t, u32 meas) +{ + struct minmax_sample val = { .t = t, .v = meas }; + + m->s[2] = m->s[1] = m->s[0] = val; + return m->s[0].v; +} + +u32 minmax_running_max(struct minmax *m, u32 win, u32 t, u32 meas); +u32 minmax_running_min(struct minmax *m, u32 win, u32 t, u32 meas); + +#endif diff --git a/lib/Makefile b/lib/Makefile index 5dc77a8ec297..df747e5eeb7a 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -22,7 +22,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ sha1.o chacha20.o md5.o irq_regs.o argv_split.o \ flex_proportions.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ - earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o + earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o win_minmax.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o diff --git a/lib/win_minmax.c b/lib/win_minmax.c new file mode 100644 index 000000000000..c8420d404926 --- /dev/null +++ b/lib/win_minmax.c @@ -0,0 +1,98 @@ +/** + * lib/minmax.c: windowed min/max tracker + * + * Kathleen Nichols' algorithm for tracking the minimum (or maximum) + * value of a data stream over some fixed time interval. (E.g., + * the minimum RTT over the past five minutes.) It uses constant + * space and constant time per update yet almost always delivers + * the same minimum as an implementation that has to keep all the + * data in the window. + * + * The algorithm keeps track of the best, 2nd best & 3rd best min + * values, maintaining an invariant that the measurement time of + * the n'th best >= n-1'th best. It also makes sure that the three + * values are widely separated in the time window since that bounds + * the worse case error when that data is monotonically increasing + * over the window. + * + * Upon getting a new min, we can forget everything earlier because + * it has no value - the new min is <= everything else in the window + * by definition and it's the most recent. So we restart fresh on + * every new min and overwrites 2nd & 3rd choices. The same property + * holds for 2nd & 3rd best. + */ +#include +#include + +/* As time advances, update the 1st, 2nd, and 3rd choices. */ +static u32 minmax_subwin_update(struct minmax *m, u32 win, + const struct minmax_sample *val) +{ + u32 dt = val->t - m->s[0].t; + + if (unlikely(dt > win)) { + /* + * Passed entire window without a new val so make 2nd + * choice the new val & 3rd choice the new 2nd choice. + * we may have to iterate this since our 2nd choice + * may also be outside the window (we checked on entry + * that the third choice was in the window). + */ + m->s[0] = m->s[1]; + m->s[1] = m->s[2]; + m->s[2] = *val; + if (unlikely(val->t - m->s[0].t > win)) { + m->s[0] = m->s[1]; + m->s[1] = m->s[2]; + m->s[2] = *val; + } + } else if (unlikely(m->s[1].t == m->s[0].t) && dt > win/4) { + /* + * We've passed a quarter of the window without a new val + * so take a 2nd choice from the 2nd quarter of the window. + */ + m->s[2] = m->s[1] = *val; + } else if (unlikely(m->s[2].t == m->s[1].t) && dt > win/2) { + /* + * We've passed half the window without finding a new val + * so take a 3rd choice from the last half of the window + */ + m->s[2] = *val; + } + return m->s[0].v; +} + +/* Check if new measurement updates the 1st, 2nd or 3rd choice max. */ +u32 minmax_running_max(struct minmax *m, u32 win, u32 t, u32 meas) +{ + struct minmax_sample val = { .t = t, .v = meas }; + + if (unlikely(val.v >= m->s[0].v) || /* found new max? */ + unlikely(val.t - m->s[2].t > win)) /* nothing left in window? */ + return minmax_reset(m, t, meas); /* forget earlier samples */ + + if (unlikely(val.v >= m->s[1].v)) + m->s[2] = m->s[1] = val; + else if (unlikely(val.v >= m->s[2].v)) + m->s[2] = val; + + return minmax_subwin_update(m, win, &val); +} +EXPORT_SYMBOL(minmax_running_max); + +/* Check if new measurement updates the 1st, 2nd or 3rd choice min. */ +u32 minmax_running_min(struct minmax *m, u32 win, u32 t, u32 meas) +{ + struct minmax_sample val = { .t = t, .v = meas }; + + if (unlikely(val.v <= m->s[0].v) || /* found new min? */ + unlikely(val.t - m->s[2].t > win)) /* nothing left in window? */ + return minmax_reset(m, t, meas); /* forget earlier samples */ + + if (unlikely(val.v <= m->s[1].v)) + m->s[2] = m->s[1] = val; + else if (unlikely(val.v <= m->s[2].v)) + m->s[2] = val; + + return minmax_subwin_update(m, win, &val); +} From 6403389211e1f4d40ed963fe47a96fce1a3ba7a9 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 19 Sep 2016 23:39:10 -0400 Subject: [PATCH 0536/1050] tcp: use windowed min filter library for TCP min_rtt estimation Refactor the TCP min_rtt code to reuse the new win_minmax library in lib/win_minmax.c to simplify the TCP code. This is a pure refactor: the functionality is exactly the same. We just moved the windowed min code to make TCP easier to read and maintain, and to allow other parts of the kernel to use the windowed min/max filter code. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/linux/tcp.h | 5 ++-- include/net/tcp.h | 2 +- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_input.c | 62 +++------------------------------------- net/ipv4/tcp_minisocks.c | 2 +- 5 files changed, 9 insertions(+), 64 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index c723a465125d..6433cc8b4667 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -19,6 +19,7 @@ #include +#include #include #include #include @@ -234,9 +235,7 @@ struct tcp_sock { u32 mdev_max_us; /* maximal mdev for the last rtt period */ u32 rttvar_us; /* smoothed mdev_max */ u32 rtt_seq; /* sequence number to update rttvar */ - struct rtt_meas { - u32 rtt, ts; /* RTT in usec and sampling time in jiffies. */ - } rtt_min[3]; + struct minmax rtt_min; u32 packets_out; /* Packets which are "in flight" */ u32 retrans_out; /* Retransmitted packets out */ diff --git a/include/net/tcp.h b/include/net/tcp.h index fdfbedd61c67..2f1648af4d12 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -671,7 +671,7 @@ static inline bool tcp_ca_dst_locked(const struct dst_entry *dst) /* Minimum RTT in usec. ~0 means not available. */ static inline u32 tcp_min_rtt(const struct tcp_sock *tp) { - return tp->rtt_min[0].rtt; + return minmax_get(&tp->rtt_min); } /* Compute the actual receive window we are currently advertising. diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7dae800092e6..e79ed17ccfd6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -387,7 +387,7 @@ void tcp_init_sock(struct sock *sk) icsk->icsk_rto = TCP_TIMEOUT_INIT; tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); - tp->rtt_min[0].rtt = ~0U; + minmax_reset(&tp->rtt_min, tcp_time_stamp, ~0U); /* So many TCP implementations out there (incorrectly) count the * initial SYN frame in their delayed-ACK and congestion control diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index dad3e7eeed94..6886f386464f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2879,67 +2879,13 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, *rexmit = REXMIT_LOST; } -/* Kathleen Nichols' algorithm for tracking the minimum value of - * a data stream over some fixed time interval. (E.g., the minimum - * RTT over the past five minutes.) It uses constant space and constant - * time per update yet almost always delivers the same minimum as an - * implementation that has to keep all the data in the window. - * - * The algorithm keeps track of the best, 2nd best & 3rd best min - * values, maintaining an invariant that the measurement time of the - * n'th best >= n-1'th best. It also makes sure that the three values - * are widely separated in the time window since that bounds the worse - * case error when that data is monotonically increasing over the window. - * - * Upon getting a new min, we can forget everything earlier because it - * has no value - the new min is <= everything else in the window by - * definition and it's the most recent. So we restart fresh on every new min - * and overwrites 2nd & 3rd choices. The same property holds for 2nd & 3rd - * best. - */ static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us) { - const u32 now = tcp_time_stamp, wlen = sysctl_tcp_min_rtt_wlen * HZ; - struct rtt_meas *m = tcp_sk(sk)->rtt_min; - struct rtt_meas rttm = { - .rtt = likely(rtt_us) ? rtt_us : jiffies_to_usecs(1), - .ts = now, - }; - u32 elapsed; + struct tcp_sock *tp = tcp_sk(sk); + u32 wlen = sysctl_tcp_min_rtt_wlen * HZ; - /* Check if the new measurement updates the 1st, 2nd, or 3rd choices */ - if (unlikely(rttm.rtt <= m[0].rtt)) - m[0] = m[1] = m[2] = rttm; - else if (rttm.rtt <= m[1].rtt) - m[1] = m[2] = rttm; - else if (rttm.rtt <= m[2].rtt) - m[2] = rttm; - - elapsed = now - m[0].ts; - if (unlikely(elapsed > wlen)) { - /* Passed entire window without a new min so make 2nd choice - * the new min & 3rd choice the new 2nd. So forth and so on. - */ - m[0] = m[1]; - m[1] = m[2]; - m[2] = rttm; - if (now - m[0].ts > wlen) { - m[0] = m[1]; - m[1] = rttm; - if (now - m[0].ts > wlen) - m[0] = rttm; - } - } else if (m[1].ts == m[0].ts && elapsed > wlen / 4) { - /* Passed a quarter of the window without a new min so - * take 2nd choice from the 2nd quarter of the window. - */ - m[2] = m[1] = rttm; - } else if (m[2].ts == m[1].ts && elapsed > wlen / 2) { - /* Passed half the window without a new min so take the 3rd - * choice from the last half of the window. - */ - m[2] = rttm; - } + minmax_running_min(&tp->rtt_min, wlen, tcp_time_stamp, + rtt_us ? : jiffies_to_usecs(1)); } static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f63c73dc0acb..568947110b60 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -464,7 +464,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->srtt_us = 0; newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); - newtp->rtt_min[0].rtt = ~0U; + minmax_reset(&newtp->rtt_min, tcp_time_stamp, ~0U); newicsk->icsk_rto = TCP_TIMEOUT_INIT; newtp->packets_out = 0; From 77879147a3481babffd7e368d977ab682545a6bd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 19 Sep 2016 23:39:11 -0400 Subject: [PATCH 0537/1050] net_sched: sch_fq: add low_rate_threshold parameter This commit adds to the fq module a low_rate_threshold parameter to insert a delay after all packets if the socket requests a pacing rate below the threshold. This helps achieve more precise control of the sending rate with low-rate paths, especially policers. The basic issue is that if a congestion control module detects a policer at a certain rate, it may want fq to be able to shape to that policed rate. That way the sender can avoid policer drops by having the packets arrive at the policer at or just under the policed rate. The default threshold of 550Kbps was chosen analytically so that for policers or links at 500Kbps or 512Kbps fq would very likely invoke this mechanism, even if the pacing rate was briefly slightly above the available bandwidth. This value was then empirically validated with two years of production testing on YouTube video servers. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 2 ++ net/sched/sch_fq.c | 22 +++++++++++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 2382eed50278..f8e39dbaa781 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -792,6 +792,8 @@ enum { TCA_FQ_ORPHAN_MASK, /* mask applied to orphaned skb hashes */ + TCA_FQ_LOW_RATE_THRESHOLD, /* per packet delay under this rate */ + __TCA_FQ_MAX }; diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index dc52cc10d6ed..5dd929cc1423 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -94,6 +94,7 @@ struct fq_sched_data { u32 flow_max_rate; /* optional max rate per flow */ u32 flow_plimit; /* max packets per flow */ u32 orphan_mask; /* mask for orphaned skb */ + u32 low_rate_threshold; struct rb_root *fq_root; u8 rate_enable; u8 fq_trees_log; @@ -433,7 +434,7 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch) struct fq_flow_head *head; struct sk_buff *skb; struct fq_flow *f; - u32 rate; + u32 rate, plen; skb = fq_dequeue_head(sch, &q->internal); if (skb) @@ -482,7 +483,7 @@ begin: prefetch(&skb->end); f->credit -= qdisc_pkt_len(skb); - if (f->credit > 0 || !q->rate_enable) + if (!q->rate_enable) goto out; /* Do not pace locally generated ack packets */ @@ -493,8 +494,15 @@ begin: if (skb->sk) rate = min(skb->sk->sk_pacing_rate, rate); + if (rate <= q->low_rate_threshold) { + f->credit = 0; + plen = qdisc_pkt_len(skb); + } else { + plen = max(qdisc_pkt_len(skb), q->quantum); + if (f->credit > 0) + goto out; + } if (rate != ~0U) { - u32 plen = max(qdisc_pkt_len(skb), q->quantum); u64 len = (u64)plen * NSEC_PER_SEC; if (likely(rate)) @@ -662,6 +670,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = { [TCA_FQ_FLOW_MAX_RATE] = { .type = NLA_U32 }, [TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 }, [TCA_FQ_FLOW_REFILL_DELAY] = { .type = NLA_U32 }, + [TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 }, }; static int fq_change(struct Qdisc *sch, struct nlattr *opt) @@ -716,6 +725,10 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) if (tb[TCA_FQ_FLOW_MAX_RATE]) q->flow_max_rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]); + if (tb[TCA_FQ_LOW_RATE_THRESHOLD]) + q->low_rate_threshold = + nla_get_u32(tb[TCA_FQ_LOW_RATE_THRESHOLD]); + if (tb[TCA_FQ_RATE_ENABLE]) { u32 enable = nla_get_u32(tb[TCA_FQ_RATE_ENABLE]); @@ -781,6 +794,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt) q->fq_root = NULL; q->fq_trees_log = ilog2(1024); q->orphan_mask = 1024 - 1; + q->low_rate_threshold = 550000 / 8; qdisc_watchdog_init(&q->watchdog, sch); if (opt) @@ -811,6 +825,8 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY, jiffies_to_usecs(q->flow_refill_delay)) || nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) || + nla_put_u32(skb, TCA_FQ_LOW_RATE_THRESHOLD, + q->low_rate_threshold) || nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log)) goto nla_put_failure; From b2d3ea4a730f812b9c0f67a67b6762ce66ddb17c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 19 Sep 2016 23:39:12 -0400 Subject: [PATCH 0538/1050] tcp: switch back to proper tcp_skb_cb size check in tcp_init() Revert to the tcp_skb_cb size check that tcp_init() had before commit b4772ef879a8 ("net: use common macro for assering skb->cb[] available size in protocol families"). As related commit 744d5a3e9fe2 ("net: move skb->dropcount to skb->cb[]") explains, the sock_skb_cb_check_size() mechanism was added to ensure that there is space for dropcount, "for protocol families using it". But TCP is not a protocol using dropcount, so tcp_init() doesn't need to provision space for dropcount in the skb->cb[], and thus we can revert to the older form of the tcp_skb_cb size check. Doing so allows TCP to use 4 more bytes of the skb->cb[] space. Fixes: b4772ef879a8 ("net: use common macro for assering skb->cb[] available size in protocol families") Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e79ed17ccfd6..de02fb4b1349 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3261,11 +3261,12 @@ static void __init tcp_init_mem(void) void __init tcp_init(void) { - unsigned long limit; int max_rshare, max_wshare, cnt; + unsigned long limit; unsigned int i; - sock_skb_cb_check_size(sizeof(struct tcp_skb_cb)); + BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > + FIELD_SIZEOF(struct sk_buff, cb)); percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL); From 0682e6902a52aca7caf6ad42551b16ea0f87bc31 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 19 Sep 2016 23:39:13 -0400 Subject: [PATCH 0539/1050] tcp: count packets marked lost for a TCP connection Count the number of packets that a TCP connection marks lost. Congestion control modules can use this loss rate information for more intelligent decisions about how fast to send. Specifically, this is used in TCP BBR policer detection. BBR uses a high packet loss rate as one signal in its policer detection and policer bandwidth estimation algorithm. The BBR policer detection algorithm cannot simply track retransmits, because a retransmit can be (and often is) an indicator of packets lost long, long ago. This is particularly true in a long CA_Loss period that repairs the initial massive losses when a policer kicks in. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + net/ipv4/tcp_input.c | 25 ++++++++++++++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 6433cc8b4667..38590fbc0ac5 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -267,6 +267,7 @@ struct tcp_sock { * receiver in Recovery. */ u32 prr_out; /* Total number of pkts sent during Recovery. */ u32 delivered; /* Total data packets delivered incl. rexmits */ + u32 lost; /* Total data packets lost incl. rexmits */ u32 rcv_wnd; /* Current receiver window */ u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6886f386464f..9413288c2778 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -899,12 +899,29 @@ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) tp->retransmit_high = TCP_SKB_CB(skb)->end_seq; } +/* Sum the number of packets on the wire we have marked as lost. + * There are two cases we care about here: + * a) Packet hasn't been marked lost (nor retransmitted), + * and this is the first loss. + * b) Packet has been marked both lost and retransmitted, + * and this means we think it was lost again. + */ +static void tcp_sum_lost(struct tcp_sock *tp, struct sk_buff *skb) +{ + __u8 sacked = TCP_SKB_CB(skb)->sacked; + + if (!(sacked & TCPCB_LOST) || + ((sacked & TCPCB_LOST) && (sacked & TCPCB_SACKED_RETRANS))) + tp->lost += tcp_skb_pcount(skb); +} + static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb) { if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) { tcp_verify_retransmit_hint(tp, skb); tp->lost_out += tcp_skb_pcount(skb); + tcp_sum_lost(tp, skb); TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; } } @@ -913,6 +930,7 @@ void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb) { tcp_verify_retransmit_hint(tp, skb); + tcp_sum_lost(tp, skb); if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) { tp->lost_out += tcp_skb_pcount(skb); TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; @@ -1890,6 +1908,7 @@ void tcp_enter_loss(struct sock *sk) struct sk_buff *skb; bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery; bool is_reneg; /* is receiver reneging on SACKs? */ + bool mark_lost; /* Reduce ssthresh if it has not yet been made inside this window. */ if (icsk->icsk_ca_state <= TCP_CA_Disorder || @@ -1923,8 +1942,12 @@ void tcp_enter_loss(struct sock *sk) if (skb == tcp_send_head(sk)) break; + mark_lost = (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) || + is_reneg); + if (mark_lost) + tcp_sum_lost(tp, skb); TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED; - if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || is_reneg) { + if (mark_lost) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out += tcp_skb_pcount(skb); From b9f64820fb226a4e8ab10591f46cecd91ca56b30 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 19 Sep 2016 23:39:14 -0400 Subject: [PATCH 0540/1050] tcp: track data delivery rate for a TCP connection This patch generates data delivery rate (throughput) samples on a per-ACK basis. These rate samples can be used by congestion control modules, and specifically will be used by TCP BBR in later patches in this series. Key state: tp->delivered: Tracks the total number of data packets (original or not) delivered so far. This is an already-existing field. tp->delivered_mstamp: the last time tp->delivered was updated. Algorithm: A rate sample is calculated as (d1 - d0)/(t1 - t0) on a per-ACK basis: d1: the current tp->delivered after processing the ACK t1: the current time after processing the ACK d0: the prior tp->delivered when the acked skb was transmitted t0: the prior tp->delivered_mstamp when the acked skb was transmitted When an skb is transmitted, we snapshot d0 and t0 in its control block in tcp_rate_skb_sent(). When an ACK arrives, it may SACK and ACK some skbs. For each SACKed or ACKed skb, tcp_rate_skb_delivered() updates the rate_sample struct to reflect the latest (d0, t0). Finally, tcp_rate_gen() generates a rate sample by storing (d1 - d0) in rs->delivered and (t1 - t0) in rs->interval_us. One caveat: if an skb was sent with no packets in flight, then tp->delivered_mstamp may be either invalid (if the connection is starting) or outdated (if the connection was idle). In that case, we'll re-stamp tp->delivered_mstamp. At first glance it seems t0 should always be the time when an skb was transmitted, but actually this could over-estimate the rate due to phase mismatch between transmit and ACK events. To track the delivery rate, we ensure that if packets are in flight then t0 and and t1 are times at which packets were marked delivered. If the initial and final RTTs are different then one may be corrupted by some sort of noise. The noise we see most often is sending gaps caused by delayed, compressed, or stretched acks. This either affects both RTTs equally or artificially reduces the final RTT. We approach this by recording the info we need to compute the initial RTT (duration of the "send phase" of the window) when we recorded the associated inflight. Then, for a filter to avoid bandwidth overestimates, we generalize the per-sample bandwidth computation from: bw = delivered / ack_phase_rtt to the following: bw = delivered / max(send_phase_rtt, ack_phase_rtt) In large-scale experiments, this filtering approach incorporating send_phase_rtt is effective at avoiding bandwidth overestimates due to ACK compression or stretched ACKs. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 + include/net/tcp.h | 35 +++++++++- net/ipv4/Makefile | 2 +- net/ipv4/tcp_input.c | 46 +++++++++---- net/ipv4/tcp_output.c | 4 ++ net/ipv4/tcp_rate.c | 149 ++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 222 insertions(+), 16 deletions(-) create mode 100644 net/ipv4/tcp_rate.c diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 38590fbc0ac5..c50e6aec005a 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -268,6 +268,8 @@ struct tcp_sock { u32 prr_out; /* Total number of pkts sent during Recovery. */ u32 delivered; /* Total data packets delivered incl. rexmits */ u32 lost; /* Total data packets lost incl. rexmits */ + struct skb_mstamp first_tx_mstamp; /* start of window send phase */ + struct skb_mstamp delivered_mstamp; /* time we reached "delivered" */ u32 rcv_wnd; /* Current receiver window */ u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 2f1648af4d12..b261c892605a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -763,8 +763,14 @@ struct tcp_skb_cb { __u32 ack_seq; /* Sequence number ACK'd */ union { struct { - /* There is space for up to 20 bytes */ + /* There is space for up to 24 bytes */ __u32 in_flight;/* Bytes in flight when packet sent */ + /* pkts S/ACKed so far upon tx of skb, incl retrans: */ + __u32 delivered; + /* start of send pipeline phase */ + struct skb_mstamp first_tx_mstamp; + /* when we reached the "delivered" count */ + struct skb_mstamp delivered_mstamp; } tx; /* only used for outgoing skbs */ union { struct inet_skb_parm h4; @@ -860,6 +866,26 @@ struct ack_sample { u32 in_flight; }; +/* A rate sample measures the number of (original/retransmitted) data + * packets delivered "delivered" over an interval of time "interval_us". + * The tcp_rate.c code fills in the rate sample, and congestion + * control modules that define a cong_control function to run at the end + * of ACK processing can optionally chose to consult this sample when + * setting cwnd and pacing rate. + * A sample is invalid if "delivered" or "interval_us" is negative. + */ +struct rate_sample { + struct skb_mstamp prior_mstamp; /* starting timestamp for interval */ + u32 prior_delivered; /* tp->delivered at "prior_mstamp" */ + s32 delivered; /* number of packets delivered over interval */ + long interval_us; /* time for tp->delivered to incr "delivered" */ + long rtt_us; /* RTT of last (S)ACKed packet (or -1) */ + int losses; /* number of packets marked lost upon ACK */ + u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */ + u32 prior_in_flight; /* in flight before this ACK */ + bool is_retrans; /* is sample from retransmission? */ +}; + struct tcp_congestion_ops { struct list_head list; u32 key; @@ -946,6 +972,13 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) icsk->icsk_ca_ops->cwnd_event(sk, event); } +/* From tcp_rate.c */ +void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb); +void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, + struct rate_sample *rs); +void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, + struct skb_mstamp *now, struct rate_sample *rs); + /* These functions determine how the current flow behaves in respect of SACK * handling. SACK is negotiated with the peer, and therefore it can vary * between different flows. diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 24629b6f57cc..9cfff1a0bf71 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -8,7 +8,7 @@ obj-y := route.o inetpeer.o protocol.o \ inet_timewait_sock.o inet_connection_sock.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \ - tcp_recovery.o \ + tcp_rate.o tcp_recovery.o \ tcp_offload.o datagram.o raw.o udp.o udplite.o \ udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o \ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9413288c2778..d9ed4bb96f74 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1112,6 +1112,7 @@ struct tcp_sacktag_state { */ struct skb_mstamp first_sackt; struct skb_mstamp last_sackt; + struct rate_sample *rate; int flag; }; @@ -1279,6 +1280,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, start_seq, end_seq, dup_sack, pcount, &skb->skb_mstamp); + tcp_rate_skb_delivered(sk, skb, state->rate); if (skb == tp->lost_skb_hint) tp->lost_cnt_hint += pcount; @@ -1329,6 +1331,9 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, tcp_advance_highest_sack(sk, skb); tcp_skb_collapse_tstamp(prev, skb); + if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp.v64)) + TCP_SKB_CB(prev)->tx.delivered_mstamp.v64 = 0; + tcp_unlink_write_queue(skb, sk); sk_wmem_free_skb(sk, skb); @@ -1558,6 +1563,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, dup_sack, tcp_skb_pcount(skb), &skb->skb_mstamp); + tcp_rate_skb_delivered(sk, skb, state->rate); if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) @@ -1640,8 +1646,10 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire, num_sacks, prior_snd_una); - if (found_dup_sack) + if (found_dup_sack) { state->flag |= FLAG_DSACKING_ACK; + tp->delivered++; /* A spurious retransmission is delivered */ + } /* Eliminate too old ACKs, but take into * account more or less fresh ones, they can @@ -3071,10 +3079,11 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, */ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, u32 prior_snd_una, int *acked, - struct tcp_sacktag_state *sack) + struct tcp_sacktag_state *sack, + struct skb_mstamp *now) { const struct inet_connection_sock *icsk = inet_csk(sk); - struct skb_mstamp first_ackt, last_ackt, now; + struct skb_mstamp first_ackt, last_ackt; struct tcp_sock *tp = tcp_sk(sk); u32 prior_sacked = tp->sacked_out; u32 reord = tp->packets_out; @@ -3106,7 +3115,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, acked_pcount = tcp_tso_acked(sk, skb); if (!acked_pcount) break; - fully_acked = false; } else { /* Speedup tcp_unlink_write_queue() and next loop */ @@ -3142,6 +3150,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tp->packets_out -= acked_pcount; pkts_acked += acked_pcount; + tcp_rate_skb_delivered(sk, skb, sack->rate); /* Initial outgoing SYN's get put onto the write_queue * just like anything else we transmit. It is not @@ -3174,16 +3183,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) flag |= FLAG_SACK_RENEGING; - skb_mstamp_get(&now); if (likely(first_ackt.v64) && !(flag & FLAG_RETRANS_DATA_ACKED)) { - seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt); - ca_rtt_us = skb_mstamp_us_delta(&now, &last_ackt); + seq_rtt_us = skb_mstamp_us_delta(now, &first_ackt); + ca_rtt_us = skb_mstamp_us_delta(now, &last_ackt); } if (sack->first_sackt.v64) { - sack_rtt_us = skb_mstamp_us_delta(&now, &sack->first_sackt); - ca_rtt_us = skb_mstamp_us_delta(&now, &sack->last_sackt); + sack_rtt_us = skb_mstamp_us_delta(now, &sack->first_sackt); + ca_rtt_us = skb_mstamp_us_delta(now, &sack->last_sackt); } - + sack->rate->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet, or -1 */ rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us, ca_rtt_us); @@ -3211,7 +3219,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tp->fackets_out -= min(pkts_acked, tp->fackets_out); } else if (skb && rtt_update && sack_rtt_us >= 0 && - sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) { + sack_rtt_us > skb_mstamp_us_delta(now, &skb->skb_mstamp)) { /* Do not re-arm RTO if the sack RTT is measured from data sent * after when the head was last (re)transmitted. Otherwise the * timeout may continue to extend in loss recovery. @@ -3548,17 +3556,21 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct tcp_sacktag_state sack_state; + struct rate_sample rs = { .prior_delivered = 0 }; u32 prior_snd_una = tp->snd_una; u32 ack_seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; bool is_dupack = false; u32 prior_fackets; int prior_packets = tp->packets_out; - u32 prior_delivered = tp->delivered; + u32 delivered = tp->delivered; + u32 lost = tp->lost; int acked = 0; /* Number of packets newly acked */ int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */ + struct skb_mstamp now; sack_state.first_sackt.v64 = 0; + sack_state.rate = &rs; /* We very likely will need to access write queue head. */ prefetchw(sk->sk_write_queue.next); @@ -3581,6 +3593,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (after(ack, tp->snd_nxt)) goto invalid_ack; + skb_mstamp_get(&now); + if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) tcp_rearm_rto(sk); @@ -3591,6 +3605,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) } prior_fackets = tp->fackets_out; + rs.prior_in_flight = tcp_packets_in_flight(tp); /* ts_recent update must be made after we are sure that the packet * is in window. @@ -3646,7 +3661,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) /* See if we can take anything off of the retransmit queue. */ flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked, - &sack_state); + &sack_state, &now); if (tcp_ack_is_dubious(sk, flag)) { is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); @@ -3663,7 +3678,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (icsk->icsk_pending == ICSK_TIME_RETRANS) tcp_schedule_loss_probe(sk); - tcp_cong_control(sk, ack, tp->delivered - prior_delivered, flag); + delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ + lost = tp->lost - lost; /* freshly marked lost */ + tcp_rate_gen(sk, delivered, lost, &now, &rs); + tcp_cong_control(sk, ack, delivered, flag); tcp_xmit_recovery(sk, rexmit); return 1; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8b45794eb6b2..e02c8ebf3ed4 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -918,6 +918,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, skb_mstamp_get(&skb->skb_mstamp); TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq - tp->snd_una; + tcp_rate_skb_sent(sk, skb); if (unlikely(skb_cloned(skb))) skb = pskb_copy(skb, gfp_mask); @@ -1213,6 +1214,9 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, tcp_set_skb_tso_segs(skb, mss_now); tcp_set_skb_tso_segs(buff, mss_now); + /* Update delivered info for the new segment */ + TCP_SKB_CB(buff)->tx = TCP_SKB_CB(skb)->tx; + /* If this packet has been sent out already, we must * adjust the various packet counters. */ diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c new file mode 100644 index 000000000000..1daed6af6e80 --- /dev/null +++ b/net/ipv4/tcp_rate.c @@ -0,0 +1,149 @@ +#include + +/* The bandwidth estimator estimates the rate at which the network + * can currently deliver outbound data packets for this flow. At a high + * level, it operates by taking a delivery rate sample for each ACK. + * + * A rate sample records the rate at which the network delivered packets + * for this flow, calculated over the time interval between the transmission + * of a data packet and the acknowledgment of that packet. + * + * Specifically, over the interval between each transmit and corresponding ACK, + * the estimator generates a delivery rate sample. Typically it uses the rate + * at which packets were acknowledged. However, the approach of using only the + * acknowledgment rate faces a challenge under the prevalent ACK decimation or + * compression: packets can temporarily appear to be delivered much quicker + * than the bottleneck rate. Since it is physically impossible to do that in a + * sustained fashion, when the estimator notices that the ACK rate is faster + * than the transmit rate, it uses the latter: + * + * send_rate = #pkts_delivered/(last_snd_time - first_snd_time) + * ack_rate = #pkts_delivered/(last_ack_time - first_ack_time) + * bw = min(send_rate, ack_rate) + * + * Notice the estimator essentially estimates the goodput, not always the + * network bottleneck link rate when the sending or receiving is limited by + * other factors like applications or receiver window limits. The estimator + * deliberately avoids using the inter-packet spacing approach because that + * approach requires a large number of samples and sophisticated filtering. + */ + + +/* Snapshot the current delivery information in the skb, to generate + * a rate sample later when the skb is (s)acked in tcp_rate_skb_delivered(). + */ +void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + + /* In general we need to start delivery rate samples from the + * time we received the most recent ACK, to ensure we include + * the full time the network needs to deliver all in-flight + * packets. If there are no packets in flight yet, then we + * know that any ACKs after now indicate that the network was + * able to deliver those packets completely in the sampling + * interval between now and the next ACK. + * + * Note that we use packets_out instead of tcp_packets_in_flight(tp) + * because the latter is a guess based on RTO and loss-marking + * heuristics. We don't want spurious RTOs or loss markings to cause + * a spuriously small time interval, causing a spuriously high + * bandwidth estimate. + */ + if (!tp->packets_out) { + tp->first_tx_mstamp = skb->skb_mstamp; + tp->delivered_mstamp = skb->skb_mstamp; + } + + TCP_SKB_CB(skb)->tx.first_tx_mstamp = tp->first_tx_mstamp; + TCP_SKB_CB(skb)->tx.delivered_mstamp = tp->delivered_mstamp; + TCP_SKB_CB(skb)->tx.delivered = tp->delivered; +} + +/* When an skb is sacked or acked, we fill in the rate sample with the (prior) + * delivery information when the skb was last transmitted. + * + * If an ACK (s)acks multiple skbs (e.g., stretched-acks), this function is + * called multiple times. We favor the information from the most recently + * sent skb, i.e., the skb with the highest prior_delivered count. + */ +void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, + struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_skb_cb *scb = TCP_SKB_CB(skb); + + if (!scb->tx.delivered_mstamp.v64) + return; + + if (!rs->prior_delivered || + after(scb->tx.delivered, rs->prior_delivered)) { + rs->prior_delivered = scb->tx.delivered; + rs->prior_mstamp = scb->tx.delivered_mstamp; + rs->is_retrans = scb->sacked & TCPCB_RETRANS; + + /* Find the duration of the "send phase" of this window: */ + rs->interval_us = skb_mstamp_us_delta( + &skb->skb_mstamp, + &scb->tx.first_tx_mstamp); + + /* Record send time of most recently ACKed packet: */ + tp->first_tx_mstamp = skb->skb_mstamp; + } + /* Mark off the skb delivered once it's sacked to avoid being + * used again when it's cumulatively acked. For acked packets + * we don't need to reset since it'll be freed soon. + */ + if (scb->sacked & TCPCB_SACKED_ACKED) + scb->tx.delivered_mstamp.v64 = 0; +} + +/* Update the connection delivery information and generate a rate sample. */ +void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, + struct skb_mstamp *now, struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + u32 snd_us, ack_us; + + /* TODO: there are multiple places throughout tcp_ack() to get + * current time. Refactor the code using a new "tcp_acktag_state" + * to carry current time, flags, stats like "tcp_sacktag_state". + */ + if (delivered) + tp->delivered_mstamp = *now; + + rs->acked_sacked = delivered; /* freshly ACKed or SACKed */ + rs->losses = lost; /* freshly marked lost */ + /* Return an invalid sample if no timing information is available. */ + if (!rs->prior_mstamp.v64) { + rs->delivered = -1; + rs->interval_us = -1; + return; + } + rs->delivered = tp->delivered - rs->prior_delivered; + + /* Model sending data and receiving ACKs as separate pipeline phases + * for a window. Usually the ACK phase is longer, but with ACK + * compression the send phase can be longer. To be safe we use the + * longer phase. + */ + snd_us = rs->interval_us; /* send phase */ + ack_us = skb_mstamp_us_delta(now, &rs->prior_mstamp); /* ack phase */ + rs->interval_us = max(snd_us, ack_us); + + /* Normally we expect interval_us >= min-rtt. + * Note that rate may still be over-estimated when a spuriously + * retransmistted skb was first (s)acked because "interval_us" + * is under-estimated (up to an RTT). However continuously + * measuring the delivery rate during loss recovery is crucial + * for connections suffer heavy or prolonged losses. + */ + if (unlikely(rs->interval_us < tcp_min_rtt(tp))) { + rs->interval_us = -1; + if (!rs->is_retrans) + pr_debug("tcp rate: %ld %d %u %u %u\n", + rs->interval_us, rs->delivered, + inet_csk(sk)->icsk_ca_state, + tp->rx_opt.sack_ok, tcp_min_rtt(tp)); + } +} From d7722e8570fc0f1e003cee7cf37694041828918b Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Mon, 19 Sep 2016 23:39:15 -0400 Subject: [PATCH 0541/1050] tcp: track application-limited rate samples This commit adds code to track whether the delivery rate represented by each rate_sample was limited by the application. Upon each transmit, we store in the is_app_limited field in the skb a boolean bit indicating whether there is a known "bubble in the pipe": a point in the rate sample interval where the sender was application-limited, and did not transmit even though the cwnd and pacing rate allowed it. This logic marks the flow app-limited on a write if *all* of the following are true: 1) There is less than 1 MSS of unsent data in the write queue available to transmit. 2) There is no packet in the sender's queues (e.g. in fq or the NIC tx queue). 3) The connection is not limited by cwnd. 4) There are no lost packets to retransmit. The tcp_rate_check_app_limited() code in tcp_rate.c determines whether the connection is application-limited at the moment. If the flow is application-limited, it sets the tp->app_limited field. If the flow is application-limited then that means there is effectively a "bubble" of silence in the pipe now, and this silence will be reflected in a lower bandwidth sample for any rate samples from now until we get an ACK indicating this bubble has exited the pipe: specifically, until we get an ACK for the next packet we transmit. When we send every skb we record in scb->tx.is_app_limited whether the resulting rate sample will be application-limited. The code in tcp_rate_gen() checks to see when it is safe to mark all known application-limited bubbles of silence as having exited the pipe. It does this by checking to see when the delivered count moves past the tp->app_limited marker. At this point it zeroes the tp->app_limited marker, as all known bubbles are out of the pipe. We make room for the tx.is_app_limited bit in the skb by borrowing a bit from the in_flight field used by NV to record the number of bytes in flight. The receive window in the TCP header is 16 bits, and the max receive window scaling shift factor is 14 (RFC 1323). So the max receive window offered by the TCP protocol is 2^(16+14) = 2^30. So we only need 30 bits for the tx.in_flight used by NV. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + include/net/tcp.h | 6 +++++- net/ipv4/tcp.c | 8 ++++++++ net/ipv4/tcp_minisocks.c | 3 +++ net/ipv4/tcp_rate.c | 29 ++++++++++++++++++++++++++++- 5 files changed, 45 insertions(+), 2 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index c50e6aec005a..fdcd00ffcb66 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -268,6 +268,7 @@ struct tcp_sock { u32 prr_out; /* Total number of pkts sent during Recovery. */ u32 delivered; /* Total data packets delivered incl. rexmits */ u32 lost; /* Total data packets lost incl. rexmits */ + u32 app_limited; /* limited until "delivered" reaches this val */ struct skb_mstamp first_tx_mstamp; /* start of window send phase */ struct skb_mstamp delivered_mstamp; /* time we reached "delivered" */ diff --git a/include/net/tcp.h b/include/net/tcp.h index b261c892605a..a69ed7f0030c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -764,7 +764,9 @@ struct tcp_skb_cb { union { struct { /* There is space for up to 24 bytes */ - __u32 in_flight;/* Bytes in flight when packet sent */ + __u32 in_flight:30,/* Bytes in flight at transmit */ + is_app_limited:1, /* cwnd not fully used? */ + unused:1; /* pkts S/ACKed so far upon tx of skb, incl retrans: */ __u32 delivered; /* start of send pipeline phase */ @@ -883,6 +885,7 @@ struct rate_sample { int losses; /* number of packets marked lost upon ACK */ u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */ u32 prior_in_flight; /* in flight before this ACK */ + bool is_app_limited; /* is sample from packet with bubble in pipe? */ bool is_retrans; /* is sample from retransmission? */ }; @@ -978,6 +981,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, struct rate_sample *rs); void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, struct skb_mstamp *now, struct rate_sample *rs); +void tcp_rate_check_app_limited(struct sock *sk); /* These functions determine how the current flow behaves in respect of SACK * handling. SACK is negotiated with the peer, and therefore it can vary diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index de02fb4b1349..2250f891f931 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -396,6 +396,9 @@ void tcp_init_sock(struct sock *sk) */ tp->snd_cwnd = TCP_INIT_CWND; + /* There's a bubble in the pipe until at least the first ACK. */ + tp->app_limited = ~0U; + /* See draft-stevens-tcpca-spec-01 for discussion of the * initialization of these values. */ @@ -1014,6 +1017,9 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, flags); lock_sock(sk); + + tcp_rate_check_app_limited(sk); /* is sending application-limited? */ + res = do_tcp_sendpages(sk, page, offset, size, flags); release_sock(sk); return res; @@ -1115,6 +1121,8 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); + tcp_rate_check_app_limited(sk); /* is sending application-limited? */ + /* Wait for a connection to finish. One exception is TCP Fast Open * (passive side) where data is allowed to be sent before a connection * is fully established. diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 568947110b60..6234ebaa7db1 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -487,6 +487,9 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->snd_cwnd = TCP_INIT_CWND; newtp->snd_cwnd_cnt = 0; + /* There's a bubble in the pipe until at least the first ACK. */ + newtp->app_limited = ~0U; + tcp_init_xmit_timers(newsk); newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1; diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c index 1daed6af6e80..52ff84be59ab 100644 --- a/net/ipv4/tcp_rate.c +++ b/net/ipv4/tcp_rate.c @@ -26,9 +26,13 @@ * other factors like applications or receiver window limits. The estimator * deliberately avoids using the inter-packet spacing approach because that * approach requires a large number of samples and sophisticated filtering. + * + * TCP flows can often be application-limited in request/response workloads. + * The estimator marks a bandwidth sample as application-limited if there + * was some moment during the sampled window of packets when there was no data + * ready to send in the write queue. */ - /* Snapshot the current delivery information in the skb, to generate * a rate sample later when the skb is (s)acked in tcp_rate_skb_delivered(). */ @@ -58,6 +62,7 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb) TCP_SKB_CB(skb)->tx.first_tx_mstamp = tp->first_tx_mstamp; TCP_SKB_CB(skb)->tx.delivered_mstamp = tp->delivered_mstamp; TCP_SKB_CB(skb)->tx.delivered = tp->delivered; + TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0; } /* When an skb is sacked or acked, we fill in the rate sample with the (prior) @@ -80,6 +85,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, after(scb->tx.delivered, rs->prior_delivered)) { rs->prior_delivered = scb->tx.delivered; rs->prior_mstamp = scb->tx.delivered_mstamp; + rs->is_app_limited = scb->tx.is_app_limited; rs->is_retrans = scb->sacked & TCPCB_RETRANS; /* Find the duration of the "send phase" of this window: */ @@ -105,6 +111,10 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, struct tcp_sock *tp = tcp_sk(sk); u32 snd_us, ack_us; + /* Clear app limited if bubble is acked and gone. */ + if (tp->app_limited && after(tp->delivered, tp->app_limited)) + tp->app_limited = 0; + /* TODO: there are multiple places throughout tcp_ack() to get * current time. Refactor the code using a new "tcp_acktag_state" * to carry current time, flags, stats like "tcp_sacktag_state". @@ -147,3 +157,20 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, tp->rx_opt.sack_ok, tcp_min_rtt(tp)); } } + +/* If a gap is detected between sends, mark the socket application-limited. */ +void tcp_rate_check_app_limited(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (/* We have less than one packet to send. */ + tp->write_seq - tp->snd_nxt < tp->mss_cache && + /* Nothing in sending host's qdisc queues or NIC tx queue. */ + sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1) && + /* We are not limited by CWND. */ + tcp_packets_in_flight(tp) < tp->snd_cwnd && + /* All lost packets have been retransmitted. */ + tp->lost_out <= tp->retrans_out) + tp->app_limited = + (tp->delivered + tcp_packets_in_flight(tp)) ? : 1; +} From eb8329e0a04db0061f714f033b4454326ba147f4 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 19 Sep 2016 23:39:16 -0400 Subject: [PATCH 0542/1050] tcp: export data delivery rate This commit export two new fields in struct tcp_info: tcpi_delivery_rate: The most recent goodput, as measured by tcp_rate_gen(). If the socket is limited by the sending application (e.g., no data to send), it reports the highest measurement instead of the most recent. The unit is bytes per second (like other rate fields in tcp_info). tcpi_delivery_rate_app_limited: A boolean indicating if the goodput was measured when the socket's throughput was limited by the sending application. This delivery rate information can be useful for applications that want to know the current throughput the TCP connection is seeing, e.g. adaptive bitrate video streaming. It can also be very useful for debugging or troubleshooting. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/linux/tcp.h | 5 ++++- include/uapi/linux/tcp.h | 3 +++ net/ipv4/tcp.c | 11 ++++++++++- net/ipv4/tcp_rate.c | 12 +++++++++++- 4 files changed, 28 insertions(+), 3 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index fdcd00ffcb66..a17ae7b85218 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -213,7 +213,8 @@ struct tcp_sock { u8 reord; /* reordering detected */ } rack; u16 advmss; /* Advertised MSS */ - u8 unused; + u8 rate_app_limited:1, /* rate_{delivered,interval_us} limited? */ + unused:7; u8 nonagle : 4,/* Disable Nagle algorithm? */ thin_lto : 1,/* Use linear timeouts for thin streams */ thin_dupack : 1,/* Fast retransmit on first dupack */ @@ -271,6 +272,8 @@ struct tcp_sock { u32 app_limited; /* limited until "delivered" reaches this val */ struct skb_mstamp first_tx_mstamp; /* start of window send phase */ struct skb_mstamp delivered_mstamp; /* time we reached "delivered" */ + u32 rate_delivered; /* saved rate sample: packets delivered */ + u32 rate_interval_us; /* saved rate sample: time elapsed */ u32 rcv_wnd; /* Current receiver window */ u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 482898fc433a..73ac0db487f8 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -167,6 +167,7 @@ struct tcp_info { __u8 tcpi_backoff; __u8 tcpi_options; __u8 tcpi_snd_wscale : 4, tcpi_rcv_wscale : 4; + __u8 tcpi_delivery_rate_app_limited:1; __u32 tcpi_rto; __u32 tcpi_ato; @@ -211,6 +212,8 @@ struct tcp_info { __u32 tcpi_min_rtt; __u32 tcpi_data_segs_in; /* RFC4898 tcpEStatsDataSegsIn */ __u32 tcpi_data_segs_out; /* RFC4898 tcpEStatsDataSegsOut */ + + __u64 tcpi_delivery_rate; }; /* for TCP_MD5SIG socket option */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2250f891f931..f253e5019d22 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2712,7 +2712,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) { const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */ const struct inet_connection_sock *icsk = inet_csk(sk); - u32 now = tcp_time_stamp; + u32 now = tcp_time_stamp, intv; unsigned int start; int notsent_bytes; u64 rate64; @@ -2802,6 +2802,15 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_min_rtt = tcp_min_rtt(tp); info->tcpi_data_segs_in = tp->data_segs_in; info->tcpi_data_segs_out = tp->data_segs_out; + + info->tcpi_delivery_rate_app_limited = tp->rate_app_limited ? 1 : 0; + rate = READ_ONCE(tp->rate_delivered); + intv = READ_ONCE(tp->rate_interval_us); + if (rate && intv) { + rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC; + do_div(rate64, intv); + put_unaligned(rate64, &info->tcpi_delivery_rate); + } } EXPORT_SYMBOL_GPL(tcp_get_info); diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c index 52ff84be59ab..9be1581a5a08 100644 --- a/net/ipv4/tcp_rate.c +++ b/net/ipv4/tcp_rate.c @@ -149,12 +149,22 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, * for connections suffer heavy or prolonged losses. */ if (unlikely(rs->interval_us < tcp_min_rtt(tp))) { - rs->interval_us = -1; if (!rs->is_retrans) pr_debug("tcp rate: %ld %d %u %u %u\n", rs->interval_us, rs->delivered, inet_csk(sk)->icsk_ca_state, tp->rx_opt.sack_ok, tcp_min_rtt(tp)); + rs->interval_us = -1; + return; + } + + /* Record the last non-app-limited or the highest app-limited bw */ + if (!rs->is_app_limited || + ((u64)rs->delivered * tp->rate_interval_us >= + (u64)tp->rate_delivered * rs->interval_us)) { + tp->rate_delivered = rs->delivered; + tp->rate_interval_us = rs->interval_us; + tp->rate_app_limited = rs->is_app_limited; } } From ed6e7268b930e0a9a65d895d368eac79a438d992 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 19 Sep 2016 23:39:17 -0400 Subject: [PATCH 0543/1050] tcp: allow congestion control module to request TSO skb segment count Add the tso_segs_goal() function in tcp_congestion_ops to allow the congestion control module to specify the number of segments that should be in a TSO skb sent by tcp_write_xmit() and tcp_xmit_retransmit_queue(). The congestion control module can either request a particular number of segments in TSO skb that we transmit, or return 0 if it doesn't care. This allows the upcoming BBR congestion control module to select small TSO skb sizes if the module detects that the bottleneck bandwidth is very low, or that the connection is policed to a low rate. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ net/ipv4/tcp_output.c | 15 +++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index a69ed7f0030c..f8f581fd05f5 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -913,6 +913,8 @@ struct tcp_congestion_ops { u32 (*undo_cwnd)(struct sock *sk); /* hook for packet ack accounting (optional) */ void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); + /* suggest number of segments for each skb to transmit (optional) */ + u32 (*tso_segs_goal)(struct sock *sk); /* get info for inet_diag (optional) */ size_t (*get_info)(struct sock *sk, u32 ext, int *attr, union tcp_cc_info *info); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e02c8ebf3ed4..01379567a732 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1566,6 +1566,17 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now) return min_t(u32, segs, sk->sk_gso_max_segs); } +/* Return the number of segments we want in the skb we are transmitting. + * See if congestion control module wants to decide; otherwise, autosize. + */ +static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) +{ + const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; + u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0; + + return tso_segs ? : tcp_tso_autosize(sk, mss_now); +} + /* Returns the portion of skb which can be sent right away */ static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb, @@ -2061,7 +2072,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, } } - max_segs = tcp_tso_autosize(sk, mss_now); + max_segs = tcp_tso_segs(sk, mss_now); while ((skb = tcp_send_head(sk))) { unsigned int limit; @@ -2778,7 +2789,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) last_lost = tp->snd_una; } - max_segs = tcp_tso_autosize(sk, tcp_current_mss(sk)); + max_segs = tcp_tso_segs(sk, tcp_current_mss(sk)); tcp_for_write_queue_from(skb, sk) { __u8 sacked; int segs; From 1b3878ca1551f3baab2c408d1e703b5ef785a1b2 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 19 Sep 2016 23:39:18 -0400 Subject: [PATCH 0544/1050] tcp: export tcp_tso_autosize() and parameterize minimum number of TSO segments To allow congestion control modules to use the default TSO auto-sizing algorithm as one of the ingredients in their own decision about TSO sizing: 1) Export tcp_tso_autosize() so that CC modules can use it. 2) Change tcp_tso_autosize() to allow callers to specify a minimum number of segments per TSO skb, in case the congestion control module has a different notion of the best floor for TSO skbs for the connection right now. For very low-rate paths or policed connections it can be appropriate to use smaller TSO skbs. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ net/ipv4/tcp_output.c | 9 ++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index f8f581fd05f5..349204130d84 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -533,6 +533,8 @@ __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss); #endif /* tcp_output.c */ +u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, + int min_tso_segs); void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, int nonagle); bool tcp_may_send_now(struct sock *sk); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 01379567a732..0bf3d481fa85 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1549,7 +1549,8 @@ static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp, /* Return how many segs we'd like on a TSO packet, * to send one TSO packet per ms */ -static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now) +u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, + int min_tso_segs) { u32 bytes, segs; @@ -1561,10 +1562,11 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now) * This preserves ACK clocking and is consistent * with tcp_tso_should_defer() heuristic. */ - segs = max_t(u32, bytes / mss_now, sysctl_tcp_min_tso_segs); + segs = max_t(u32, bytes / mss_now, min_tso_segs); return min_t(u32, segs, sk->sk_gso_max_segs); } +EXPORT_SYMBOL(tcp_tso_autosize); /* Return the number of segments we want in the skb we are transmitting. * See if congestion control module wants to decide; otherwise, autosize. @@ -1574,7 +1576,8 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0; - return tso_segs ? : tcp_tso_autosize(sk, mss_now); + return tso_segs ? : + tcp_tso_autosize(sk, mss_now, sysctl_tcp_min_tso_segs); } /* Returns the portion of skb which can be sent right away */ From 556c6b46d194cc0dbb6a5b22f1d2bbc699c86d8e Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 19 Sep 2016 23:39:19 -0400 Subject: [PATCH 0545/1050] tcp: export tcp_mss_to_mtu() for congestion control modules Export tcp_mss_to_mtu(), so that congestion control modules can use this to help calculate a pacing rate. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 0bf3d481fa85..7d025a7804b5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1362,6 +1362,7 @@ int tcp_mss_to_mtu(struct sock *sk, int mss) } return mtu; } +EXPORT_SYMBOL(tcp_mss_to_mtu); /* MTU probing init per socket */ void tcp_mtup_init(struct sock *sk) From 77bfc174c38e558a3425d3b069aa2762b2fedfdd Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 19 Sep 2016 23:39:20 -0400 Subject: [PATCH 0546/1050] tcp: allow congestion control to expand send buffer differently Currently the TCP send buffer expands to twice cwnd, in order to allow limited transmits in the CA_Recovery state. This assumes that cwnd does not increase in the CA_Recovery. For some congestion control algorithms, like the upcoming BBR module, if the losses in recovery do not indicate congestion then we may continue to raise cwnd multiplicatively in recovery. In such cases the current multiplier will falsely limit the sending rate, much as if it were limited by the application. This commit adds an optional congestion control callback to use a different multiplier to expand the TCP send buffer. For congestion control modules that do not specificy this callback, TCP continues to use the previous default of 2. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ net/ipv4/tcp_input.c | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 349204130d84..1aa9628ae608 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -917,6 +917,8 @@ struct tcp_congestion_ops { void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); /* suggest number of segments for each skb to transmit (optional) */ u32 (*tso_segs_goal)(struct sock *sk); + /* returns the multiplier used in tcp_sndbuf_expand (optional) */ + u32 (*sndbuf_expand)(struct sock *sk); /* get info for inet_diag (optional) */ size_t (*get_info)(struct sock *sk, u32 ext, int *attr, union tcp_cc_info *info); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d9ed4bb96f74..13a2e70141f5 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -289,6 +289,7 @@ static bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr static void tcp_sndbuf_expand(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; int sndmem, per_mss; u32 nr_segs; @@ -309,7 +310,8 @@ static void tcp_sndbuf_expand(struct sock *sk) * Cubic needs 1.7 factor, rounded to 2 to include * extra cushion (application might react slowly to POLLOUT) */ - sndmem = 2 * nr_segs * per_mss; + sndmem = ca_ops->sndbuf_expand ? ca_ops->sndbuf_expand(sk) : 2; + sndmem *= nr_segs * per_mss; if (sk->sk_sndbuf < sndmem) sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]); From c0402760f565ae066621ebf8720a32fba074d538 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 19 Sep 2016 23:39:21 -0400 Subject: [PATCH 0547/1050] tcp: new CC hook to set sending rate with rate_sample in any CA state This commit introduces an optional new "omnipotent" hook, cong_control(), for congestion control modules. The cong_control() function is called at the end of processing an ACK (i.e., after updating sequence numbers, the SACK scoreboard, and loss detection). At that moment we have precise delivery rate information the congestion control module can use to control the sending behavior (using cwnd, TSO skb size, and pacing rate) in any CA state. This function can also be used by a congestion control that prefers not to use the default cwnd reduction approach (i.e., the PRR algorithm) during CA_Recovery to control the cwnd and sending rate during loss recovery. We take advantage of the fact that recent changes defer the retransmission or transmission of new data (e.g. by F-RTO) in recovery until the new tcp_cong_control() function is run. With this commit, we only run tcp_update_pacing_rate() if the congestion control is not using this new API. New congestion controls which use the new API do not want the TCP stack to run the default pacing rate calculation and overwrite whatever pacing rate they have chosen at initialization time. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/net/tcp.h | 4 ++++ net/ipv4/tcp_cong.c | 2 +- net/ipv4/tcp_input.c | 17 ++++++++++++++--- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 1aa9628ae608..f83b7f220a65 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -919,6 +919,10 @@ struct tcp_congestion_ops { u32 (*tso_segs_goal)(struct sock *sk); /* returns the multiplier used in tcp_sndbuf_expand (optional) */ u32 (*sndbuf_expand)(struct sock *sk); + /* call when packets are delivered to update cwnd and pacing rate, + * after all the ca_state processing. (optional) + */ + void (*cong_control)(struct sock *sk, const struct rate_sample *rs); /* get info for inet_diag (optional) */ size_t (*get_info)(struct sock *sk, u32 ext, int *attr, union tcp_cc_info *info); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 882caa4e72bc..1294af4e0127 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -69,7 +69,7 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca) int ret = 0; /* all algorithms must implement ssthresh and cong_avoid ops */ - if (!ca->ssthresh || !ca->cong_avoid) { + if (!ca->ssthresh || !(ca->cong_avoid || ca->cong_control)) { pr_err("%s does not implement required ops\n", ca->name); return -EINVAL; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 13a2e70141f5..980a83edfa63 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2536,6 +2536,9 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + if (inet_csk(sk)->icsk_ca_ops->cong_control) + return; + /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */ if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) { @@ -3312,8 +3315,15 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) * information. All transmission or retransmission are delayed afterwards. */ static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked, - int flag) + int flag, const struct rate_sample *rs) { + const struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ca_ops->cong_control) { + icsk->icsk_ca_ops->cong_control(sk, rs); + return; + } + if (tcp_in_cwnd_reduction(sk)) { /* Reduce cwnd if state mandates */ tcp_cwnd_reduction(sk, acked_sacked, flag); @@ -3683,7 +3693,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ lost = tp->lost - lost; /* freshly marked lost */ tcp_rate_gen(sk, delivered, lost, &now, &rs); - tcp_cong_control(sk, ack, delivered, flag); + tcp_cong_control(sk, ack, delivered, flag, &rs); tcp_xmit_recovery(sk, rexmit); return 1; @@ -5982,7 +5992,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) } else tcp_init_metrics(sk); - tcp_update_pacing_rate(sk); + if (!inet_csk(sk)->icsk_ca_ops->cong_control) + tcp_update_pacing_rate(sk); /* Prevent spurious tcp_cwnd_restart() on first data packet */ tp->lsndtime = tcp_time_stamp; From 7e744171386ae6da1248d3d27d10b6dbdc54f0ff Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 19 Sep 2016 23:39:22 -0400 Subject: [PATCH 0548/1050] tcp: increase ICSK_CA_PRIV_SIZE from 64 bytes to 88 The TCP CUBIC module already uses 64 bytes. The upcoming TCP BBR module uses 88 bytes. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 49dcad4fe99e..197a30d221e9 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -134,8 +134,8 @@ struct inet_connection_sock { } icsk_mtup; u32 icsk_user_timeout; - u64 icsk_ca_priv[64 / sizeof(u64)]; -#define ICSK_CA_PRIV_SIZE (8 * sizeof(u64)) + u64 icsk_ca_priv[88 / sizeof(u64)]; +#define ICSK_CA_PRIV_SIZE (11 * sizeof(u64)) }; #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ From 0f8782ea14974ce992618b55f0c041ef43ed0b78 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 19 Sep 2016 23:39:23 -0400 Subject: [PATCH 0549/1050] tcp_bbr: add BBR congestion control This commit implements a new TCP congestion control algorithm: BBR (Bottleneck Bandwidth and RTT). A detailed description of BBR will be published in ACM Queue, Vol. 14 No. 5, September-October 2016, as "BBR: Congestion-Based Congestion Control". BBR has significantly increased throughput and reduced latency for connections on Google's internal backbone networks and google.com and YouTube Web servers. BBR requires only changes on the sender side, not in the network or the receiver side. Thus it can be incrementally deployed on today's Internet, or in datacenters. The Internet has predominantly used loss-based congestion control (largely Reno or CUBIC) since the 1980s, relying on packet loss as the signal to slow down. While this worked well for many years, loss-based congestion control is unfortunately out-dated in today's networks. On today's Internet, loss-based congestion control causes the infamous bufferbloat problem, often causing seconds of needless queuing delay, since it fills the bloated buffers in many last-mile links. On today's high-speed long-haul links using commodity switches with shallow buffers, loss-based congestion control has abysmal throughput because it over-reacts to losses caused by transient traffic bursts. In 1981 Kleinrock and Gale showed that the optimal operating point for a network maximizes delivered bandwidth while minimizing delay and loss, not only for single connections but for the network as a whole. Finding that optimal operating point has been elusive, since any single network measurement is ambiguous: network measurements are the result of both bandwidth and propagation delay, and those two cannot be measured simultaneously. While it is impossible to disambiguate any single bandwidth or RTT measurement, a connection's behavior over time tells a clearer story. BBR uses a measurement strategy designed to resolve this ambiguity. It combines these measurements with a robust servo loop using recent control systems advances to implement a distributed congestion control algorithm that reacts to actual congestion, not packet loss or transient queue delay, and is designed to converge with high probability to a point near the optimal operating point. In a nutshell, BBR creates an explicit model of the network pipe by sequentially probing the bottleneck bandwidth and RTT. On the arrival of each ACK, BBR derives the current delivery rate of the last round trip, and feeds it through a windowed max-filter to estimate the bottleneck bandwidth. Conversely it uses a windowed min-filter to estimate the round trip propagation delay. The max-filtered bandwidth and min-filtered RTT estimates form BBR's model of the network pipe. Using its model, BBR sets control parameters to govern sending behavior. The primary control is the pacing rate: BBR applies a gain multiplier to transmit faster or slower than the observed bottleneck bandwidth. The conventional congestion window (cwnd) is now the secondary control; the cwnd is set to a small multiple of the estimated BDP (bandwidth-delay product) in order to allow full utilization and bandwidth probing while bounding the potential amount of queue at the bottleneck. When a BBR connection starts, it enters STARTUP mode and applies a high gain to perform an exponential search to quickly probe the bottleneck bandwidth (doubling its sending rate each round trip, like slow start). However, instead of continuing until it fills up the buffer (i.e. a loss), or until delay or ACK spacing reaches some threshold (like Hystart), it uses its model of the pipe to estimate when that pipe is full: it estimates the pipe is full when it notices the estimated bandwidth has stopped growing. At that point it exits STARTUP and enters DRAIN mode, where it reduces its pacing rate to drain the queue it estimates it has created. Then BBR enters steady state. In steady state, PROBE_BW mode cycles between first pacing faster to probe for more bandwidth, then pacing slower to drain any queue that created if no more bandwidth was available, and then cruising at the estimated bandwidth to utilize the pipe without creating excess queue. Occasionally, on an as-needed basis, it sends significantly slower to probe for RTT (PROBE_RTT mode). BBR has been fully deployed on Google's wide-area backbone networks and we're experimenting with BBR on Google.com and YouTube on a global scale. Replacing CUBIC with BBR has resulted in significant improvements in network latency and application (RPC, browser, and video) metrics. For more details please refer to our upcoming ACM Queue publication. Example performance results, to illustrate the difference between BBR and CUBIC: Resilience to random loss (e.g. from shallow buffers): Consider a netperf TCP_STREAM test lasting 30 secs on an emulated path with a 10Gbps bottleneck, 100ms RTT, and 1% packet loss rate. CUBIC gets 3.27 Mbps, and BBR gets 9150 Mbps (2798x higher). Low latency with the bloated buffers common in today's last-mile links: Consider a netperf TCP_STREAM test lasting 120 secs on an emulated path with a 10Mbps bottleneck, 40ms RTT, and 1000-packet bottleneck buffer. Both fully utilize the bottleneck bandwidth, but BBR achieves this with a median RTT 25x lower (43 ms instead of 1.09 secs). Our long-term goal is to improve the congestion control algorithms used on the Internet. We are hopeful that BBR can help advance the efforts toward this goal, and motivate the community to do further research. Test results, performance evaluations, feedback, and BBR-related discussions are very welcome in the public e-mail list for BBR: https://groups.google.com/forum/#!forum/bbr-dev NOTE: BBR *must* be used with the fq qdisc ("man tc-fq") with pacing enabled, since pacing is integral to the BBR design and implementation. BBR without pacing would not function properly, and may incur unnecessary high packet loss rates. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/uapi/linux/inet_diag.h | 13 + net/ipv4/Kconfig | 18 + net/ipv4/Makefile | 1 + net/ipv4/tcp_bbr.c | 896 +++++++++++++++++++++++++++++++++ 4 files changed, 928 insertions(+) create mode 100644 net/ipv4/tcp_bbr.c diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index b5c366f87b3e..509cd961068d 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -124,6 +124,7 @@ enum { INET_DIAG_PEERS, INET_DIAG_PAD, INET_DIAG_MARK, + INET_DIAG_BBRINFO, __INET_DIAG_MAX, }; @@ -157,8 +158,20 @@ struct tcp_dctcp_info { __u32 dctcp_ab_tot; }; +/* INET_DIAG_BBRINFO */ + +struct tcp_bbr_info { + /* u64 bw: max-filtered BW (app throughput) estimate in Byte per sec: */ + __u32 bbr_bw_lo; /* lower 32 bits of bw */ + __u32 bbr_bw_hi; /* upper 32 bits of bw */ + __u32 bbr_min_rtt; /* min-filtered RTT in uSec */ + __u32 bbr_pacing_gain; /* pacing gain shifted left 8 bits */ + __u32 bbr_cwnd_gain; /* cwnd gain shifted left 8 bits */ +}; + union tcp_cc_info { struct tcpvegas_info vegas; struct tcp_dctcp_info dctcp; + struct tcp_bbr_info bbr; }; #endif /* _UAPI_INET_DIAG_H_ */ diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 50d6a9b49f6c..300b06888fdf 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -640,6 +640,21 @@ config TCP_CONG_CDG D.A. Hayes and G. Armitage. "Revisiting TCP congestion control using delay gradients." In Networking 2011. Preprint: http://goo.gl/No3vdg +config TCP_CONG_BBR + tristate "BBR TCP" + default n + ---help--- + + BBR (Bottleneck Bandwidth and RTT) TCP congestion control aims to + maximize network utilization and minimize queues. It builds an explicit + model of the the bottleneck delivery rate and path round-trip + propagation delay. It tolerates packet loss and delay unrelated to + congestion. It can operate over LAN, WAN, cellular, wifi, or cable + modem links. It can coexist with flows that use loss-based congestion + control, and can operate with shallow buffers, deep buffers, + bufferbloat, policers, or AQM schemes that do not provide a delay + signal. It requires the fq ("Fair Queue") pacing packet scheduler. + choice prompt "Default TCP congestion control" default DEFAULT_CUBIC @@ -674,6 +689,9 @@ choice config DEFAULT_CDG bool "CDG" if TCP_CONG_CDG=y + config DEFAULT_BBR + bool "BBR" if TCP_CONG_BBR=y + config DEFAULT_RENO bool "Reno" endchoice diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 9cfff1a0bf71..bc6a6c8b9bcd 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -41,6 +41,7 @@ obj-$(CONFIG_INET_DIAG) += inet_diag.o obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o +obj-$(CONFIG_TCP_CONG_BBR) += tcp_bbr.o obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o obj-$(CONFIG_TCP_CONG_CDG) += tcp_cdg.o obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c new file mode 100644 index 000000000000..0ea66c2c9344 --- /dev/null +++ b/net/ipv4/tcp_bbr.c @@ -0,0 +1,896 @@ +/* Bottleneck Bandwidth and RTT (BBR) congestion control + * + * BBR congestion control computes the sending rate based on the delivery + * rate (throughput) estimated from ACKs. In a nutshell: + * + * On each ACK, update our model of the network path: + * bottleneck_bandwidth = windowed_max(delivered / elapsed, 10 round trips) + * min_rtt = windowed_min(rtt, 10 seconds) + * pacing_rate = pacing_gain * bottleneck_bandwidth + * cwnd = max(cwnd_gain * bottleneck_bandwidth * min_rtt, 4) + * + * The core algorithm does not react directly to packet losses or delays, + * although BBR may adjust the size of next send per ACK when loss is + * observed, or adjust the sending rate if it estimates there is a + * traffic policer, in order to keep the drop rate reasonable. + * + * BBR is described in detail in: + * "BBR: Congestion-Based Congestion Control", + * Neal Cardwell, Yuchung Cheng, C. Stephen Gunn, Soheil Hassas Yeganeh, + * Van Jacobson. ACM Queue, Vol. 14 No. 5, September-October 2016. + * + * There is a public e-mail list for discussing BBR development and testing: + * https://groups.google.com/forum/#!forum/bbr-dev + * + * NOTE: BBR *must* be used with the fq qdisc ("man tc-fq") with pacing enabled, + * since pacing is integral to the BBR design and implementation. + * BBR without pacing would not function properly, and may incur unnecessary + * high packet loss rates. + */ +#include +#include +#include +#include +#include +#include + +/* Scale factor for rate in pkt/uSec unit to avoid truncation in bandwidth + * estimation. The rate unit ~= (1500 bytes / 1 usec / 2^24) ~= 715 bps. + * This handles bandwidths from 0.06pps (715bps) to 256Mpps (3Tbps) in a u32. + * Since the minimum window is >=4 packets, the lower bound isn't + * an issue. The upper bound isn't an issue with existing technologies. + */ +#define BW_SCALE 24 +#define BW_UNIT (1 << BW_SCALE) + +#define BBR_SCALE 8 /* scaling factor for fractions in BBR (e.g. gains) */ +#define BBR_UNIT (1 << BBR_SCALE) + +/* BBR has the following modes for deciding how fast to send: */ +enum bbr_mode { + BBR_STARTUP, /* ramp up sending rate rapidly to fill pipe */ + BBR_DRAIN, /* drain any queue created during startup */ + BBR_PROBE_BW, /* discover, share bw: pace around estimated bw */ + BBR_PROBE_RTT, /* cut cwnd to min to probe min_rtt */ +}; + +/* BBR congestion control block */ +struct bbr { + u32 min_rtt_us; /* min RTT in min_rtt_win_sec window */ + u32 min_rtt_stamp; /* timestamp of min_rtt_us */ + u32 probe_rtt_done_stamp; /* end time for BBR_PROBE_RTT mode */ + struct minmax bw; /* Max recent delivery rate in pkts/uS << 24 */ + u32 rtt_cnt; /* count of packet-timed rounds elapsed */ + u32 next_rtt_delivered; /* scb->tx.delivered at end of round */ + struct skb_mstamp cycle_mstamp; /* time of this cycle phase start */ + u32 mode:3, /* current bbr_mode in state machine */ + prev_ca_state:3, /* CA state on previous ACK */ + packet_conservation:1, /* use packet conservation? */ + restore_cwnd:1, /* decided to revert cwnd to old value */ + round_start:1, /* start of packet-timed tx->ack round? */ + tso_segs_goal:7, /* segments we want in each skb we send */ + idle_restart:1, /* restarting after idle? */ + probe_rtt_round_done:1, /* a BBR_PROBE_RTT round at 4 pkts? */ + unused:5, + lt_is_sampling:1, /* taking long-term ("LT") samples now? */ + lt_rtt_cnt:7, /* round trips in long-term interval */ + lt_use_bw:1; /* use lt_bw as our bw estimate? */ + u32 lt_bw; /* LT est delivery rate in pkts/uS << 24 */ + u32 lt_last_delivered; /* LT intvl start: tp->delivered */ + u32 lt_last_stamp; /* LT intvl start: tp->delivered_mstamp */ + u32 lt_last_lost; /* LT intvl start: tp->lost */ + u32 pacing_gain:10, /* current gain for setting pacing rate */ + cwnd_gain:10, /* current gain for setting cwnd */ + full_bw_cnt:3, /* number of rounds without large bw gains */ + cycle_idx:3, /* current index in pacing_gain cycle array */ + unused_b:6; + u32 prior_cwnd; /* prior cwnd upon entering loss recovery */ + u32 full_bw; /* recent bw, to estimate if pipe is full */ +}; + +#define CYCLE_LEN 8 /* number of phases in a pacing gain cycle */ + +/* Window length of bw filter (in rounds): */ +static const int bbr_bw_rtts = CYCLE_LEN + 2; +/* Window length of min_rtt filter (in sec): */ +static const u32 bbr_min_rtt_win_sec = 10; +/* Minimum time (in ms) spent at bbr_cwnd_min_target in BBR_PROBE_RTT mode: */ +static const u32 bbr_probe_rtt_mode_ms = 200; +/* Skip TSO below the following bandwidth (bits/sec): */ +static const int bbr_min_tso_rate = 1200000; + +/* We use a high_gain value of 2/ln(2) because it's the smallest pacing gain + * that will allow a smoothly increasing pacing rate that will double each RTT + * and send the same number of packets per RTT that an un-paced, slow-starting + * Reno or CUBIC flow would: + */ +static const int bbr_high_gain = BBR_UNIT * 2885 / 1000 + 1; +/* The pacing gain of 1/high_gain in BBR_DRAIN is calculated to typically drain + * the queue created in BBR_STARTUP in a single round: + */ +static const int bbr_drain_gain = BBR_UNIT * 1000 / 2885; +/* The gain for deriving steady-state cwnd tolerates delayed/stretched ACKs: */ +static const int bbr_cwnd_gain = BBR_UNIT * 2; +/* The pacing_gain values for the PROBE_BW gain cycle, to discover/share bw: */ +static const int bbr_pacing_gain[] = { + BBR_UNIT * 5 / 4, /* probe for more available bw */ + BBR_UNIT * 3 / 4, /* drain queue and/or yield bw to other flows */ + BBR_UNIT, BBR_UNIT, BBR_UNIT, /* cruise at 1.0*bw to utilize pipe, */ + BBR_UNIT, BBR_UNIT, BBR_UNIT /* without creating excess queue... */ +}; +/* Randomize the starting gain cycling phase over N phases: */ +static const u32 bbr_cycle_rand = 7; + +/* Try to keep at least this many packets in flight, if things go smoothly. For + * smooth functioning, a sliding window protocol ACKing every other packet + * needs at least 4 packets in flight: + */ +static const u32 bbr_cwnd_min_target = 4; + +/* To estimate if BBR_STARTUP mode (i.e. high_gain) has filled pipe... */ +/* If bw has increased significantly (1.25x), there may be more bw available: */ +static const u32 bbr_full_bw_thresh = BBR_UNIT * 5 / 4; +/* But after 3 rounds w/o significant bw growth, estimate pipe is full: */ +static const u32 bbr_full_bw_cnt = 3; + +/* "long-term" ("LT") bandwidth estimator parameters... */ +/* The minimum number of rounds in an LT bw sampling interval: */ +static const u32 bbr_lt_intvl_min_rtts = 4; +/* If lost/delivered ratio > 20%, interval is "lossy" and we may be policed: */ +static const u32 bbr_lt_loss_thresh = 50; +/* If 2 intervals have a bw ratio <= 1/8, their bw is "consistent": */ +static const u32 bbr_lt_bw_ratio = BBR_UNIT / 8; +/* If 2 intervals have a bw diff <= 4 Kbit/sec their bw is "consistent": */ +static const u32 bbr_lt_bw_diff = 4000 / 8; +/* If we estimate we're policed, use lt_bw for this many round trips: */ +static const u32 bbr_lt_bw_max_rtts = 48; + +/* Do we estimate that STARTUP filled the pipe? */ +static bool bbr_full_bw_reached(const struct sock *sk) +{ + const struct bbr *bbr = inet_csk_ca(sk); + + return bbr->full_bw_cnt >= bbr_full_bw_cnt; +} + +/* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */ +static u32 bbr_max_bw(const struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + return minmax_get(&bbr->bw); +} + +/* Return the estimated bandwidth of the path, in pkts/uS << BW_SCALE. */ +static u32 bbr_bw(const struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + return bbr->lt_use_bw ? bbr->lt_bw : bbr_max_bw(sk); +} + +/* Return rate in bytes per second, optionally with a gain. + * The order here is chosen carefully to avoid overflow of u64. This should + * work for input rates of up to 2.9Tbit/sec and gain of 2.89x. + */ +static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain) +{ + rate *= tcp_mss_to_mtu(sk, tcp_sk(sk)->mss_cache); + rate *= gain; + rate >>= BBR_SCALE; + rate *= USEC_PER_SEC; + return rate >> BW_SCALE; +} + +/* Pace using current bw estimate and a gain factor. In order to help drive the + * network toward lower queues while maintaining high utilization and low + * latency, the average pacing rate aims to be slightly (~1%) lower than the + * estimated bandwidth. This is an important aspect of the design. In this + * implementation this slightly lower pacing rate is achieved implicitly by not + * including link-layer headers in the packet size used for the pacing rate. + */ +static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) +{ + struct bbr *bbr = inet_csk_ca(sk); + u64 rate = bw; + + rate = bbr_rate_bytes_per_sec(sk, rate, gain); + rate = min_t(u64, rate, sk->sk_max_pacing_rate); + if (bbr->mode != BBR_STARTUP || rate > sk->sk_pacing_rate) + sk->sk_pacing_rate = rate; +} + +/* Return count of segments we want in the skbs we send, or 0 for default. */ +static u32 bbr_tso_segs_goal(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + return bbr->tso_segs_goal; +} + +static void bbr_set_tso_segs_goal(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u32 min_segs; + + min_segs = sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2; + bbr->tso_segs_goal = min(tcp_tso_autosize(sk, tp->mss_cache, min_segs), + 0x7FU); +} + +/* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */ +static void bbr_save_cwnd(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr->prev_ca_state < TCP_CA_Recovery && bbr->mode != BBR_PROBE_RTT) + bbr->prior_cwnd = tp->snd_cwnd; /* this cwnd is good enough */ + else /* loss recovery or BBR_PROBE_RTT have temporarily cut cwnd */ + bbr->prior_cwnd = max(bbr->prior_cwnd, tp->snd_cwnd); +} + +static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + if (event == CA_EVENT_TX_START && tp->app_limited) { + bbr->idle_restart = 1; + /* Avoid pointless buffer overflows: pace at est. bw if we don't + * need more speed (we're restarting from idle and app-limited). + */ + if (bbr->mode == BBR_PROBE_BW) + bbr_set_pacing_rate(sk, bbr_bw(sk), BBR_UNIT); + } +} + +/* Find target cwnd. Right-size the cwnd based on min RTT and the + * estimated bottleneck bandwidth: + * + * cwnd = bw * min_rtt * gain = BDP * gain + * + * The key factor, gain, controls the amount of queue. While a small gain + * builds a smaller queue, it becomes more vulnerable to noise in RTT + * measurements (e.g., delayed ACKs or other ACK compression effects). This + * noise may cause BBR to under-estimate the rate. + * + * To achieve full performance in high-speed paths, we budget enough cwnd to + * fit full-sized skbs in-flight on both end hosts to fully utilize the path: + * - one skb in sending host Qdisc, + * - one skb in sending host TSO/GSO engine + * - one skb being received by receiver host LRO/GRO/delayed-ACK engine + * Don't worry, at low rates (bbr_min_tso_rate) this won't bloat cwnd because + * in such cases tso_segs_goal is 1. The minimum cwnd is 4 packets, + * which allows 2 outstanding 2-packet sequences, to try to keep pipe + * full even with ACK-every-other-packet delayed ACKs. + */ +static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 cwnd; + u64 w; + + /* If we've never had a valid RTT sample, cap cwnd at the initial + * default. This should only happen when the connection is not using TCP + * timestamps and has retransmitted all of the SYN/SYNACK/data packets + * ACKed so far. In this case, an RTO can cut cwnd to 1, in which + * case we need to slow-start up toward something safe: TCP_INIT_CWND. + */ + if (unlikely(bbr->min_rtt_us == ~0U)) /* no valid RTT samples yet? */ + return TCP_INIT_CWND; /* be safe: cap at default initial cwnd*/ + + w = (u64)bw * bbr->min_rtt_us; + + /* Apply a gain to the given value, then remove the BW_SCALE shift. */ + cwnd = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT; + + /* Allow enough full-sized skbs in flight to utilize end systems. */ + cwnd += 3 * bbr->tso_segs_goal; + + /* Reduce delayed ACKs by rounding up cwnd to the next even number. */ + cwnd = (cwnd + 1) & ~1U; + + return cwnd; +} + +/* An optimization in BBR to reduce losses: On the first round of recovery, we + * follow the packet conservation principle: send P packets per P packets acked. + * After that, we slow-start and send at most 2*P packets per P packets acked. + * After recovery finishes, or upon undo, we restore the cwnd we had when + * recovery started (capped by the target cwnd based on estimated BDP). + * + * TODO(ycheng/ncardwell): implement a rate-based approach. + */ +static bool bbr_set_cwnd_to_recover_or_restore( + struct sock *sk, const struct rate_sample *rs, u32 acked, u32 *new_cwnd) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u8 prev_state = bbr->prev_ca_state, state = inet_csk(sk)->icsk_ca_state; + u32 cwnd = tp->snd_cwnd; + + /* An ACK for P pkts should release at most 2*P packets. We do this + * in two steps. First, here we deduct the number of lost packets. + * Then, in bbr_set_cwnd() we slow start up toward the target cwnd. + */ + if (rs->losses > 0) + cwnd = max_t(s32, cwnd - rs->losses, 1); + + if (state == TCP_CA_Recovery && prev_state != TCP_CA_Recovery) { + /* Starting 1st round of Recovery, so do packet conservation. */ + bbr->packet_conservation = 1; + bbr->next_rtt_delivered = tp->delivered; /* start round now */ + /* Cut unused cwnd from app behavior, TSQ, or TSO deferral: */ + cwnd = tcp_packets_in_flight(tp) + acked; + } else if (prev_state >= TCP_CA_Recovery && state < TCP_CA_Recovery) { + /* Exiting loss recovery; restore cwnd saved before recovery. */ + bbr->restore_cwnd = 1; + bbr->packet_conservation = 0; + } + bbr->prev_ca_state = state; + + if (bbr->restore_cwnd) { + /* Restore cwnd after exiting loss recovery or PROBE_RTT. */ + cwnd = max(cwnd, bbr->prior_cwnd); + bbr->restore_cwnd = 0; + } + + if (bbr->packet_conservation) { + *new_cwnd = max(cwnd, tcp_packets_in_flight(tp) + acked); + return true; /* yes, using packet conservation */ + } + *new_cwnd = cwnd; + return false; +} + +/* Slow-start up toward target cwnd (if bw estimate is growing, or packet loss + * has drawn us down below target), or snap down to target if we're above it. + */ +static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs, + u32 acked, u32 bw, int gain) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u32 cwnd = 0, target_cwnd = 0; + + if (!acked) + return; + + if (bbr_set_cwnd_to_recover_or_restore(sk, rs, acked, &cwnd)) + goto done; + + /* If we're below target cwnd, slow start cwnd toward target cwnd. */ + target_cwnd = bbr_target_cwnd(sk, bw, gain); + if (bbr_full_bw_reached(sk)) /* only cut cwnd if we filled the pipe */ + cwnd = min(cwnd + acked, target_cwnd); + else if (cwnd < target_cwnd || tp->delivered < TCP_INIT_CWND) + cwnd = cwnd + acked; + cwnd = max(cwnd, bbr_cwnd_min_target); + +done: + tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); /* apply global cap */ + if (bbr->mode == BBR_PROBE_RTT) /* drain queue, refresh min_rtt */ + tp->snd_cwnd = min(tp->snd_cwnd, bbr_cwnd_min_target); +} + +/* End cycle phase if it's time and/or we hit the phase's in-flight target. */ +static bool bbr_is_next_cycle_phase(struct sock *sk, + const struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + bool is_full_length = + skb_mstamp_us_delta(&tp->delivered_mstamp, &bbr->cycle_mstamp) > + bbr->min_rtt_us; + u32 inflight, bw; + + /* The pacing_gain of 1.0 paces at the estimated bw to try to fully + * use the pipe without increasing the queue. + */ + if (bbr->pacing_gain == BBR_UNIT) + return is_full_length; /* just use wall clock time */ + + inflight = rs->prior_in_flight; /* what was in-flight before ACK? */ + bw = bbr_max_bw(sk); + + /* A pacing_gain > 1.0 probes for bw by trying to raise inflight to at + * least pacing_gain*BDP; this may take more than min_rtt if min_rtt is + * small (e.g. on a LAN). We do not persist if packets are lost, since + * a path with small buffers may not hold that much. + */ + if (bbr->pacing_gain > BBR_UNIT) + return is_full_length && + (rs->losses || /* perhaps pacing_gain*BDP won't fit */ + inflight >= bbr_target_cwnd(sk, bw, bbr->pacing_gain)); + + /* A pacing_gain < 1.0 tries to drain extra queue we added if bw + * probing didn't find more bw. If inflight falls to match BDP then we + * estimate queue is drained; persisting would underutilize the pipe. + */ + return is_full_length || + inflight <= bbr_target_cwnd(sk, bw, BBR_UNIT); +} + +static void bbr_advance_cycle_phase(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1); + bbr->cycle_mstamp = tp->delivered_mstamp; + bbr->pacing_gain = bbr_pacing_gain[bbr->cycle_idx]; +} + +/* Gain cycling: cycle pacing gain to converge to fair share of available bw. */ +static void bbr_update_cycle_phase(struct sock *sk, + const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if ((bbr->mode == BBR_PROBE_BW) && !bbr->lt_use_bw && + bbr_is_next_cycle_phase(sk, rs)) + bbr_advance_cycle_phase(sk); +} + +static void bbr_reset_startup_mode(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr->mode = BBR_STARTUP; + bbr->pacing_gain = bbr_high_gain; + bbr->cwnd_gain = bbr_high_gain; +} + +static void bbr_reset_probe_bw_mode(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr->mode = BBR_PROBE_BW; + bbr->pacing_gain = BBR_UNIT; + bbr->cwnd_gain = bbr_cwnd_gain; + bbr->cycle_idx = CYCLE_LEN - 1 - prandom_u32_max(bbr_cycle_rand); + bbr_advance_cycle_phase(sk); /* flip to next phase of gain cycle */ +} + +static void bbr_reset_mode(struct sock *sk) +{ + if (!bbr_full_bw_reached(sk)) + bbr_reset_startup_mode(sk); + else + bbr_reset_probe_bw_mode(sk); +} + +/* Start a new long-term sampling interval. */ +static void bbr_reset_lt_bw_sampling_interval(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + bbr->lt_last_stamp = tp->delivered_mstamp.stamp_jiffies; + bbr->lt_last_delivered = tp->delivered; + bbr->lt_last_lost = tp->lost; + bbr->lt_rtt_cnt = 0; +} + +/* Completely reset long-term bandwidth sampling. */ +static void bbr_reset_lt_bw_sampling(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr->lt_bw = 0; + bbr->lt_use_bw = 0; + bbr->lt_is_sampling = false; + bbr_reset_lt_bw_sampling_interval(sk); +} + +/* Long-term bw sampling interval is done. Estimate whether we're policed. */ +static void bbr_lt_bw_interval_done(struct sock *sk, u32 bw) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 diff; + + if (bbr->lt_bw) { /* do we have bw from a previous interval? */ + /* Is new bw close to the lt_bw from the previous interval? */ + diff = abs(bw - bbr->lt_bw); + if ((diff * BBR_UNIT <= bbr_lt_bw_ratio * bbr->lt_bw) || + (bbr_rate_bytes_per_sec(sk, diff, BBR_UNIT) <= + bbr_lt_bw_diff)) { + /* All criteria are met; estimate we're policed. */ + bbr->lt_bw = (bw + bbr->lt_bw) >> 1; /* avg 2 intvls */ + bbr->lt_use_bw = 1; + bbr->pacing_gain = BBR_UNIT; /* try to avoid drops */ + bbr->lt_rtt_cnt = 0; + return; + } + } + bbr->lt_bw = bw; + bbr_reset_lt_bw_sampling_interval(sk); +} + +/* Token-bucket traffic policers are common (see "An Internet-Wide Analysis of + * Traffic Policing", SIGCOMM 2016). BBR detects token-bucket policers and + * explicitly models their policed rate, to reduce unnecessary losses. We + * estimate that we're policed if we see 2 consecutive sampling intervals with + * consistent throughput and high packet loss. If we think we're being policed, + * set lt_bw to the "long-term" average delivery rate from those 2 intervals. + */ +static void bbr_lt_bw_sampling(struct sock *sk, const struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u32 lost, delivered; + u64 bw; + s32 t; + + if (bbr->lt_use_bw) { /* already using long-term rate, lt_bw? */ + if (bbr->mode == BBR_PROBE_BW && bbr->round_start && + ++bbr->lt_rtt_cnt >= bbr_lt_bw_max_rtts) { + bbr_reset_lt_bw_sampling(sk); /* stop using lt_bw */ + bbr_reset_probe_bw_mode(sk); /* restart gain cycling */ + } + return; + } + + /* Wait for the first loss before sampling, to let the policer exhaust + * its tokens and estimate the steady-state rate allowed by the policer. + * Starting samples earlier includes bursts that over-estimate the bw. + */ + if (!bbr->lt_is_sampling) { + if (!rs->losses) + return; + bbr_reset_lt_bw_sampling_interval(sk); + bbr->lt_is_sampling = true; + } + + /* To avoid underestimates, reset sampling if we run out of data. */ + if (rs->is_app_limited) { + bbr_reset_lt_bw_sampling(sk); + return; + } + + if (bbr->round_start) + bbr->lt_rtt_cnt++; /* count round trips in this interval */ + if (bbr->lt_rtt_cnt < bbr_lt_intvl_min_rtts) + return; /* sampling interval needs to be longer */ + if (bbr->lt_rtt_cnt > 4 * bbr_lt_intvl_min_rtts) { + bbr_reset_lt_bw_sampling(sk); /* interval is too long */ + return; + } + + /* End sampling interval when a packet is lost, so we estimate the + * policer tokens were exhausted. Stopping the sampling before the + * tokens are exhausted under-estimates the policed rate. + */ + if (!rs->losses) + return; + + /* Calculate packets lost and delivered in sampling interval. */ + lost = tp->lost - bbr->lt_last_lost; + delivered = tp->delivered - bbr->lt_last_delivered; + /* Is loss rate (lost/delivered) >= lt_loss_thresh? If not, wait. */ + if (!delivered || (lost << BBR_SCALE) < bbr_lt_loss_thresh * delivered) + return; + + /* Find average delivery rate in this sampling interval. */ + t = (s32)(tp->delivered_mstamp.stamp_jiffies - bbr->lt_last_stamp); + if (t < 1) + return; /* interval is less than one jiffy, so wait */ + t = jiffies_to_usecs(t); + /* Interval long enough for jiffies_to_usecs() to return a bogus 0? */ + if (t < 1) { + bbr_reset_lt_bw_sampling(sk); /* interval too long; reset */ + return; + } + bw = (u64)delivered * BW_UNIT; + do_div(bw, t); + bbr_lt_bw_interval_done(sk, bw); +} + +/* Estimate the bandwidth based on how fast packets are delivered */ +static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u64 bw; + + bbr->round_start = 0; + if (rs->delivered < 0 || rs->interval_us <= 0) + return; /* Not a valid observation */ + + /* See if we've reached the next RTT */ + if (!before(rs->prior_delivered, bbr->next_rtt_delivered)) { + bbr->next_rtt_delivered = tp->delivered; + bbr->rtt_cnt++; + bbr->round_start = 1; + bbr->packet_conservation = 0; + } + + bbr_lt_bw_sampling(sk, rs); + + /* Divide delivered by the interval to find a (lower bound) bottleneck + * bandwidth sample. Delivered is in packets and interval_us in uS and + * ratio will be <<1 for most connections. So delivered is first scaled. + */ + bw = (u64)rs->delivered * BW_UNIT; + do_div(bw, rs->interval_us); + + /* If this sample is application-limited, it is likely to have a very + * low delivered count that represents application behavior rather than + * the available network rate. Such a sample could drag down estimated + * bw, causing needless slow-down. Thus, to continue to send at the + * last measured network rate, we filter out app-limited samples unless + * they describe the path bw at least as well as our bw model. + * + * So the goal during app-limited phase is to proceed with the best + * network rate no matter how long. We automatically leave this + * phase when app writes faster than the network can deliver :) + */ + if (!rs->is_app_limited || bw >= bbr_max_bw(sk)) { + /* Incorporate new sample into our max bw filter. */ + minmax_running_max(&bbr->bw, bbr_bw_rtts, bbr->rtt_cnt, bw); + } +} + +/* Estimate when the pipe is full, using the change in delivery rate: BBR + * estimates that STARTUP filled the pipe if the estimated bw hasn't changed by + * at least bbr_full_bw_thresh (25%) after bbr_full_bw_cnt (3) non-app-limited + * rounds. Why 3 rounds: 1: rwin autotuning grows the rwin, 2: we fill the + * higher rwin, 3: we get higher delivery rate samples. Or transient + * cross-traffic or radio noise can go away. CUBIC Hystart shares a similar + * design goal, but uses delay and inter-ACK spacing instead of bandwidth. + */ +static void bbr_check_full_bw_reached(struct sock *sk, + const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 bw_thresh; + + if (bbr_full_bw_reached(sk) || !bbr->round_start || rs->is_app_limited) + return; + + bw_thresh = (u64)bbr->full_bw * bbr_full_bw_thresh >> BBR_SCALE; + if (bbr_max_bw(sk) >= bw_thresh) { + bbr->full_bw = bbr_max_bw(sk); + bbr->full_bw_cnt = 0; + return; + } + ++bbr->full_bw_cnt; +} + +/* If pipe is probably full, drain the queue and then enter steady-state. */ +static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) { + bbr->mode = BBR_DRAIN; /* drain queue we created */ + bbr->pacing_gain = bbr_drain_gain; /* pace slow to drain */ + bbr->cwnd_gain = bbr_high_gain; /* maintain cwnd */ + } /* fall through to check if in-flight is already small: */ + if (bbr->mode == BBR_DRAIN && + tcp_packets_in_flight(tcp_sk(sk)) <= + bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT)) + bbr_reset_probe_bw_mode(sk); /* we estimate queue is drained */ +} + +/* The goal of PROBE_RTT mode is to have BBR flows cooperatively and + * periodically drain the bottleneck queue, to converge to measure the true + * min_rtt (unloaded propagation delay). This allows the flows to keep queues + * small (reducing queuing delay and packet loss) and achieve fairness among + * BBR flows. + * + * The min_rtt filter window is 10 seconds. When the min_rtt estimate expires, + * we enter PROBE_RTT mode and cap the cwnd at bbr_cwnd_min_target=4 packets. + * After at least bbr_probe_rtt_mode_ms=200ms and at least one packet-timed + * round trip elapsed with that flight size <= 4, we leave PROBE_RTT mode and + * re-enter the previous mode. BBR uses 200ms to approximately bound the + * performance penalty of PROBE_RTT's cwnd capping to roughly 2% (200ms/10s). + * + * Note that flows need only pay 2% if they are busy sending over the last 10 + * seconds. Interactive applications (e.g., Web, RPCs, video chunks) often have + * natural silences or low-rate periods within 10 seconds where the rate is low + * enough for long enough to drain its queue in the bottleneck. We pick up + * these min RTT measurements opportunistically with our min_rtt filter. :-) + */ +static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + bool filter_expired; + + /* Track min RTT seen in the min_rtt_win_sec filter window: */ + filter_expired = after(tcp_time_stamp, + bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ); + if (rs->rtt_us >= 0 && + (rs->rtt_us <= bbr->min_rtt_us || filter_expired)) { + bbr->min_rtt_us = rs->rtt_us; + bbr->min_rtt_stamp = tcp_time_stamp; + } + + if (bbr_probe_rtt_mode_ms > 0 && filter_expired && + !bbr->idle_restart && bbr->mode != BBR_PROBE_RTT) { + bbr->mode = BBR_PROBE_RTT; /* dip, drain queue */ + bbr->pacing_gain = BBR_UNIT; + bbr->cwnd_gain = BBR_UNIT; + bbr_save_cwnd(sk); /* note cwnd so we can restore it */ + bbr->probe_rtt_done_stamp = 0; + } + + if (bbr->mode == BBR_PROBE_RTT) { + /* Ignore low rate samples during this mode. */ + tp->app_limited = + (tp->delivered + tcp_packets_in_flight(tp)) ? : 1; + /* Maintain min packets in flight for max(200 ms, 1 round). */ + if (!bbr->probe_rtt_done_stamp && + tcp_packets_in_flight(tp) <= bbr_cwnd_min_target) { + bbr->probe_rtt_done_stamp = tcp_time_stamp + + msecs_to_jiffies(bbr_probe_rtt_mode_ms); + bbr->probe_rtt_round_done = 0; + bbr->next_rtt_delivered = tp->delivered; + } else if (bbr->probe_rtt_done_stamp) { + if (bbr->round_start) + bbr->probe_rtt_round_done = 1; + if (bbr->probe_rtt_round_done && + after(tcp_time_stamp, bbr->probe_rtt_done_stamp)) { + bbr->min_rtt_stamp = tcp_time_stamp; + bbr->restore_cwnd = 1; /* snap to prior_cwnd */ + bbr_reset_mode(sk); + } + } + } + bbr->idle_restart = 0; +} + +static void bbr_update_model(struct sock *sk, const struct rate_sample *rs) +{ + bbr_update_bw(sk, rs); + bbr_update_cycle_phase(sk, rs); + bbr_check_full_bw_reached(sk, rs); + bbr_check_drain(sk, rs); + bbr_update_min_rtt(sk, rs); +} + +static void bbr_main(struct sock *sk, const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 bw; + + bbr_update_model(sk, rs); + + bw = bbr_bw(sk); + bbr_set_pacing_rate(sk, bw, bbr->pacing_gain); + bbr_set_tso_segs_goal(sk); + bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain); +} + +static void bbr_init(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u64 bw; + + bbr->prior_cwnd = 0; + bbr->tso_segs_goal = 0; /* default segs per skb until first ACK */ + bbr->rtt_cnt = 0; + bbr->next_rtt_delivered = 0; + bbr->prev_ca_state = TCP_CA_Open; + bbr->packet_conservation = 0; + + bbr->probe_rtt_done_stamp = 0; + bbr->probe_rtt_round_done = 0; + bbr->min_rtt_us = tcp_min_rtt(tp); + bbr->min_rtt_stamp = tcp_time_stamp; + + minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */ + + /* Initialize pacing rate to: high_gain * init_cwnd / RTT. */ + bw = (u64)tp->snd_cwnd * BW_UNIT; + do_div(bw, (tp->srtt_us >> 3) ? : USEC_PER_MSEC); + sk->sk_pacing_rate = 0; /* force an update of sk_pacing_rate */ + bbr_set_pacing_rate(sk, bw, bbr_high_gain); + + bbr->restore_cwnd = 0; + bbr->round_start = 0; + bbr->idle_restart = 0; + bbr->full_bw = 0; + bbr->full_bw_cnt = 0; + bbr->cycle_mstamp.v64 = 0; + bbr->cycle_idx = 0; + bbr_reset_lt_bw_sampling(sk); + bbr_reset_startup_mode(sk); +} + +static u32 bbr_sndbuf_expand(struct sock *sk) +{ + /* Provision 3 * cwnd since BBR may slow-start even during recovery. */ + return 3; +} + +/* In theory BBR does not need to undo the cwnd since it does not + * always reduce cwnd on losses (see bbr_main()). Keep it for now. + */ +static u32 bbr_undo_cwnd(struct sock *sk) +{ + return tcp_sk(sk)->snd_cwnd; +} + +/* Entering loss recovery, so save cwnd for when we exit or undo recovery. */ +static u32 bbr_ssthresh(struct sock *sk) +{ + bbr_save_cwnd(sk); + return TCP_INFINITE_SSTHRESH; /* BBR does not use ssthresh */ +} + +static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr, + union tcp_cc_info *info) +{ + if (ext & (1 << (INET_DIAG_BBRINFO - 1)) || + ext & (1 << (INET_DIAG_VEGASINFO - 1))) { + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u64 bw = bbr_bw(sk); + + bw = bw * tp->mss_cache * USEC_PER_SEC >> BW_SCALE; + memset(&info->bbr, 0, sizeof(info->bbr)); + info->bbr.bbr_bw_lo = (u32)bw; + info->bbr.bbr_bw_hi = (u32)(bw >> 32); + info->bbr.bbr_min_rtt = bbr->min_rtt_us; + info->bbr.bbr_pacing_gain = bbr->pacing_gain; + info->bbr.bbr_cwnd_gain = bbr->cwnd_gain; + *attr = INET_DIAG_BBRINFO; + return sizeof(info->bbr); + } + return 0; +} + +static void bbr_set_state(struct sock *sk, u8 new_state) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (new_state == TCP_CA_Loss) { + struct rate_sample rs = { .losses = 1 }; + + bbr->prev_ca_state = TCP_CA_Loss; + bbr->full_bw = 0; + bbr->round_start = 1; /* treat RTO like end of a round */ + bbr_lt_bw_sampling(sk, &rs); + } +} + +static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { + .flags = TCP_CONG_NON_RESTRICTED, + .name = "bbr", + .owner = THIS_MODULE, + .init = bbr_init, + .cong_control = bbr_main, + .sndbuf_expand = bbr_sndbuf_expand, + .undo_cwnd = bbr_undo_cwnd, + .cwnd_event = bbr_cwnd_event, + .ssthresh = bbr_ssthresh, + .tso_segs_goal = bbr_tso_segs_goal, + .get_info = bbr_get_info, + .set_state = bbr_set_state, +}; + +static int __init bbr_register(void) +{ + BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE); + return tcp_register_congestion_control(&tcp_bbr_cong_ops); +} + +static void __exit bbr_unregister(void) +{ + tcp_unregister_congestion_control(&tcp_bbr_cong_ops); +} + +module_init(bbr_register); +module_exit(bbr_unregister); + +MODULE_AUTHOR("Van Jacobson "); +MODULE_AUTHOR("Neal Cardwell "); +MODULE_AUTHOR("Yuchung Cheng "); +MODULE_AUTHOR("Soheil Hassas Yeganeh "); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("TCP BBR (Bottleneck Bandwidth and RTT)"); From 4bdcc6ca2158f43b1770e020f9b71ab8a808594f Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 20 Sep 2016 08:14:08 +0300 Subject: [PATCH 0550/1050] mlxsw: spectrum: Make offloads stats functions static The offloads stats functions are local to this file, make them static. Fixes: fc1bbb0f1831 ('mlxsw: spectrum: Implement offload stats ndo [..]') Signed-off-by: Or Gerlitz Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index fa312610c2d2..43d508111b17 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -823,7 +823,7 @@ err_span_port_mtu_update: return err; } -int +static int mlxsw_sp_port_get_sw_stats64(const struct net_device *dev, struct rtnl_link_stats64 *stats) { @@ -855,7 +855,7 @@ mlxsw_sp_port_get_sw_stats64(const struct net_device *dev, return 0; } -bool mlxsw_sp_port_has_offload_stats(int attr_id) +static bool mlxsw_sp_port_has_offload_stats(int attr_id) { switch (attr_id) { case IFLA_OFFLOAD_XSTATS_CPU_HIT: @@ -865,8 +865,8 @@ bool mlxsw_sp_port_has_offload_stats(int attr_id) return false; } -int mlxsw_sp_port_get_offload_stats(int attr_id, const struct net_device *dev, - void *sp) +static int mlxsw_sp_port_get_offload_stats(int attr_id, const struct net_device *dev, + void *sp) { switch (attr_id) { case IFLA_OFFLOAD_XSTATS_CPU_HIT: From 9b86a8d19bd6406a10de5f924bf2a003a502d427 Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Tue, 20 Sep 2016 12:00:52 +0530 Subject: [PATCH 0551/1050] cxgb4/cxgb4vf: Allocate more queues for 25G and 100G adapter We were missing check for 25G and 100G while checking port speed, which lead to less number of queues getting allocated for 25G & 100G adapters and leading to low throughput. Adding the missing check for both NIC and vNIC driver. Also fixes port advertisement for 25G and 100G in ethtool output. Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 4 ++-- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 15 +++++++++++++-- drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 7 ++++++- drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h | 6 ++++++ .../net/ethernet/chelsio/cxgb4vf/t4vf_common.h | 15 +++++++++++---- drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c | 9 +++++++-- 6 files changed, 45 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 2e2aa9fec9bb..edd23386b47d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -419,8 +419,8 @@ struct link_config { unsigned short supported; /* link capabilities */ unsigned short advertising; /* advertised capabilities */ unsigned short lp_advertising; /* peer advertised capabilities */ - unsigned short requested_speed; /* speed user has requested */ - unsigned short speed; /* actual link speed */ + unsigned int requested_speed; /* speed user has requested */ + unsigned int speed; /* actual link speed */ unsigned char requested_fc; /* flow control user has requested */ unsigned char fc; /* actual link flow control */ unsigned char autoneg; /* autonegotiating? */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index c762a8c8c954..3ceafb55d6da 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -4305,10 +4305,17 @@ static const struct pci_error_handlers cxgb4_eeh = { .resume = eeh_resume, }; +/* Return true if the Link Configuration supports "High Speeds" (those greater + * than 1Gb/s). + */ static inline bool is_x_10g_port(const struct link_config *lc) { - return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0 || - (lc->supported & FW_PORT_CAP_SPEED_40G) != 0; + unsigned int speeds, high_speeds; + + speeds = FW_PORT_CAP_SPEED_V(FW_PORT_CAP_SPEED_G(lc->supported)); + high_speeds = speeds & ~(FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G); + + return high_speeds != 0; } static inline void init_rspq(struct adapter *adap, struct sge_rspq *q, @@ -4756,8 +4763,12 @@ static void print_port_info(const struct net_device *dev) bufp += sprintf(bufp, "1000/"); if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_10G) bufp += sprintf(bufp, "10G/"); + if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_25G) + bufp += sprintf(bufp, "25G/"); if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_40G) bufp += sprintf(bufp, "40G/"); + if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_100G) + bufp += sprintf(bufp, "100G/"); if (bufp != buf) --bufp; sprintf(bufp, "BASE-%s", t4_get_port_type_description(pi->port_type)); diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index dc92c80a75f4..660204bff726 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -3627,7 +3627,8 @@ void t4_ulprx_read_la(struct adapter *adap, u32 *la_buf) } #define ADVERT_MASK (FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G |\ - FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_40G | \ + FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_25G | \ + FW_PORT_CAP_SPEED_40G | FW_PORT_CAP_SPEED_100G | \ FW_PORT_CAP_ANEG) /** @@ -7196,8 +7197,12 @@ void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl) speed = 1000; else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_10G)) speed = 10000; + else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_25G)) + speed = 25000; else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_40G)) speed = 40000; + else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_100G)) + speed = 100000; lc = &pi->link_cfg; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index a89b30720e38..30507d44422c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -2265,6 +2265,12 @@ enum fw_port_cap { FW_PORT_CAP_802_3_ASM_DIR = 0x8000, }; +#define FW_PORT_CAP_SPEED_S 0 +#define FW_PORT_CAP_SPEED_M 0x3f +#define FW_PORT_CAP_SPEED_V(x) ((x) << FW_PORT_CAP_SPEED_S) +#define FW_PORT_CAP_SPEED_G(x) \ + (((x) >> FW_PORT_CAP_SPEED_S) & FW_PORT_CAP_SPEED_M) + enum fw_port_mdi { FW_PORT_CAP_MDI_UNCHANGED, FW_PORT_CAP_MDI_AUTO, diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h index 8ee541431e8b..17a2bbcf93f0 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h @@ -108,8 +108,8 @@ struct link_config { unsigned int supported; /* link capabilities */ unsigned int advertising; /* advertised capabilities */ unsigned short lp_advertising; /* peer advertised capabilities */ - unsigned short requested_speed; /* speed user has requested */ - unsigned short speed; /* actual link speed */ + unsigned int requested_speed; /* speed user has requested */ + unsigned int speed; /* actual link speed */ unsigned char requested_fc; /* flow control user has requested */ unsigned char fc; /* actual link flow control */ unsigned char autoneg; /* autonegotiating? */ @@ -271,10 +271,17 @@ static inline bool is_10g_port(const struct link_config *lc) return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0; } +/* Return true if the Link Configuration supports "High Speeds" (those greater + * than 1Gb/s). + */ static inline bool is_x_10g_port(const struct link_config *lc) { - return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0 || - (lc->supported & FW_PORT_CAP_SPEED_40G) != 0; + unsigned int speeds, high_speeds; + + speeds = FW_PORT_CAP_SPEED_V(FW_PORT_CAP_SPEED_G(lc->supported)); + high_speeds = speeds & ~(FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G); + + return high_speeds != 0; } static inline unsigned int core_ticks_per_usec(const struct adapter *adapter) diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c index 427bfa71388b..b5622b1689e9 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c @@ -314,8 +314,9 @@ int t4vf_wr_mbox_core(struct adapter *adapter, const void *cmd, int size, } #define ADVERT_MASK (FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G |\ - FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_40G | \ - FW_PORT_CAP_SPEED_100G | FW_PORT_CAP_ANEG) + FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_25G | \ + FW_PORT_CAP_SPEED_40G | FW_PORT_CAP_SPEED_100G | \ + FW_PORT_CAP_ANEG) /** * init_link_config - initialize a link's SW state @@ -1712,8 +1713,12 @@ int t4vf_handle_fw_rpl(struct adapter *adapter, const __be64 *rpl) speed = 1000; else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_10G)) speed = 10000; + else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_25G)) + speed = 25000; else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_40G)) speed = 40000; + else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_100G)) + speed = 100000; /* * Scan all of our "ports" (Virtual Interfaces) looking for From e6449539828ac3b7c74b648793291640bcca8259 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 20 Sep 2016 16:22:05 +0800 Subject: [PATCH 0552/1050] r8152: move some functions Move the following functions forward. r8152_mmd_indirect() r8152_mmd_read() r8152_mmd_write() r8152_eee_en() r8152b_enable_eee() r8153_eee_en() r8153_enable_eee() r8152b_enable_fc() r8153_aldps_en() Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 224 ++++++++++++++++++++-------------------- 1 file changed, 112 insertions(+), 112 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index f41a8ad4740e..ae7db460baa2 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -2552,6 +2552,77 @@ static void r8152_aldps_en(struct r8152 *tp, bool enable) } } +static inline void r8152_mmd_indirect(struct r8152 *tp, u16 dev, u16 reg) +{ + ocp_reg_write(tp, OCP_EEE_AR, FUN_ADDR | dev); + ocp_reg_write(tp, OCP_EEE_DATA, reg); + ocp_reg_write(tp, OCP_EEE_AR, FUN_DATA | dev); +} + +static u16 r8152_mmd_read(struct r8152 *tp, u16 dev, u16 reg) +{ + u16 data; + + r8152_mmd_indirect(tp, dev, reg); + data = ocp_reg_read(tp, OCP_EEE_DATA); + ocp_reg_write(tp, OCP_EEE_AR, 0x0000); + + return data; +} + +static void r8152_mmd_write(struct r8152 *tp, u16 dev, u16 reg, u16 data) +{ + r8152_mmd_indirect(tp, dev, reg); + ocp_reg_write(tp, OCP_EEE_DATA, data); + ocp_reg_write(tp, OCP_EEE_AR, 0x0000); +} + +static void r8152_eee_en(struct r8152 *tp, bool enable) +{ + u16 config1, config2, config3; + u32 ocp_data; + + ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR); + config1 = ocp_reg_read(tp, OCP_EEE_CONFIG1) & ~sd_rise_time_mask; + config2 = ocp_reg_read(tp, OCP_EEE_CONFIG2); + config3 = ocp_reg_read(tp, OCP_EEE_CONFIG3) & ~fast_snr_mask; + + if (enable) { + ocp_data |= EEE_RX_EN | EEE_TX_EN; + config1 |= EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN | RX_QUIET_EN; + config1 |= sd_rise_time(1); + config2 |= RG_DACQUIET_EN | RG_LDVQUIET_EN; + config3 |= fast_snr(42); + } else { + ocp_data &= ~(EEE_RX_EN | EEE_TX_EN); + config1 &= ~(EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN | + RX_QUIET_EN); + config1 |= sd_rise_time(7); + config2 &= ~(RG_DACQUIET_EN | RG_LDVQUIET_EN); + config3 |= fast_snr(511); + } + + ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data); + ocp_reg_write(tp, OCP_EEE_CONFIG1, config1); + ocp_reg_write(tp, OCP_EEE_CONFIG2, config2); + ocp_reg_write(tp, OCP_EEE_CONFIG3, config3); +} + +static void r8152b_enable_eee(struct r8152 *tp) +{ + r8152_eee_en(tp, true); + r8152_mmd_write(tp, MDIO_MMD_AN, MDIO_AN_EEE_ADV, MDIO_EEE_100TX); +} + +static void r8152b_enable_fc(struct r8152 *tp) +{ + u16 anar; + + anar = r8152_mdio_read(tp, MII_ADVERTISE); + anar |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM; + r8152_mdio_write(tp, MII_ADVERTISE, anar); +} + static void rtl8152_disable(struct r8152 *tp) { r8152_aldps_en(tp, false); @@ -2701,6 +2772,47 @@ static void r8152b_enter_oob(struct r8152 *tp) ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data); } +static void r8153_aldps_en(struct r8152 *tp, bool enable) +{ + u16 data; + + data = ocp_reg_read(tp, OCP_POWER_CFG); + if (enable) { + data |= EN_ALDPS; + ocp_reg_write(tp, OCP_POWER_CFG, data); + } else { + data &= ~EN_ALDPS; + ocp_reg_write(tp, OCP_POWER_CFG, data); + msleep(20); + } +} + +static void r8153_eee_en(struct r8152 *tp, bool enable) +{ + u32 ocp_data; + u16 config; + + ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR); + config = ocp_reg_read(tp, OCP_EEE_CFG); + + if (enable) { + ocp_data |= EEE_RX_EN | EEE_TX_EN; + config |= EEE10_EN; + } else { + ocp_data &= ~(EEE_RX_EN | EEE_TX_EN); + config &= ~EEE10_EN; + } + + ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data); + ocp_reg_write(tp, OCP_EEE_CFG, config); +} + +static void r8153_enable_eee(struct r8152 *tp) +{ + r8153_eee_en(tp, true); + ocp_reg_write(tp, OCP_EEE_ADV, MDIO_EEE_1000T | MDIO_EEE_100TX); +} + static void r8153_hw_phy_cfg(struct r8152 *tp) { u32 ocp_data; @@ -2866,21 +2978,6 @@ static void r8153_enter_oob(struct r8152 *tp) ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data); } -static void r8153_aldps_en(struct r8152 *tp, bool enable) -{ - u16 data; - - data = ocp_reg_read(tp, OCP_POWER_CFG); - if (enable) { - data |= EN_ALDPS; - ocp_reg_write(tp, OCP_POWER_CFG, data); - } else { - data &= ~EN_ALDPS; - ocp_reg_write(tp, OCP_POWER_CFG, data); - msleep(20); - } -} - static void rtl8153_disable(struct r8152 *tp) { r8153_aldps_en(tp, false); @@ -3246,103 +3343,6 @@ static int rtl8152_close(struct net_device *netdev) return res; } -static inline void r8152_mmd_indirect(struct r8152 *tp, u16 dev, u16 reg) -{ - ocp_reg_write(tp, OCP_EEE_AR, FUN_ADDR | dev); - ocp_reg_write(tp, OCP_EEE_DATA, reg); - ocp_reg_write(tp, OCP_EEE_AR, FUN_DATA | dev); -} - -static u16 r8152_mmd_read(struct r8152 *tp, u16 dev, u16 reg) -{ - u16 data; - - r8152_mmd_indirect(tp, dev, reg); - data = ocp_reg_read(tp, OCP_EEE_DATA); - ocp_reg_write(tp, OCP_EEE_AR, 0x0000); - - return data; -} - -static void r8152_mmd_write(struct r8152 *tp, u16 dev, u16 reg, u16 data) -{ - r8152_mmd_indirect(tp, dev, reg); - ocp_reg_write(tp, OCP_EEE_DATA, data); - ocp_reg_write(tp, OCP_EEE_AR, 0x0000); -} - -static void r8152_eee_en(struct r8152 *tp, bool enable) -{ - u16 config1, config2, config3; - u32 ocp_data; - - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR); - config1 = ocp_reg_read(tp, OCP_EEE_CONFIG1) & ~sd_rise_time_mask; - config2 = ocp_reg_read(tp, OCP_EEE_CONFIG2); - config3 = ocp_reg_read(tp, OCP_EEE_CONFIG3) & ~fast_snr_mask; - - if (enable) { - ocp_data |= EEE_RX_EN | EEE_TX_EN; - config1 |= EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN | RX_QUIET_EN; - config1 |= sd_rise_time(1); - config2 |= RG_DACQUIET_EN | RG_LDVQUIET_EN; - config3 |= fast_snr(42); - } else { - ocp_data &= ~(EEE_RX_EN | EEE_TX_EN); - config1 &= ~(EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN | - RX_QUIET_EN); - config1 |= sd_rise_time(7); - config2 &= ~(RG_DACQUIET_EN | RG_LDVQUIET_EN); - config3 |= fast_snr(511); - } - - ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data); - ocp_reg_write(tp, OCP_EEE_CONFIG1, config1); - ocp_reg_write(tp, OCP_EEE_CONFIG2, config2); - ocp_reg_write(tp, OCP_EEE_CONFIG3, config3); -} - -static void r8152b_enable_eee(struct r8152 *tp) -{ - r8152_eee_en(tp, true); - r8152_mmd_write(tp, MDIO_MMD_AN, MDIO_AN_EEE_ADV, MDIO_EEE_100TX); -} - -static void r8153_eee_en(struct r8152 *tp, bool enable) -{ - u32 ocp_data; - u16 config; - - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR); - config = ocp_reg_read(tp, OCP_EEE_CFG); - - if (enable) { - ocp_data |= EEE_RX_EN | EEE_TX_EN; - config |= EEE10_EN; - } else { - ocp_data &= ~(EEE_RX_EN | EEE_TX_EN); - config &= ~EEE10_EN; - } - - ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data); - ocp_reg_write(tp, OCP_EEE_CFG, config); -} - -static void r8153_enable_eee(struct r8152 *tp) -{ - r8153_eee_en(tp, true); - ocp_reg_write(tp, OCP_EEE_ADV, MDIO_EEE_1000T | MDIO_EEE_100TX); -} - -static void r8152b_enable_fc(struct r8152 *tp) -{ - u16 anar; - - anar = r8152_mdio_read(tp, MII_ADVERTISE); - anar |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM; - r8152_mdio_write(tp, MII_ADVERTISE, anar); -} - static void rtl_tally_reset(struct r8152 *tp) { u32 ocp_data; From 2dd436daac7848dbf3fe799cf59c1408871a14e3 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 20 Sep 2016 16:22:06 +0800 Subject: [PATCH 0553/1050] r8152: move enabling PHY Move enabling PHY to init(), otherwise some other settings may fail. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 43 ++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index ae7db460baa2..dbf11ba9d91c 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -2632,14 +2632,6 @@ static void rtl8152_disable(struct r8152 *tp) static void r8152b_hw_phy_cfg(struct r8152 *tp) { - u16 data; - - data = r8152_mdio_read(tp, MII_BMCR); - if (data & BMCR_PDOWN) { - data &= ~BMCR_PDOWN; - r8152_mdio_write(tp, MII_BMCR, data); - } - set_bit(PHY_RESET, &tp->flags); } @@ -2818,16 +2810,6 @@ static void r8153_hw_phy_cfg(struct r8152 *tp) u32 ocp_data; u16 data; - if (tp->version == RTL_VER_03 || tp->version == RTL_VER_04 || - tp->version == RTL_VER_05) - ocp_reg_write(tp, OCP_ADC_CFG, CKADSEL_L | ADC_EN | EN_EMI_L); - - data = r8152_mdio_read(tp, MII_BMCR); - if (data & BMCR_PDOWN) { - data &= ~BMCR_PDOWN; - r8152_mdio_write(tp, MII_BMCR, data); - } - if (tp->version == RTL_VER_03) { data = ocp_reg_read(tp, OCP_EEE_CFG); data &= ~CTAP_SHORT_EN; @@ -3355,10 +3337,17 @@ static void rtl_tally_reset(struct r8152 *tp) static void r8152b_init(struct r8152 *tp) { u32 ocp_data; + u16 data; if (test_bit(RTL8152_UNPLUG, &tp->flags)) return; + data = r8152_mdio_read(tp, MII_BMCR); + if (data & BMCR_PDOWN) { + data &= ~BMCR_PDOWN; + r8152_mdio_write(tp, MII_BMCR, data); + } + r8152_aldps_en(tp, false); if (tp->version == RTL_VER_01) { @@ -3394,6 +3383,7 @@ static void r8152b_init(struct r8152 *tp) static void r8153_init(struct r8152 *tp) { u32 ocp_data; + u16 data; int i; if (test_bit(RTL8152_UNPLUG, &tp->flags)) @@ -3416,6 +3406,23 @@ static void r8153_init(struct r8152 *tp) msleep(20); } + if (tp->version == RTL_VER_03 || tp->version == RTL_VER_04 || + tp->version == RTL_VER_05) + ocp_reg_write(tp, OCP_ADC_CFG, CKADSEL_L | ADC_EN | EN_EMI_L); + + data = r8152_mdio_read(tp, MII_BMCR); + if (data & BMCR_PDOWN) { + data &= ~BMCR_PDOWN; + r8152_mdio_write(tp, MII_BMCR, data); + } + + for (i = 0; i < 500; i++) { + ocp_data = ocp_reg_read(tp, OCP_PHY_STATUS) & PHY_STAT_MASK; + if (ocp_data == PHY_STAT_LAN_ON) + break; + msleep(20); + } + usb_disable_lpm(tp->udev); r8153_u2p3en(tp, false); From ef39df8eaba48c0de779440f41a648b17a560953 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 20 Sep 2016 16:22:07 +0800 Subject: [PATCH 0554/1050] r8152: move PHY settings to hw_phy_cfg Move the PHY relative settings together to hw_phy_cfg(). Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index dbf11ba9d91c..9ce5bd549482 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -2632,6 +2632,10 @@ static void rtl8152_disable(struct r8152 *tp) static void r8152b_hw_phy_cfg(struct r8152 *tp) { + r8152b_enable_eee(tp); + r8152_aldps_en(tp, true); + r8152b_enable_fc(tp); + set_bit(PHY_RESET, &tp->flags); } @@ -2839,6 +2843,10 @@ static void r8153_hw_phy_cfg(struct r8152 *tp) sram_write(tp, SRAM_10M_AMP1, 0x00af); sram_write(tp, SRAM_10M_AMP2, 0x0208); + r8153_enable_eee(tp); + r8153_aldps_en(tp, true); + r8152b_enable_fc(tp); + set_bit(PHY_RESET, &tp->flags); } @@ -3369,9 +3377,6 @@ static void r8152b_init(struct r8152 *tp) SPDWN_RXDV_MSK | SPDWN_LINKCHG_MSK; ocp_write_word(tp, MCU_TYPE_PLA, PLA_GPHY_INTR_IMR, ocp_data); - r8152b_enable_eee(tp); - r8152_aldps_en(tp, true); - r8152b_enable_fc(tp); rtl_tally_reset(tp); /* enable rx aggregation */ @@ -3490,9 +3495,6 @@ static void r8153_init(struct r8152 *tp) ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, 0); ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, 0); - r8153_enable_eee(tp); - r8153_aldps_en(tp, true); - r8152b_enable_fc(tp); rtl_tally_reset(tp); r8153_u2p3en(tp, true); } From af0287ec10c62c84cc5cd1bad4fd37644a1ac41d Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 20 Sep 2016 16:22:08 +0800 Subject: [PATCH 0555/1050] r8152: remove r8153_enable_eee Remove r8153_enable_eee(). Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 9ce5bd549482..e7a05dd2be97 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -2803,12 +2803,6 @@ static void r8153_eee_en(struct r8152 *tp, bool enable) ocp_reg_write(tp, OCP_EEE_CFG, config); } -static void r8153_enable_eee(struct r8152 *tp) -{ - r8153_eee_en(tp, true); - ocp_reg_write(tp, OCP_EEE_ADV, MDIO_EEE_1000T | MDIO_EEE_100TX); -} - static void r8153_hw_phy_cfg(struct r8152 *tp) { u32 ocp_data; @@ -2843,7 +2837,9 @@ static void r8153_hw_phy_cfg(struct r8152 *tp) sram_write(tp, SRAM_10M_AMP1, 0x00af); sram_write(tp, SRAM_10M_AMP2, 0x0208); - r8153_enable_eee(tp); + r8153_eee_en(tp, true); + ocp_reg_write(tp, OCP_EEE_ADV, MDIO_EEE_1000T | MDIO_EEE_100TX); + r8153_aldps_en(tp, true); r8152b_enable_fc(tp); From d768c61bc353a0e0de3f839e1de99eee7d4eca10 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 20 Sep 2016 16:22:09 +0800 Subject: [PATCH 0556/1050] r8152: disable ALDPS and EEE before setting PHY Disable ALDPS and EEE to avoid the possible failure when setting the PHY. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index e7a05dd2be97..c254248863d4 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -32,7 +32,7 @@ #define NETNEXT_VERSION "08" /* Information for net */ -#define NET_VERSION "5" +#define NET_VERSION "6" #define DRIVER_VERSION "v1." NETNEXT_VERSION "." NET_VERSION #define DRIVER_AUTHOR "Realtek linux nic maintainers " @@ -2808,6 +2808,13 @@ static void r8153_hw_phy_cfg(struct r8152 *tp) u32 ocp_data; u16 data; + /* disable ALDPS before updating the PHY parameters */ + r8153_aldps_en(tp, false); + + /* disable EEE before updating the PHY parameters */ + r8153_eee_en(tp, false); + ocp_reg_write(tp, OCP_EEE_ADV, 0); + if (tp->version == RTL_VER_03) { data = ocp_reg_read(tp, OCP_EEE_CFG); data &= ~CTAP_SHORT_EN; @@ -3390,7 +3397,6 @@ static void r8153_init(struct r8152 *tp) if (test_bit(RTL8152_UNPLUG, &tp->flags)) return; - r8153_aldps_en(tp, false); r8153_u1u2en(tp, false); for (i = 0; i < 500; i++) { From b5036cd4ed3173ab8cdbc85e2ba74acf46bafb51 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 20 Sep 2016 16:17:22 +0200 Subject: [PATCH 0557/1050] ipmr, ip6mr: return lastuse relative to now When I introduced the lastuse member I made a subtle error because it was returned as an absolute value but that is meaningless to user-space as it doesn't allow to see how old exactly an entry is. Let's make it similar to how the bridge returns such values and make it relative to "now" (jiffies). This allows us to show the actual age of the entries and is much more useful (e.g. user-space daemons can age out entries, iproute2 can display the lastuse properly). Fixes: 43b9e1274060 ("net: ipmr/ip6mr: add support for keeping an entry age") Reported-by: Satish Ashok Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 7 +++++-- net/ipv6/ip6mr.c | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 26253328d227..a87bcd2d4a94 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2076,6 +2076,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct rta_mfc_stats mfcs; struct nlattr *mp_attr; struct rtnexthop *nhp; + unsigned long lastuse; int ct; /* If cache is unresolved, don't try to parse IIF and OIF */ @@ -2105,12 +2106,14 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, nla_nest_end(skb, mp_attr); + lastuse = READ_ONCE(c->mfc_un.res.lastuse); + lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0; + mfcs.mfcs_packets = c->mfc_un.res.pkt; mfcs.mfcs_bytes = c->mfc_un.res.bytes; mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if; if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) || - nla_put_u64_64bit(skb, RTA_EXPIRES, - jiffies_to_clock_t(c->mfc_un.res.lastuse), + nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse), RTA_PAD)) return -EMSGSIZE; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 6122f9c5cc49..fccb5dd91902 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2239,6 +2239,7 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, struct rta_mfc_stats mfcs; struct nlattr *mp_attr; struct rtnexthop *nhp; + unsigned long lastuse; int ct; /* If cache is unresolved, don't try to parse IIF and OIF */ @@ -2269,12 +2270,14 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, nla_nest_end(skb, mp_attr); + lastuse = READ_ONCE(c->mfc_un.res.lastuse); + lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0; + mfcs.mfcs_packets = c->mfc_un.res.pkt; mfcs.mfcs_bytes = c->mfc_un.res.bytes; mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if; if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) || - nla_put_u64_64bit(skb, RTA_EXPIRES, - jiffies_to_clock_t(c->mfc_un.res.lastuse), + nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse), RTA_PAD)) return -EMSGSIZE; From 9f7f797c1df40ee6c0329e9e53ea9ca0d224f55d Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Tue, 20 Sep 2016 11:16:49 +0200 Subject: [PATCH 0558/1050] mlxsw: pci: Add lag related resources to resources query Add max lag and max ports in lag resources to resources query. Signed-off-by: Nogah Frankel Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.h | 6 +++++- drivers/net/ethernet/mellanox/mlxsw/pci.c | 10 ++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index d2e32979319c..51f27a35749e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -269,8 +269,12 @@ struct mlxsw_driver { }; struct mlxsw_resources { - u8 max_span_valid:1; + u8 max_span_valid:1, + max_lag_valid:1, + max_ports_in_lag_valid:1; u8 max_span; + u8 max_lag; + u8 max_ports_in_lag; }; struct mlxsw_resources *mlxsw_core_resources_get(struct mlxsw_core *mlxsw_core); diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 1d1360c178bb..cb284eaeec39 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1156,6 +1156,8 @@ mlxsw_pci_config_profile_swid_config(struct mlxsw_pci *mlxsw_pci, #define MLXSW_RESOURCES_TABLE_END_ID 0xffff #define MLXSW_MAX_SPAN_ID 0x2420 +#define MLXSW_MAX_LAG_ID 0x2520 +#define MLXSW_MAX_PORTS_IN_LAG_ID 0x2521 #define MLXSW_RESOURCES_QUERY_MAX_QUERIES 100 #define MLXSW_RESOURCES_PER_QUERY 32 @@ -1167,6 +1169,14 @@ static void mlxsw_pci_resources_query_parse(int id, u64 val, resources->max_span = val; resources->max_span_valid = 1; break; + case MLXSW_MAX_LAG_ID: + resources->max_lag = val; + resources->max_lag_valid = 1; + break; + case MLXSW_MAX_PORTS_IN_LAG_ID: + resources->max_ports_in_lag = val; + resources->max_ports_in_lag_valid = 1; + break; default: break; } From ce0bd2b0c57a2d97ea89f87f61b9f5758139bcb8 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Tue, 20 Sep 2016 11:16:50 +0200 Subject: [PATCH 0559/1050] mlxsw: spectrum: lag resources- use resources data instead of consts Use max lag and max ports in lag resources as the result of resource query instead of using const to save them. Signed-off-by: Nogah Frankel Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.c | 26 ++++++------ drivers/net/ethernet/mellanox/mlxsw/core.h | 4 -- drivers/net/ethernet/mellanox/mlxsw/pci.c | 12 ------ .../net/ethernet/mellanox/mlxsw/spectrum.c | 41 +++++++++++++++---- .../net/ethernet/mellanox/mlxsw/spectrum.h | 5 +-- .../mellanox/mlxsw/spectrum_switchdev.c | 4 +- .../net/ethernet/mellanox/mlxsw/switchx2.c | 4 -- 7 files changed, 50 insertions(+), 46 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 068ee65a960b..aa33d58b9f81 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1100,10 +1100,15 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, goto err_alloc_stats; } - if (mlxsw_driver->profile->used_max_lag && - mlxsw_driver->profile->used_max_port_per_lag) { - alloc_size = sizeof(u8) * mlxsw_driver->profile->max_lag * - mlxsw_driver->profile->max_port_per_lag; + err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile, + &mlxsw_core->resources); + if (err) + goto err_bus_init; + + if (mlxsw_core->resources.max_lag_valid && + mlxsw_core->resources.max_ports_in_lag_valid) { + alloc_size = sizeof(u8) * mlxsw_core->resources.max_lag * + mlxsw_core->resources.max_ports_in_lag; mlxsw_core->lag.mapping = kzalloc(alloc_size, GFP_KERNEL); if (!mlxsw_core->lag.mapping) { err = -ENOMEM; @@ -1111,11 +1116,6 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, } } - err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile, - &mlxsw_core->resources); - if (err) - goto err_bus_init; - err = mlxsw_emad_init(mlxsw_core); if (err) goto err_emad_init; @@ -1146,10 +1146,10 @@ err_hwmon_init: err_devlink_register: mlxsw_emad_fini(mlxsw_core); err_emad_init: - mlxsw_bus->fini(bus_priv); -err_bus_init: kfree(mlxsw_core->lag.mapping); err_alloc_lag_mapping: + mlxsw_bus->fini(bus_priv); +err_bus_init: free_percpu(mlxsw_core->pcpu_stats); err_alloc_stats: devlink_free(devlink); @@ -1615,7 +1615,7 @@ EXPORT_SYMBOL(mlxsw_core_skb_receive); static int mlxsw_core_lag_mapping_index(struct mlxsw_core *mlxsw_core, u16 lag_id, u8 port_index) { - return mlxsw_core->driver->profile->max_port_per_lag * lag_id + + return mlxsw_core->resources.max_ports_in_lag * lag_id + port_index; } @@ -1644,7 +1644,7 @@ void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core, { int i; - for (i = 0; i < mlxsw_core->driver->profile->max_port_per_lag; i++) { + for (i = 0; i < mlxsw_core->resources.max_ports_in_lag; i++) { int index = mlxsw_core_lag_mapping_index(mlxsw_core, lag_id, i); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 51f27a35749e..558d1ce89531 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -179,8 +179,6 @@ struct mlxsw_swid_config { struct mlxsw_config_profile { u16 used_max_vepa_channels:1, - used_max_lag:1, - used_max_port_per_lag:1, used_max_mid:1, used_max_pgt:1, used_max_system_port:1, @@ -194,8 +192,6 @@ struct mlxsw_config_profile { used_adaptive_routing_group_cap:1, used_kvd_sizes:1; u8 max_vepa_channels; - u16 max_lag; - u16 max_port_per_lag; u16 max_mid; u16 max_pgt; u16 max_system_port; diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index cb284eaeec39..57c2d3474bb6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1232,18 +1232,6 @@ static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox, mlxsw_cmd_mbox_config_profile_max_vepa_channels_set( mbox, profile->max_vepa_channels); } - if (profile->used_max_lag) { - mlxsw_cmd_mbox_config_profile_set_max_lag_set( - mbox, 1); - mlxsw_cmd_mbox_config_profile_max_lag_set( - mbox, profile->max_lag); - } - if (profile->used_max_port_per_lag) { - mlxsw_cmd_mbox_config_profile_set_max_port_per_lag_set( - mbox, 1); - mlxsw_cmd_mbox_config_profile_max_port_per_lag_set( - mbox, profile->max_port_per_lag); - } if (profile->used_max_mid) { mlxsw_cmd_mbox_config_profile_set_max_mid_set( mbox, 1); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 43d508111b17..0f96d1108568 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2887,7 +2887,9 @@ static int mlxsw_sp_flood_init(struct mlxsw_sp *mlxsw_sp) static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp) { + struct mlxsw_resources *resources; char slcr_pl[MLXSW_REG_SLCR_LEN]; + int err; mlxsw_reg_slcr_pack(slcr_pl, MLXSW_REG_SLCR_LAG_HASH_SMAC | MLXSW_REG_SLCR_LAG_HASH_DMAC | @@ -2898,7 +2900,26 @@ static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp) MLXSW_REG_SLCR_LAG_HASH_SPORT | MLXSW_REG_SLCR_LAG_HASH_DPORT | MLXSW_REG_SLCR_LAG_HASH_IPPROTO); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcr), slcr_pl); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcr), slcr_pl); + if (err) + return err; + + resources = mlxsw_core_resources_get(mlxsw_sp->core); + if (!(resources->max_lag_valid && resources->max_ports_in_lag_valid)) + return -EIO; + + mlxsw_sp->lags = kcalloc(resources->max_lag, + sizeof(struct mlxsw_sp_upper), + GFP_KERNEL); + if (!mlxsw_sp->lags) + return -ENOMEM; + + return 0; +} + +static void mlxsw_sp_lag_fini(struct mlxsw_sp *mlxsw_sp) +{ + kfree(mlxsw_sp->lags); } static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, @@ -2982,6 +3003,7 @@ err_span_init: err_router_init: mlxsw_sp_switchdev_fini(mlxsw_sp); err_switchdev_init: + mlxsw_sp_lag_fini(mlxsw_sp); err_lag_init: mlxsw_sp_buffers_fini(mlxsw_sp); err_buffers_init: @@ -3001,6 +3023,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) mlxsw_sp_span_fini(mlxsw_sp); mlxsw_sp_router_fini(mlxsw_sp); mlxsw_sp_switchdev_fini(mlxsw_sp); + mlxsw_sp_lag_fini(mlxsw_sp); mlxsw_sp_buffers_fini(mlxsw_sp); mlxsw_sp_traps_fini(mlxsw_sp); mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE); @@ -3013,10 +3036,6 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) static struct mlxsw_config_profile mlxsw_sp_config_profile = { .used_max_vepa_channels = 1, .max_vepa_channels = 0, - .used_max_lag = 1, - .max_lag = MLXSW_SP_LAG_MAX, - .used_max_port_per_lag = 1, - .max_port_per_lag = MLXSW_SP_PORT_PER_LAG_MAX, .used_max_mid = 1, .max_mid = MLXSW_SP_MID_MAX, .used_max_pgt = 1, @@ -3683,12 +3702,14 @@ static bool mlxsw_sp_port_fdb_should_flush(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; u8 local_port = mlxsw_sp_port->local_port; u16 lag_id = mlxsw_sp_port->lag_id; + struct mlxsw_resources *resources; int i, count = 0; if (!mlxsw_sp_port->lagged) return true; - for (i = 0; i < MLXSW_SP_PORT_PER_LAG_MAX; i++) { + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_ports_in_lag; i++) { struct mlxsw_sp_port *lag_port; lag_port = mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i); @@ -3894,11 +3915,13 @@ static int mlxsw_sp_lag_index_get(struct mlxsw_sp *mlxsw_sp, struct net_device *lag_dev, u16 *p_lag_id) { + struct mlxsw_resources *resources; struct mlxsw_sp_upper *lag; int free_lag_id = -1; int i; - for (i = 0; i < MLXSW_SP_LAG_MAX; i++) { + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_lag; i++) { lag = mlxsw_sp_lag_get(mlxsw_sp, i); if (lag->ref_count) { if (lag->dev == lag_dev) { @@ -3932,9 +3955,11 @@ mlxsw_sp_master_lag_check(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_port_lag_index_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id, u8 *p_port_index) { + struct mlxsw_resources *resources; int i; - for (i = 0; i < MLXSW_SP_PORT_PER_LAG_MAX; i++) { + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_ports_in_lag; i++) { if (!mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i)) { *p_port_index = i; return 0; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 49f4cafce148..a056aaa6c584 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -56,9 +56,6 @@ #define MLXSW_SP_RFID_BASE 15360 #define MLXSW_SP_RIF_MAX 800 -#define MLXSW_SP_LAG_MAX 64 -#define MLXSW_SP_PORT_PER_LAG_MAX 16 - #define MLXSW_SP_MID_MAX 7000 #define MLXSW_SP_PORTS_PER_CLUSTER_MAX 4 @@ -290,7 +287,7 @@ struct mlxsw_sp { #define MLXSW_SP_DEFAULT_AGEING_TIME 300 u32 ageing_time; struct mlxsw_sp_upper master_bridge; - struct mlxsw_sp_upper lags[MLXSW_SP_LAG_MAX]; + struct mlxsw_sp_upper *lags; u8 port_to_module[MLXSW_PORT_MAX_PORTS]; struct mlxsw_sp_sb sb; struct mlxsw_sp_router router; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 7186c4810785..2b04b76b503e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1205,9 +1205,11 @@ static struct mlxsw_sp_port *mlxsw_sp_lag_rep_port(struct mlxsw_sp *mlxsw_sp, u16 lag_id) { struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_resources *resources; int i; - for (i = 0; i < MLXSW_SP_PORT_PER_LAG_MAX; i++) { + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_ports_in_lag; i++) { mlxsw_sp_port = mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i); if (mlxsw_sp_port) return mlxsw_sp_port; diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 377daa4d509c..8b15bf0c744d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -1512,10 +1512,6 @@ static void mlxsw_sx_fini(struct mlxsw_core *mlxsw_core) static struct mlxsw_config_profile mlxsw_sx_config_profile = { .used_max_vepa_channels = 1, .max_vepa_channels = 0, - .used_max_lag = 1, - .max_lag = 64, - .used_max_port_per_lag = 1, - .max_port_per_lag = 16, .used_max_mid = 1, .max_mid = 7000, .used_max_pgt = 1, From 2acd10c51bd2ce3a39c75fa3ff113e32e2413c6f Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Tue, 20 Sep 2016 11:16:51 +0200 Subject: [PATCH 0560/1050] mlxsw: pci: Add KVD size relate resources Add KVD size, and minimum sizes for the single and double sections resources to resources query. Signed-off-by: Nogah Frankel Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.h | 8 +++++++- drivers/net/ethernet/mellanox/mlxsw/pci.c | 15 +++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 558d1ce89531..76ad566a0421 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -267,10 +267,16 @@ struct mlxsw_driver { struct mlxsw_resources { u8 max_span_valid:1, max_lag_valid:1, - max_ports_in_lag_valid:1; + max_ports_in_lag_valid:1, + kvd_size_valid:1, + kvd_single_min_size_valid:1, + kvd_double_min_size_valid:1; u8 max_span; u8 max_lag; u8 max_ports_in_lag; + u32 kvd_size; + u32 kvd_single_min_size; + u32 kvd_double_min_size; }; struct mlxsw_resources *mlxsw_core_resources_get(struct mlxsw_core *mlxsw_core); diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 57c2d3474bb6..7b2ab1ec1290 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1158,6 +1158,9 @@ mlxsw_pci_config_profile_swid_config(struct mlxsw_pci *mlxsw_pci, #define MLXSW_MAX_SPAN_ID 0x2420 #define MLXSW_MAX_LAG_ID 0x2520 #define MLXSW_MAX_PORTS_IN_LAG_ID 0x2521 +#define MLXSW_KVD_SIZE_ID 0x1001 +#define MLXSW_KVD_SINGLE_MIN_SIZE_ID 0x1002 +#define MLXSW_KVD_DOUBLE_MIN_SIZE_ID 0x1003 #define MLXSW_RESOURCES_QUERY_MAX_QUERIES 100 #define MLXSW_RESOURCES_PER_QUERY 32 @@ -1177,6 +1180,18 @@ static void mlxsw_pci_resources_query_parse(int id, u64 val, resources->max_ports_in_lag = val; resources->max_ports_in_lag_valid = 1; break; + case MLXSW_KVD_SIZE_ID: + resources->kvd_size = val; + resources->kvd_size_valid = 1; + break; + case MLXSW_KVD_SINGLE_MIN_SIZE_ID: + resources->kvd_single_min_size = val; + resources->kvd_single_min_size_valid = 1; + break; + case MLXSW_KVD_DOUBLE_MIN_SIZE_ID: + resources->kvd_double_min_size = val; + resources->kvd_double_min_size_valid = 1; + break; default: break; } From 403547d38d0b99f589a0d87f3a5f352895e54aae Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Tue, 20 Sep 2016 11:16:52 +0200 Subject: [PATCH 0561/1050] mlxsw: profile: Add KVD resources to profile config Use resources from resource query to determine values for the profile configuration. Add KVD determined section sizes to the resources struct. Change the profile struct and value to match this changes. Signed-off-by: Nogah Frankel Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.h | 15 +++- drivers/net/ethernet/mellanox/mlxsw/pci.c | 73 +++++++++++++++---- .../net/ethernet/mellanox/mlxsw/spectrum.c | 7 +- .../net/ethernet/mellanox/mlxsw/spectrum.h | 3 +- 4 files changed, 76 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 76ad566a0421..1193bdcc4407 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -190,7 +190,8 @@ struct mlxsw_config_profile { used_max_pkey:1, used_ar_sec:1, used_adaptive_routing_group_cap:1, - used_kvd_sizes:1; + used_kvd_split_data:1; /* indicate for the kvd's values */ + u8 max_vepa_channels; u16 max_mid; u16 max_pgt; @@ -210,8 +211,9 @@ struct mlxsw_config_profile { u16 adaptive_routing_group_cap; u8 arn; u32 kvd_linear_size; - u32 kvd_hash_single_size; - u32 kvd_hash_double_size; + u16 kvd_hash_granularity; + u8 kvd_hash_single_parts; + u8 kvd_hash_double_parts; u8 resource_query_enable; struct mlxsw_swid_config swid_config[MLXSW_CONFIG_PROFILE_SWID_COUNT]; }; @@ -277,6 +279,13 @@ struct mlxsw_resources { u32 kvd_size; u32 kvd_single_min_size; u32 kvd_double_min_size; + + /* Internal resources. + * Determined by the SW, not queried from the HW. + */ + u32 kvd_single_size; + u32 kvd_double_size; + u32 kvd_linear_size; }; struct mlxsw_resources *mlxsw_core_resources_get(struct mlxsw_core *mlxsw_core); diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 7b2ab1ec1290..c2d2fa1a1d3a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1234,10 +1234,52 @@ static int mlxsw_pci_resources_query(struct mlxsw_pci *mlxsw_pci, char *mbox, return -EIO; } +static int mlxsw_pci_profile_get_kvd_sizes(const struct mlxsw_config_profile *profile, + struct mlxsw_resources *resources) +{ + u32 singles_size, doubles_size, linear_size; + + if (!resources->kvd_single_min_size_valid || + !resources->kvd_double_min_size_valid || + !profile->used_kvd_split_data) + return -EIO; + + linear_size = profile->kvd_linear_size; + + /* The hash part is what left of the kvd without the + * linear part. It is split to the single size and + * double size by the parts ratio from the profile. + * Both sizes must be a multiplications of the + * granularity from the profile. + */ + doubles_size = (resources->kvd_size - linear_size); + doubles_size *= profile->kvd_hash_double_parts; + doubles_size /= (profile->kvd_hash_double_parts + + profile->kvd_hash_single_parts); + doubles_size /= profile->kvd_hash_granularity; + doubles_size *= profile->kvd_hash_granularity; + singles_size = resources->kvd_size - doubles_size - + linear_size; + + /* Check results are legal. */ + if (singles_size < resources->kvd_single_min_size || + doubles_size < resources->kvd_double_min_size || + resources->kvd_size < linear_size) + return -EIO; + + resources->kvd_single_size = singles_size; + resources->kvd_double_size = doubles_size; + resources->kvd_linear_size = linear_size; + + return 0; +} + static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox, - const struct mlxsw_config_profile *profile) + const struct mlxsw_config_profile *profile, + struct mlxsw_resources *resources) { int i; + int err; mlxsw_cmd_mbox_zero(mbox); @@ -1323,19 +1365,22 @@ static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox, mlxsw_cmd_mbox_config_profile_adaptive_routing_group_cap_set( mbox, profile->adaptive_routing_group_cap); } - if (profile->used_kvd_sizes) { - mlxsw_cmd_mbox_config_profile_set_kvd_linear_size_set( - mbox, 1); - mlxsw_cmd_mbox_config_profile_kvd_linear_size_set( - mbox, profile->kvd_linear_size); - mlxsw_cmd_mbox_config_profile_set_kvd_hash_single_size_set( - mbox, 1); - mlxsw_cmd_mbox_config_profile_kvd_hash_single_size_set( - mbox, profile->kvd_hash_single_size); + if (resources->kvd_size_valid) { + err = mlxsw_pci_profile_get_kvd_sizes(profile, resources); + if (err) + return err; + + mlxsw_cmd_mbox_config_profile_set_kvd_linear_size_set(mbox, 1); + mlxsw_cmd_mbox_config_profile_kvd_linear_size_set(mbox, + resources->kvd_linear_size); + mlxsw_cmd_mbox_config_profile_set_kvd_hash_single_size_set(mbox, + 1); + mlxsw_cmd_mbox_config_profile_kvd_hash_single_size_set(mbox, + resources->kvd_single_size); mlxsw_cmd_mbox_config_profile_set_kvd_hash_double_size_set( - mbox, 1); - mlxsw_cmd_mbox_config_profile_kvd_hash_double_size_set( - mbox, profile->kvd_hash_double_size); + mbox, 1); + mlxsw_cmd_mbox_config_profile_kvd_hash_double_size_set(mbox, + resources->kvd_double_size); } for (i = 0; i < MLXSW_CONFIG_PROFILE_SWID_COUNT; i++) @@ -1537,7 +1582,7 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core, if (err) goto err_query_resources; - err = mlxsw_pci_config_profile(mlxsw_pci, mbox, profile); + err = mlxsw_pci_config_profile(mlxsw_pci, mbox, profile, resources); if (err) goto err_config_profile; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 0f96d1108568..d8e3da2f0667 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3057,10 +3057,11 @@ static struct mlxsw_config_profile mlxsw_sp_config_profile = { .max_ib_mc = 0, .used_max_pkey = 1, .max_pkey = 0, - .used_kvd_sizes = 1, + .used_kvd_split_data = 1, + .kvd_hash_granularity = MLXSW_SP_KVD_GRANULARITY, + .kvd_hash_single_parts = 2, + .kvd_hash_double_parts = 1, .kvd_linear_size = MLXSW_SP_KVD_LINEAR_SIZE, - .kvd_hash_single_size = MLXSW_SP_KVD_HASH_SINGLE_SIZE, - .kvd_hash_double_size = MLXSW_SP_KVD_HASH_DOUBLE_SIZE, .swid_config = { { .used_type = 1, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index a056aaa6c584..208dfeedde44 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -74,8 +74,7 @@ #define MLXSW_SP_CELLS_TO_BYTES(c) (c * MLXSW_SP_BYTES_PER_CELL) #define MLXSW_SP_KVD_LINEAR_SIZE 65536 /* entries */ -#define MLXSW_SP_KVD_HASH_SINGLE_SIZE 163840 /* entries */ -#define MLXSW_SP_KVD_HASH_DOUBLE_SIZE 32768 /* entries */ +#define MLXSW_SP_KVD_GRANULARITY 128 /* Maximum delay buffer needed in case of PAUSE frames, in cells. * Assumes 100m cable and maximum MTU. From b8a09f0a0938acee81c386e3c27e7aca78b087eb Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Tue, 20 Sep 2016 11:16:53 +0200 Subject: [PATCH 0562/1050] mlxsw: pci: Add max virtual routers resource Add the max number of virtual routers to resource query. Signed-off-by: Nogah Frankel Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.h | 4 +++- drivers/net/ethernet/mellanox/mlxsw/pci.c | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 1193bdcc4407..e936dc9bdb9b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -272,13 +272,15 @@ struct mlxsw_resources { max_ports_in_lag_valid:1, kvd_size_valid:1, kvd_single_min_size_valid:1, - kvd_double_min_size_valid:1; + kvd_double_min_size_valid:1, + max_virtual_routers_valid:1; u8 max_span; u8 max_lag; u8 max_ports_in_lag; u32 kvd_size; u32 kvd_single_min_size; u32 kvd_double_min_size; + u16 max_virtual_routers; /* Internal resources. * Determined by the SW, not queried from the HW. diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index c2d2fa1a1d3a..cb95f9a07200 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1161,6 +1161,7 @@ mlxsw_pci_config_profile_swid_config(struct mlxsw_pci *mlxsw_pci, #define MLXSW_KVD_SIZE_ID 0x1001 #define MLXSW_KVD_SINGLE_MIN_SIZE_ID 0x1002 #define MLXSW_KVD_DOUBLE_MIN_SIZE_ID 0x1003 +#define MLXSW_MAX_VIRTUAL_ROUTERS_ID 0x2C01 #define MLXSW_RESOURCES_QUERY_MAX_QUERIES 100 #define MLXSW_RESOURCES_PER_QUERY 32 @@ -1192,6 +1193,10 @@ static void mlxsw_pci_resources_query_parse(int id, u64 val, resources->kvd_double_min_size = val; resources->kvd_double_min_size_valid = 1; break; + case MLXSW_MAX_VIRTUAL_ROUTERS_ID: + resources->max_virtual_routers = val; + resources->max_virtual_routers_valid = 1; + break; default: break; } From 9497c042bfa9012cdefaddd8588b031bfae59a7c Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Tue, 20 Sep 2016 11:16:54 +0200 Subject: [PATCH 0563/1050] mlxsw: spectrum: Implement max virtual routers resource Replace max virtual routers const with the result from the resource query. Signed-off-by: Nogah Frankel Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.h | 4 +- .../ethernet/mellanox/mlxsw/spectrum_router.c | 43 ++++++++++++++++--- 2 files changed, 38 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 208dfeedde44..352079ef96b6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -64,8 +64,6 @@ #define MLXSW_SP_LPM_TREE_MAX 22 #define MLXSW_SP_LPM_TREE_COUNT (MLXSW_SP_LPM_TREE_MAX - MLXSW_SP_LPM_TREE_MIN) -#define MLXSW_SP_VIRTUAL_ROUTER_MAX 256 - #define MLXSW_SP_PORT_BASE_SPEED 25000 /* Mb/s */ #define MLXSW_SP_BYTES_PER_CELL 96 @@ -249,7 +247,7 @@ struct mlxsw_sp_port_mall_tc_entry { struct mlxsw_sp_router { struct mlxsw_sp_lpm_tree lpm_trees[MLXSW_SP_LPM_TREE_COUNT]; - struct mlxsw_sp_vr vrs[MLXSW_SP_VIRTUAL_ROUTER_MAX]; + struct mlxsw_sp_vr *vrs; struct rhashtable neigh_ht; struct { struct delayed_work dw; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 4afb49854eec..ded19f00ee72 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -372,10 +372,12 @@ static void mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp) static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) { + struct mlxsw_resources *resources; struct mlxsw_sp_vr *vr; int i; - for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) { + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_virtual_routers; i++) { vr = &mlxsw_sp->router.vrs[i]; if (!vr->used) return vr; @@ -417,11 +419,14 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id, enum mlxsw_sp_l3proto proto) { + struct mlxsw_resources *resources; struct mlxsw_sp_vr *vr; int i; tb_id = mlxsw_sp_fix_tb_id(tb_id); - for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) { + + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_virtual_routers; i++) { vr = &mlxsw_sp->router.vrs[i]; if (vr->used && vr->proto == proto && vr->tb_id == tb_id) return vr; @@ -555,15 +560,33 @@ static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr) &vr->fib->prefix_usage); } -static void mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp) +static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp) { + struct mlxsw_resources *resources; struct mlxsw_sp_vr *vr; int i; - for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) { + resources = mlxsw_core_resources_get(mlxsw_sp->core); + if (!resources->max_virtual_routers_valid) + return -EIO; + + mlxsw_sp->router.vrs = kcalloc(resources->max_virtual_routers, + sizeof(struct mlxsw_sp_vr), + GFP_KERNEL); + if (!mlxsw_sp->router.vrs) + return -ENOMEM; + + for (i = 0; i < resources->max_virtual_routers; i++) { vr = &mlxsw_sp->router.vrs[i]; vr->id = i; } + + return 0; +} + +static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp) +{ + kfree(mlxsw_sp->router.vrs); } struct mlxsw_sp_neigh_key { @@ -1523,14 +1546,21 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) err = __mlxsw_sp_router_init(mlxsw_sp); if (err) return err; + mlxsw_sp_lpm_init(mlxsw_sp); - mlxsw_sp_vrs_init(mlxsw_sp); - err = mlxsw_sp_neigh_init(mlxsw_sp); + err = mlxsw_sp_vrs_init(mlxsw_sp); + if (err) + goto err_vrs_init; + + err = mlxsw_sp_neigh_init(mlxsw_sp); if (err) goto err_neigh_init; + return 0; err_neigh_init: + mlxsw_sp_vrs_fini(mlxsw_sp); +err_vrs_init: __mlxsw_sp_router_fini(mlxsw_sp); return err; } @@ -1538,6 +1568,7 @@ err_neigh_init: void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) { mlxsw_sp_neigh_fini(mlxsw_sp); + mlxsw_sp_vrs_fini(mlxsw_sp); __mlxsw_sp_router_fini(mlxsw_sp); } From e44d49cbbc5fb1a310b71212acc4e1f378a8fd91 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Tue, 20 Sep 2016 11:16:55 +0200 Subject: [PATCH 0564/1050] mlxsw: pci: Add some miscellaneous resources Add max system ports, max regions and max vlan groups to resource query. Signed-off-by: Nogah Frankel Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.h | 10 ++++++++-- drivers/net/ethernet/mellanox/mlxsw/pci.c | 15 +++++++++++++++ drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 6 ------ 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index e936dc9bdb9b..097e56014c0c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -267,13 +267,16 @@ struct mlxsw_driver { }; struct mlxsw_resources { - u8 max_span_valid:1, + u32 max_span_valid:1, max_lag_valid:1, max_ports_in_lag_valid:1, kvd_size_valid:1, kvd_single_min_size_valid:1, kvd_double_min_size_valid:1, - max_virtual_routers_valid:1; + max_virtual_routers_valid:1, + max_system_ports_valid:1, + max_vlan_groups_valid:1, + max_regions_valid:1; u8 max_span; u8 max_lag; u8 max_ports_in_lag; @@ -281,6 +284,9 @@ struct mlxsw_resources { u32 kvd_single_min_size; u32 kvd_double_min_size; u16 max_virtual_routers; + u16 max_system_ports; + u16 max_vlan_groups; + u16 max_regions; /* Internal resources. * Determined by the SW, not queried from the HW. diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index cb95f9a07200..826b5020c01a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1162,6 +1162,9 @@ mlxsw_pci_config_profile_swid_config(struct mlxsw_pci *mlxsw_pci, #define MLXSW_KVD_SINGLE_MIN_SIZE_ID 0x1002 #define MLXSW_KVD_DOUBLE_MIN_SIZE_ID 0x1003 #define MLXSW_MAX_VIRTUAL_ROUTERS_ID 0x2C01 +#define MLXSW_MAX_SYSTEM_PORT_ID 0x2502 +#define MLXSW_MAX_VLAN_GROUPS_ID 0x2906 +#define MLXSW_MAX_REGIONS_ID 0x2901 #define MLXSW_RESOURCES_QUERY_MAX_QUERIES 100 #define MLXSW_RESOURCES_PER_QUERY 32 @@ -1197,6 +1200,18 @@ static void mlxsw_pci_resources_query_parse(int id, u64 val, resources->max_virtual_routers = val; resources->max_virtual_routers_valid = 1; break; + case MLXSW_MAX_SYSTEM_PORT_ID: + resources->max_system_ports = val; + resources->max_system_ports_valid = 1; + break; + case MLXSW_MAX_VLAN_GROUPS_ID: + resources->max_vlan_groups = val; + resources->max_vlan_groups_valid = 1; + break; + case MLXSW_MAX_REGIONS_ID: + resources->max_regions = val; + resources->max_regions_valid = 1; + break; default: break; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index d8e3da2f0667..9acc3e022543 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3040,12 +3040,6 @@ static struct mlxsw_config_profile mlxsw_sp_config_profile = { .max_mid = MLXSW_SP_MID_MAX, .used_max_pgt = 1, .max_pgt = 0, - .used_max_system_port = 1, - .max_system_port = 64, - .used_max_vlan_groups = 1, - .max_vlan_groups = 127, - .used_max_regions = 1, - .max_regions = 400, .used_flood_tables = 1, .used_flood_mode = 1, .flood_mode = 3, From 274df7fb77fffd243336c6affa2f9469e8f11122 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Tue, 20 Sep 2016 11:16:56 +0200 Subject: [PATCH 0565/1050] mlxsw: pci: Add max router interface resource Add the max number of rif (router interfaces) to resource query. Signed-off-by: Nogah Frankel Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.h | 4 +++- drivers/net/ethernet/mellanox/mlxsw/pci.c | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 097e56014c0c..c4f550b6f783 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -276,7 +276,8 @@ struct mlxsw_resources { max_virtual_routers_valid:1, max_system_ports_valid:1, max_vlan_groups_valid:1, - max_regions_valid:1; + max_regions_valid:1, + max_rif_valid:1; u8 max_span; u8 max_lag; u8 max_ports_in_lag; @@ -287,6 +288,7 @@ struct mlxsw_resources { u16 max_system_ports; u16 max_vlan_groups; u16 max_regions; + u16 max_rif; /* Internal resources. * Determined by the SW, not queried from the HW. diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 826b5020c01a..e742bd4e8894 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1165,6 +1165,7 @@ mlxsw_pci_config_profile_swid_config(struct mlxsw_pci *mlxsw_pci, #define MLXSW_MAX_SYSTEM_PORT_ID 0x2502 #define MLXSW_MAX_VLAN_GROUPS_ID 0x2906 #define MLXSW_MAX_REGIONS_ID 0x2901 +#define MLXSW_MAX_RIF_ID 0x2C02 #define MLXSW_RESOURCES_QUERY_MAX_QUERIES 100 #define MLXSW_RESOURCES_PER_QUERY 32 @@ -1212,6 +1213,10 @@ static void mlxsw_pci_resources_query_parse(int id, u64 val, resources->max_regions = val; resources->max_regions_valid = 1; break; + case MLXSW_MAX_RIF_ID: + resources->max_rif = val; + resources->max_rif_valid = 1; + break; default: break; } From 8f8a62d462492a043349a08abc51e2ad65b1f49a Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Tue, 20 Sep 2016 11:16:57 +0200 Subject: [PATCH 0566/1050] mlxsw: spectrum: Implement max rif resource Replace max rif const with using the result from resource query. Signed-off-by: Nogah Frankel Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 13 ++++---- .../net/ethernet/mellanox/mlxsw/spectrum.h | 9 ++++-- .../ethernet/mellanox/mlxsw/spectrum_router.c | 31 +++++++++++++++++-- 3 files changed, 41 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 9acc3e022543..80f27b504444 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3017,7 +3017,6 @@ err_rx_listener_register: static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); - int i; mlxsw_sp_ports_remove(mlxsw_sp); mlxsw_sp_span_fini(mlxsw_sp); @@ -3029,8 +3028,6 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE); WARN_ON(!list_empty(&mlxsw_sp->vfids.list)); WARN_ON(!list_empty(&mlxsw_sp->fids)); - for (i = 0; i < MLXSW_SP_RIF_MAX; i++) - WARN_ON_ONCE(mlxsw_sp->rifs[i]); } static struct mlxsw_config_profile mlxsw_sp_config_profile = { @@ -3172,13 +3169,15 @@ static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *r, static int mlxsw_sp_avail_rif_get(struct mlxsw_sp *mlxsw_sp) { + struct mlxsw_resources *resources; int i; - for (i = 0; i < MLXSW_SP_RIF_MAX; i++) + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_rif; i++) if (!mlxsw_sp->rifs[i]) return i; - return MLXSW_SP_RIF_MAX; + return MLXSW_SP_INVALID_RIF; } static void mlxsw_sp_vport_rif_sp_attr_get(struct mlxsw_sp_port *mlxsw_sp_vport, @@ -3258,7 +3257,7 @@ mlxsw_sp_vport_rif_sp_create(struct mlxsw_sp_port *mlxsw_sp_vport, int err; rif = mlxsw_sp_avail_rif_get(mlxsw_sp); - if (rif == MLXSW_SP_RIF_MAX) + if (rif == MLXSW_SP_INVALID_RIF) return ERR_PTR(-ERANGE); err = mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, l3_dev, rif, true); @@ -3490,7 +3489,7 @@ static int mlxsw_sp_rif_bridge_create(struct mlxsw_sp *mlxsw_sp, int err; rif = mlxsw_sp_avail_rif_get(mlxsw_sp); - if (rif == MLXSW_SP_RIF_MAX) + if (rif == MLXSW_SP_INVALID_RIF) return -ERANGE; err = mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, true); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 352079ef96b6..73cae211a5ce 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -54,7 +54,7 @@ #define MLXSW_SP_VFID_MAX 6656 /* Bridged VLAN interfaces */ #define MLXSW_SP_RFID_BASE 15360 -#define MLXSW_SP_RIF_MAX 800 +#define MLXSW_SP_INVALID_RIF 0xffff #define MLXSW_SP_MID_MAX 7000 @@ -269,7 +269,7 @@ struct mlxsw_sp { DECLARE_BITMAP(mapped, MLXSW_SP_MID_MAX); } br_mids; struct list_head fids; /* VLAN-aware bridge FIDs */ - struct mlxsw_sp_rif *rifs[MLXSW_SP_RIF_MAX]; + struct mlxsw_sp_rif **rifs; struct mlxsw_sp_port **ports; struct mlxsw_core *core; const struct mlxsw_bus_info *bus_info; @@ -477,9 +477,12 @@ static inline struct mlxsw_sp_rif * mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, const struct net_device *dev) { + struct mlxsw_resources *resources; int i; - for (i = 0; i < MLXSW_SP_RIF_MAX; i++) + resources = mlxsw_core_resources_get(mlxsw_sp->core); + + for (i = 0; i < resources->max_rif; i++) if (mlxsw_sp->rifs[i] && mlxsw_sp->rifs[i]->dev == dev) return mlxsw_sp->rifs[i]; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index ded19f00ee72..cc653ac2d7d6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1522,19 +1522,46 @@ static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp, static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) { + struct mlxsw_resources *resources; char rgcr_pl[MLXSW_REG_RGCR_LEN]; + int err; + + resources = mlxsw_core_resources_get(mlxsw_sp->core); + if (!resources->max_rif_valid) + return -EIO; + + mlxsw_sp->rifs = kcalloc(resources->max_rif, + sizeof(struct mlxsw_sp_rif *), GFP_KERNEL); + if (!mlxsw_sp->rifs) + return -ENOMEM; mlxsw_reg_rgcr_pack(rgcr_pl, true); - mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, MLXSW_SP_RIF_MAX); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); + mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, resources->max_rif); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); + if (err) + goto err_rgcr_fail; + + return 0; + +err_rgcr_fail: + kfree(mlxsw_sp->rifs); + return err; } static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) { + struct mlxsw_resources *resources; char rgcr_pl[MLXSW_REG_RGCR_LEN]; + int i; mlxsw_reg_rgcr_pack(rgcr_pl, false); mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); + + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_rif; i++) + WARN_ON_ONCE(mlxsw_sp->rifs[i]); + + kfree(mlxsw_sp->rifs); } int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) From 63c43787d35e45562a6b5927e2edc8f4783d95b8 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 19 Sep 2016 16:17:57 +0200 Subject: [PATCH 0567/1050] vti6: fix input path Since commit 1625f4529957, vti6 is broken, all input packets are dropped (LINUX_MIB_XFRMINNOSTATES is incremented). XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 is set by vti6_rcv() before calling xfrm6_rcv()/xfrm6_rcv_spi(), thus we cannot set to NULL that value in xfrm6_rcv_spi(). A new function xfrm6_rcv_tnl() that enables to pass a value to xfrm6_rcv_spi() is added, so that xfrm6_rcv() is not touched (this function is used in several handlers). CC: Alexey Kodanev Fixes: 1625f4529957 ("net/xfrm_input: fix possible NULL deref of tunnel.ip6->parms.i_key") Signed-off-by: Nicolas Dichtel Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 4 +++- net/ipv6/ip6_vti.c | 4 +--- net/ipv6/xfrm6_input.c | 16 +++++++++++----- net/ipv6/xfrm6_tunnel.c | 2 +- 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index adfebd6f243c..17934312eecb 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1540,8 +1540,10 @@ int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family); void xfrm4_local_error(struct sk_buff *skb, u32 mtu); int xfrm6_extract_header(struct sk_buff *skb); int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb); -int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi); +int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi, + struct ip6_tnl *t); int xfrm6_transport_finish(struct sk_buff *skb, int async); +int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t); int xfrm6_rcv(struct sk_buff *skb); int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 52a2f735881f..5bd3afdcc771 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -321,11 +321,9 @@ static int vti6_rcv(struct sk_buff *skb) goto discard; } - XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t; - rcu_read_unlock(); - return xfrm6_rcv(skb); + return xfrm6_rcv_tnl(skb, t); } rcu_read_unlock(); return -EINVAL; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 00a2d40677d6..b5789562aded 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -21,9 +21,10 @@ int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb) return xfrm6_extract_header(skb); } -int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) +int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi, + struct ip6_tnl *t) { - XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t; XFRM_SPI_SKB_CB(skb)->family = AF_INET6; XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr); return xfrm_input(skb, nexthdr, spi, 0); @@ -49,13 +50,18 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) return -1; } -int xfrm6_rcv(struct sk_buff *skb) +int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t) { return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff], - 0); + 0, t); +} +EXPORT_SYMBOL(xfrm6_rcv_tnl); + +int xfrm6_rcv(struct sk_buff *skb) +{ + return xfrm6_rcv_tnl(skb, NULL); } EXPORT_SYMBOL(xfrm6_rcv); - int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto) { diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 5743044cd660..e1c0bbe7996c 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -236,7 +236,7 @@ static int xfrm6_tunnel_rcv(struct sk_buff *skb) __be32 spi; spi = xfrm6_tunnel_spi_lookup(net, (const xfrm_address_t *)&iph->saddr); - return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi); + return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi, NULL); } static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt, From 8d58790b832e13d6006d842037732304af357c3c Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 19 Sep 2016 21:34:01 +0200 Subject: [PATCH 0568/1050] net: can: ifi: Configure transmitter delay Configure the transmitter delay register at +0x1c to correctly handle the CAN FD bitrate switch (BRS). This moves the SSP (secondary sample point) to a proper offset, so that the TDC mechanism works and won't generate error frames on the CAN link. Signed-off-by: Marek Vasut Cc: Marc Kleine-Budde Cc: Mark Rutland Cc: Oliver Hartkopp Cc: Wolfgang Grandegger Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/ifi_canfd/ifi_canfd.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/ifi_canfd/ifi_canfd.c b/drivers/net/can/ifi_canfd/ifi_canfd.c index 2d1d22eec750..368bb0710d8f 100644 --- a/drivers/net/can/ifi_canfd/ifi_canfd.c +++ b/drivers/net/can/ifi_canfd/ifi_canfd.c @@ -81,6 +81,10 @@ #define IFI_CANFD_TIME_SET_TIMEA_4_12_6_6 BIT(15) #define IFI_CANFD_TDELAY 0x1c +#define IFI_CANFD_TDELAY_DEFAULT 0xb +#define IFI_CANFD_TDELAY_MASK 0x3fff +#define IFI_CANFD_TDELAY_ABS BIT(14) +#define IFI_CANFD_TDELAY_EN BIT(15) #define IFI_CANFD_ERROR 0x20 #define IFI_CANFD_ERROR_TX_OFFSET 0 @@ -641,7 +645,7 @@ static void ifi_canfd_set_bittiming(struct net_device *ndev) struct ifi_canfd_priv *priv = netdev_priv(ndev); const struct can_bittiming *bt = &priv->can.bittiming; const struct can_bittiming *dbt = &priv->can.data_bittiming; - u16 brp, sjw, tseg1, tseg2; + u16 brp, sjw, tseg1, tseg2, tdc; /* Configure bit timing */ brp = bt->brp - 2; @@ -664,6 +668,11 @@ static void ifi_canfd_set_bittiming(struct net_device *ndev) (brp << IFI_CANFD_TIME_PRESCALE_OFF) | (sjw << IFI_CANFD_TIME_SJW_OFF_7_9_8_8), priv->base + IFI_CANFD_FTIME); + + /* Configure transmitter delay */ + tdc = (dbt->brp * (dbt->phase_seg1 + 1)) & IFI_CANFD_TDELAY_MASK; + writel(IFI_CANFD_TDELAY_EN | IFI_CANFD_TDELAY_ABS | tdc, + priv->base + IFI_CANFD_TDELAY); } static void ifi_canfd_set_filter(struct net_device *ndev, const u32 id, From c2f672fc94642bae96821a393f342edcfa9794a6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 20 Sep 2016 15:45:26 +0200 Subject: [PATCH 0569/1050] xfrm: state lookup can be lockless This is called from the packet input path, we get lock contention if many cpus handle ipsec in parallel. After recent rcu conversion it is safe to call __xfrm_state_lookup without the spinlock. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index ba8bf518ba14..a38fdead38ea 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1431,9 +1431,9 @@ xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 { struct xfrm_state *x; - spin_lock_bh(&net->xfrm.xfrm_state_lock); + rcu_read_lock(); x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family); - spin_unlock_bh(&net->xfrm.xfrm_state_lock); + rcu_read_unlock(); return x; } EXPORT_SYMBOL(xfrm_state_lookup); From 554af0c396380baf416f54c439b99b495180b2f4 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 7 Sep 2016 13:37:01 +0100 Subject: [PATCH 0570/1050] MIPS: vDSO: Fix Malta EVA mapping to vDSO page structs The page structures associated with the vDSO pages in the kernel image are calculated using virt_to_page(), which uses __pa() under the hood to find the pfn associated with the virtual address. The vDSO data pointers however point to kernel symbols, so __pa_symbol() should really be used instead. Since there is no equivalent to virt_to_page() which uses __pa_symbol(), fix init_vdso_image() to work directly with pfns, calculated with __phys_to_pfn(__pa_symbol(...)). This issue broke the Malta Enhanced Virtual Addressing (EVA) configuration which has a non-default implementation of __pa_symbol(). This is because it uses a physical alias so that the kernel executes from KSeg0 (VA 0x80000000 -> PA 0x00000000), while RAM is provided to the kernel in the KUSeg range (VA 0x00000000 -> PA 0x80000000) which uses the same underlying RAM. Since there are no page structures associated with the low physical address region, some arbitrary kernel memory would be interpreted as a page structure for the vDSO pages and badness ensues. Fixes: ebb5e78cc634 ("MIPS: Initial implementation of a VDSO") Signed-off-by: James Hogan Cc: Leonid Yegoshin Cc: linux-mips@linux-mips.org Cc: # 4.4.x- Patchwork: https://patchwork.linux-mips.org/patch/14229/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/vdso.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c index 9abe447a4b48..f9dbfb14af33 100644 --- a/arch/mips/kernel/vdso.c +++ b/arch/mips/kernel/vdso.c @@ -39,16 +39,16 @@ static struct vm_special_mapping vdso_vvar_mapping = { static void __init init_vdso_image(struct mips_vdso_image *image) { unsigned long num_pages, i; + unsigned long data_pfn; BUG_ON(!PAGE_ALIGNED(image->data)); BUG_ON(!PAGE_ALIGNED(image->size)); num_pages = image->size / PAGE_SIZE; - for (i = 0; i < num_pages; i++) { - image->mapping.pages[i] = - virt_to_page(image->data + (i * PAGE_SIZE)); - } + data_pfn = __phys_to_pfn(__pa_symbol(image->data)); + for (i = 0; i < num_pages; i++) + image->mapping.pages[i] = pfn_to_page(data_pfn + i); } static int __init init_vdso(void) From 371a015344b6e270e7e3632107d9554ec6d27a6b Mon Sep 17 00:00:00 2001 From: "Yadi.hu" Date: Sun, 18 Sep 2016 18:52:31 +0800 Subject: [PATCH 0571/1050] i2c-eg20t: fix race between i2c init and interrupt enable the eg20t driver call request_irq() function before the pch_base_address, base address of i2c controller's register, is assigned an effective value. there is one possible scenario that an interrupt which isn't inside eg20t arrives immediately after request_irq() is executed when i2c controller shares an interrupt number with others. since the interrupt handler pch_i2c_handler() has already active as shared action, it will be called and read its own register to determine if this interrupt is from itself. At that moment, since base address of i2c registers is not remapped in kernel space yet,so the INT handler will access an illegal address and then a error occurs. Signed-off-by: Yadi.hu Signed-off-by: Wolfram Sang Cc: stable@kernel.org --- drivers/i2c/busses/i2c-eg20t.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/i2c/busses/i2c-eg20t.c b/drivers/i2c/busses/i2c-eg20t.c index 137125b5eae7..5ce71ce7b6c4 100644 --- a/drivers/i2c/busses/i2c-eg20t.c +++ b/drivers/i2c/busses/i2c-eg20t.c @@ -773,13 +773,6 @@ static int pch_i2c_probe(struct pci_dev *pdev, /* Set the number of I2C channel instance */ adap_info->ch_num = id->driver_data; - ret = request_irq(pdev->irq, pch_i2c_handler, IRQF_SHARED, - KBUILD_MODNAME, adap_info); - if (ret) { - pch_pci_err(pdev, "request_irq FAILED\n"); - goto err_request_irq; - } - for (i = 0; i < adap_info->ch_num; i++) { pch_adap = &adap_info->pch_data[i].pch_adapter; adap_info->pch_i2c_suspended = false; @@ -797,6 +790,17 @@ static int pch_i2c_probe(struct pci_dev *pdev, pch_adap->dev.of_node = pdev->dev.of_node; pch_adap->dev.parent = &pdev->dev; + } + + ret = request_irq(pdev->irq, pch_i2c_handler, IRQF_SHARED, + KBUILD_MODNAME, adap_info); + if (ret) { + pch_pci_err(pdev, "request_irq FAILED\n"); + goto err_request_irq; + } + + for (i = 0; i < adap_info->ch_num; i++) { + pch_adap = &adap_info->pch_data[i].pch_adapter; pch_i2c_init(&adap_info->pch_data[i]); From ecfb6d8a041cc2ca80bc69ffc20c00067d190df5 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 21 Sep 2016 09:22:33 -0700 Subject: [PATCH 0572/1050] libnvdimm: fix devm_nvdimm_memremap() error path The internal alloc_nvdimm_map() helper might fail, particularly if the memory region is already busy. Report request_mem_region() failures and check for the failure. Reported-by: Ryan Chen Signed-off-by: Dan Williams --- drivers/nvdimm/core.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index 715583f69d28..4d7bbd2df5c0 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -99,8 +99,11 @@ static struct nvdimm_map *alloc_nvdimm_map(struct device *dev, nvdimm_map->size = size; kref_init(&nvdimm_map->kref); - if (!request_mem_region(offset, size, dev_name(&nvdimm_bus->dev))) + if (!request_mem_region(offset, size, dev_name(&nvdimm_bus->dev))) { + dev_err(&nvdimm_bus->dev, "failed to request %pa + %zd for %s\n", + &offset, size, dev_name(dev)); goto err_request_region; + } if (flags) nvdimm_map->mem = memremap(offset, size, flags); @@ -171,6 +174,9 @@ void *devm_nvdimm_memremap(struct device *dev, resource_size_t offset, kref_get(&nvdimm_map->kref); nvdimm_bus_unlock(dev); + if (!nvdimm_map) + return NULL; + if (devm_add_action_or_reset(dev, nvdimm_map_put, nvdimm_map)) return NULL; From 11294d63ac915230a36b0603c62134ef7b173d0a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 21 Sep 2016 09:21:26 -0700 Subject: [PATCH 0573/1050] nfit: fail DSMs that return non-zero status by default For the DSMs where the kernel knows the format of the output buffer and originates those DSMs from within the kernel, return -EIO for any non-zero status. If the BIOS is indicating a status that we do not know how to handle, fail the DSM. Cc: Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 48 +++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 80cc7c089a15..e1d5ea6d5e40 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -94,54 +94,50 @@ static struct acpi_device *to_acpi_dev(struct acpi_nfit_desc *acpi_desc) return to_acpi_device(acpi_desc->dev); } -static int xlat_status(void *buf, unsigned int cmd) +static int xlat_status(void *buf, unsigned int cmd, u32 status) { struct nd_cmd_clear_error *clear_err; struct nd_cmd_ars_status *ars_status; - struct nd_cmd_ars_start *ars_start; - struct nd_cmd_ars_cap *ars_cap; u16 flags; switch (cmd) { case ND_CMD_ARS_CAP: - ars_cap = buf; - if ((ars_cap->status & 0xffff) == NFIT_ARS_CAP_NONE) + if ((status & 0xffff) == NFIT_ARS_CAP_NONE) return -ENOTTY; /* Command failed */ - if (ars_cap->status & 0xffff) + if (status & 0xffff) return -EIO; /* No supported scan types for this range */ flags = ND_ARS_PERSISTENT | ND_ARS_VOLATILE; - if ((ars_cap->status >> 16 & flags) == 0) + if ((status >> 16 & flags) == 0) return -ENOTTY; break; case ND_CMD_ARS_START: - ars_start = buf; /* ARS is in progress */ - if ((ars_start->status & 0xffff) == NFIT_ARS_START_BUSY) + if ((status & 0xffff) == NFIT_ARS_START_BUSY) return -EBUSY; /* Command failed */ - if (ars_start->status & 0xffff) + if (status & 0xffff) return -EIO; break; case ND_CMD_ARS_STATUS: ars_status = buf; /* Command failed */ - if (ars_status->status & 0xffff) + if (status & 0xffff) return -EIO; /* Check extended status (Upper two bytes) */ - if (ars_status->status == NFIT_ARS_STATUS_DONE) + if (status == NFIT_ARS_STATUS_DONE) return 0; /* ARS is in progress */ - if (ars_status->status == NFIT_ARS_STATUS_BUSY) + if (status == NFIT_ARS_STATUS_BUSY) return -EBUSY; /* No ARS performed for the current boot */ - if (ars_status->status == NFIT_ARS_STATUS_NONE) + if (status == NFIT_ARS_STATUS_NONE) return -EAGAIN; /* @@ -149,19 +145,19 @@ static int xlat_status(void *buf, unsigned int cmd) * agent wants the scan to stop. If we didn't overflow * then just continue with the returned results. */ - if (ars_status->status == NFIT_ARS_STATUS_INTR) { + if (status == NFIT_ARS_STATUS_INTR) { if (ars_status->flags & NFIT_ARS_F_OVERFLOW) return -ENOSPC; return 0; } /* Unknown status */ - if (ars_status->status >> 16) + if (status >> 16) return -EIO; break; case ND_CMD_CLEAR_ERROR: clear_err = buf; - if (clear_err->status & 0xffff) + if (status & 0xffff) return -EIO; if (!clear_err->cleared) return -EIO; @@ -172,6 +168,9 @@ static int xlat_status(void *buf, unsigned int cmd) break; } + /* all other non-zero status results in an error */ + if (status) + return -EIO; return 0; } @@ -186,10 +185,10 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nd_cmd_pkg *call_pkg = NULL; const char *cmd_name, *dimm_name; unsigned long cmd_mask, dsm_mask; + u32 offset, fw_status = 0; acpi_handle handle; unsigned int func; const u8 *uuid; - u32 offset; int rc, i; func = cmd; @@ -317,6 +316,15 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, out_obj->buffer.pointer + offset, out_size); offset += out_size; } + + /* + * Set fw_status for all the commands with a known format to be + * later interpreted by xlat_status(). + */ + if (i >= 1 && ((cmd >= ND_CMD_ARS_CAP && cmd <= ND_CMD_CLEAR_ERROR) + || (cmd >= ND_CMD_SMART && cmd <= ND_CMD_VENDOR))) + fw_status = *(u32 *) out_obj->buffer.pointer; + if (offset + in_buf.buffer.length < buf_len) { if (i >= 1) { /* @@ -325,7 +333,7 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, */ rc = buf_len - offset - in_buf.buffer.length; if (cmd_rc) - *cmd_rc = xlat_status(buf, cmd); + *cmd_rc = xlat_status(buf, cmd, fw_status); } else { dev_err(dev, "%s:%s underrun cmd: %s buf_len: %d out_len: %d\n", __func__, dimm_name, cmd_name, buf_len, @@ -335,7 +343,7 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, } else { rc = 0; if (cmd_rc) - *cmd_rc = xlat_status(buf, cmd); + *cmd_rc = xlat_status(buf, cmd, fw_status); } out: From 463e8f845cbf1c01e4cc8aeef1703212991d8e1e Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Wed, 14 Sep 2016 15:24:12 +0200 Subject: [PATCH 0574/1050] i2c: mux: pca954x: retry updating the mux selection on failure The cached value of the last selected channel prevents retries on the next call, even on failure to update the selected channel. Fix that. Signed-off-by: Peter Rosin Signed-off-by: Wolfram Sang Cc: stable@kernel.org --- drivers/i2c/muxes/i2c-mux-pca954x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/muxes/i2c-mux-pca954x.c b/drivers/i2c/muxes/i2c-mux-pca954x.c index 528e755c468f..3278ebf1cc5c 100644 --- a/drivers/i2c/muxes/i2c-mux-pca954x.c +++ b/drivers/i2c/muxes/i2c-mux-pca954x.c @@ -164,7 +164,7 @@ static int pca954x_select_chan(struct i2c_mux_core *muxc, u32 chan) /* Only select the channel if its different from the last channel */ if (data->last_chan != regval) { ret = pca954x_reg_write(muxc->parent, client, regval); - data->last_chan = regval; + data->last_chan = ret ? 0 : regval; } return ret; From 332ae8e2f6ecda5e50c5c62ed62894963e3a83f5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:43:53 +0100 Subject: [PATCH 0575/1050] net: cls_bpf: add hardware offload This patch adds hardware offload capability to cls_bpf classifier, similar to what have been done with U32 and flower. Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ include/net/pkt_cls.h | 14 ++++++++ net/sched/cls_bpf.c | 70 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a10d8d18ce19..69f242c71865 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -789,6 +789,7 @@ enum { TC_SETUP_CLSU32, TC_SETUP_CLSFLOWER, TC_SETUP_MATCHALL, + TC_SETUP_CLSBPF, }; struct tc_cls_u32_offload; @@ -800,6 +801,7 @@ struct tc_to_netdev { struct tc_cls_u32_offload *cls_u32; struct tc_cls_flower_offload *cls_flower; struct tc_cls_matchall_offload *cls_mall; + struct tc_cls_bpf_offload *cls_bpf; }; }; diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index a459be5fe1c2..41e8071dff87 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -486,4 +486,18 @@ struct tc_cls_matchall_offload { unsigned long cookie; }; +enum tc_clsbpf_command { + TC_CLSBPF_ADD, + TC_CLSBPF_REPLACE, + TC_CLSBPF_DESTROY, +}; + +struct tc_cls_bpf_offload { + enum tc_clsbpf_command command; + struct tcf_exts *exts; + struct bpf_prog *prog; + const char *name; + bool exts_integrated; +}; + #endif diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index c6f7a47541eb..6523c5b4c0a5 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -39,6 +39,7 @@ struct cls_bpf_prog { struct list_head link; struct tcf_result res; bool exts_integrated; + bool offloaded; struct tcf_exts exts; u32 handle; union { @@ -138,6 +139,71 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog) return !prog->bpf_ops; } +static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, + enum tc_clsbpf_command cmd) +{ + struct net_device *dev = tp->q->dev_queue->dev; + struct tc_cls_bpf_offload bpf_offload = {}; + struct tc_to_netdev offload; + + offload.type = TC_SETUP_CLSBPF; + offload.cls_bpf = &bpf_offload; + + bpf_offload.command = cmd; + bpf_offload.exts = &prog->exts; + bpf_offload.prog = prog->filter; + bpf_offload.name = prog->bpf_name; + bpf_offload.exts_integrated = prog->exts_integrated; + + return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->protocol, &offload); +} + +static void cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, + struct cls_bpf_prog *oldprog) +{ + struct net_device *dev = tp->q->dev_queue->dev; + struct cls_bpf_prog *obj = prog; + enum tc_clsbpf_command cmd; + + if (oldprog && oldprog->offloaded) { + if (tc_should_offload(dev, tp, 0)) { + cmd = TC_CLSBPF_REPLACE; + } else { + obj = oldprog; + cmd = TC_CLSBPF_DESTROY; + } + } else { + if (!tc_should_offload(dev, tp, 0)) + return; + cmd = TC_CLSBPF_ADD; + } + + if (cls_bpf_offload_cmd(tp, obj, cmd)) + return; + + obj->offloaded = true; + if (oldprog) + oldprog->offloaded = false; +} + +static void cls_bpf_stop_offload(struct tcf_proto *tp, + struct cls_bpf_prog *prog) +{ + int err; + + if (!prog->offloaded) + return; + + err = cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY); + if (err) { + pr_err("Stopping hardware offload failed: %d\n", err); + return; + } + + prog->offloaded = false; +} + static int cls_bpf_init(struct tcf_proto *tp) { struct cls_bpf_head *head; @@ -177,6 +243,7 @@ static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg) { struct cls_bpf_prog *prog = (struct cls_bpf_prog *) arg; + cls_bpf_stop_offload(tp, prog); list_del_rcu(&prog->link); tcf_unbind_filter(tp, &prog->res); call_rcu(&prog->rcu, __cls_bpf_delete_prog); @@ -193,6 +260,7 @@ static bool cls_bpf_destroy(struct tcf_proto *tp, bool force) return false; list_for_each_entry_safe(prog, tmp, &head->plist, link) { + cls_bpf_stop_offload(tp, prog); list_del_rcu(&prog->link); tcf_unbind_filter(tp, &prog->res); call_rcu(&prog->rcu, __cls_bpf_delete_prog); @@ -415,6 +483,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, if (ret < 0) goto errout; + cls_bpf_offload(tp, prog, oldprog); + if (oldprog) { list_replace_rcu(&oldprog->link, &prog->link); tcf_unbind_filter(tp, &oldprog->res); From 0d01d45f1b251448590c710baa32f722e43c62c7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:43:54 +0100 Subject: [PATCH 0576/1050] net: cls_bpf: limit hardware offload by software-only flag Add cls_bpf support for the TCA_CLS_FLAGS_SKIP_HW flag. Unlike U32 and flower cls_bpf already has some netlink flags defined. Create a new attribute to be able to use the same flag values as the above. Unlike U32 and flower reject unknown flags. Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 1 + include/uapi/linux/pkt_cls.h | 1 + net/sched/cls_bpf.c | 22 ++++++++++++++++++++-- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 41e8071dff87..57af9f3032ff 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -498,6 +498,7 @@ struct tc_cls_bpf_offload { struct bpf_prog *prog; const char *name; bool exts_integrated; + u32 gen_flags; }; #endif diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 8915b61bbf83..8fd715f806a2 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -396,6 +396,7 @@ enum { TCA_BPF_FD, TCA_BPF_NAME, TCA_BPF_FLAGS, + TCA_BPF_FLAGS_GEN, __TCA_BPF_MAX, }; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 6523c5b4c0a5..ebf01f7c1470 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -27,6 +27,8 @@ MODULE_AUTHOR("Daniel Borkmann "); MODULE_DESCRIPTION("TC BPF based classifier"); #define CLS_BPF_NAME_LEN 256 +#define CLS_BPF_SUPPORTED_GEN_FLAGS \ + TCA_CLS_FLAGS_SKIP_HW struct cls_bpf_head { struct list_head plist; @@ -40,6 +42,7 @@ struct cls_bpf_prog { struct tcf_result res; bool exts_integrated; bool offloaded; + u32 gen_flags; struct tcf_exts exts; u32 handle; union { @@ -55,6 +58,7 @@ struct cls_bpf_prog { static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = { [TCA_BPF_CLASSID] = { .type = NLA_U32 }, [TCA_BPF_FLAGS] = { .type = NLA_U32 }, + [TCA_BPF_FLAGS_GEN] = { .type = NLA_U32 }, [TCA_BPF_FD] = { .type = NLA_U32 }, [TCA_BPF_NAME] = { .type = NLA_NUL_STRING, .len = CLS_BPF_NAME_LEN }, @@ -154,6 +158,7 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, bpf_offload.prog = prog->filter; bpf_offload.name = prog->bpf_name; bpf_offload.exts_integrated = prog->exts_integrated; + bpf_offload.gen_flags = prog->gen_flags; return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &offload); @@ -167,14 +172,14 @@ static void cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, enum tc_clsbpf_command cmd; if (oldprog && oldprog->offloaded) { - if (tc_should_offload(dev, tp, 0)) { + if (tc_should_offload(dev, tp, prog->gen_flags)) { cmd = TC_CLSBPF_REPLACE; } else { obj = oldprog; cmd = TC_CLSBPF_DESTROY; } } else { - if (!tc_should_offload(dev, tp, 0)) + if (!tc_should_offload(dev, tp, prog->gen_flags)) return; cmd = TC_CLSBPF_ADD; } @@ -370,6 +375,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, { bool is_bpf, is_ebpf, have_exts = false; struct tcf_exts exts; + u32 gen_flags = 0; int ret; is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS]; @@ -394,8 +400,17 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT; } + if (tb[TCA_BPF_FLAGS_GEN]) { + gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]); + if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS || + !tc_flags_valid(gen_flags)) { + ret = -EINVAL; + goto errout; + } + } prog->exts_integrated = have_exts; + prog->gen_flags = gen_flags; ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) : cls_bpf_prog_from_efd(tb, prog, tp); @@ -568,6 +583,9 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, bpf_flags |= TCA_BPF_FLAG_ACT_DIRECT; if (bpf_flags && nla_put_u32(skb, TCA_BPF_FLAGS, bpf_flags)) goto nla_put_failure; + if (prog->gen_flags && + nla_put_u32(skb, TCA_BPF_FLAGS_GEN, prog->gen_flags)) + goto nla_put_failure; nla_nest_end(skb, nest); From eadb41489fd2249e71fd14b36fb488ed7217ca4b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:43:55 +0100 Subject: [PATCH 0577/1050] net: cls_bpf: add support for marking filters as hardware-only Add cls_bpf support for the TCA_CLS_FLAGS_SKIP_SW flag. Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/sched/cls_bpf.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index ebf01f7c1470..1becc2fe1bc5 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -28,7 +28,7 @@ MODULE_DESCRIPTION("TC BPF based classifier"); #define CLS_BPF_NAME_LEN 256 #define CLS_BPF_SUPPORTED_GEN_FLAGS \ - TCA_CLS_FLAGS_SKIP_HW + (TCA_CLS_FLAGS_SKIP_HW | TCA_CLS_FLAGS_SKIP_SW) struct cls_bpf_head { struct list_head plist; @@ -96,7 +96,9 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, qdisc_skb_cb(skb)->tc_classid = prog->res.classid; - if (at_ingress) { + if (tc_skip_sw(prog->gen_flags)) { + filter_res = prog->exts_integrated ? TC_ACT_UNSPEC : 0; + } else if (at_ingress) { /* It is safe to push/pull even if skb_shared() */ __skb_push(skb, skb->mac_len); bpf_compute_data_end(skb); @@ -164,32 +166,42 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, tp->protocol, &offload); } -static void cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, - struct cls_bpf_prog *oldprog) +static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, + struct cls_bpf_prog *oldprog) { struct net_device *dev = tp->q->dev_queue->dev; struct cls_bpf_prog *obj = prog; enum tc_clsbpf_command cmd; + bool skip_sw; + int ret; + + skip_sw = tc_skip_sw(prog->gen_flags) || + (oldprog && tc_skip_sw(oldprog->gen_flags)); if (oldprog && oldprog->offloaded) { if (tc_should_offload(dev, tp, prog->gen_flags)) { cmd = TC_CLSBPF_REPLACE; - } else { + } else if (!tc_skip_sw(prog->gen_flags)) { obj = oldprog; cmd = TC_CLSBPF_DESTROY; + } else { + return -EINVAL; } } else { if (!tc_should_offload(dev, tp, prog->gen_flags)) - return; + return skip_sw ? -EINVAL : 0; cmd = TC_CLSBPF_ADD; } - if (cls_bpf_offload_cmd(tp, obj, cmd)) - return; + ret = cls_bpf_offload_cmd(tp, obj, cmd); + if (ret) + return skip_sw ? ret : 0; obj->offloaded = true; if (oldprog) oldprog->offloaded = false; + + return 0; } static void cls_bpf_stop_offload(struct tcf_proto *tp, @@ -498,7 +510,11 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, if (ret < 0) goto errout; - cls_bpf_offload(tp, prog, oldprog); + ret = cls_bpf_offload(tp, prog, oldprog); + if (ret) { + cls_bpf_delete_prog(tp, prog); + return ret; + } if (oldprog) { list_replace_rcu(&oldprog->link, &prog->link); From 3df126f35f88dc76eea33769f85a3c3bb8ce6c6b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:43:56 +0100 Subject: [PATCH 0578/1050] bpf: don't (ab)use instructions to store state Storing state in reserved fields of instructions makes it impossible to run verifier on programs already marked as read-only. Allocate and use an array of per-instruction state instead. While touching the error path rename and move existing jump target. Suggested-by: Alexei Starovoitov Signed-off-by: Jakub Kicinski Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 70 ++++++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 30 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3a75ee3bdcd1..a9542d89f293 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -181,6 +181,10 @@ struct verifier_stack_elem { struct verifier_stack_elem *next; }; +struct bpf_insn_aux_data { + enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ +}; + #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ /* single container for all structs @@ -197,6 +201,7 @@ struct verifier_env { u32 id_gen; /* used to generate unique reg IDs */ bool allow_ptr_leaks; bool seen_direct_write; + struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ }; #define BPF_COMPLEXITY_LIMIT_INSNS 65536 @@ -2344,7 +2349,7 @@ static int do_check(struct verifier_env *env) return err; } else if (class == BPF_LDX) { - enum bpf_reg_type src_reg_type; + enum bpf_reg_type *prev_src_type, src_reg_type; /* check for reserved fields is already done */ @@ -2374,16 +2379,18 @@ static int do_check(struct verifier_env *env) continue; } - if (insn->imm == 0) { + prev_src_type = &env->insn_aux_data[insn_idx].ptr_type; + + if (*prev_src_type == NOT_INIT) { /* saw a valid insn * dst_reg = *(u32 *)(src_reg + off) - * use reserved 'imm' field to mark this insn + * save type to validate intersecting paths */ - insn->imm = src_reg_type; + *prev_src_type = src_reg_type; - } else if (src_reg_type != insn->imm && + } else if (src_reg_type != *prev_src_type && (src_reg_type == PTR_TO_CTX || - insn->imm == PTR_TO_CTX)) { + *prev_src_type == PTR_TO_CTX)) { /* ABuser program is trying to use the same insn * dst_reg = *(u32*) (src_reg + off) * with different pointer types: @@ -2396,7 +2403,7 @@ static int do_check(struct verifier_env *env) } } else if (class == BPF_STX) { - enum bpf_reg_type dst_reg_type; + enum bpf_reg_type *prev_dst_type, dst_reg_type; if (BPF_MODE(insn->code) == BPF_XADD) { err = check_xadd(env, insn); @@ -2424,11 +2431,13 @@ static int do_check(struct verifier_env *env) if (err) return err; - if (insn->imm == 0) { - insn->imm = dst_reg_type; - } else if (dst_reg_type != insn->imm && + prev_dst_type = &env->insn_aux_data[insn_idx].ptr_type; + + if (*prev_dst_type == NOT_INIT) { + *prev_dst_type = dst_reg_type; + } else if (dst_reg_type != *prev_dst_type && (dst_reg_type == PTR_TO_CTX || - insn->imm == PTR_TO_CTX)) { + *prev_dst_type == PTR_TO_CTX)) { verbose("same insn cannot be used with different pointers\n"); return -EINVAL; } @@ -2686,10 +2695,11 @@ static void convert_pseudo_ld_imm64(struct verifier_env *env) static int convert_ctx_accesses(struct verifier_env *env) { const struct bpf_verifier_ops *ops = env->prog->aux->ops; + const int insn_cnt = env->prog->len; struct bpf_insn insn_buf[16], *insn; struct bpf_prog *new_prog; enum bpf_access_type type; - int i, insn_cnt, cnt; + int i, cnt, delta = 0; if (ops->gen_prologue) { cnt = ops->gen_prologue(insn_buf, env->seen_direct_write, @@ -2703,18 +2713,16 @@ static int convert_ctx_accesses(struct verifier_env *env) if (!new_prog) return -ENOMEM; env->prog = new_prog; + delta += cnt - 1; } } if (!ops->convert_ctx_access) return 0; - insn_cnt = env->prog->len; - insn = env->prog->insnsi; + insn = env->prog->insnsi + delta; for (i = 0; i < insn_cnt; i++, insn++) { - u32 insn_delta; - if (insn->code == (BPF_LDX | BPF_MEM | BPF_W) || insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) type = BPF_READ; @@ -2724,11 +2732,8 @@ static int convert_ctx_accesses(struct verifier_env *env) else continue; - if (insn->imm != PTR_TO_CTX) { - /* clear internal mark */ - insn->imm = 0; + if (env->insn_aux_data[i].ptr_type != PTR_TO_CTX) continue; - } cnt = ops->convert_ctx_access(type, insn->dst_reg, insn->src_reg, insn->off, insn_buf, env->prog); @@ -2737,18 +2742,16 @@ static int convert_ctx_accesses(struct verifier_env *env) return -EINVAL; } - new_prog = bpf_patch_insn_single(env->prog, i, insn_buf, cnt); + new_prog = bpf_patch_insn_single(env->prog, i + delta, insn_buf, + cnt); if (!new_prog) return -ENOMEM; - insn_delta = cnt - 1; + delta += cnt - 1; /* keep walking new program and skip insns we just inserted */ env->prog = new_prog; - insn = new_prog->insnsi + i + insn_delta; - - insn_cnt += insn_delta; - i += insn_delta; + insn = new_prog->insnsi + i + delta; } return 0; @@ -2792,6 +2795,11 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) if (!env) return -ENOMEM; + env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) * + (*prog)->len); + ret = -ENOMEM; + if (!env->insn_aux_data) + goto err_free_env; env->prog = *prog; /* grab the mutex to protect few globals used by verifier */ @@ -2810,12 +2818,12 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) /* log_* values have to be sane */ if (log_size < 128 || log_size > UINT_MAX >> 8 || log_level == 0 || log_ubuf == NULL) - goto free_env; + goto err_unlock; ret = -ENOMEM; log_buf = vmalloc(log_size); if (!log_buf) - goto free_env; + goto err_unlock; } else { log_level = 0; } @@ -2884,14 +2892,16 @@ skip_full_check: free_log_buf: if (log_level) vfree(log_buf); -free_env: if (!env->prog->aux->used_maps) /* if we didn't copy map pointers into bpf_prog_info, release * them now. Otherwise free_bpf_prog_info() will release them. */ release_maps(env); *prog = env->prog; - kfree(env); +err_unlock: mutex_unlock(&bpf_verifier_lock); + vfree(env->insn_aux_data); +err_free_env: + kfree(env); return ret; } From 58e2af8b3a6b587e4ac8414343581da4349d3c0f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:43:57 +0100 Subject: [PATCH 0579/1050] bpf: expose internal verfier structures Move verifier's internal structures to a header file and prefix their names with bpf_ to avoid potential namespace conflicts. Those structures will soon be used by external analyzers. Signed-off-by: Jakub Kicinski Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf_verifier.h | 79 +++++++++++ kernel/bpf/verifier.c | 266 ++++++++++++++--------------------- 2 files changed, 182 insertions(+), 163 deletions(-) create mode 100644 include/linux/bpf_verifier.h diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h new file mode 100644 index 000000000000..9457a22fc6e0 --- /dev/null +++ b/include/linux/bpf_verifier.h @@ -0,0 +1,79 @@ +/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#ifndef _LINUX_BPF_VERIFIER_H +#define _LINUX_BPF_VERIFIER_H 1 + +#include /* for enum bpf_reg_type */ +#include /* for MAX_BPF_STACK */ + +struct bpf_reg_state { + enum bpf_reg_type type; + union { + /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */ + s64 imm; + + /* valid when type == PTR_TO_PACKET* */ + struct { + u32 id; + u16 off; + u16 range; + }; + + /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE | + * PTR_TO_MAP_VALUE_OR_NULL + */ + struct bpf_map *map_ptr; + }; +}; + +enum bpf_stack_slot_type { + STACK_INVALID, /* nothing was stored in this stack slot */ + STACK_SPILL, /* register spilled into stack */ + STACK_MISC /* BPF program wrote some data into this slot */ +}; + +#define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ + +/* state of the program: + * type of all registers and stack info + */ +struct bpf_verifier_state { + struct bpf_reg_state regs[MAX_BPF_REG]; + u8 stack_slot_type[MAX_BPF_STACK]; + struct bpf_reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE]; +}; + +/* linked list of verifier states used to prune search */ +struct bpf_verifier_state_list { + struct bpf_verifier_state state; + struct bpf_verifier_state_list *next; +}; + +struct bpf_insn_aux_data { + enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ +}; + +#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ + +/* single container for all structs + * one verifier_env per bpf_check() call + */ +struct bpf_verifier_env { + struct bpf_prog *prog; /* eBPF program being verified */ + struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */ + int stack_size; /* number of states to be processed */ + struct bpf_verifier_state cur_state; /* current verifier state */ + struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ + struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ + u32 used_map_cnt; /* number of used maps */ + u32 id_gen; /* used to generate unique reg IDs */ + bool allow_ptr_leaks; + bool seen_direct_write; + struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ +}; + +#endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a9542d89f293..dca2b9b1d02e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -126,82 +127,16 @@ * are set to NOT_INIT to indicate that they are no longer readable. */ -struct reg_state { - enum bpf_reg_type type; - union { - /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */ - s64 imm; - - /* valid when type == PTR_TO_PACKET* */ - struct { - u32 id; - u16 off; - u16 range; - }; - - /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE | - * PTR_TO_MAP_VALUE_OR_NULL - */ - struct bpf_map *map_ptr; - }; -}; - -enum bpf_stack_slot_type { - STACK_INVALID, /* nothing was stored in this stack slot */ - STACK_SPILL, /* register spilled into stack */ - STACK_MISC /* BPF program wrote some data into this slot */ -}; - -#define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ - -/* state of the program: - * type of all registers and stack info - */ -struct verifier_state { - struct reg_state regs[MAX_BPF_REG]; - u8 stack_slot_type[MAX_BPF_STACK]; - struct reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE]; -}; - -/* linked list of verifier states used to prune search */ -struct verifier_state_list { - struct verifier_state state; - struct verifier_state_list *next; -}; - /* verifier_state + insn_idx are pushed to stack when branch is encountered */ -struct verifier_stack_elem { +struct bpf_verifier_stack_elem { /* verifer state is 'st' * before processing instruction 'insn_idx' * and after processing instruction 'prev_insn_idx' */ - struct verifier_state st; + struct bpf_verifier_state st; int insn_idx; int prev_insn_idx; - struct verifier_stack_elem *next; -}; - -struct bpf_insn_aux_data { - enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ -}; - -#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ - -/* single container for all structs - * one verifier_env per bpf_check() call - */ -struct verifier_env { - struct bpf_prog *prog; /* eBPF program being verified */ - struct verifier_stack_elem *head; /* stack of verifier states to be processed */ - int stack_size; /* number of states to be processed */ - struct verifier_state cur_state; /* current verifier state */ - struct verifier_state_list **explored_states; /* search pruning optimization */ - struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ - u32 used_map_cnt; /* number of used maps */ - u32 id_gen; /* used to generate unique reg IDs */ - bool allow_ptr_leaks; - bool seen_direct_write; - struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ + struct bpf_verifier_stack_elem *next; }; #define BPF_COMPLEXITY_LIMIT_INSNS 65536 @@ -254,9 +189,9 @@ static const char * const reg_type_str[] = { [PTR_TO_PACKET_END] = "pkt_end", }; -static void print_verifier_state(struct verifier_state *state) +static void print_verifier_state(struct bpf_verifier_state *state) { - struct reg_state *reg; + struct bpf_reg_state *reg; enum bpf_reg_type t; int i; @@ -432,9 +367,9 @@ static void print_bpf_insn(struct bpf_insn *insn) } } -static int pop_stack(struct verifier_env *env, int *prev_insn_idx) +static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx) { - struct verifier_stack_elem *elem; + struct bpf_verifier_stack_elem *elem; int insn_idx; if (env->head == NULL) @@ -451,12 +386,12 @@ static int pop_stack(struct verifier_env *env, int *prev_insn_idx) return insn_idx; } -static struct verifier_state *push_stack(struct verifier_env *env, int insn_idx, - int prev_insn_idx) +static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, + int insn_idx, int prev_insn_idx) { - struct verifier_stack_elem *elem; + struct bpf_verifier_stack_elem *elem; - elem = kmalloc(sizeof(struct verifier_stack_elem), GFP_KERNEL); + elem = kmalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL); if (!elem) goto err; @@ -482,7 +417,7 @@ static const int caller_saved[CALLER_SAVED_REGS] = { BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5 }; -static void init_reg_state(struct reg_state *regs) +static void init_reg_state(struct bpf_reg_state *regs) { int i; @@ -498,7 +433,7 @@ static void init_reg_state(struct reg_state *regs) regs[BPF_REG_1].type = PTR_TO_CTX; } -static void mark_reg_unknown_value(struct reg_state *regs, u32 regno) +static void mark_reg_unknown_value(struct bpf_reg_state *regs, u32 regno) { BUG_ON(regno >= MAX_BPF_REG); regs[regno].type = UNKNOWN_VALUE; @@ -511,7 +446,7 @@ enum reg_arg_type { DST_OP_NO_MARK /* same as above, check only, don't mark */ }; -static int check_reg_arg(struct reg_state *regs, u32 regno, +static int check_reg_arg(struct bpf_reg_state *regs, u32 regno, enum reg_arg_type t) { if (regno >= MAX_BPF_REG) { @@ -571,8 +506,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type) /* check_stack_read/write functions track spill/fill of registers, * stack boundary and alignment are checked in check_mem_access() */ -static int check_stack_write(struct verifier_state *state, int off, int size, - int value_regno) +static int check_stack_write(struct bpf_verifier_state *state, int off, + int size, int value_regno) { int i; /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0, @@ -597,7 +532,7 @@ static int check_stack_write(struct verifier_state *state, int off, int size, } else { /* regular write of data into stack */ state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] = - (struct reg_state) {}; + (struct bpf_reg_state) {}; for (i = 0; i < size; i++) state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_MISC; @@ -605,7 +540,7 @@ static int check_stack_write(struct verifier_state *state, int off, int size, return 0; } -static int check_stack_read(struct verifier_state *state, int off, int size, +static int check_stack_read(struct bpf_verifier_state *state, int off, int size, int value_regno) { u8 *slot_type; @@ -646,7 +581,7 @@ static int check_stack_read(struct verifier_state *state, int off, int size, } /* check read/write into map element returned by bpf_map_lookup_elem() */ -static int check_map_access(struct verifier_env *env, u32 regno, int off, +static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off, int size) { struct bpf_map *map = env->cur_state.regs[regno].map_ptr; @@ -661,7 +596,7 @@ static int check_map_access(struct verifier_env *env, u32 regno, int off, #define MAX_PACKET_OFF 0xffff -static bool may_access_direct_pkt_data(struct verifier_env *env, +static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, const struct bpf_call_arg_meta *meta) { switch (env->prog->type) { @@ -678,11 +613,11 @@ static bool may_access_direct_pkt_data(struct verifier_env *env, } } -static int check_packet_access(struct verifier_env *env, u32 regno, int off, +static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, int size) { - struct reg_state *regs = env->cur_state.regs; - struct reg_state *reg = ®s[regno]; + struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *reg = ®s[regno]; off += reg->off; if (off < 0 || size <= 0 || off + size > reg->range) { @@ -694,7 +629,7 @@ static int check_packet_access(struct verifier_env *env, u32 regno, int off, } /* check access to 'struct bpf_context' fields */ -static int check_ctx_access(struct verifier_env *env, int off, int size, +static int check_ctx_access(struct bpf_verifier_env *env, int off, int size, enum bpf_access_type t, enum bpf_reg_type *reg_type) { if (env->prog->aux->ops->is_valid_access && @@ -709,7 +644,7 @@ static int check_ctx_access(struct verifier_env *env, int off, int size, return -EACCES; } -static bool is_pointer_value(struct verifier_env *env, int regno) +static bool is_pointer_value(struct bpf_verifier_env *env, int regno) { if (env->allow_ptr_leaks) return false; @@ -723,12 +658,13 @@ static bool is_pointer_value(struct verifier_env *env, int regno) } } -static int check_ptr_alignment(struct verifier_env *env, struct reg_state *reg, - int off, int size) +static int check_ptr_alignment(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, int off, int size) { if (reg->type != PTR_TO_PACKET) { if (off % size != 0) { - verbose("misaligned access off %d size %d\n", off, size); + verbose("misaligned access off %d size %d\n", + off, size); return -EACCES; } else { return 0; @@ -769,12 +705,12 @@ static int check_ptr_alignment(struct verifier_env *env, struct reg_state *reg, * if t==write && value_regno==-1, some unknown value is stored into memory * if t==read && value_regno==-1, don't care what we read from memory */ -static int check_mem_access(struct verifier_env *env, u32 regno, int off, +static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off, int bpf_size, enum bpf_access_type t, int value_regno) { - struct verifier_state *state = &env->cur_state; - struct reg_state *reg = &state->regs[regno]; + struct bpf_verifier_state *state = &env->cur_state; + struct bpf_reg_state *reg = &state->regs[regno]; int size, err = 0; if (reg->type == PTR_TO_STACK) @@ -860,9 +796,9 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off, return err; } -static int check_xadd(struct verifier_env *env, struct bpf_insn *insn) +static int check_xadd(struct bpf_verifier_env *env, struct bpf_insn *insn) { - struct reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *regs = env->cur_state.regs; int err; if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) || @@ -896,12 +832,12 @@ static int check_xadd(struct verifier_env *env, struct bpf_insn *insn) * bytes from that pointer, make sure that it's within stack boundary * and all elements of stack are initialized */ -static int check_stack_boundary(struct verifier_env *env, int regno, +static int check_stack_boundary(struct bpf_verifier_env *env, int regno, int access_size, bool zero_size_allowed, struct bpf_call_arg_meta *meta) { - struct verifier_state *state = &env->cur_state; - struct reg_state *regs = state->regs; + struct bpf_verifier_state *state = &env->cur_state; + struct bpf_reg_state *regs = state->regs; int off, i; if (regs[regno].type != PTR_TO_STACK) { @@ -940,11 +876,11 @@ static int check_stack_boundary(struct verifier_env *env, int regno, return 0; } -static int check_func_arg(struct verifier_env *env, u32 regno, +static int check_func_arg(struct bpf_verifier_env *env, u32 regno, enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta) { - struct reg_state *regs = env->cur_state.regs, *reg = ®s[regno]; + struct bpf_reg_state *regs = env->cur_state.regs, *reg = ®s[regno]; enum bpf_reg_type expected_type, type = reg->type; int err = 0; @@ -1149,10 +1085,10 @@ static int check_raw_mode(const struct bpf_func_proto *fn) return count > 1 ? -EINVAL : 0; } -static void clear_all_pkt_pointers(struct verifier_env *env) +static void clear_all_pkt_pointers(struct bpf_verifier_env *env) { - struct verifier_state *state = &env->cur_state; - struct reg_state *regs = state->regs, *reg; + struct bpf_verifier_state *state = &env->cur_state; + struct bpf_reg_state *regs = state->regs, *reg; int i; for (i = 0; i < MAX_BPF_REG; i++) @@ -1172,12 +1108,12 @@ static void clear_all_pkt_pointers(struct verifier_env *env) } } -static int check_call(struct verifier_env *env, int func_id) +static int check_call(struct bpf_verifier_env *env, int func_id) { - struct verifier_state *state = &env->cur_state; + struct bpf_verifier_state *state = &env->cur_state; const struct bpf_func_proto *fn = NULL; - struct reg_state *regs = state->regs; - struct reg_state *reg; + struct bpf_reg_state *regs = state->regs; + struct bpf_reg_state *reg; struct bpf_call_arg_meta meta; bool changes_data; int i, err; @@ -1280,12 +1216,13 @@ static int check_call(struct verifier_env *env, int func_id) return 0; } -static int check_packet_ptr_add(struct verifier_env *env, struct bpf_insn *insn) +static int check_packet_ptr_add(struct bpf_verifier_env *env, + struct bpf_insn *insn) { - struct reg_state *regs = env->cur_state.regs; - struct reg_state *dst_reg = ®s[insn->dst_reg]; - struct reg_state *src_reg = ®s[insn->src_reg]; - struct reg_state tmp_reg; + struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *dst_reg = ®s[insn->dst_reg]; + struct bpf_reg_state *src_reg = ®s[insn->src_reg]; + struct bpf_reg_state tmp_reg; s32 imm; if (BPF_SRC(insn->code) == BPF_K) { @@ -1353,10 +1290,10 @@ add_imm: return 0; } -static int evaluate_reg_alu(struct verifier_env *env, struct bpf_insn *insn) +static int evaluate_reg_alu(struct bpf_verifier_env *env, struct bpf_insn *insn) { - struct reg_state *regs = env->cur_state.regs; - struct reg_state *dst_reg = ®s[insn->dst_reg]; + struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *dst_reg = ®s[insn->dst_reg]; u8 opcode = BPF_OP(insn->code); s64 imm_log2; @@ -1366,7 +1303,7 @@ static int evaluate_reg_alu(struct verifier_env *env, struct bpf_insn *insn) */ if (BPF_SRC(insn->code) == BPF_X) { - struct reg_state *src_reg = ®s[insn->src_reg]; + struct bpf_reg_state *src_reg = ®s[insn->src_reg]; if (src_reg->type == UNKNOWN_VALUE && src_reg->imm > 0 && dst_reg->imm && opcode == BPF_ADD) { @@ -1455,11 +1392,12 @@ static int evaluate_reg_alu(struct verifier_env *env, struct bpf_insn *insn) return 0; } -static int evaluate_reg_imm_alu(struct verifier_env *env, struct bpf_insn *insn) +static int evaluate_reg_imm_alu(struct bpf_verifier_env *env, + struct bpf_insn *insn) { - struct reg_state *regs = env->cur_state.regs; - struct reg_state *dst_reg = ®s[insn->dst_reg]; - struct reg_state *src_reg = ®s[insn->src_reg]; + struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *dst_reg = ®s[insn->dst_reg]; + struct bpf_reg_state *src_reg = ®s[insn->src_reg]; u8 opcode = BPF_OP(insn->code); /* dst_reg->type == CONST_IMM here, simulate execution of 'add' insn. @@ -1476,9 +1414,9 @@ static int evaluate_reg_imm_alu(struct verifier_env *env, struct bpf_insn *insn) } /* check validity of 32-bit and 64-bit arithmetic operations */ -static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn) +static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) { - struct reg_state *regs = env->cur_state.regs, *dst_reg; + struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg; u8 opcode = BPF_OP(insn->code); int err; @@ -1652,10 +1590,10 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn) return 0; } -static void find_good_pkt_pointers(struct verifier_state *state, - const struct reg_state *dst_reg) +static void find_good_pkt_pointers(struct bpf_verifier_state *state, + struct bpf_reg_state *dst_reg) { - struct reg_state *regs = state->regs, *reg; + struct bpf_reg_state *regs = state->regs, *reg; int i; /* LLVM can generate two kind of checks: @@ -1701,11 +1639,11 @@ static void find_good_pkt_pointers(struct verifier_state *state, } } -static int check_cond_jmp_op(struct verifier_env *env, +static int check_cond_jmp_op(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx) { - struct verifier_state *other_branch, *this_branch = &env->cur_state; - struct reg_state *regs = this_branch->regs, *dst_reg; + struct bpf_verifier_state *other_branch, *this_branch = &env->cur_state; + struct bpf_reg_state *regs = this_branch->regs, *dst_reg; u8 opcode = BPF_OP(insn->code); int err; @@ -1767,7 +1705,7 @@ static int check_cond_jmp_op(struct verifier_env *env, if (!other_branch) return -EFAULT; - /* detect if R == 0 where R is returned value from bpf_map_lookup_elem() */ + /* detect if R == 0 where R is returned from bpf_map_lookup_elem() */ if (BPF_SRC(insn->code) == BPF_K && insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) && dst_reg->type == PTR_TO_MAP_VALUE_OR_NULL) { @@ -1809,9 +1747,9 @@ static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn) } /* verify BPF_LD_IMM64 instruction */ -static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn) +static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) { - struct reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *regs = env->cur_state.regs; int err; if (BPF_SIZE(insn->code) != BPF_DW) { @@ -1866,11 +1804,11 @@ static bool may_access_skb(enum bpf_prog_type type) * Output: * R0 - 8/16/32-bit skb data converted to cpu endianness */ -static int check_ld_abs(struct verifier_env *env, struct bpf_insn *insn) +static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) { - struct reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *regs = env->cur_state.regs; u8 mode = BPF_MODE(insn->code); - struct reg_state *reg; + struct bpf_reg_state *reg; int i, err; if (!may_access_skb(env->prog->type)) { @@ -1956,7 +1894,7 @@ enum { BRANCH = 2, }; -#define STATE_LIST_MARK ((struct verifier_state_list *) -1L) +#define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L) static int *insn_stack; /* stack of insns to process */ static int cur_stack; /* current stack index */ @@ -1967,7 +1905,7 @@ static int *insn_state; * w - next instruction * e - edge */ -static int push_insn(int t, int w, int e, struct verifier_env *env) +static int push_insn(int t, int w, int e, struct bpf_verifier_env *env) { if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH)) return 0; @@ -2008,7 +1946,7 @@ static int push_insn(int t, int w, int e, struct verifier_env *env) /* non-recursive depth-first-search to detect loops in BPF program * loop == back-edge in directed graph */ -static int check_cfg(struct verifier_env *env) +static int check_cfg(struct bpf_verifier_env *env) { struct bpf_insn *insns = env->prog->insnsi; int insn_cnt = env->prog->len; @@ -2117,7 +2055,8 @@ err_free: /* the following conditions reduce the number of explored insns * from ~140k to ~80k for ultra large programs that use a lot of ptr_to_packet */ -static bool compare_ptrs_to_packet(struct reg_state *old, struct reg_state *cur) +static bool compare_ptrs_to_packet(struct bpf_reg_state *old, + struct bpf_reg_state *cur) { if (old->id != cur->id) return false; @@ -2192,9 +2131,10 @@ static bool compare_ptrs_to_packet(struct reg_state *old, struct reg_state *cur) * whereas register type in current state is meaningful, it means that * the current state will reach 'bpf_exit' instruction safely */ -static bool states_equal(struct verifier_state *old, struct verifier_state *cur) +static bool states_equal(struct bpf_verifier_state *old, + struct bpf_verifier_state *cur) { - struct reg_state *rold, *rcur; + struct bpf_reg_state *rold, *rcur; int i; for (i = 0; i < MAX_BPF_REG; i++) { @@ -2234,9 +2174,9 @@ static bool states_equal(struct verifier_state *old, struct verifier_state *cur) * the same, check that stored pointers types * are the same as well. * Ex: explored safe path could have stored - * (struct reg_state) {.type = PTR_TO_STACK, .imm = -8} + * (bpf_reg_state) {.type = PTR_TO_STACK, .imm = -8} * but current path has stored: - * (struct reg_state) {.type = PTR_TO_STACK, .imm = -16} + * (bpf_reg_state) {.type = PTR_TO_STACK, .imm = -16} * such verifier states are not equivalent. * return false to continue verification of this path */ @@ -2247,10 +2187,10 @@ static bool states_equal(struct verifier_state *old, struct verifier_state *cur) return true; } -static int is_state_visited(struct verifier_env *env, int insn_idx) +static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) { - struct verifier_state_list *new_sl; - struct verifier_state_list *sl; + struct bpf_verifier_state_list *new_sl; + struct bpf_verifier_state_list *sl; sl = env->explored_states[insn_idx]; if (!sl) @@ -2274,7 +2214,7 @@ static int is_state_visited(struct verifier_env *env, int insn_idx) * it will be rejected. Since there are no loops, we won't be * seeing this 'insn_idx' instruction again on the way to bpf_exit */ - new_sl = kmalloc(sizeof(struct verifier_state_list), GFP_USER); + new_sl = kmalloc(sizeof(struct bpf_verifier_state_list), GFP_USER); if (!new_sl) return -ENOMEM; @@ -2285,11 +2225,11 @@ static int is_state_visited(struct verifier_env *env, int insn_idx) return 0; } -static int do_check(struct verifier_env *env) +static int do_check(struct bpf_verifier_env *env) { - struct verifier_state *state = &env->cur_state; + struct bpf_verifier_state *state = &env->cur_state; struct bpf_insn *insns = env->prog->insnsi; - struct reg_state *regs = state->regs; + struct bpf_reg_state *regs = state->regs; int insn_cnt = env->prog->len; int insn_idx, prev_insn_idx = 0; int insn_processed = 0; @@ -2572,7 +2512,7 @@ static int check_map_prog_compatibility(struct bpf_map *map, /* look for pseudo eBPF instructions that access map FDs and * replace them with actual map pointers */ -static int replace_map_fd_with_map_ptr(struct verifier_env *env) +static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env) { struct bpf_insn *insn = env->prog->insnsi; int insn_cnt = env->prog->len; @@ -2669,7 +2609,7 @@ next_insn: } /* drop refcnt of maps used by the rejected program */ -static void release_maps(struct verifier_env *env) +static void release_maps(struct bpf_verifier_env *env) { int i; @@ -2678,7 +2618,7 @@ static void release_maps(struct verifier_env *env) } /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */ -static void convert_pseudo_ld_imm64(struct verifier_env *env) +static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env) { struct bpf_insn *insn = env->prog->insnsi; int insn_cnt = env->prog->len; @@ -2692,7 +2632,7 @@ static void convert_pseudo_ld_imm64(struct verifier_env *env) /* convert load instructions that access fields of 'struct __sk_buff' * into sequence of instructions that access fields of 'struct sk_buff' */ -static int convert_ctx_accesses(struct verifier_env *env) +static int convert_ctx_accesses(struct bpf_verifier_env *env) { const struct bpf_verifier_ops *ops = env->prog->aux->ops; const int insn_cnt = env->prog->len; @@ -2757,9 +2697,9 @@ static int convert_ctx_accesses(struct verifier_env *env) return 0; } -static void free_states(struct verifier_env *env) +static void free_states(struct bpf_verifier_env *env) { - struct verifier_state_list *sl, *sln; + struct bpf_verifier_state_list *sl, *sln; int i; if (!env->explored_states) @@ -2782,16 +2722,16 @@ static void free_states(struct verifier_env *env) int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) { char __user *log_ubuf = NULL; - struct verifier_env *env; + struct bpf_verifier_env *env; int ret = -EINVAL; if ((*prog)->len <= 0 || (*prog)->len > BPF_MAXINSNS) return -E2BIG; - /* 'struct verifier_env' can be global, but since it's not small, + /* 'struct bpf_verifier_env' can be global, but since it's not small, * allocate/free it every time bpf_check() is called */ - env = kzalloc(sizeof(struct verifier_env), GFP_KERNEL); + env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL); if (!env) return -ENOMEM; @@ -2833,7 +2773,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) goto skip_full_check; env->explored_states = kcalloc(env->prog->len, - sizeof(struct verifier_state_list *), + sizeof(struct bpf_verifier_state_list *), GFP_USER); ret = -ENOMEM; if (!env->explored_states) From 13a27dfc669724564aafa2699976ee756029fed2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:43:58 +0100 Subject: [PATCH 0580/1050] bpf: enable non-core use of the verfier Advanced JIT compilers and translators may want to use eBPF verifier as a base for parsers or to perform custom checks and validations. Add ability for external users to invoke the verifier and provide callbacks to be invoked for every intruction checked. For now only add most basic callback for per-instruction pre-interpretation checks is added. More advanced users may also like to have per-instruction post callback and state comparison callback. Signed-off-by: Jakub Kicinski Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf_verifier.h | 11 ++++++ kernel/bpf/verifier.c | 68 ++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 9457a22fc6e0..c5cb661712c9 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -59,6 +59,12 @@ struct bpf_insn_aux_data { #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ +struct bpf_verifier_env; +struct bpf_ext_analyzer_ops { + int (*insn_hook)(struct bpf_verifier_env *env, + int insn_idx, int prev_insn_idx); +}; + /* single container for all structs * one verifier_env per bpf_check() call */ @@ -68,6 +74,8 @@ struct bpf_verifier_env { int stack_size; /* number of states to be processed */ struct bpf_verifier_state cur_state; /* current verifier state */ struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ + const struct bpf_ext_analyzer_ops *analyzer_ops; /* external analyzer ops */ + void *analyzer_priv; /* pointer to external analyzer's private data */ struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ u32 used_map_cnt; /* number of used maps */ u32 id_gen; /* used to generate unique reg IDs */ @@ -76,4 +84,7 @@ struct bpf_verifier_env { struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ }; +int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, + void *priv); + #endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index dca2b9b1d02e..ee86a77dc40b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -632,6 +632,10 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, static int check_ctx_access(struct bpf_verifier_env *env, int off, int size, enum bpf_access_type t, enum bpf_reg_type *reg_type) { + /* for analyzer ctx accesses are already validated and converted */ + if (env->analyzer_ops) + return 0; + if (env->prog->aux->ops->is_valid_access && env->prog->aux->ops->is_valid_access(off, size, t, reg_type)) { /* remember the offset of last byte accessed in ctx */ @@ -2225,6 +2229,15 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) return 0; } +static int ext_analyzer_insn_hook(struct bpf_verifier_env *env, + int insn_idx, int prev_insn_idx) +{ + if (!env->analyzer_ops || !env->analyzer_ops->insn_hook) + return 0; + + return env->analyzer_ops->insn_hook(env, insn_idx, prev_insn_idx); +} + static int do_check(struct bpf_verifier_env *env) { struct bpf_verifier_state *state = &env->cur_state; @@ -2283,6 +2296,10 @@ static int do_check(struct bpf_verifier_env *env) print_bpf_insn(insn); } + err = ext_analyzer_insn_hook(env, insn_idx, prev_insn_idx); + if (err) + return err; + if (class == BPF_ALU || class == BPF_ALU64) { err = check_alu_op(env, insn); if (err) @@ -2845,3 +2862,54 @@ err_free_env: kfree(env); return ret; } + +int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, + void *priv) +{ + struct bpf_verifier_env *env; + int ret; + + env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL); + if (!env) + return -ENOMEM; + + env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) * + prog->len); + ret = -ENOMEM; + if (!env->insn_aux_data) + goto err_free_env; + env->prog = prog; + env->analyzer_ops = ops; + env->analyzer_priv = priv; + + /* grab the mutex to protect few globals used by verifier */ + mutex_lock(&bpf_verifier_lock); + + log_level = 0; + + env->explored_states = kcalloc(env->prog->len, + sizeof(struct bpf_verifier_state_list *), + GFP_KERNEL); + ret = -ENOMEM; + if (!env->explored_states) + goto skip_full_check; + + ret = check_cfg(env); + if (ret < 0) + goto skip_full_check; + + env->allow_ptr_leaks = capable(CAP_SYS_ADMIN); + + ret = do_check(env); + +skip_full_check: + while (pop_stack(env, NULL) >= 0); + free_states(env); + + mutex_unlock(&bpf_verifier_lock); + vfree(env->insn_aux_data); +err_free_env: + kfree(env); + return ret; +} +EXPORT_SYMBOL_GPL(bpf_analyzer); From 6b17387307bafc71624b9890b9239b6a438e2e89 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:43:59 +0100 Subject: [PATCH 0581/1050] bpf: recognize 64bit immediate loads as consts When running as parser interpret BPF_LD | BPF_IMM | BPF_DW instructions as loading CONST_IMM with the value stored in imm. The verifier will continue not recognizing those due to concerns about search space/program complexity increase. Signed-off-by: Jakub Kicinski Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ee86a77dc40b..8c3f794c7028 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1769,9 +1769,19 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) if (err) return err; - if (insn->src_reg == 0) - /* generic move 64-bit immediate into a register */ + if (insn->src_reg == 0) { + /* generic move 64-bit immediate into a register, + * only analyzer needs to collect the ld_imm value. + */ + u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm; + + if (!env->analyzer_ops) + return 0; + + regs[insn->dst_reg].type = CONST_IMM; + regs[insn->dst_reg].imm = imm; return 0; + } /* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */ BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD); From cd7df56ed3e60d046ddb3acd987778c00aa9ee33 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:44:00 +0100 Subject: [PATCH 0582/1050] nfp: add BPF to NFP code translator Add translator for JITing eBPF to operations which can be executed on NFP's programmable engines. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/Makefile | 6 + drivers/net/ethernet/netronome/nfp/nfp_asm.h | 233 +++ drivers/net/ethernet/netronome/nfp/nfp_bpf.h | 208 ++ .../net/ethernet/netronome/nfp/nfp_bpf_jit.c | 1724 +++++++++++++++++ .../ethernet/netronome/nfp/nfp_bpf_verifier.c | 162 ++ 5 files changed, 2333 insertions(+) create mode 100644 drivers/net/ethernet/netronome/nfp/nfp_asm.h create mode 100644 drivers/net/ethernet/netronome/nfp/nfp_bpf.h create mode 100644 drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c create mode 100644 drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile index 68178819ff12..5f12689bf523 100644 --- a/drivers/net/ethernet/netronome/nfp/Makefile +++ b/drivers/net/ethernet/netronome/nfp/Makefile @@ -5,4 +5,10 @@ nfp_netvf-objs := \ nfp_net_ethtool.o \ nfp_netvf_main.o +ifeq ($(CONFIG_BPF_SYSCALL),y) +nfp_netvf-objs += \ + nfp_bpf_verifier.o \ + nfp_bpf_jit.o +endif + nfp_netvf-$(CONFIG_NFP_NET_DEBUG) += nfp_net_debugfs.o diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h new file mode 100644 index 000000000000..22484b6fd3e8 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h @@ -0,0 +1,233 @@ +/* + * Copyright (C) 2016 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NFP_ASM_H__ +#define __NFP_ASM_H__ 1 + +#include "nfp_bpf.h" + +#define REG_NONE 0 + +#define RE_REG_NO_DST 0x020 +#define RE_REG_IMM 0x020 +#define RE_REG_IMM_encode(x) \ + (RE_REG_IMM | ((x) & 0x1f) | (((x) & 0x60) << 1)) +#define RE_REG_IMM_MAX 0x07fULL +#define RE_REG_XFR 0x080 + +#define UR_REG_XFR 0x180 +#define UR_REG_NN 0x280 +#define UR_REG_NO_DST 0x300 +#define UR_REG_IMM UR_REG_NO_DST +#define UR_REG_IMM_encode(x) (UR_REG_IMM | (x)) +#define UR_REG_IMM_MAX 0x0ffULL + +#define OP_BR_BASE 0x0d800000020ULL +#define OP_BR_BASE_MASK 0x0f8000c3ce0ULL +#define OP_BR_MASK 0x0000000001fULL +#define OP_BR_EV_PIP 0x00000000300ULL +#define OP_BR_CSS 0x0000003c000ULL +#define OP_BR_DEFBR 0x00000300000ULL +#define OP_BR_ADDR_LO 0x007ffc00000ULL +#define OP_BR_ADDR_HI 0x10000000000ULL + +#define nfp_is_br(_insn) \ + (((_insn) & OP_BR_BASE_MASK) == OP_BR_BASE) + +enum br_mask { + BR_BEQ = 0x00, + BR_BNE = 0x01, + BR_BHS = 0x04, + BR_BLO = 0x05, + BR_BGE = 0x08, + BR_UNC = 0x18, +}; + +enum br_ev_pip { + BR_EV_PIP_UNCOND = 0, + BR_EV_PIP_COND = 1, +}; + +enum br_ctx_signal_state { + BR_CSS_NONE = 2, +}; + +#define OP_BBYTE_BASE 0x0c800000000ULL +#define OP_BB_A_SRC 0x000000000ffULL +#define OP_BB_BYTE 0x00000000300ULL +#define OP_BB_B_SRC 0x0000003fc00ULL +#define OP_BB_I8 0x00000040000ULL +#define OP_BB_EQ 0x00000080000ULL +#define OP_BB_DEFBR 0x00000300000ULL +#define OP_BB_ADDR_LO 0x007ffc00000ULL +#define OP_BB_ADDR_HI 0x10000000000ULL + +#define OP_BALU_BASE 0x0e800000000ULL +#define OP_BA_A_SRC 0x000000003ffULL +#define OP_BA_B_SRC 0x000000ffc00ULL +#define OP_BA_DEFBR 0x00000300000ULL +#define OP_BA_ADDR_HI 0x0007fc00000ULL + +#define OP_IMMED_A_SRC 0x000000003ffULL +#define OP_IMMED_B_SRC 0x000000ffc00ULL +#define OP_IMMED_IMM 0x0000ff00000ULL +#define OP_IMMED_WIDTH 0x00060000000ULL +#define OP_IMMED_INV 0x00080000000ULL +#define OP_IMMED_SHIFT 0x00600000000ULL +#define OP_IMMED_BASE 0x0f000000000ULL +#define OP_IMMED_WR_AB 0x20000000000ULL + +enum immed_width { + IMMED_WIDTH_ALL = 0, + IMMED_WIDTH_BYTE = 1, + IMMED_WIDTH_WORD = 2, +}; + +enum immed_shift { + IMMED_SHIFT_0B = 0, + IMMED_SHIFT_1B = 1, + IMMED_SHIFT_2B = 2, +}; + +#define OP_SHF_BASE 0x08000000000ULL +#define OP_SHF_A_SRC 0x000000000ffULL +#define OP_SHF_SC 0x00000000300ULL +#define OP_SHF_B_SRC 0x0000003fc00ULL +#define OP_SHF_I8 0x00000040000ULL +#define OP_SHF_SW 0x00000080000ULL +#define OP_SHF_DST 0x0000ff00000ULL +#define OP_SHF_SHIFT 0x001f0000000ULL +#define OP_SHF_OP 0x00e00000000ULL +#define OP_SHF_DST_AB 0x01000000000ULL +#define OP_SHF_WR_AB 0x20000000000ULL + +enum shf_op { + SHF_OP_NONE = 0, + SHF_OP_AND = 2, + SHF_OP_OR = 5, +}; + +enum shf_sc { + SHF_SC_R_ROT = 0, + SHF_SC_R_SHF = 1, + SHF_SC_L_SHF = 2, + SHF_SC_R_DSHF = 3, +}; + +#define OP_ALU_A_SRC 0x000000003ffULL +#define OP_ALU_B_SRC 0x000000ffc00ULL +#define OP_ALU_DST 0x0003ff00000ULL +#define OP_ALU_SW 0x00040000000ULL +#define OP_ALU_OP 0x00f80000000ULL +#define OP_ALU_DST_AB 0x01000000000ULL +#define OP_ALU_BASE 0x0a000000000ULL +#define OP_ALU_WR_AB 0x20000000000ULL + +enum alu_op { + ALU_OP_NONE = 0x00, + ALU_OP_ADD = 0x01, + ALU_OP_NEG = 0x04, + ALU_OP_AND = 0x08, + ALU_OP_SUB_C = 0x0d, + ALU_OP_ADD_C = 0x11, + ALU_OP_OR = 0x14, + ALU_OP_SUB = 0x15, + ALU_OP_XOR = 0x18, +}; + +enum alu_dst_ab { + ALU_DST_A = 0, + ALU_DST_B = 1, +}; + +#define OP_LDF_BASE 0x0c000000000ULL +#define OP_LDF_A_SRC 0x000000000ffULL +#define OP_LDF_SC 0x00000000300ULL +#define OP_LDF_B_SRC 0x0000003fc00ULL +#define OP_LDF_I8 0x00000040000ULL +#define OP_LDF_SW 0x00000080000ULL +#define OP_LDF_ZF 0x00000100000ULL +#define OP_LDF_BMASK 0x0000f000000ULL +#define OP_LDF_SHF 0x001f0000000ULL +#define OP_LDF_WR_AB 0x20000000000ULL + +#define OP_CMD_A_SRC 0x000000000ffULL +#define OP_CMD_CTX 0x00000000300ULL +#define OP_CMD_B_SRC 0x0000003fc00ULL +#define OP_CMD_TOKEN 0x000000c0000ULL +#define OP_CMD_XFER 0x00001f00000ULL +#define OP_CMD_CNT 0x0000e000000ULL +#define OP_CMD_SIG 0x000f0000000ULL +#define OP_CMD_TGT_CMD 0x07f00000000ULL +#define OP_CMD_MODE 0x1c0000000000ULL + +struct cmd_tgt_act { + u8 token; + u8 tgt_cmd; +}; + +enum cmd_tgt_map { + CMD_TGT_READ8, + CMD_TGT_WRITE8, + CMD_TGT_READ_LE, + CMD_TGT_READ_SWAP_LE, + __CMD_TGT_MAP_SIZE, +}; + +enum cmd_mode { + CMD_MODE_40b_AB = 0, + CMD_MODE_40b_BA = 1, + CMD_MODE_32b = 4, +}; + +enum cmd_ctx_swap { + CMD_CTX_SWAP = 0, + CMD_CTX_NO_SWAP = 3, +}; + +#define OP_LCSR_BASE 0x0fc00000000ULL +#define OP_LCSR_A_SRC 0x000000003ffULL +#define OP_LCSR_B_SRC 0x000000ffc00ULL +#define OP_LCSR_WRITE 0x00000200000ULL +#define OP_LCSR_ADDR 0x001ffc00000ULL + +enum lcsr_wr_src { + LCSR_WR_AREG, + LCSR_WR_BREG, + LCSR_WR_IMM, +}; + +#define OP_CARB_BASE 0x0e000000000ULL +#define OP_CARB_OR 0x00000010000ULL + +#endif diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h new file mode 100644 index 000000000000..3726421e353f --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h @@ -0,0 +1,208 @@ +/* + * Copyright (C) 2016 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NFP_BPF_H__ +#define __NFP_BPF_H__ 1 + +#include +#include +#include +#include + +#define FIELD_FIT(mask, val) (!((((u64)val) << __bf_shf(mask)) & ~(mask))) + +/* For branch fixup logic use up-most byte of branch instruction as scratch + * area. Remember to clear this before sending instructions to HW! + */ +#define OP_BR_SPECIAL 0xff00000000000000ULL + +enum br_special { + OP_BR_NORMAL = 0, + OP_BR_GO_OUT, + OP_BR_GO_ABORT, +}; + +enum static_regs { + STATIC_REG_PKT = 1, +#define REG_PKT_BANK ALU_DST_A + STATIC_REG_IMM = 2, /* Bank AB */ +}; + +enum nfp_bpf_action_type { + NN_ACT_TC_DROP, +}; + +/* Software register representation, hardware encoding in asm.h */ +#define NN_REG_TYPE GENMASK(31, 24) +#define NN_REG_VAL GENMASK(7, 0) + +enum nfp_bpf_reg_type { + NN_REG_GPR_A = BIT(0), + NN_REG_GPR_B = BIT(1), + NN_REG_NNR = BIT(2), + NN_REG_XFER = BIT(3), + NN_REG_IMM = BIT(4), + NN_REG_NONE = BIT(5), +}; + +#define NN_REG_GPR_BOTH (NN_REG_GPR_A | NN_REG_GPR_B) + +#define reg_both(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_BOTH)) +#define reg_a(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_A)) +#define reg_b(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_B)) +#define reg_nnr(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_NNR)) +#define reg_xfer(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_XFER)) +#define reg_imm(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_IMM)) +#define reg_none() (FIELD_PREP(NN_REG_TYPE, NN_REG_NONE)) + +#define pkt_reg(np) reg_a((np)->regs_per_thread - STATIC_REG_PKT) +#define imm_a(np) reg_a((np)->regs_per_thread - STATIC_REG_IMM) +#define imm_b(np) reg_b((np)->regs_per_thread - STATIC_REG_IMM) +#define imm_both(np) reg_both((np)->regs_per_thread - STATIC_REG_IMM) + +#define NFP_BPF_ABI_FLAGS reg_nnr(0) +#define NFP_BPF_ABI_PKT reg_nnr(2) +#define NFP_BPF_ABI_LEN reg_nnr(3) + +struct nfp_prog; +struct nfp_insn_meta; +typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *); + +#define nfp_prog_first_meta(nfp_prog) \ + list_first_entry(&(nfp_prog)->insns, struct nfp_insn_meta, l) +#define nfp_prog_last_meta(nfp_prog) \ + list_last_entry(&(nfp_prog)->insns, struct nfp_insn_meta, l) +#define nfp_meta_next(meta) list_next_entry(meta, l) +#define nfp_meta_prev(meta) list_prev_entry(meta, l) + +/** + * struct nfp_insn_meta - BPF instruction wrapper + * @insn: BPF instruction + * @off: index of first generated machine instruction (in nfp_prog.prog) + * @n: eBPF instruction number + * @skip: skip this instruction (optimized out) + * @double_cb: callback for second part of the instruction + * @l: link on nfp_prog->insns list + */ +struct nfp_insn_meta { + struct bpf_insn insn; + unsigned int off; + unsigned short n; + bool skip; + instr_cb_t double_cb; + + struct list_head l; +}; + +#define BPF_SIZE_MASK 0x18 + +static inline u8 mbpf_class(const struct nfp_insn_meta *meta) +{ + return BPF_CLASS(meta->insn.code); +} + +static inline u8 mbpf_src(const struct nfp_insn_meta *meta) +{ + return BPF_SRC(meta->insn.code); +} + +static inline u8 mbpf_op(const struct nfp_insn_meta *meta) +{ + return BPF_OP(meta->insn.code); +} + +static inline u8 mbpf_mode(const struct nfp_insn_meta *meta) +{ + return BPF_MODE(meta->insn.code); +} + +/** + * struct nfp_prog - nfp BPF program + * @prog: machine code + * @prog_len: number of valid instructions in @prog array + * @__prog_alloc_len: alloc size of @prog array + * @act: BPF program/action type (TC DA, TC with action, XDP etc.) + * @num_regs: number of registers used by this program + * @regs_per_thread: number of basic registers allocated per thread + * @start_off: address of the first instruction in the memory + * @tgt_out: jump target for normal exit + * @tgt_abort: jump target for abort (e.g. access outside of packet buffer) + * @tgt_done: jump target to get the next packet + * @n_translated: number of successfully translated instructions (for errors) + * @error: error code if something went wrong + * @insns: list of BPF instruction wrappers (struct nfp_insn_meta) + */ +struct nfp_prog { + u64 *prog; + unsigned int prog_len; + unsigned int __prog_alloc_len; + + enum nfp_bpf_action_type act; + + unsigned int num_regs; + unsigned int regs_per_thread; + + unsigned int start_off; + unsigned int tgt_out; + unsigned int tgt_abort; + unsigned int tgt_done; + + unsigned int n_translated; + int error; + + struct list_head insns; +}; + +struct nfp_bpf_result { + unsigned int n_instr; + bool dense_mode; +}; + +#ifdef CONFIG_BPF_SYSCALL +int +nfp_bpf_jit(struct bpf_prog *filter, void *prog, enum nfp_bpf_action_type act, + unsigned int prog_start, unsigned int prog_done, + unsigned int prog_sz, struct nfp_bpf_result *res); +#else +int +nfp_bpf_jit(struct bpf_prog *filter, void *prog, enum nfp_bpf_action_type act, + unsigned int prog_start, unsigned int prog_done, + unsigned int prog_sz, struct nfp_bpf_result *res) +{ + return -ENOTSUPP; +} +#endif + +int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog); + +#endif diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c new file mode 100644 index 000000000000..cfbf53607fc9 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c @@ -0,0 +1,1724 @@ +/* + * Copyright (C) 2016 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define pr_fmt(fmt) "NFP net bpf: " fmt + +#include +#include +#include +#include +#include + +#include "nfp_asm.h" +#include "nfp_bpf.h" + +/* --- NFP prog --- */ +/* Foreach "multiple" entries macros provide pos and next pointers. + * It's safe to modify the next pointers (but not pos). + */ +#define nfp_for_each_insn_walk2(nfp_prog, pos, next) \ + for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \ + next = list_next_entry(pos, l); \ + &(nfp_prog)->insns != &pos->l && \ + &(nfp_prog)->insns != &next->l; \ + pos = nfp_meta_next(pos), \ + next = nfp_meta_next(pos)) + +#define nfp_for_each_insn_walk3(nfp_prog, pos, next, next2) \ + for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \ + next = list_next_entry(pos, l), \ + next2 = list_next_entry(next, l); \ + &(nfp_prog)->insns != &pos->l && \ + &(nfp_prog)->insns != &next->l && \ + &(nfp_prog)->insns != &next2->l; \ + pos = nfp_meta_next(pos), \ + next = nfp_meta_next(pos), \ + next2 = nfp_meta_next(next)) + +static bool +nfp_meta_has_next(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return meta->l.next != &nfp_prog->insns; +} + +static bool +nfp_meta_has_prev(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return meta->l.prev != &nfp_prog->insns; +} + +static void nfp_prog_free(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta, *tmp; + + list_for_each_entry_safe(meta, tmp, &nfp_prog->insns, l) { + list_del(&meta->l); + kfree(meta); + } + kfree(nfp_prog); +} + +static void nfp_prog_push(struct nfp_prog *nfp_prog, u64 insn) +{ + if (nfp_prog->__prog_alloc_len == nfp_prog->prog_len) { + nfp_prog->error = -ENOSPC; + return; + } + + nfp_prog->prog[nfp_prog->prog_len] = insn; + nfp_prog->prog_len++; +} + +static unsigned int nfp_prog_current_offset(struct nfp_prog *nfp_prog) +{ + return nfp_prog->start_off + nfp_prog->prog_len; +} + +static unsigned int +nfp_prog_offset_to_index(struct nfp_prog *nfp_prog, unsigned int offset) +{ + return offset - nfp_prog->start_off; +} + +/* --- SW reg --- */ +struct nfp_insn_ur_regs { + enum alu_dst_ab dst_ab; + u16 dst; + u16 areg, breg; + bool swap; + bool wr_both; +}; + +struct nfp_insn_re_regs { + enum alu_dst_ab dst_ab; + u8 dst; + u8 areg, breg; + bool swap; + bool wr_both; + bool i8; +}; + +static u16 nfp_swreg_to_unreg(u32 swreg, bool is_dst) +{ + u16 val = FIELD_GET(NN_REG_VAL, swreg); + + switch (FIELD_GET(NN_REG_TYPE, swreg)) { + case NN_REG_GPR_A: + case NN_REG_GPR_B: + case NN_REG_GPR_BOTH: + return val; + case NN_REG_NNR: + return UR_REG_NN | val; + case NN_REG_XFER: + return UR_REG_XFR | val; + case NN_REG_IMM: + if (val & ~0xff) { + pr_err("immediate too large\n"); + return 0; + } + return UR_REG_IMM_encode(val); + case NN_REG_NONE: + return is_dst ? UR_REG_NO_DST : REG_NONE; + default: + pr_err("unrecognized reg encoding %08x\n", swreg); + return 0; + } +} + +static int +swreg_to_unrestricted(u32 dst, u32 lreg, u32 rreg, struct nfp_insn_ur_regs *reg) +{ + memset(reg, 0, sizeof(*reg)); + + /* Decode destination */ + if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM) + return -EFAULT; + + if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_B) + reg->dst_ab = ALU_DST_B; + if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_BOTH) + reg->wr_both = true; + reg->dst = nfp_swreg_to_unreg(dst, true); + + /* Decode source operands */ + if (FIELD_GET(NN_REG_TYPE, lreg) == FIELD_GET(NN_REG_TYPE, rreg)) + return -EFAULT; + + if (FIELD_GET(NN_REG_TYPE, lreg) == NN_REG_GPR_B || + FIELD_GET(NN_REG_TYPE, rreg) == NN_REG_GPR_A) { + reg->areg = nfp_swreg_to_unreg(rreg, false); + reg->breg = nfp_swreg_to_unreg(lreg, false); + reg->swap = true; + } else { + reg->areg = nfp_swreg_to_unreg(lreg, false); + reg->breg = nfp_swreg_to_unreg(rreg, false); + } + + return 0; +} + +static u16 nfp_swreg_to_rereg(u32 swreg, bool is_dst, bool has_imm8, bool *i8) +{ + u16 val = FIELD_GET(NN_REG_VAL, swreg); + + switch (FIELD_GET(NN_REG_TYPE, swreg)) { + case NN_REG_GPR_A: + case NN_REG_GPR_B: + case NN_REG_GPR_BOTH: + return val; + case NN_REG_XFER: + return RE_REG_XFR | val; + case NN_REG_IMM: + if (val & ~(0x7f | has_imm8 << 7)) { + pr_err("immediate too large\n"); + return 0; + } + *i8 = val & 0x80; + return RE_REG_IMM_encode(val & 0x7f); + case NN_REG_NONE: + return is_dst ? RE_REG_NO_DST : REG_NONE; + default: + pr_err("unrecognized reg encoding\n"); + return 0; + } +} + +static int +swreg_to_restricted(u32 dst, u32 lreg, u32 rreg, struct nfp_insn_re_regs *reg, + bool has_imm8) +{ + memset(reg, 0, sizeof(*reg)); + + /* Decode destination */ + if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM) + return -EFAULT; + + if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_B) + reg->dst_ab = ALU_DST_B; + if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_BOTH) + reg->wr_both = true; + reg->dst = nfp_swreg_to_rereg(dst, true, false, NULL); + + /* Decode source operands */ + if (FIELD_GET(NN_REG_TYPE, lreg) == FIELD_GET(NN_REG_TYPE, rreg)) + return -EFAULT; + + if (FIELD_GET(NN_REG_TYPE, lreg) == NN_REG_GPR_B || + FIELD_GET(NN_REG_TYPE, rreg) == NN_REG_GPR_A) { + reg->areg = nfp_swreg_to_rereg(rreg, false, has_imm8, ®->i8); + reg->breg = nfp_swreg_to_rereg(lreg, false, has_imm8, ®->i8); + reg->swap = true; + } else { + reg->areg = nfp_swreg_to_rereg(lreg, false, has_imm8, ®->i8); + reg->breg = nfp_swreg_to_rereg(rreg, false, has_imm8, ®->i8); + } + + return 0; +} + +/* --- Emitters --- */ +static const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = { + [CMD_TGT_WRITE8] = { 0x00, 0x42 }, + [CMD_TGT_READ8] = { 0x01, 0x43 }, + [CMD_TGT_READ_LE] = { 0x01, 0x40 }, + [CMD_TGT_READ_SWAP_LE] = { 0x03, 0x40 }, +}; + +static void +__emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, + u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, bool sync) +{ + enum cmd_ctx_swap ctx; + u64 insn; + + if (sync) + ctx = CMD_CTX_SWAP; + else + ctx = CMD_CTX_NO_SWAP; + + insn = FIELD_PREP(OP_CMD_A_SRC, areg) | + FIELD_PREP(OP_CMD_CTX, ctx) | + FIELD_PREP(OP_CMD_B_SRC, breg) | + FIELD_PREP(OP_CMD_TOKEN, cmd_tgt_act[op].token) | + FIELD_PREP(OP_CMD_XFER, xfer) | + FIELD_PREP(OP_CMD_CNT, size) | + FIELD_PREP(OP_CMD_SIG, sync) | + FIELD_PREP(OP_CMD_TGT_CMD, cmd_tgt_act[op].tgt_cmd) | + FIELD_PREP(OP_CMD_MODE, mode); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, + u8 mode, u8 xfer, u32 lreg, u32 rreg, u8 size, bool sync) +{ + struct nfp_insn_re_regs reg; + int err; + + err = swreg_to_restricted(reg_none(), lreg, rreg, ®, false); + if (err) { + nfp_prog->error = err; + return; + } + if (reg.swap) { + pr_err("cmd can't swap arguments\n"); + nfp_prog->error = -EFAULT; + return; + } + + __emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, sync); +} + +static void +__emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip, + enum br_ctx_signal_state css, u16 addr, u8 defer) +{ + u16 addr_lo, addr_hi; + u64 insn; + + addr_lo = addr & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO)); + addr_hi = addr != addr_lo; + + insn = OP_BR_BASE | + FIELD_PREP(OP_BR_MASK, mask) | + FIELD_PREP(OP_BR_EV_PIP, ev_pip) | + FIELD_PREP(OP_BR_CSS, css) | + FIELD_PREP(OP_BR_DEFBR, defer) | + FIELD_PREP(OP_BR_ADDR_LO, addr_lo) | + FIELD_PREP(OP_BR_ADDR_HI, addr_hi); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer) +{ + __emit_br(nfp_prog, mask, + mask != BR_UNC ? BR_EV_PIP_COND : BR_EV_PIP_UNCOND, + BR_CSS_NONE, addr, defer); +} + +static void +__emit_br_byte(struct nfp_prog *nfp_prog, u8 areg, u8 breg, bool imm8, + u8 byte, bool equal, u16 addr, u8 defer) +{ + u16 addr_lo, addr_hi; + u64 insn; + + addr_lo = addr & (OP_BB_ADDR_LO >> __bf_shf(OP_BB_ADDR_LO)); + addr_hi = addr != addr_lo; + + insn = OP_BBYTE_BASE | + FIELD_PREP(OP_BB_A_SRC, areg) | + FIELD_PREP(OP_BB_BYTE, byte) | + FIELD_PREP(OP_BB_B_SRC, breg) | + FIELD_PREP(OP_BB_I8, imm8) | + FIELD_PREP(OP_BB_EQ, equal) | + FIELD_PREP(OP_BB_DEFBR, defer) | + FIELD_PREP(OP_BB_ADDR_LO, addr_lo) | + FIELD_PREP(OP_BB_ADDR_HI, addr_hi); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_br_byte_neq(struct nfp_prog *nfp_prog, + u32 dst, u8 imm, u8 byte, u16 addr, u8 defer) +{ + struct nfp_insn_re_regs reg; + int err; + + err = swreg_to_restricted(reg_none(), dst, reg_imm(imm), ®, true); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_br_byte(nfp_prog, reg.areg, reg.breg, reg.i8, byte, false, addr, + defer); +} + +static void +__emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi, + enum immed_width width, bool invert, + enum immed_shift shift, bool wr_both) +{ + u64 insn; + + insn = OP_IMMED_BASE | + FIELD_PREP(OP_IMMED_A_SRC, areg) | + FIELD_PREP(OP_IMMED_B_SRC, breg) | + FIELD_PREP(OP_IMMED_IMM, imm_hi) | + FIELD_PREP(OP_IMMED_WIDTH, width) | + FIELD_PREP(OP_IMMED_INV, invert) | + FIELD_PREP(OP_IMMED_SHIFT, shift) | + FIELD_PREP(OP_IMMED_WR_AB, wr_both); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_immed(struct nfp_prog *nfp_prog, u32 dst, u16 imm, + enum immed_width width, bool invert, enum immed_shift shift) +{ + struct nfp_insn_ur_regs reg; + int err; + + if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM) { + nfp_prog->error = -EFAULT; + return; + } + + err = swreg_to_unrestricted(dst, dst, reg_imm(imm & 0xff), ®); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_immed(nfp_prog, reg.areg, reg.breg, imm >> 8, width, + invert, shift, reg.wr_both); +} + +static void +__emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, + enum shf_sc sc, u8 shift, + u16 areg, enum shf_op op, u16 breg, bool i8, bool sw, bool wr_both) +{ + u64 insn; + + if (!FIELD_FIT(OP_SHF_SHIFT, shift)) { + nfp_prog->error = -EFAULT; + return; + } + + if (sc == SHF_SC_L_SHF) + shift = 32 - shift; + + insn = OP_SHF_BASE | + FIELD_PREP(OP_SHF_A_SRC, areg) | + FIELD_PREP(OP_SHF_SC, sc) | + FIELD_PREP(OP_SHF_B_SRC, breg) | + FIELD_PREP(OP_SHF_I8, i8) | + FIELD_PREP(OP_SHF_SW, sw) | + FIELD_PREP(OP_SHF_DST, dst) | + FIELD_PREP(OP_SHF_SHIFT, shift) | + FIELD_PREP(OP_SHF_OP, op) | + FIELD_PREP(OP_SHF_DST_AB, dst_ab) | + FIELD_PREP(OP_SHF_WR_AB, wr_both); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_shf(struct nfp_prog *nfp_prog, u32 dst, u32 lreg, enum shf_op op, u32 rreg, + enum shf_sc sc, u8 shift) +{ + struct nfp_insn_re_regs reg; + int err; + + err = swreg_to_restricted(dst, lreg, rreg, ®, true); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_shf(nfp_prog, reg.dst, reg.dst_ab, sc, shift, + reg.areg, op, reg.breg, reg.i8, reg.swap, reg.wr_both); +} + +static void +__emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, + u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both) +{ + u64 insn; + + insn = OP_ALU_BASE | + FIELD_PREP(OP_ALU_A_SRC, areg) | + FIELD_PREP(OP_ALU_B_SRC, breg) | + FIELD_PREP(OP_ALU_DST, dst) | + FIELD_PREP(OP_ALU_SW, swap) | + FIELD_PREP(OP_ALU_OP, op) | + FIELD_PREP(OP_ALU_DST_AB, dst_ab) | + FIELD_PREP(OP_ALU_WR_AB, wr_both); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_alu(struct nfp_prog *nfp_prog, u32 dst, u32 lreg, enum alu_op op, u32 rreg) +{ + struct nfp_insn_ur_regs reg; + int err; + + err = swreg_to_unrestricted(dst, lreg, rreg, ®); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_alu(nfp_prog, reg.dst, reg.dst_ab, + reg.areg, op, reg.breg, reg.swap, reg.wr_both); +} + +static void +__emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc, + u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8, + bool zero, bool swap, bool wr_both) +{ + u64 insn; + + insn = OP_LDF_BASE | + FIELD_PREP(OP_LDF_A_SRC, areg) | + FIELD_PREP(OP_LDF_SC, sc) | + FIELD_PREP(OP_LDF_B_SRC, breg) | + FIELD_PREP(OP_LDF_I8, imm8) | + FIELD_PREP(OP_LDF_SW, swap) | + FIELD_PREP(OP_LDF_ZF, zero) | + FIELD_PREP(OP_LDF_BMASK, bmask) | + FIELD_PREP(OP_LDF_SHF, shift) | + FIELD_PREP(OP_LDF_WR_AB, wr_both); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_ld_field_any(struct nfp_prog *nfp_prog, enum shf_sc sc, u8 shift, + u32 dst, u8 bmask, u32 src, bool zero) +{ + struct nfp_insn_re_regs reg; + int err; + + err = swreg_to_restricted(reg_none(), dst, src, ®, true); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_ld_field(nfp_prog, sc, reg.areg, bmask, reg.breg, shift, + reg.i8, zero, reg.swap, reg.wr_both); +} + +static void +emit_ld_field(struct nfp_prog *nfp_prog, u32 dst, u8 bmask, u32 src, + enum shf_sc sc, u8 shift) +{ + emit_ld_field_any(nfp_prog, sc, shift, dst, bmask, src, false); +} + +/* --- Wrappers --- */ +static bool pack_immed(u32 imm, u16 *val, enum immed_shift *shift) +{ + if (!(imm & 0xffff0000)) { + *val = imm; + *shift = IMMED_SHIFT_0B; + } else if (!(imm & 0xff0000ff)) { + *val = imm >> 8; + *shift = IMMED_SHIFT_1B; + } else if (!(imm & 0x0000ffff)) { + *val = imm >> 16; + *shift = IMMED_SHIFT_2B; + } else { + return false; + } + + return true; +} + +static void wrp_immed(struct nfp_prog *nfp_prog, u32 dst, u32 imm) +{ + enum immed_shift shift; + u16 val; + + if (pack_immed(imm, &val, &shift)) { + emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, false, shift); + } else if (pack_immed(~imm, &val, &shift)) { + emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, true, shift); + } else { + emit_immed(nfp_prog, dst, imm & 0xffff, IMMED_WIDTH_ALL, + false, IMMED_SHIFT_0B); + emit_immed(nfp_prog, dst, imm >> 16, IMMED_WIDTH_WORD, + false, IMMED_SHIFT_2B); + } +} + +/* ur_load_imm_any() - encode immediate or use tmp register (unrestricted) + * If the @imm is small enough encode it directly in operand and return + * otherwise load @imm to a spare register and return its encoding. + */ +static u32 ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, u32 tmp_reg) +{ + if (FIELD_FIT(UR_REG_IMM_MAX, imm)) + return reg_imm(imm); + + wrp_immed(nfp_prog, tmp_reg, imm); + return tmp_reg; +} + +/* re_load_imm_any() - encode immediate or use tmp register (restricted) + * If the @imm is small enough encode it directly in operand and return + * otherwise load @imm to a spare register and return its encoding. + */ +static u32 re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, u32 tmp_reg) +{ + if (FIELD_FIT(RE_REG_IMM_MAX, imm)) + return reg_imm(imm); + + wrp_immed(nfp_prog, tmp_reg, imm); + return tmp_reg; +} + +static void +wrp_br_special(struct nfp_prog *nfp_prog, enum br_mask mask, + enum br_special special) +{ + emit_br(nfp_prog, mask, 0, 0); + + nfp_prog->prog[nfp_prog->prog_len - 1] |= + FIELD_PREP(OP_BR_SPECIAL, special); +} + +static void wrp_reg_mov(struct nfp_prog *nfp_prog, u16 dst, u16 src) +{ + emit_alu(nfp_prog, reg_both(dst), reg_none(), ALU_OP_NONE, reg_b(src)); +} + +static int +construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, + u16 src, bool src_valid, u8 size) +{ + unsigned int i; + u16 shift, sz; + u32 tmp_reg; + + /* We load the value from the address indicated in @offset and then + * shift out the data we don't need. Note: this is big endian! + */ + sz = size < 4 ? 4 : size; + shift = size < 4 ? 4 - size : 0; + + if (src_valid) { + /* Calculate the true offset (src_reg + imm) */ + tmp_reg = ur_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); + emit_alu(nfp_prog, imm_both(nfp_prog), + reg_a(src), ALU_OP_ADD, tmp_reg); + /* Check packet length (size guaranteed to fit b/c it's u8) */ + emit_alu(nfp_prog, imm_a(nfp_prog), + imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size)); + emit_alu(nfp_prog, reg_none(), + NFP_BPF_ABI_LEN, ALU_OP_SUB, imm_a(nfp_prog)); + wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT); + /* Load data */ + emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0, + pkt_reg(nfp_prog), imm_b(nfp_prog), sz - 1, true); + } else { + /* Check packet length */ + tmp_reg = ur_load_imm_any(nfp_prog, offset + size, + imm_a(nfp_prog)); + emit_alu(nfp_prog, reg_none(), + NFP_BPF_ABI_LEN, ALU_OP_SUB, tmp_reg); + wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT); + /* Load data */ + tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); + emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0, + pkt_reg(nfp_prog), tmp_reg, sz - 1, true); + } + + i = 0; + if (shift) + emit_shf(nfp_prog, reg_both(0), reg_none(), SHF_OP_NONE, + reg_xfer(0), SHF_SC_R_SHF, shift * 8); + else + for (; i * 4 < size; i++) + emit_alu(nfp_prog, reg_both(i), + reg_none(), ALU_OP_NONE, reg_xfer(i)); + + if (i < 2) + wrp_immed(nfp_prog, reg_both(1), 0); + + return 0; +} + +static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size) +{ + return construct_data_ind_ld(nfp_prog, offset, 0, false, size); +} + +static void +wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm) +{ + u32 tmp_reg; + + if (alu_op == ALU_OP_AND) { + if (!imm) + wrp_immed(nfp_prog, reg_both(dst), 0); + if (!imm || !~imm) + return; + } + if (alu_op == ALU_OP_OR) { + if (!~imm) + wrp_immed(nfp_prog, reg_both(dst), ~0U); + if (!imm || !~imm) + return; + } + if (alu_op == ALU_OP_XOR) { + if (!~imm) + emit_alu(nfp_prog, reg_both(dst), reg_none(), + ALU_OP_NEG, reg_b(dst)); + if (!imm || !~imm) + return; + } + + tmp_reg = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog)); + emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, tmp_reg); +} + +static int +wrp_alu64_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum alu_op alu_op, bool skip) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + + if (skip) { + meta->skip = true; + return 0; + } + + wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, imm & ~0U); + wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, alu_op, imm >> 32); + + return 0; +} + +static int +wrp_alu64_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum alu_op alu_op) +{ + u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2; + + emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src)); + emit_alu(nfp_prog, reg_both(dst + 1), + reg_a(dst + 1), alu_op, reg_b(src + 1)); + + return 0; +} + +static int +wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum alu_op alu_op, bool skip) +{ + const struct bpf_insn *insn = &meta->insn; + + if (skip) { + meta->skip = true; + return 0; + } + + wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, insn->imm); + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); + + return 0; +} + +static int +wrp_alu32_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum alu_op alu_op) +{ + u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2; + + emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src)); + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); + + return 0; +} + +static void +wrp_test_reg_one(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u8 src, + enum br_mask br_mask, u16 off) +{ + emit_alu(nfp_prog, reg_none(), reg_a(dst), alu_op, reg_b(src)); + emit_br(nfp_prog, br_mask, off, 0); +} + +static int +wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum alu_op alu_op, enum br_mask br_mask) +{ + const struct bpf_insn *insn = &meta->insn; + + if (insn->off < 0) /* TODO */ + return -ENOTSUPP; + + wrp_test_reg_one(nfp_prog, insn->dst_reg * 2, alu_op, + insn->src_reg * 2, br_mask, insn->off); + wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op, + insn->src_reg * 2 + 1, br_mask, insn->off); + + return 0; +} + +static int +wrp_cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum br_mask br_mask, bool swap) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + u8 reg = insn->dst_reg * 2; + u32 tmp_reg; + + if (insn->off < 0) /* TODO */ + return -ENOTSUPP; + + tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); + if (!swap) + emit_alu(nfp_prog, reg_none(), reg_a(reg), ALU_OP_SUB, tmp_reg); + else + emit_alu(nfp_prog, reg_none(), tmp_reg, ALU_OP_SUB, reg_a(reg)); + + tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); + if (!swap) + emit_alu(nfp_prog, reg_none(), + reg_a(reg + 1), ALU_OP_SUB_C, tmp_reg); + else + emit_alu(nfp_prog, reg_none(), + tmp_reg, ALU_OP_SUB_C, reg_a(reg + 1)); + + emit_br(nfp_prog, br_mask, insn->off, 0); + + return 0; +} + +static int +wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum br_mask br_mask, bool swap) +{ + const struct bpf_insn *insn = &meta->insn; + u8 areg = insn->src_reg * 2, breg = insn->dst_reg * 2; + + if (insn->off < 0) /* TODO */ + return -ENOTSUPP; + + if (swap) { + areg ^= breg; + breg ^= areg; + areg ^= breg; + } + + emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg)); + emit_alu(nfp_prog, reg_none(), + reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1)); + emit_br(nfp_prog, br_mask, insn->off, 0); + + return 0; +} + +/* --- Callbacks --- */ +static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->src_reg * 2); + wrp_reg_mov(nfp_prog, insn->dst_reg * 2 + 1, insn->src_reg * 2 + 1); + + return 0; +} + +static int mov_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + u64 imm = meta->insn.imm; /* sign extend */ + + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2), imm & ~0U); + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), imm >> 32); + + return 0; +} + +static int xor_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_reg(nfp_prog, meta, ALU_OP_XOR); +} + +static int xor_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_imm(nfp_prog, meta, ALU_OP_XOR, !meta->insn.imm); +} + +static int and_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_reg(nfp_prog, meta, ALU_OP_AND); +} + +static int and_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm); +} + +static int or_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_reg(nfp_prog, meta, ALU_OP_OR); +} + +static int or_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm); +} + +static int add_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), + reg_a(insn->dst_reg * 2), ALU_OP_ADD, + reg_b(insn->src_reg * 2)); + emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_ADD_C, + reg_b(insn->src_reg * 2 + 1)); + + return 0; +} + +static int add_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + + wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_ADD, imm & ~0U); + wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_ADD_C, imm >> 32); + + return 0; +} + +static int sub_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), + reg_a(insn->dst_reg * 2), ALU_OP_SUB, + reg_b(insn->src_reg * 2)); + emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_SUB_C, + reg_b(insn->src_reg * 2 + 1)); + + return 0; +} + +static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + + wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_SUB, imm & ~0U); + wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_SUB_C, imm >> 32); + + return 0; +} + +static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + if (insn->imm != 32) + return 1; /* TODO */ + + wrp_reg_mov(nfp_prog, insn->dst_reg * 2 + 1, insn->dst_reg * 2); + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), 0); + + return 0; +} + +static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + if (insn->imm != 32) + return 1; /* TODO */ + + wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->dst_reg * 2 + 1); + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); + + return 0; +} + +static int mov_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->src_reg * 2); + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); + + return 0; +} + +static int mov_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm); + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); + + return 0; +} + +static int xor_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_reg(nfp_prog, meta, ALU_OP_XOR); +} + +static int xor_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR, !~meta->insn.imm); +} + +static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_reg(nfp_prog, meta, ALU_OP_AND); +} + +static int and_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm); +} + +static int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_reg(nfp_prog, meta, ALU_OP_OR); +} + +static int or_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm); +} + +static int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_reg(nfp_prog, meta, ALU_OP_ADD); +} + +static int add_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD, !meta->insn.imm); +} + +static int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_reg(nfp_prog, meta, ALU_OP_SUB); +} + +static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB, !meta->insn.imm); +} + +static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + if (!insn->imm) + return 1; /* TODO: zero shift means indirect */ + + emit_shf(nfp_prog, reg_both(insn->dst_reg * 2), + reg_none(), SHF_OP_NONE, reg_b(insn->dst_reg * 2), + SHF_SC_L_SHF, insn->imm); + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); + + return 0; +} + +static int imm_ld8_part2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + wrp_immed(nfp_prog, reg_both(nfp_meta_prev(meta)->insn.dst_reg * 2 + 1), + meta->insn.imm); + + return 0; +} + +static int imm_ld8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + meta->double_cb = imm_ld8_part2; + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm); + + return 0; +} + +static int data_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ld(nfp_prog, meta->insn.imm, 1); +} + +static int data_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ld(nfp_prog, meta->insn.imm, 2); +} + +static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ld(nfp_prog, meta->insn.imm, 4); +} + +static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ind_ld(nfp_prog, meta->insn.imm, + meta->insn.src_reg * 2, true, 1); +} + +static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ind_ld(nfp_prog, meta->insn.imm, + meta->insn.src_reg * 2, true, 2); +} + +static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ind_ld(nfp_prog, meta->insn.imm, + meta->insn.src_reg * 2, true, 4); +} + +static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + if (meta->insn.off == offsetof(struct sk_buff, len)) + emit_alu(nfp_prog, reg_both(meta->insn.dst_reg * 2), + reg_none(), ALU_OP_NONE, NFP_BPF_ABI_LEN); + else + return -ENOTSUPP; + + return 0; +} + +static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + if (meta->insn.off < 0) /* TODO */ + return -ENOTSUPP; + emit_br(nfp_prog, BR_UNC, meta->insn.off, 0); + + return 0; +} + +static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + u32 or1 = reg_a(insn->dst_reg * 2), or2 = reg_b(insn->dst_reg * 2 + 1); + u32 tmp_reg; + + if (insn->off < 0) /* TODO */ + return -ENOTSUPP; + + if (imm & ~0U) { + tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); + emit_alu(nfp_prog, imm_a(nfp_prog), + reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg); + or1 = imm_a(nfp_prog); + } + + if (imm >> 32) { + tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); + emit_alu(nfp_prog, imm_b(nfp_prog), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg); + or2 = imm_b(nfp_prog); + } + + emit_alu(nfp_prog, reg_none(), or1, ALU_OP_OR, or2); + emit_br(nfp_prog, BR_BEQ, insn->off, 0); + + return 0; +} + +static int jgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_imm(nfp_prog, meta, BR_BLO, false); +} + +static int jge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_imm(nfp_prog, meta, BR_BHS, true); +} + +static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + u32 tmp_reg; + + if (insn->off < 0) /* TODO */ + return -ENOTSUPP; + + if (!imm) { + meta->skip = true; + return 0; + } + + if (imm & ~0U) { + tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); + emit_alu(nfp_prog, reg_none(), + reg_a(insn->dst_reg * 2), ALU_OP_AND, tmp_reg); + emit_br(nfp_prog, BR_BNE, insn->off, 0); + } + + if (imm >> 32) { + tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); + emit_alu(nfp_prog, reg_none(), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_AND, tmp_reg); + emit_br(nfp_prog, BR_BNE, insn->off, 0); + } + + return 0; +} + +static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + u32 tmp_reg; + + if (insn->off < 0) /* TODO */ + return -ENOTSUPP; + + if (!imm) { + emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2), + ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1)); + emit_br(nfp_prog, BR_BNE, insn->off, 0); + } + + tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); + emit_alu(nfp_prog, reg_none(), + reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg); + emit_br(nfp_prog, BR_BNE, insn->off, 0); + + tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); + emit_alu(nfp_prog, reg_none(), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg); + emit_br(nfp_prog, BR_BNE, insn->off, 0); + + return 0; +} + +static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + if (insn->off < 0) /* TODO */ + return -ENOTSUPP; + + emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(insn->dst_reg * 2), + ALU_OP_XOR, reg_b(insn->src_reg * 2)); + emit_alu(nfp_prog, imm_b(nfp_prog), reg_a(insn->dst_reg * 2 + 1), + ALU_OP_XOR, reg_b(insn->src_reg * 2 + 1)); + emit_alu(nfp_prog, reg_none(), + imm_a(nfp_prog), ALU_OP_OR, imm_b(nfp_prog)); + emit_br(nfp_prog, BR_BEQ, insn->off, 0); + + return 0; +} + +static int jgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_reg(nfp_prog, meta, BR_BLO, false); +} + +static int jge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_reg(nfp_prog, meta, BR_BHS, true); +} + +static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE); +} + +static int jne_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_test_reg(nfp_prog, meta, ALU_OP_XOR, BR_BNE); +} + +static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + wrp_br_special(nfp_prog, BR_UNC, OP_BR_GO_OUT); + + return 0; +} + +static const instr_cb_t instr_cb[256] = { + [BPF_ALU64 | BPF_MOV | BPF_X] = mov_reg64, + [BPF_ALU64 | BPF_MOV | BPF_K] = mov_imm64, + [BPF_ALU64 | BPF_XOR | BPF_X] = xor_reg64, + [BPF_ALU64 | BPF_XOR | BPF_K] = xor_imm64, + [BPF_ALU64 | BPF_AND | BPF_X] = and_reg64, + [BPF_ALU64 | BPF_AND | BPF_K] = and_imm64, + [BPF_ALU64 | BPF_OR | BPF_X] = or_reg64, + [BPF_ALU64 | BPF_OR | BPF_K] = or_imm64, + [BPF_ALU64 | BPF_ADD | BPF_X] = add_reg64, + [BPF_ALU64 | BPF_ADD | BPF_K] = add_imm64, + [BPF_ALU64 | BPF_SUB | BPF_X] = sub_reg64, + [BPF_ALU64 | BPF_SUB | BPF_K] = sub_imm64, + [BPF_ALU64 | BPF_LSH | BPF_K] = shl_imm64, + [BPF_ALU64 | BPF_RSH | BPF_K] = shr_imm64, + [BPF_ALU | BPF_MOV | BPF_X] = mov_reg, + [BPF_ALU | BPF_MOV | BPF_K] = mov_imm, + [BPF_ALU | BPF_XOR | BPF_X] = xor_reg, + [BPF_ALU | BPF_XOR | BPF_K] = xor_imm, + [BPF_ALU | BPF_AND | BPF_X] = and_reg, + [BPF_ALU | BPF_AND | BPF_K] = and_imm, + [BPF_ALU | BPF_OR | BPF_X] = or_reg, + [BPF_ALU | BPF_OR | BPF_K] = or_imm, + [BPF_ALU | BPF_ADD | BPF_X] = add_reg, + [BPF_ALU | BPF_ADD | BPF_K] = add_imm, + [BPF_ALU | BPF_SUB | BPF_X] = sub_reg, + [BPF_ALU | BPF_SUB | BPF_K] = sub_imm, + [BPF_ALU | BPF_LSH | BPF_K] = shl_imm, + [BPF_LD | BPF_IMM | BPF_DW] = imm_ld8, + [BPF_LD | BPF_ABS | BPF_B] = data_ld1, + [BPF_LD | BPF_ABS | BPF_H] = data_ld2, + [BPF_LD | BPF_ABS | BPF_W] = data_ld4, + [BPF_LD | BPF_IND | BPF_B] = data_ind_ld1, + [BPF_LD | BPF_IND | BPF_H] = data_ind_ld2, + [BPF_LD | BPF_IND | BPF_W] = data_ind_ld4, + [BPF_LDX | BPF_MEM | BPF_W] = mem_ldx4, + [BPF_JMP | BPF_JA | BPF_K] = jump, + [BPF_JMP | BPF_JEQ | BPF_K] = jeq_imm, + [BPF_JMP | BPF_JGT | BPF_K] = jgt_imm, + [BPF_JMP | BPF_JGE | BPF_K] = jge_imm, + [BPF_JMP | BPF_JSET | BPF_K] = jset_imm, + [BPF_JMP | BPF_JNE | BPF_K] = jne_imm, + [BPF_JMP | BPF_JEQ | BPF_X] = jeq_reg, + [BPF_JMP | BPF_JGT | BPF_X] = jgt_reg, + [BPF_JMP | BPF_JGE | BPF_X] = jge_reg, + [BPF_JMP | BPF_JSET | BPF_X] = jset_reg, + [BPF_JMP | BPF_JNE | BPF_X] = jne_reg, + [BPF_JMP | BPF_EXIT] = goto_out, +}; + +/* --- Misc code --- */ +static void br_set_offset(u64 *instr, u16 offset) +{ + u16 addr_lo, addr_hi; + + addr_lo = offset & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO)); + addr_hi = offset != addr_lo; + *instr &= ~(OP_BR_ADDR_HI | OP_BR_ADDR_LO); + *instr |= FIELD_PREP(OP_BR_ADDR_HI, addr_hi); + *instr |= FIELD_PREP(OP_BR_ADDR_LO, addr_lo); +} + +/* --- Assembler logic --- */ +static int nfp_fixup_branches(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta, *next; + u32 off, br_idx; + u32 idx; + + nfp_for_each_insn_walk2(nfp_prog, meta, next) { + if (meta->skip) + continue; + if (BPF_CLASS(meta->insn.code) != BPF_JMP) + continue; + + br_idx = nfp_prog_offset_to_index(nfp_prog, next->off) - 1; + if (!nfp_is_br(nfp_prog->prog[br_idx])) { + pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n", + br_idx, meta->insn.code, nfp_prog->prog[br_idx]); + return -ELOOP; + } + /* Leave special branches for later */ + if (FIELD_GET(OP_BR_SPECIAL, nfp_prog->prog[br_idx])) + continue; + + /* Find the target offset in assembler realm */ + off = meta->insn.off; + if (!off) { + pr_err("Fixup found zero offset!!\n"); + return -ELOOP; + } + + while (off && nfp_meta_has_next(nfp_prog, next)) { + next = nfp_meta_next(next); + off--; + } + if (off) { + pr_err("Fixup found too large jump!! %d\n", off); + return -ELOOP; + } + + if (next->skip) { + pr_err("Branch landing on removed instruction!!\n"); + return -ELOOP; + } + + for (idx = nfp_prog_offset_to_index(nfp_prog, meta->off); + idx <= br_idx; idx++) { + if (!nfp_is_br(nfp_prog->prog[idx])) + continue; + br_set_offset(&nfp_prog->prog[idx], next->off); + } + } + + /* Fixup 'goto out's separately, they can be scattered around */ + for (br_idx = 0; br_idx < nfp_prog->prog_len; br_idx++) { + enum br_special special; + + if ((nfp_prog->prog[br_idx] & OP_BR_BASE_MASK) != OP_BR_BASE) + continue; + + special = FIELD_GET(OP_BR_SPECIAL, nfp_prog->prog[br_idx]); + switch (special) { + case OP_BR_NORMAL: + break; + case OP_BR_GO_OUT: + br_set_offset(&nfp_prog->prog[br_idx], + nfp_prog->tgt_out); + break; + case OP_BR_GO_ABORT: + br_set_offset(&nfp_prog->prog[br_idx], + nfp_prog->tgt_abort); + break; + } + + nfp_prog->prog[br_idx] &= ~OP_BR_SPECIAL; + } + + return 0; +} + +static void nfp_intro(struct nfp_prog *nfp_prog) +{ + emit_alu(nfp_prog, pkt_reg(nfp_prog), + reg_none(), ALU_OP_NONE, NFP_BPF_ABI_PKT); +} + +static void nfp_outro_tc_legacy(struct nfp_prog *nfp_prog) +{ + const u8 act2code[] = { + [NN_ACT_TC_DROP] = 0x22, + }; + /* Target for aborts */ + nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); + wrp_immed(nfp_prog, reg_both(0), 0); + + /* Target for normal exits */ + nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog); + /* Legacy TC mode: + * 0 0x11 -> pass, count as stat0 + * -1 drop 0x22 -> drop, count as stat1 + * redir 0x24 -> redir, count as stat1 + * ife mark 0x21 -> pass, count as stat1 + * ife + tx 0x24 -> redir, count as stat1 + */ + emit_br_byte_neq(nfp_prog, reg_b(0), 0xff, 0, nfp_prog->tgt_done, 2); + emit_alu(nfp_prog, reg_a(0), + reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS); + emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16); + + emit_br(nfp_prog, BR_UNC, nfp_prog->tgt_done, 1); + emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(act2code[nfp_prog->act]), + SHF_SC_L_SHF, 16); +} + +static void nfp_outro(struct nfp_prog *nfp_prog) +{ + switch (nfp_prog->act) { + case NN_ACT_TC_DROP: + nfp_outro_tc_legacy(nfp_prog); + break; + } +} + +static int nfp_translate(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta; + int err; + + nfp_intro(nfp_prog); + if (nfp_prog->error) + return nfp_prog->error; + + list_for_each_entry(meta, &nfp_prog->insns, l) { + instr_cb_t cb = instr_cb[meta->insn.code]; + + meta->off = nfp_prog_current_offset(nfp_prog); + + if (meta->skip) { + nfp_prog->n_translated++; + continue; + } + + if (nfp_meta_has_prev(nfp_prog, meta) && + nfp_meta_prev(meta)->double_cb) + cb = nfp_meta_prev(meta)->double_cb; + if (!cb) + return -ENOENT; + err = cb(nfp_prog, meta); + if (err) + return err; + + nfp_prog->n_translated++; + } + + nfp_outro(nfp_prog); + if (nfp_prog->error) + return nfp_prog->error; + + return nfp_fixup_branches(nfp_prog); +} + +static int +nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, + unsigned int cnt) +{ + unsigned int i; + + for (i = 0; i < cnt; i++) { + struct nfp_insn_meta *meta; + + meta = kzalloc(sizeof(*meta), GFP_KERNEL); + if (!meta) + return -ENOMEM; + + meta->insn = prog[i]; + meta->n = i; + + list_add_tail(&meta->l, &nfp_prog->insns); + } + + return 0; +} + +/* --- Optimizations --- */ +static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta; + + list_for_each_entry(meta, &nfp_prog->insns, l) { + struct bpf_insn insn = meta->insn; + + /* Programs converted from cBPF start with register xoring */ + if (insn.code == (BPF_ALU64 | BPF_XOR | BPF_X) && + insn.src_reg == insn.dst_reg) + continue; + + /* Programs start with R6 = R1 but we ignore the skb pointer */ + if (insn.code == (BPF_ALU64 | BPF_MOV | BPF_X) && + insn.src_reg == 1 && insn.dst_reg == 6) + meta->skip = true; + + /* Return as soon as something doesn't match */ + if (!meta->skip) + return; + } +} + +/* Try to rename registers so that program uses only low ones */ +static int nfp_bpf_opt_reg_rename(struct nfp_prog *nfp_prog) +{ + bool reg_used[MAX_BPF_REG] = {}; + u8 tgt_reg[MAX_BPF_REG] = {}; + struct nfp_insn_meta *meta; + unsigned int i, j; + + list_for_each_entry(meta, &nfp_prog->insns, l) { + if (meta->skip) + continue; + + reg_used[meta->insn.src_reg] = true; + reg_used[meta->insn.dst_reg] = true; + } + + for (i = 0, j = 0; i < ARRAY_SIZE(tgt_reg); i++) { + if (!reg_used[i]) + continue; + + tgt_reg[i] = j++; + } + nfp_prog->num_regs = j; + + list_for_each_entry(meta, &nfp_prog->insns, l) { + meta->insn.src_reg = tgt_reg[meta->insn.src_reg]; + meta->insn.dst_reg = tgt_reg[meta->insn.dst_reg]; + } + + return 0; +} + +/* Remove masking after load since our load guarantees this is not needed */ +static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta1, *meta2; + const s32 exp_mask[] = { + [BPF_B] = 0x000000ffU, + [BPF_H] = 0x0000ffffU, + [BPF_W] = 0xffffffffU, + }; + + nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) { + struct bpf_insn insn, next; + + insn = meta1->insn; + next = meta2->insn; + + if (BPF_CLASS(insn.code) != BPF_LD) + continue; + if (BPF_MODE(insn.code) != BPF_ABS && + BPF_MODE(insn.code) != BPF_IND) + continue; + + if (next.code != (BPF_ALU64 | BPF_AND | BPF_K)) + continue; + + if (!exp_mask[BPF_SIZE(insn.code)]) + continue; + if (exp_mask[BPF_SIZE(insn.code)] != next.imm) + continue; + + if (next.src_reg || next.dst_reg) + continue; + + meta2->skip = true; + } +} + +static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta1, *meta2, *meta3; + + nfp_for_each_insn_walk3(nfp_prog, meta1, meta2, meta3) { + struct bpf_insn insn, next1, next2; + + insn = meta1->insn; + next1 = meta2->insn; + next2 = meta3->insn; + + if (BPF_CLASS(insn.code) != BPF_LD) + continue; + if (BPF_MODE(insn.code) != BPF_ABS && + BPF_MODE(insn.code) != BPF_IND) + continue; + if (BPF_SIZE(insn.code) != BPF_W) + continue; + + if (!(next1.code == (BPF_LSH | BPF_K | BPF_ALU64) && + next2.code == (BPF_RSH | BPF_K | BPF_ALU64)) && + !(next1.code == (BPF_RSH | BPF_K | BPF_ALU64) && + next2.code == (BPF_LSH | BPF_K | BPF_ALU64))) + continue; + + if (next1.src_reg || next1.dst_reg || + next2.src_reg || next2.dst_reg) + continue; + + if (next1.imm != 0x20 || next2.imm != 0x20) + continue; + + meta2->skip = true; + meta3->skip = true; + } +} + +static int nfp_bpf_optimize(struct nfp_prog *nfp_prog) +{ + int ret; + + nfp_bpf_opt_reg_init(nfp_prog); + + ret = nfp_bpf_opt_reg_rename(nfp_prog); + if (ret) + return ret; + + nfp_bpf_opt_ld_mask(nfp_prog); + nfp_bpf_opt_ld_shift(nfp_prog); + + return 0; +} + +/** + * nfp_bpf_jit() - translate BPF code into NFP assembly + * @filter: kernel BPF filter struct + * @prog_mem: memory to store assembler instructions + * @act: action attached to this eBPF program + * @prog_start: offset of the first instruction when loaded + * @prog_done: where to jump on exit + * @prog_sz: size of @prog_mem in instructions + * @res: achieved parameters of translation results + */ +int +nfp_bpf_jit(struct bpf_prog *filter, void *prog_mem, + enum nfp_bpf_action_type act, + unsigned int prog_start, unsigned int prog_done, + unsigned int prog_sz, struct nfp_bpf_result *res) +{ + struct nfp_prog *nfp_prog; + int ret; + + nfp_prog = kzalloc(sizeof(*nfp_prog), GFP_KERNEL); + if (!nfp_prog) + return -ENOMEM; + + INIT_LIST_HEAD(&nfp_prog->insns); + nfp_prog->act = act; + nfp_prog->start_off = prog_start; + nfp_prog->tgt_done = prog_done; + + ret = nfp_prog_prepare(nfp_prog, filter->insnsi, filter->len); + if (ret) + goto out; + + ret = nfp_prog_verify(nfp_prog, filter); + if (ret) + goto out; + + ret = nfp_bpf_optimize(nfp_prog); + if (ret) + goto out; + + if (nfp_prog->num_regs <= 7) + nfp_prog->regs_per_thread = 16; + else + nfp_prog->regs_per_thread = 32; + + nfp_prog->prog = prog_mem; + nfp_prog->__prog_alloc_len = prog_sz; + + ret = nfp_translate(nfp_prog); + if (ret) { + pr_err("Translation failed with error %d (translated: %u)\n", + ret, nfp_prog->n_translated); + ret = -EINVAL; + } + + res->n_instr = nfp_prog->prog_len; + res->dense_mode = nfp_prog->num_regs <= 7; +out: + nfp_prog_free(nfp_prog); + + return ret; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c new file mode 100644 index 000000000000..ef6775b54168 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c @@ -0,0 +1,162 @@ +/* + * Copyright (C) 2016 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define pr_fmt(fmt) "NFP net bpf: " fmt + +#include +#include +#include +#include + +#include "nfp_bpf.h" + +/* Analyzer/verifier definitions */ +struct nfp_bpf_analyzer_priv { + struct nfp_prog *prog; + struct nfp_insn_meta *meta; +}; + +static struct nfp_insn_meta * +nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int insn_idx, unsigned int n_insns) +{ + unsigned int forward, backward, i; + + backward = meta->n - insn_idx; + forward = insn_idx - meta->n; + + if (min(forward, backward) > n_insns - insn_idx - 1) { + backward = n_insns - insn_idx - 1; + meta = nfp_prog_last_meta(nfp_prog); + } + if (min(forward, backward) > insn_idx && backward > insn_idx) { + forward = insn_idx; + meta = nfp_prog_first_meta(nfp_prog); + } + + if (forward < backward) + for (i = 0; i < forward; i++) + meta = nfp_meta_next(meta); + else + for (i = 0; i < backward; i++) + meta = nfp_meta_prev(meta); + + return meta; +} + +static int +nfp_bpf_check_exit(struct nfp_prog *nfp_prog, + const struct bpf_verifier_env *env) +{ + const struct bpf_reg_state *reg0 = &env->cur_state.regs[0]; + + if (reg0->type != CONST_IMM) { + pr_info("unsupported exit state: %d, imm: %llx\n", + reg0->type, reg0->imm); + return -EINVAL; + } + + if (reg0->imm != 0 && (reg0->imm & ~0U) != ~0U) { + pr_info("unsupported exit state: %d, imm: %llx\n", + reg0->type, reg0->imm); + return -EINVAL; + } + + return 0; +} + +static int +nfp_bpf_check_ctx_ptr(struct nfp_prog *nfp_prog, + const struct bpf_verifier_env *env, u8 reg) +{ + if (env->cur_state.regs[reg].type != PTR_TO_CTX) + return -EINVAL; + + return 0; +} + +static int +nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) +{ + struct nfp_bpf_analyzer_priv *priv = env->analyzer_priv; + struct nfp_insn_meta *meta = priv->meta; + + meta = nfp_bpf_goto_meta(priv->prog, meta, insn_idx, env->prog->len); + priv->meta = meta; + + if (meta->insn.src_reg == BPF_REG_10 || + meta->insn.dst_reg == BPF_REG_10) { + pr_err("stack not yet supported\n"); + return -EINVAL; + } + if (meta->insn.src_reg >= MAX_BPF_REG || + meta->insn.dst_reg >= MAX_BPF_REG) { + pr_err("program uses extended registers - jit hardening?\n"); + return -EINVAL; + } + + if (meta->insn.code == (BPF_JMP | BPF_EXIT)) + return nfp_bpf_check_exit(priv->prog, env); + + if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_LDX | BPF_MEM)) + return nfp_bpf_check_ctx_ptr(priv->prog, env, + meta->insn.src_reg); + if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_MEM)) + return nfp_bpf_check_ctx_ptr(priv->prog, env, + meta->insn.dst_reg); + + return 0; +} + +static const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops = { + .insn_hook = nfp_verify_insn, +}; + +int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog) +{ + struct nfp_bpf_analyzer_priv *priv; + int ret; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->prog = nfp_prog; + priv->meta = nfp_prog_first_meta(nfp_prog); + + ret = bpf_analyzer(prog, &nfp_bpf_analyzer_ops, priv); + + kfree(priv); + + return ret; +} From 7533fdc0f77f207fcc370b10965f4bcee82dfedf Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:44:01 +0100 Subject: [PATCH 0583/1050] nfp: bpf: add hardware bpf offload Add hardware bpf offload on our smart NICs. Detect if capable firmware is loaded and use it to load the code JITed with just added translator onto programmable engines. This commit only supports offloading cls_bpf in legacy mode (non-direct action). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/Makefile | 1 + drivers/net/ethernet/netronome/nfp/nfp_net.h | 26 ++- .../ethernet/netronome/nfp/nfp_net_common.c | 40 +++- .../net/ethernet/netronome/nfp/nfp_net_ctrl.h | 44 +++- .../ethernet/netronome/nfp/nfp_net_offload.c | 220 ++++++++++++++++++ 5 files changed, 324 insertions(+), 7 deletions(-) create mode 100644 drivers/net/ethernet/netronome/nfp/nfp_net_offload.c diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile index 5f12689bf523..0efb2ba9a558 100644 --- a/drivers/net/ethernet/netronome/nfp/Makefile +++ b/drivers/net/ethernet/netronome/nfp/Makefile @@ -3,6 +3,7 @@ obj-$(CONFIG_NFP_NETVF) += nfp_netvf.o nfp_netvf-objs := \ nfp_net_common.o \ nfp_net_ethtool.o \ + nfp_net_offload.o \ nfp_netvf_main.o ifeq ($(CONFIG_BPF_SYSCALL),y) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 690635660195..ea6f5e667f27 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -220,7 +220,7 @@ struct nfp_net_tx_ring { #define PCIE_DESC_RX_I_TCP_CSUM_OK cpu_to_le16(BIT(11)) #define PCIE_DESC_RX_I_UDP_CSUM cpu_to_le16(BIT(10)) #define PCIE_DESC_RX_I_UDP_CSUM_OK cpu_to_le16(BIT(9)) -#define PCIE_DESC_RX_SPARE cpu_to_le16(BIT(8)) +#define PCIE_DESC_RX_BPF cpu_to_le16(BIT(8)) #define PCIE_DESC_RX_EOP cpu_to_le16(BIT(7)) #define PCIE_DESC_RX_IP4_CSUM cpu_to_le16(BIT(6)) #define PCIE_DESC_RX_IP4_CSUM_OK cpu_to_le16(BIT(5)) @@ -413,6 +413,7 @@ static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver, * @is_vf: Is the driver attached to a VF? * @is_nfp3200: Is the driver for a NFP-3200 card? * @fw_loaded: Is the firmware loaded? + * @bpf_offload_skip_sw: Offloaded BPF program will not be rerun by cls_bpf * @ctrl: Local copy of the control register/word. * @fl_bufsz: Currently configured size of the freelist buffers * @rx_offset: Offset in the RX buffers where packet data starts @@ -473,6 +474,7 @@ struct nfp_net { unsigned is_vf:1; unsigned is_nfp3200:1; unsigned fw_loaded:1; + unsigned bpf_offload_skip_sw:1; u32 ctrl; u32 fl_bufsz; @@ -561,12 +563,28 @@ struct nfp_net { /* Functions to read/write from/to a BAR * Performs any endian conversion necessary. */ +static inline u16 nn_readb(struct nfp_net *nn, int off) +{ + return readb(nn->ctrl_bar + off); +} + static inline void nn_writeb(struct nfp_net *nn, int off, u8 val) { writeb(val, nn->ctrl_bar + off); } -/* NFP-3200 can't handle 16-bit accesses too well - hence no readw/writew */ +/* NFP-3200 can't handle 16-bit accesses too well */ +static inline u16 nn_readw(struct nfp_net *nn, int off) +{ + WARN_ON_ONCE(nn->is_nfp3200); + return readw(nn->ctrl_bar + off); +} + +static inline void nn_writew(struct nfp_net *nn, int off, u16 val) +{ + WARN_ON_ONCE(nn->is_nfp3200); + writew(val, nn->ctrl_bar + off); +} static inline u32 nn_readl(struct nfp_net *nn, int off) { @@ -757,4 +775,8 @@ static inline void nfp_net_debugfs_adapter_del(struct nfp_net *nn) } #endif /* CONFIG_NFP_NET_DEBUG */ +int +nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto, + struct tc_cls_bpf_offload *cls_bpf); + #endif /* _NFP_NET_H_ */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 252e4924de0f..51978dfe883b 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -60,6 +60,7 @@ #include +#include #include #include "nfp_net_ctrl.h" @@ -2382,6 +2383,31 @@ static struct rtnl_link_stats64 *nfp_net_stat64(struct net_device *netdev, return stats; } +static bool nfp_net_ebpf_capable(struct nfp_net *nn) +{ + if (nn->cap & NFP_NET_CFG_CTRL_BPF && + nn_readb(nn, NFP_NET_CFG_BPF_ABI) == NFP_NET_BPF_ABI) + return true; + return false; +} + +static int +nfp_net_setup_tc(struct net_device *netdev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) +{ + struct nfp_net *nn = netdev_priv(netdev); + + if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS)) + return -ENOTSUPP; + if (proto != htons(ETH_P_ALL)) + return -ENOTSUPP; + + if (tc->type == TC_SETUP_CLSBPF && nfp_net_ebpf_capable(nn)) + return nfp_net_bpf_offload(nn, handle, proto, tc->cls_bpf); + + return -EINVAL; +} + static int nfp_net_set_features(struct net_device *netdev, netdev_features_t features) { @@ -2436,6 +2462,11 @@ static int nfp_net_set_features(struct net_device *netdev, new_ctrl &= ~NFP_NET_CFG_CTRL_GATHER; } + if (changed & NETIF_F_HW_TC && nn->ctrl & NFP_NET_CFG_CTRL_BPF) { + nn_err(nn, "Cannot disable HW TC offload while in use\n"); + return -EBUSY; + } + nn_dbg(nn, "Feature change 0x%llx -> 0x%llx (changed=0x%llx)\n", netdev->features, features, changed); @@ -2585,6 +2616,7 @@ static const struct net_device_ops nfp_net_netdev_ops = { .ndo_stop = nfp_net_netdev_close, .ndo_start_xmit = nfp_net_tx, .ndo_get_stats64 = nfp_net_stat64, + .ndo_setup_tc = nfp_net_setup_tc, .ndo_tx_timeout = nfp_net_tx_timeout, .ndo_set_rx_mode = nfp_net_set_rx_mode, .ndo_change_mtu = nfp_net_change_mtu, @@ -2610,7 +2642,7 @@ void nfp_net_info(struct nfp_net *nn) nn->fw_ver.resv, nn->fw_ver.class, nn->fw_ver.major, nn->fw_ver.minor, nn->max_mtu); - nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", nn->cap, nn->cap & NFP_NET_CFG_CTRL_PROMISC ? "PROMISC " : "", nn->cap & NFP_NET_CFG_CTRL_L2BC ? "L2BCFILT " : "", @@ -2627,7 +2659,8 @@ void nfp_net_info(struct nfp_net *nn) nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO ? "AUTOMASK " : "", nn->cap & NFP_NET_CFG_CTRL_IRQMOD ? "IRQMOD " : "", nn->cap & NFP_NET_CFG_CTRL_VXLAN ? "VXLAN " : "", - nn->cap & NFP_NET_CFG_CTRL_NVGRE ? "NVGRE " : ""); + nn->cap & NFP_NET_CFG_CTRL_NVGRE ? "NVGRE " : "", + nfp_net_ebpf_capable(nn) ? "BPF " : ""); } /** @@ -2795,6 +2828,9 @@ int nfp_net_netdev_init(struct net_device *netdev) netdev->features = netdev->hw_features; + if (nfp_net_ebpf_capable(nn)) + netdev->hw_features |= NETIF_F_HW_TC; + /* Advertise but disable TSO by default. */ netdev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h index ad6c4e31cedd..7aa11f3c5ef0 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -123,6 +123,7 @@ #define NFP_NET_CFG_CTRL_L2SWITCH_LOCAL (0x1 << 23) /* Switch to local */ #define NFP_NET_CFG_CTRL_VXLAN (0x1 << 24) /* VXLAN tunnel support */ #define NFP_NET_CFG_CTRL_NVGRE (0x1 << 25) /* NVGRE tunnel support */ +#define NFP_NET_CFG_CTRL_BPF (0x1 << 27) /* BPF offload capable */ #define NFP_NET_CFG_UPDATE 0x0004 #define NFP_NET_CFG_UPDATE_GEN (0x1 << 0) /* General update */ #define NFP_NET_CFG_UPDATE_RING (0x1 << 1) /* Ring config change */ @@ -134,6 +135,7 @@ #define NFP_NET_CFG_UPDATE_RESET (0x1 << 7) /* Update due to FLR */ #define NFP_NET_CFG_UPDATE_IRQMOD (0x1 << 8) /* IRQ mod change */ #define NFP_NET_CFG_UPDATE_VXLAN (0x1 << 9) /* VXLAN port change */ +#define NFP_NET_CFG_UPDATE_BPF (0x1 << 10) /* BPF program load */ #define NFP_NET_CFG_UPDATE_ERR (0x1 << 31) /* A error occurred */ #define NFP_NET_CFG_TXRS_ENABLE 0x0008 #define NFP_NET_CFG_RXRS_ENABLE 0x0010 @@ -196,10 +198,37 @@ #define NFP_NET_CFG_VXLAN_SZ 0x0008 /** - * 64B reserved for future use (0x0080 - 0x00c0) + * NFP6000 - BPF section + * @NFP_NET_CFG_BPF_ABI: BPF ABI version + * @NFP_NET_CFG_BPF_CAP: BPF capabilities + * @NFP_NET_CFG_BPF_MAX_LEN: Maximum size of JITed BPF code in bytes + * @NFP_NET_CFG_BPF_START: Offset at which BPF will be loaded + * @NFP_NET_CFG_BPF_DONE: Offset to jump to on exit + * @NFP_NET_CFG_BPF_STACK_SZ: Total size of stack area in 64B chunks + * @NFP_NET_CFG_BPF_INL_MTU: Packet data split offset in 64B chunks + * @NFP_NET_CFG_BPF_SIZE: Size of the JITed BPF code in instructions + * @NFP_NET_CFG_BPF_ADDR: DMA address of the buffer with JITed BPF code */ -#define NFP_NET_CFG_RESERVED 0x0080 -#define NFP_NET_CFG_RESERVED_SZ 0x0040 +#define NFP_NET_CFG_BPF_ABI 0x0080 +#define NFP_NET_BPF_ABI 1 +#define NFP_NET_CFG_BPF_CAP 0x0081 +#define NFP_NET_BPF_CAP_RELO (1 << 0) /* seamless reload */ +#define NFP_NET_CFG_BPF_MAX_LEN 0x0082 +#define NFP_NET_CFG_BPF_START 0x0084 +#define NFP_NET_CFG_BPF_DONE 0x0086 +#define NFP_NET_CFG_BPF_STACK_SZ 0x0088 +#define NFP_NET_CFG_BPF_INL_MTU 0x0089 +#define NFP_NET_CFG_BPF_SIZE 0x008e +#define NFP_NET_CFG_BPF_ADDR 0x0090 +#define NFP_NET_CFG_BPF_CFG_8CTX (1 << 0) /* 8ctx mode */ +#define NFP_NET_CFG_BPF_CFG_MASK 7ULL +#define NFP_NET_CFG_BPF_ADDR_MASK (~NFP_NET_CFG_BPF_CFG_MASK) + +/** + * 40B reserved for future use (0x0098 - 0x00c0) + */ +#define NFP_NET_CFG_RESERVED 0x0098 +#define NFP_NET_CFG_RESERVED_SZ 0x0028 /** * RSS configuration (0x0100 - 0x01ac): @@ -303,6 +332,15 @@ #define NFP_NET_CFG_STATS_TX_MC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x80) #define NFP_NET_CFG_STATS_TX_BC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x88) +#define NFP_NET_CFG_STATS_APP0_FRAMES (NFP_NET_CFG_STATS_BASE + 0x90) +#define NFP_NET_CFG_STATS_APP0_BYTES (NFP_NET_CFG_STATS_BASE + 0x98) +#define NFP_NET_CFG_STATS_APP1_FRAMES (NFP_NET_CFG_STATS_BASE + 0xa0) +#define NFP_NET_CFG_STATS_APP1_BYTES (NFP_NET_CFG_STATS_BASE + 0xa8) +#define NFP_NET_CFG_STATS_APP2_FRAMES (NFP_NET_CFG_STATS_BASE + 0xb0) +#define NFP_NET_CFG_STATS_APP2_BYTES (NFP_NET_CFG_STATS_BASE + 0xb8) +#define NFP_NET_CFG_STATS_APP3_FRAMES (NFP_NET_CFG_STATS_BASE + 0xc0) +#define NFP_NET_CFG_STATS_APP3_BYTES (NFP_NET_CFG_STATS_BASE + 0xc8) + /** * Per ring stats (0x1000 - 0x1800) * options, 64bit per entry diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c new file mode 100644 index 000000000000..313988cd3a43 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c @@ -0,0 +1,220 @@ +/* + * Copyright (C) 2016 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * nfp_net_offload.c + * Netronome network device driver: TC offload functions for PF and VF + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "nfp_bpf.h" +#include "nfp_net_ctrl.h" +#include "nfp_net.h" + +static int +nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) +{ + const struct tc_action *a; + LIST_HEAD(actions); + + /* TC direct action */ + if (cls_bpf->exts_integrated) + return -ENOTSUPP; + + /* TC legacy mode */ + if (!tc_single_action(cls_bpf->exts)) + return -ENOTSUPP; + + tcf_exts_to_list(cls_bpf->exts, &actions); + list_for_each_entry(a, &actions, list) { + if (is_tcf_gact_shot(a)) + return NN_ACT_TC_DROP; + } + + return -ENOTSUPP; +} + +static int +nfp_net_bpf_offload_prepare(struct nfp_net *nn, + struct tc_cls_bpf_offload *cls_bpf, + struct nfp_bpf_result *res, + void **code, dma_addr_t *dma_addr, u16 max_instr) +{ + unsigned int code_sz = max_instr * sizeof(u64); + enum nfp_bpf_action_type act; + u16 start_off, done_off; + unsigned int max_mtu; + int ret; + + ret = nfp_net_bpf_get_act(nn, cls_bpf); + if (ret < 0) + return ret; + act = ret; + + max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; + if (max_mtu < nn->netdev->mtu) { + nn_info(nn, "BPF offload not supported with MTU larger than HW packet split boundary\n"); + return -ENOTSUPP; + } + + start_off = nn_readw(nn, NFP_NET_CFG_BPF_START); + done_off = nn_readw(nn, NFP_NET_CFG_BPF_DONE); + + *code = dma_zalloc_coherent(&nn->pdev->dev, code_sz, dma_addr, + GFP_KERNEL); + if (!*code) + return -ENOMEM; + + ret = nfp_bpf_jit(cls_bpf->prog, *code, act, start_off, done_off, + max_instr, res); + if (ret) + goto out; + + return 0; + +out: + dma_free_coherent(&nn->pdev->dev, code_sz, *code, *dma_addr); + return ret; +} + +static void +nfp_net_bpf_load_and_start(struct nfp_net *nn, u32 tc_flags, + void *code, dma_addr_t dma_addr, + unsigned int code_sz, unsigned int n_instr, + bool dense_mode) +{ + u64 bpf_addr = dma_addr; + int err; + + nn->bpf_offload_skip_sw = !!(tc_flags & TCA_CLS_FLAGS_SKIP_SW); + + if (dense_mode) + bpf_addr |= NFP_NET_CFG_BPF_CFG_8CTX; + + nn_writew(nn, NFP_NET_CFG_BPF_SIZE, n_instr); + nn_writeq(nn, NFP_NET_CFG_BPF_ADDR, bpf_addr); + + /* Load up the JITed code */ + err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_BPF); + if (err) + nn_err(nn, "FW command error while loading BPF: %d\n", err); + + /* Enable passing packets through BPF function */ + nn->ctrl |= NFP_NET_CFG_CTRL_BPF; + nn_writel(nn, NFP_NET_CFG_CTRL, nn->ctrl); + err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); + if (err) + nn_err(nn, "FW command error while enabling BPF: %d\n", err); + + dma_free_coherent(&nn->pdev->dev, code_sz, code, dma_addr); +} + +static int nfp_net_bpf_stop(struct nfp_net *nn) +{ + if (!(nn->ctrl & NFP_NET_CFG_CTRL_BPF)) + return 0; + + nn->ctrl &= ~NFP_NET_CFG_CTRL_BPF; + nn_writel(nn, NFP_NET_CFG_CTRL, nn->ctrl); + + nn->bpf_offload_skip_sw = 0; + + return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); +} + +int +nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto, + struct tc_cls_bpf_offload *cls_bpf) +{ + struct nfp_bpf_result res; + dma_addr_t dma_addr; + u16 max_instr; + void *code; + int err; + + max_instr = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN); + + switch (cls_bpf->command) { + case TC_CLSBPF_REPLACE: + /* There is nothing stopping us from implementing seamless + * replace but the simple method of loading I adopted in + * the firmware does not handle atomic replace (i.e. we have to + * stop the BPF offload and re-enable it). Leaking-in a few + * frames which didn't have BPF applied in the hardware should + * be fine if software fallback is available, though. + */ + if (nn->bpf_offload_skip_sw) + return -EBUSY; + + err = nfp_net_bpf_offload_prepare(nn, cls_bpf, &res, &code, + &dma_addr, max_instr); + if (err) + return err; + + nfp_net_bpf_stop(nn); + nfp_net_bpf_load_and_start(nn, cls_bpf->gen_flags, code, + dma_addr, max_instr * sizeof(u64), + res.n_instr, res.dense_mode); + return 0; + + case TC_CLSBPF_ADD: + if (nn->ctrl & NFP_NET_CFG_CTRL_BPF) + return -EBUSY; + + err = nfp_net_bpf_offload_prepare(nn, cls_bpf, &res, &code, + &dma_addr, max_instr); + if (err) + return err; + + nfp_net_bpf_load_and_start(nn, cls_bpf->gen_flags, code, + dma_addr, max_instr * sizeof(u64), + res.n_instr, res.dense_mode); + return 0; + + case TC_CLSBPF_DESTROY: + return nfp_net_bpf_stop(nn); + + default: + return -ENOTSUPP; + } +} From 68d640630d4ef2a4bf3f68b5073dec5e4c4f878b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:44:02 +0100 Subject: [PATCH 0584/1050] net: cls_bpf: allow offloaded filters to update stats Call into offloaded filters to update stats. Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 1 + net/sched/cls_bpf.c | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 57af9f3032ff..5ccaa4be7d96 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -490,6 +490,7 @@ enum tc_clsbpf_command { TC_CLSBPF_ADD, TC_CLSBPF_REPLACE, TC_CLSBPF_DESTROY, + TC_CLSBPF_STATS, }; struct tc_cls_bpf_offload { diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 1becc2fe1bc5..bb1d5a487081 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -221,6 +221,15 @@ static void cls_bpf_stop_offload(struct tcf_proto *tp, prog->offloaded = false; } +static void cls_bpf_offload_update_stats(struct tcf_proto *tp, + struct cls_bpf_prog *prog) +{ + if (!prog->offloaded) + return; + + cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_STATS); +} + static int cls_bpf_init(struct tcf_proto *tp) { struct cls_bpf_head *head; @@ -577,6 +586,8 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, tm->tcm_handle = prog->handle; + cls_bpf_offload_update_stats(tp, prog); + nest = nla_nest_start(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; From 66860beb7ed5df11433528cb535d5e9f7dad2302 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:44:03 +0100 Subject: [PATCH 0585/1050] nfp: bpf: allow offloaded filters to update stats Periodically poll stats and call into offloaded actions to update them. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net.h | 19 ++++++ .../ethernet/netronome/nfp/nfp_net_common.c | 3 + .../ethernet/netronome/nfp/nfp_net_ethtool.c | 12 ++++ .../ethernet/netronome/nfp/nfp_net_offload.c | 63 +++++++++++++++++++ 4 files changed, 97 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index ea6f5e667f27..13c6a9001b4d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -62,6 +62,9 @@ /* Max time to wait for NFP to respond on updates (in seconds) */ #define NFP_NET_POLL_TIMEOUT 5 +/* Interval for reading offloaded filter stats */ +#define NFP_NET_STAT_POLL_IVL msecs_to_jiffies(100) + /* Bar allocation */ #define NFP_NET_CTRL_BAR 0 #define NFP_NET_Q0_BAR 2 @@ -405,6 +408,11 @@ static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver, fw_ver->minor == minor; } +struct nfp_stat_pair { + u64 pkts; + u64 bytes; +}; + /** * struct nfp_net - NFP network device structure * @pdev: Backpointer to PCI device @@ -428,6 +436,11 @@ static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver, * @rss_cfg: RSS configuration * @rss_key: RSS secret key * @rss_itbl: RSS indirection table + * @rx_filter: Filter offload statistics - dropped packets/bytes + * @rx_filter_prev: Filter offload statistics - values from previous update + * @rx_filter_change: Jiffies when statistics last changed + * @rx_filter_stats_timer: Timer for polling filter offload statistics + * @rx_filter_lock: Lock protecting timer state changes (teardown) * @max_tx_rings: Maximum number of TX rings supported by the Firmware * @max_rx_rings: Maximum number of RX rings supported by the Firmware * @num_tx_rings: Currently configured number of TX rings @@ -504,6 +517,11 @@ struct nfp_net { u8 rss_key[NFP_NET_CFG_RSS_KEY_SZ]; u8 rss_itbl[NFP_NET_CFG_RSS_ITBL_SZ]; + struct nfp_stat_pair rx_filter, rx_filter_prev; + unsigned long rx_filter_change; + struct timer_list rx_filter_stats_timer; + spinlock_t rx_filter_lock; + int max_tx_rings; int max_rx_rings; @@ -775,6 +793,7 @@ static inline void nfp_net_debugfs_adapter_del(struct nfp_net *nn) } #endif /* CONFIG_NFP_NET_DEBUG */ +void nfp_net_filter_stats_timer(unsigned long data); int nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto, struct tc_cls_bpf_offload *cls_bpf); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 51978dfe883b..f091eb758ca2 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2703,10 +2703,13 @@ struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev, nn->rxd_cnt = NFP_NET_RX_DESCS_DEFAULT; spin_lock_init(&nn->reconfig_lock); + spin_lock_init(&nn->rx_filter_lock); spin_lock_init(&nn->link_status_lock); setup_timer(&nn->reconfig_timer, nfp_net_reconfig_timer, (unsigned long)nn); + setup_timer(&nn->rx_filter_stats_timer, + nfp_net_filter_stats_timer, (unsigned long)nn); return nn; } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 4c9897220969..3418f2277e9d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -106,6 +106,18 @@ static const struct _nfp_net_et_stats nfp_net_et_stats[] = { {"dev_tx_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_FRAMES)}, {"dev_tx_mc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_MC_FRAMES)}, {"dev_tx_bc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_BC_FRAMES)}, + + {"bpf_pass_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP0_FRAMES)}, + {"bpf_pass_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP0_BYTES)}, + /* see comments in outro functions in nfp_bpf_jit.c to find out + * how different BPF modes use app-specific counters + */ + {"bpf_app1_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP1_FRAMES)}, + {"bpf_app1_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP1_BYTES)}, + {"bpf_app2_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP2_FRAMES)}, + {"bpf_app2_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP2_BYTES)}, + {"bpf_app3_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP3_FRAMES)}, + {"bpf_app3_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP3_BYTES)}, }; #define NN_ET_GLOBAL_STATS_LEN ARRAY_SIZE(nfp_net_et_stats) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c index 313988cd3a43..0537a53e2174 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c @@ -51,6 +51,60 @@ #include "nfp_net_ctrl.h" #include "nfp_net.h" +void nfp_net_filter_stats_timer(unsigned long data) +{ + struct nfp_net *nn = (void *)data; + struct nfp_stat_pair latest; + + spin_lock_bh(&nn->rx_filter_lock); + + if (nn->ctrl & NFP_NET_CFG_CTRL_BPF) + mod_timer(&nn->rx_filter_stats_timer, + jiffies + NFP_NET_STAT_POLL_IVL); + + spin_unlock_bh(&nn->rx_filter_lock); + + latest.pkts = nn_readq(nn, NFP_NET_CFG_STATS_APP1_FRAMES); + latest.bytes = nn_readq(nn, NFP_NET_CFG_STATS_APP1_BYTES); + + if (latest.pkts != nn->rx_filter.pkts) + nn->rx_filter_change = jiffies; + + nn->rx_filter = latest; +} + +static void nfp_net_bpf_stats_reset(struct nfp_net *nn) +{ + nn->rx_filter.pkts = nn_readq(nn, NFP_NET_CFG_STATS_APP1_FRAMES); + nn->rx_filter.bytes = nn_readq(nn, NFP_NET_CFG_STATS_APP1_BYTES); + nn->rx_filter_prev = nn->rx_filter; + nn->rx_filter_change = jiffies; +} + +static int +nfp_net_bpf_stats_update(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) +{ + struct tc_action *a; + LIST_HEAD(actions); + u64 bytes, pkts; + + pkts = nn->rx_filter.pkts - nn->rx_filter_prev.pkts; + bytes = nn->rx_filter.bytes - nn->rx_filter_prev.bytes; + bytes -= pkts * ETH_HLEN; + + nn->rx_filter_prev = nn->rx_filter; + + preempt_disable(); + + tcf_exts_to_list(cls_bpf->exts, &actions); + list_for_each_entry(a, &actions, list) + tcf_action_stats_update(a, bytes, pkts, nn->rx_filter_change); + + preempt_enable(); + + return 0; +} + static int nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) { @@ -147,6 +201,9 @@ nfp_net_bpf_load_and_start(struct nfp_net *nn, u32 tc_flags, nn_err(nn, "FW command error while enabling BPF: %d\n", err); dma_free_coherent(&nn->pdev->dev, code_sz, code, dma_addr); + + nfp_net_bpf_stats_reset(nn); + mod_timer(&nn->rx_filter_stats_timer, jiffies + NFP_NET_STAT_POLL_IVL); } static int nfp_net_bpf_stop(struct nfp_net *nn) @@ -154,9 +211,12 @@ static int nfp_net_bpf_stop(struct nfp_net *nn) if (!(nn->ctrl & NFP_NET_CFG_CTRL_BPF)) return 0; + spin_lock_bh(&nn->rx_filter_lock); nn->ctrl &= ~NFP_NET_CFG_CTRL_BPF; + spin_unlock_bh(&nn->rx_filter_lock); nn_writel(nn, NFP_NET_CFG_CTRL, nn->ctrl); + del_timer_sync(&nn->rx_filter_stats_timer); nn->bpf_offload_skip_sw = 0; return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); @@ -214,6 +274,9 @@ nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto, case TC_CLSBPF_DESTROY: return nfp_net_bpf_stop(nn); + case TC_CLSBPF_STATS: + return nfp_net_bpf_stats_update(nn, cls_bpf); + default: return -ENOTSUPP; } From 19d0f54edab6e77b6b73277ac33717be1f858fa8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:44:04 +0100 Subject: [PATCH 0586/1050] nfp: bpf: add packet marking support Add missing ABI defines and eBPF instructions to allow mark to be passed on and extend prepend parsing on the RX path to pick it up from packet metadata. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_bpf.h | 2 + .../net/ethernet/netronome/nfp/nfp_bpf_jit.c | 19 ++++ drivers/net/ethernet/netronome/nfp/nfp_net.h | 2 + .../ethernet/netronome/nfp/nfp_net_common.c | 95 ++++++++++++++----- .../net/ethernet/netronome/nfp/nfp_net_ctrl.h | 7 ++ .../ethernet/netronome/nfp/nfp_netvf_main.c | 2 +- 6 files changed, 103 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h index 3726421e353f..2adb1d80c7b7 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h @@ -91,6 +91,8 @@ enum nfp_bpf_reg_type { #define imm_both(np) reg_both((np)->regs_per_thread - STATIC_REG_IMM) #define NFP_BPF_ABI_FLAGS reg_nnr(0) +#define NFP_BPF_ABI_FLAG_MARK 1 +#define NFP_BPF_ABI_MARK reg_nnr(1) #define NFP_BPF_ABI_PKT reg_nnr(2) #define NFP_BPF_ABI_LEN reg_nnr(3) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c index cfbf53607fc9..368381f0357f 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c @@ -674,6 +674,16 @@ static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size) return construct_data_ind_ld(nfp_prog, offset, 0, false, size); } +static int wrp_set_mark(struct nfp_prog *nfp_prog, u8 src) +{ + emit_alu(nfp_prog, NFP_BPF_ABI_MARK, + reg_none(), ALU_OP_NONE, reg_b(src)); + emit_alu(nfp_prog, NFP_BPF_ABI_FLAGS, + NFP_BPF_ABI_FLAGS, ALU_OP_OR, reg_imm(NFP_BPF_ABI_FLAG_MARK)); + + return 0; +} + static void wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm) { @@ -1117,6 +1127,14 @@ static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return 0; } +static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + if (meta->insn.off == offsetof(struct sk_buff, mark)) + return wrp_set_mark(nfp_prog, meta->insn.src_reg * 2); + + return -ENOTSUPP; +} + static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { if (meta->insn.off < 0) /* TODO */ @@ -1306,6 +1324,7 @@ static const instr_cb_t instr_cb[256] = { [BPF_LD | BPF_IND | BPF_H] = data_ind_ld2, [BPF_LD | BPF_IND | BPF_W] = data_ind_ld4, [BPF_LDX | BPF_MEM | BPF_W] = mem_ldx4, + [BPF_STX | BPF_MEM | BPF_W] = mem_stx4, [BPF_JMP | BPF_JA | BPF_K] = jump, [BPF_JMP | BPF_JEQ | BPF_K] = jeq_imm, [BPF_JMP | BPF_JGT | BPF_K] = jgt_imm, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 13c6a9001b4d..ed824e11a1e3 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -269,6 +269,8 @@ struct nfp_net_rx_desc { }; }; +#define NFP_NET_META_FIELD_MASK GENMASK(NFP_NET_META_FIELD_SIZE - 1, 0) + struct nfp_net_rx_hash { __be32 hash_type; __be32 hash; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index f091eb758ca2..415691edcaa5 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -1293,36 +1293,70 @@ static void nfp_net_rx_csum(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, } } -/** - * nfp_net_set_hash() - Set SKB hash data - * @netdev: adapter's net_device structure - * @skb: SKB to set the hash data on - * @rxd: RX descriptor - * - * The RSS hash and hash-type are pre-pended to the packet data. - * Extract and decode it and set the skb fields. - */ static void nfp_net_set_hash(struct net_device *netdev, struct sk_buff *skb, - struct nfp_net_rx_desc *rxd) + unsigned int type, __be32 *hash) +{ + if (!(netdev->features & NETIF_F_RXHASH)) + return; + + switch (type) { + case NFP_NET_RSS_IPV4: + case NFP_NET_RSS_IPV6: + case NFP_NET_RSS_IPV6_EX: + skb_set_hash(skb, get_unaligned_be32(hash), PKT_HASH_TYPE_L3); + break; + default: + skb_set_hash(skb, get_unaligned_be32(hash), PKT_HASH_TYPE_L4); + break; + } +} + +static void +nfp_net_set_hash_desc(struct net_device *netdev, struct sk_buff *skb, + struct nfp_net_rx_desc *rxd) { struct nfp_net_rx_hash *rx_hash; - if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS) || - !(netdev->features & NETIF_F_RXHASH)) + if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS)) return; rx_hash = (struct nfp_net_rx_hash *)(skb->data - sizeof(*rx_hash)); - switch (be32_to_cpu(rx_hash->hash_type)) { - case NFP_NET_RSS_IPV4: - case NFP_NET_RSS_IPV6: - case NFP_NET_RSS_IPV6_EX: - skb_set_hash(skb, be32_to_cpu(rx_hash->hash), PKT_HASH_TYPE_L3); - break; - default: - skb_set_hash(skb, be32_to_cpu(rx_hash->hash), PKT_HASH_TYPE_L4); - break; + nfp_net_set_hash(netdev, skb, get_unaligned_be32(&rx_hash->hash_type), + &rx_hash->hash); +} + +static void * +nfp_net_parse_meta(struct net_device *netdev, struct sk_buff *skb, + int meta_len) +{ + u8 *data = skb->data - meta_len; + u32 meta_info; + + meta_info = get_unaligned_be32(data); + data += 4; + + while (meta_info) { + switch (meta_info & NFP_NET_META_FIELD_MASK) { + case NFP_NET_META_HASH: + meta_info >>= NFP_NET_META_FIELD_SIZE; + nfp_net_set_hash(netdev, skb, + meta_info & NFP_NET_META_FIELD_MASK, + (__be32 *)data); + data += 4; + break; + case NFP_NET_META_MARK: + skb->mark = get_unaligned_be32(data); + data += 4; + break; + default: + return NULL; + } + + meta_info >>= NFP_NET_META_FIELD_SIZE; } + + return data; } /** @@ -1439,14 +1473,29 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) skb_reserve(skb, nn->rx_offset); skb_put(skb, data_len - meta_len); - nfp_net_set_hash(nn->netdev, skb, rxd); - /* Stats update */ u64_stats_update_begin(&r_vec->rx_sync); r_vec->rx_pkts++; r_vec->rx_bytes += skb->len; u64_stats_update_end(&r_vec->rx_sync); + if (nn->fw_ver.major <= 3) { + nfp_net_set_hash_desc(nn->netdev, skb, rxd); + } else if (meta_len) { + void *end; + + end = nfp_net_parse_meta(nn->netdev, skb, meta_len); + if (unlikely(end != skb->data)) { + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->rx_drops++; + u64_stats_update_end(&r_vec->rx_sync); + + dev_kfree_skb_any(skb); + nn_warn_ratelimit(nn, "invalid RX packet metadata\n"); + continue; + } + } + skb_record_rx_queue(skb, rx_ring->idx); skb->protocol = eth_type_trans(skb, nn->netdev); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h index 7aa11f3c5ef0..93b10b441acb 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -65,6 +65,13 @@ */ #define NFP_NET_LSO_MAX_HDR_SZ 255 +/** + * Prepend field types + */ +#define NFP_NET_META_FIELD_SIZE 4 +#define NFP_NET_META_HASH 1 /* next field carries hash type */ +#define NFP_NET_META_MARK 2 + /** * Hash type pre-pended when a RSS hash was computed */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c index f7062cb648e1..2800bbf65a89 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c @@ -148,7 +148,7 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev, dev_warn(&pdev->dev, "OBSOLETE Firmware detected - VF isolation not available\n"); } else { switch (fw_ver.major) { - case 1 ... 3: + case 1 ... 4: if (is_nfp3200) { stride = 2; tx_bar_no = NFP_NET_Q0_BAR; From 9798e6fe4f9b6a2847a40e24b75e68afdc7a01b3 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:44:05 +0100 Subject: [PATCH 0587/1050] net: act_mirred: allow statistic updates from offloaded actions Implement .stats_update() callback. The implementation is generic and can be reused by other simple actions if needed. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/sched/act_mirred.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 1c76387c5d9c..667dc382df82 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -204,6 +204,13 @@ out: return retval; } +static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets, + u64 lastuse) +{ + tcf_lastuse_update(&a->tcfa_tm); + _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); +} + static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { @@ -281,6 +288,7 @@ static struct tc_action_ops act_mirred_ops = { .type = TCA_ACT_MIRRED, .owner = THIS_MODULE, .act = tcf_mirred, + .stats_update = tcf_stats_update, .dump = tcf_mirred_dump, .cleanup = tcf_mirred_release, .init = tcf_mirred_init, From 2d18421debc29a338e6783c06fb75ab7b16fc9ba Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:44:06 +0100 Subject: [PATCH 0588/1050] nfp: bpf: add support for legacy redirect action Data path has redirect support so expressing redirect to the port frame came from is a trivial matter of setting the right result code. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_bpf.h | 1 + drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c | 2 ++ drivers/net/ethernet/netronome/nfp/nfp_net_offload.c | 4 ++++ 3 files changed, 7 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h index 2adb1d80c7b7..adbe0235d98e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h @@ -60,6 +60,7 @@ enum static_regs { enum nfp_bpf_action_type { NN_ACT_TC_DROP, + NN_ACT_TC_REDIR, }; /* Software register representation, hardware encoding in asm.h */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c index 368381f0357f..434bef975c58 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c @@ -1440,6 +1440,7 @@ static void nfp_outro_tc_legacy(struct nfp_prog *nfp_prog) { const u8 act2code[] = { [NN_ACT_TC_DROP] = 0x22, + [NN_ACT_TC_REDIR] = 0x24 }; /* Target for aborts */ nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); @@ -1468,6 +1469,7 @@ static void nfp_outro(struct nfp_prog *nfp_prog) { switch (nfp_prog->act) { case NN_ACT_TC_DROP: + case NN_ACT_TC_REDIR: nfp_outro_tc_legacy(nfp_prog); break; } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c index 0537a53e2174..1ec8e5b74651 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c @@ -123,6 +123,10 @@ nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) list_for_each_entry(a, &actions, list) { if (is_tcf_gact_shot(a)) return NN_ACT_TC_DROP; + + if (is_tcf_mirred_redirect(a) && + tcf_mirred_ifindex(a) == nn->netdev->ifindex) + return NN_ACT_TC_REDIR; } return -ENOTSUPP; From e3b8baf0ca2a69f88846b5446234e5647ecd17eb Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:44:07 +0100 Subject: [PATCH 0589/1050] nfp: bpf: add offload of TC direct action mode Add offload of TC in direct action mode. We just need to provide appropriate checks in the verifier and a new outro block to translate the exit codes to what data path expects Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_bpf.h | 1 + .../net/ethernet/netronome/nfp/nfp_bpf_jit.c | 66 +++++++++++++++++++ .../ethernet/netronome/nfp/nfp_bpf_verifier.c | 11 +++- .../ethernet/netronome/nfp/nfp_net_offload.c | 6 +- 4 files changed, 82 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h index adbe0235d98e..fc220cd04115 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h @@ -61,6 +61,7 @@ enum static_regs { enum nfp_bpf_action_type { NN_ACT_TC_DROP, NN_ACT_TC_REDIR, + NN_ACT_DIRECT, }; /* Software register representation, hardware encoding in asm.h */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c index 434bef975c58..3de819acd68c 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c @@ -321,6 +321,16 @@ __emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip, nfp_prog_push(nfp_prog, insn); } +static void emit_br_def(struct nfp_prog *nfp_prog, u16 addr, u8 defer) +{ + if (defer > 2) { + pr_err("BUG: branch defer out of bounds %d\n", defer); + nfp_prog->error = -EFAULT; + return; + } + __emit_br(nfp_prog, BR_UNC, BR_EV_PIP_UNCOND, BR_CSS_NONE, addr, defer); +} + static void emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer) { @@ -1465,9 +1475,65 @@ static void nfp_outro_tc_legacy(struct nfp_prog *nfp_prog) SHF_SC_L_SHF, 16); } +static void nfp_outro_tc_da(struct nfp_prog *nfp_prog) +{ + /* TC direct-action mode: + * 0,1 ok NOT SUPPORTED[1] + * 2 drop 0x22 -> drop, count as stat1 + * 4,5 nuke 0x02 -> drop + * 7 redir 0x44 -> redir, count as stat2 + * * unspec 0x11 -> pass, count as stat0 + * + * [1] We can't support OK and RECLASSIFY because we can't tell TC + * the exact decision made. We are forced to support UNSPEC + * to handle aborts so that's the only one we handle for passing + * packets up the stack. + */ + /* Target for aborts */ + nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); + + emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); + + emit_alu(nfp_prog, reg_a(0), + reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS); + emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16); + + /* Target for normal exits */ + nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog); + + /* if R0 > 7 jump to abort */ + emit_alu(nfp_prog, reg_none(), reg_imm(7), ALU_OP_SUB, reg_b(0)); + emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0); + emit_alu(nfp_prog, reg_a(0), + reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS); + + wrp_immed(nfp_prog, reg_b(2), 0x41221211); + wrp_immed(nfp_prog, reg_b(3), 0x41001211); + + emit_shf(nfp_prog, reg_a(1), + reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 2); + + emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0)); + emit_shf(nfp_prog, reg_a(2), + reg_imm(0xf), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0); + + emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0)); + emit_shf(nfp_prog, reg_b(2), + reg_imm(0xf), SHF_OP_AND, reg_b(3), SHF_SC_R_SHF, 0); + + emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); + + emit_shf(nfp_prog, reg_b(2), + reg_a(2), SHF_OP_OR, reg_b(2), SHF_SC_L_SHF, 4); + emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16); +} + static void nfp_outro(struct nfp_prog *nfp_prog) { switch (nfp_prog->act) { + case NN_ACT_DIRECT: + nfp_outro_tc_da(nfp_prog); + break; case NN_ACT_TC_DROP: case NN_ACT_TC_REDIR: nfp_outro_tc_legacy(nfp_prog); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c index ef6775b54168..144cae87f63a 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c @@ -86,7 +86,16 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog, return -EINVAL; } - if (reg0->imm != 0 && (reg0->imm & ~0U) != ~0U) { + if (nfp_prog->act != NN_ACT_DIRECT && + reg0->imm != 0 && (reg0->imm & ~0U) != ~0U) { + pr_info("unsupported exit state: %d, imm: %llx\n", + reg0->type, reg0->imm); + return -EINVAL; + } + + if (nfp_prog->act == NN_ACT_DIRECT && reg0->imm <= TC_ACT_REDIRECT && + reg0->imm != TC_ACT_SHOT && reg0->imm != TC_ACT_STOLEN && + reg0->imm != TC_ACT_QUEUED) { pr_info("unsupported exit state: %d, imm: %llx\n", reg0->type, reg0->imm); return -EINVAL; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c index 1ec8e5b74651..43f42f842eda 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c @@ -112,8 +112,12 @@ nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) LIST_HEAD(actions); /* TC direct action */ - if (cls_bpf->exts_integrated) + if (cls_bpf->exts_integrated) { + if (tc_no_actions(cls_bpf->exts)) + return NN_ACT_DIRECT; + return -ENOTSUPP; + } /* TC legacy mode */ if (!tc_single_action(cls_bpf->exts)) From 1e5ec2e709bd8c5588fdbdda909945e4e2be8d23 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 15 Sep 2016 14:57:48 -0400 Subject: [PATCH 0590/1050] Btrfs: handle quota reserve failure properly btrfs/022 was spitting a warning for the case that we exceed the quota. If we fail to make our quota reservation we need to clean up our data space reservation. Thanks, Signed-off-by: Josef Bacik Tested-by: Jeff Mahoney Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d09cf7aa083b..db76cc18c562 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4271,13 +4271,10 @@ int btrfs_check_data_free_space(struct inode *inode, u64 start, u64 len) if (ret < 0) return ret; - /* - * Use new btrfs_qgroup_reserve_data to reserve precious data space - * - * TODO: Find a good method to avoid reserve data space for NOCOW - * range, but don't impact performance on quota disable case. - */ + /* Use new btrfs_qgroup_reserve_data to reserve precious data space. */ ret = btrfs_qgroup_reserve_data(inode, start, len); + if (ret) + btrfs_free_reserved_data_space_noquota(inode, start, len); return ret; } From 325c50e3cebb9208009083e841550f98a863bfa0 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Wed, 21 Sep 2016 08:31:29 -0400 Subject: [PATCH 0591/1050] btrfs: ensure that file descriptor used with subvol ioctls is a dir If the subvol/snapshot create/destroy ioctls are passed a regular file with execute permissions set, we'll eventually Oops while trying to do inode->i_op->lookup via lookup_one_len. This patch ensures that the file descriptor refers to a directory. Fixes: cb8e70901d (Btrfs: Fix subvolume creation locking rules) Fixes: 76dda93c6a (Btrfs: add snapshot/subvolume destroy ioctl) Cc: #v2.6.29+ Signed-off-by: Jeff Mahoney Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index b2a2da5893af..7fd939bfbd99 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1634,6 +1634,9 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, int namelen; int ret = 0; + if (!S_ISDIR(file_inode(file)->i_mode)) + return -ENOTDIR; + ret = mnt_want_write_file(file); if (ret) goto out; @@ -1691,6 +1694,9 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, struct btrfs_ioctl_vol_args *vol_args; int ret; + if (!S_ISDIR(file_inode(file)->i_mode)) + return -ENOTDIR; + vol_args = memdup_user(arg, sizeof(*vol_args)); if (IS_ERR(vol_args)) return PTR_ERR(vol_args); @@ -1714,6 +1720,9 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, bool readonly = false; struct btrfs_qgroup_inherit *inherit = NULL; + if (!S_ISDIR(file_inode(file)->i_mode)) + return -ENOTDIR; + vol_args = memdup_user(arg, sizeof(*vol_args)); if (IS_ERR(vol_args)) return PTR_ERR(vol_args); @@ -2357,6 +2366,9 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, int ret; int err = 0; + if (!S_ISDIR(dir->i_mode)) + return -ENOTDIR; + vol_args = memdup_user(arg, sizeof(*vol_args)); if (IS_ERR(vol_args)) return PTR_ERR(vol_args); From 5a924b8951f835b5ff8a3d9f434f3b230fc9905f Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Sep 2016 00:29:31 +0100 Subject: [PATCH 0592/1050] rxrpc: Don't store the rxrpc header in the Tx queue sk_buffs Don't store the rxrpc protocol header in sk_buffs on the transmit queue, but rather generate it on the fly and pass it to kernel_sendmsg() as a separate iov. This reduces the amount of storage required. Note that the security header is still stored in the sk_buff as it may get encrypted along with the data (and doesn't change with each transmission). Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 7 ++-- net/rxrpc/call_event.c | 11 ++---- net/rxrpc/conn_object.c | 1 - net/rxrpc/output.c | 79 ++++++++++++++++++++++++++++------------- net/rxrpc/rxkad.c | 8 ++--- net/rxrpc/sendmsg.c | 51 +++++--------------------- 6 files changed, 70 insertions(+), 87 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 034f525f2235..f021df4a6a22 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -385,10 +385,9 @@ struct rxrpc_connection { int debug_id; /* debug ID for printks */ atomic_t serial; /* packet serial number counter */ unsigned int hi_serial; /* highest serial number received */ - u8 size_align; /* data size alignment (for security) */ - u8 header_size; /* rxrpc + security header size */ - u8 security_size; /* security header size */ u32 security_nonce; /* response re-use preventer */ + u8 size_align; /* data size alignment (for security) */ + u8 security_size; /* security header size */ u8 security_ix; /* security type */ u8 out_clientflag; /* RXRPC_CLIENT_INITIATED if we are client */ }; @@ -946,7 +945,7 @@ extern const s8 rxrpc_ack_priority[]; * output.c */ int rxrpc_send_call_packet(struct rxrpc_call *, u8); -int rxrpc_send_data_packet(struct rxrpc_connection *, struct sk_buff *); +int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *); void rxrpc_reject_packets(struct rxrpc_local *); /* diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 7d1b99824ed9..6247ce25eb21 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -139,7 +139,6 @@ void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, */ static void rxrpc_resend(struct rxrpc_call *call) { - struct rxrpc_wire_header *whdr; struct rxrpc_skb_priv *sp; struct sk_buff *skb; rxrpc_seq_t cursor, seq, top; @@ -201,15 +200,8 @@ static void rxrpc_resend(struct rxrpc_call *call) skb = call->rxtx_buffer[ix]; rxrpc_get_skb(skb, rxrpc_skb_tx_got); spin_unlock_bh(&call->lock); - sp = rxrpc_skb(skb); - /* Each Tx packet needs a new serial number */ - sp->hdr.serial = atomic_inc_return(&call->conn->serial); - - whdr = (struct rxrpc_wire_header *)skb->head; - whdr->serial = htonl(sp->hdr.serial); - - if (rxrpc_send_data_packet(call->conn, skb) < 0) { + if (rxrpc_send_data_packet(call, skb) < 0) { call->resend_at = now + 2; rxrpc_free_skb(skb, rxrpc_skb_tx_freed); return; @@ -217,6 +209,7 @@ static void rxrpc_resend(struct rxrpc_call *call) if (rxrpc_is_client_call(call)) rxrpc_expose_client_call(call); + sp = rxrpc_skb(skb); sp->resend_at = now + rxrpc_resend_timeout; rxrpc_free_skb(skb, rxrpc_skb_tx_freed); diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 3b55aee0c436..e1e83af47866 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -53,7 +53,6 @@ struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp) spin_lock_init(&conn->state_lock); conn->debug_id = atomic_inc_return(&rxrpc_debug_id); conn->size_align = 4; - conn->header_size = sizeof(struct rxrpc_wire_header); conn->idle_timestamp = jiffies; } diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 16e18a94ffa6..817fb0e82d6a 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -208,19 +208,42 @@ out: /* * send a packet through the transport endpoint */ -int rxrpc_send_data_packet(struct rxrpc_connection *conn, struct sk_buff *skb) +int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb) { - struct kvec iov[1]; + struct rxrpc_connection *conn = call->conn; + struct rxrpc_wire_header whdr; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct msghdr msg; + struct kvec iov[2]; + rxrpc_serial_t serial; + size_t len; int ret, opt; _enter(",{%d}", skb->len); - iov[0].iov_base = skb->head; - iov[0].iov_len = skb->len; + /* Each transmission of a Tx packet needs a new serial number */ + serial = atomic_inc_return(&conn->serial); - msg.msg_name = &conn->params.peer->srx.transport; - msg.msg_namelen = conn->params.peer->srx.transport_len; + whdr.epoch = htonl(conn->proto.epoch); + whdr.cid = htonl(call->cid); + whdr.callNumber = htonl(call->call_id); + whdr.seq = htonl(sp->hdr.seq); + whdr.serial = htonl(serial); + whdr.type = RXRPC_PACKET_TYPE_DATA; + whdr.flags = sp->hdr.flags; + whdr.userStatus = 0; + whdr.securityIndex = call->security_ix; + whdr._rsvd = htons(sp->hdr._rsvd); + whdr.serviceId = htons(call->service_id); + + iov[0].iov_base = &whdr; + iov[0].iov_len = sizeof(whdr); + iov[1].iov_base = skb->head; + iov[1].iov_len = skb->len; + len = iov[0].iov_len + iov[1].iov_len; + + msg.msg_name = &call->peer->srx.transport; + msg.msg_namelen = call->peer->srx.transport_len; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; @@ -234,26 +257,33 @@ int rxrpc_send_data_packet(struct rxrpc_connection *conn, struct sk_buff *skb) } } + _proto("Tx DATA %%%u { #%u }", serial, sp->hdr.seq); + /* send the packet with the don't fragment bit set if we currently * think it's small enough */ - if (skb->len - sizeof(struct rxrpc_wire_header) < conn->params.peer->maxdata) { - down_read(&conn->params.local->defrag_sem); - /* send the packet by UDP - * - returns -EMSGSIZE if UDP would have to fragment the packet - * to go out of the interface - * - in which case, we'll have processed the ICMP error - * message and update the peer record - */ - ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 1, - iov[0].iov_len); + if (iov[1].iov_len >= call->peer->maxdata) + goto send_fragmentable; - up_read(&conn->params.local->defrag_sem); - if (ret == -EMSGSIZE) - goto send_fragmentable; + down_read(&conn->params.local->defrag_sem); + /* send the packet by UDP + * - returns -EMSGSIZE if UDP would have to fragment the packet + * to go out of the interface + * - in which case, we'll have processed the ICMP error + * message and update the peer record + */ + ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); - _leave(" = %d [%u]", ret, conn->params.peer->maxdata); - return ret; + up_read(&conn->params.local->defrag_sem); + if (ret == -EMSGSIZE) + goto send_fragmentable; + +done: + if (ret == 0) { + sp->resend_at = jiffies + rxrpc_resend_timeout; + sp->hdr.serial = serial; } + _leave(" = %d [%u]", ret, call->peer->maxdata); + return ret; send_fragmentable: /* attempt to send this message with fragmentation enabled */ @@ -268,8 +298,8 @@ send_fragmentable: SOL_IP, IP_MTU_DISCOVER, (char *)&opt, sizeof(opt)); if (ret == 0) { - ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 1, - iov[0].iov_len); + ret = kernel_sendmsg(conn->params.local->socket, &msg, + iov, 2, len); opt = IP_PMTUDISC_DO; kernel_setsockopt(conn->params.local->socket, SOL_IP, @@ -298,8 +328,7 @@ send_fragmentable: } up_write(&conn->params.local->defrag_sem); - _leave(" = %d [frag %u]", ret, conn->params.peer->maxdata); - return ret; + goto done; } /* diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index ae392558829d..88d080a1a3de 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -80,12 +80,10 @@ static int rxkad_init_connection_security(struct rxrpc_connection *conn) case RXRPC_SECURITY_AUTH: conn->size_align = 8; conn->security_size = sizeof(struct rxkad_level1_hdr); - conn->header_size += sizeof(struct rxkad_level1_hdr); break; case RXRPC_SECURITY_ENCRYPT: conn->size_align = 8; conn->security_size = sizeof(struct rxkad_level2_hdr); - conn->header_size += sizeof(struct rxkad_level2_hdr); break; default: ret = -EKEYREJECTED; @@ -161,7 +159,7 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call, _enter(""); - check = sp->hdr.seq ^ sp->hdr.callNumber; + check = sp->hdr.seq ^ call->call_id; data_size |= (u32)check << 16; hdr.data_size = htonl(data_size); @@ -205,7 +203,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, _enter(""); - check = sp->hdr.seq ^ sp->hdr.callNumber; + check = sp->hdr.seq ^ call->call_id; rxkhdr.data_size = htonl(data_size | (u32)check << 16); rxkhdr.checksum = 0; @@ -277,7 +275,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call, /* calculate the security checksum */ x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT); x |= sp->hdr.seq & 0x3fffffff; - call->crypto_buf[0] = htonl(sp->hdr.callNumber); + call->crypto_buf[0] = htonl(call->call_id); call->crypto_buf[1] = htonl(x); sg_init_one(&sg, call->crypto_buf, 8); diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 6a39ee97a0b7..814b17f23971 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -134,13 +134,11 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, write_unlock_bh(&call->state_lock); } - _proto("Tx DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq); - if (seq == 1 && rxrpc_is_client_call(call)) rxrpc_expose_client_call(call); sp->resend_at = jiffies + rxrpc_resend_timeout; - ret = rxrpc_send_data_packet(call->conn, skb); + ret = rxrpc_send_data_packet(call, skb); if (ret < 0) { _debug("need instant resend %d", ret); rxrpc_instant_resend(call, ix); @@ -150,29 +148,6 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, _leave(""); } -/* - * Convert a host-endian header into a network-endian header. - */ -static void rxrpc_insert_header(struct sk_buff *skb) -{ - struct rxrpc_wire_header whdr; - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - - whdr.epoch = htonl(sp->hdr.epoch); - whdr.cid = htonl(sp->hdr.cid); - whdr.callNumber = htonl(sp->hdr.callNumber); - whdr.seq = htonl(sp->hdr.seq); - whdr.serial = htonl(sp->hdr.serial); - whdr.type = sp->hdr.type; - whdr.flags = sp->hdr.flags; - whdr.userStatus = sp->hdr.userStatus; - whdr.securityIndex = sp->hdr.securityIndex; - whdr._rsvd = htons(sp->hdr._rsvd); - whdr.serviceId = htons(sp->hdr.serviceId); - - memcpy(skb->head, &whdr, sizeof(whdr)); -} - /* * send data through a socket * - must be called in process context @@ -232,7 +207,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, space = chunk + call->conn->size_align; space &= ~(call->conn->size_align - 1UL); - size = space + call->conn->header_size; + size = space + call->conn->security_size; _debug("SIZE: %zu/%zu/%zu", chunk, space, size); @@ -248,9 +223,9 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, ASSERTCMP(skb->mark, ==, 0); - _debug("HS: %u", call->conn->header_size); - skb_reserve(skb, call->conn->header_size); - skb->len += call->conn->header_size; + _debug("HS: %u", call->conn->security_size); + skb_reserve(skb, call->conn->security_size); + skb->len += call->conn->security_size; sp = rxrpc_skb(skb); sp->remain = chunk; @@ -312,33 +287,23 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, seq = call->tx_top + 1; - sp->hdr.epoch = conn->proto.epoch; - sp->hdr.cid = call->cid; - sp->hdr.callNumber = call->call_id; sp->hdr.seq = seq; - sp->hdr.serial = atomic_inc_return(&conn->serial); - sp->hdr.type = RXRPC_PACKET_TYPE_DATA; - sp->hdr.userStatus = 0; - sp->hdr.securityIndex = call->security_ix; sp->hdr._rsvd = 0; - sp->hdr.serviceId = call->service_id; + sp->hdr.flags = conn->out_clientflag; - sp->hdr.flags = conn->out_clientflag; if (msg_data_left(msg) == 0 && !more) sp->hdr.flags |= RXRPC_LAST_PACKET; else if (call->tx_top - call->tx_hard_ack < call->tx_winsize) sp->hdr.flags |= RXRPC_MORE_PACKETS; - if (more && seq & 1) + if (seq & 1) sp->hdr.flags |= RXRPC_REQUEST_ACK; ret = conn->security->secure_packet( - call, skb, skb->mark, - skb->head + sizeof(struct rxrpc_wire_header)); + call, skb, skb->mark, skb->head); if (ret < 0) goto out; - rxrpc_insert_header(skb); rxrpc_queue_packet(call, skb, !msg_data_left(msg) && !more); skb = NULL; } From f07373ead455a396e15a431bc08d8ce1dac6f1cf Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Sep 2016 00:29:32 +0100 Subject: [PATCH 0593/1050] rxrpc: Add re-sent Tx annotation Add a Tx-phase annotation for packet buffers to indicate that a buffer has already been retransmitted. This will be used by future congestion management. Re-retransmissions of a packet don't affect the congestion window managment in the same way as initial retransmissions. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 2 ++ net/rxrpc/call_event.c | 28 +++++++++++++++++++--------- net/rxrpc/input.c | 14 +++++++++++--- 3 files changed, 32 insertions(+), 12 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index f021df4a6a22..dcf54e3fb478 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -505,6 +505,8 @@ struct rxrpc_call { #define RXRPC_TX_ANNO_UNACK 1 #define RXRPC_TX_ANNO_NAK 2 #define RXRPC_TX_ANNO_RETRANS 3 +#define RXRPC_TX_ANNO_MASK 0x03 +#define RXRPC_TX_ANNO_RESENT 0x04 #define RXRPC_RX_ANNO_JUMBO 0x3f /* Jumbo subpacket number + 1 if not zero */ #define RXRPC_RX_ANNO_JLAST 0x40 /* Set if last element of a jumbo packet */ #define RXRPC_RX_ANNO_VERIFIED 0x80 /* Set if verified and decrypted */ diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 6247ce25eb21..34ad967f2d81 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -144,7 +144,7 @@ static void rxrpc_resend(struct rxrpc_call *call) rxrpc_seq_t cursor, seq, top; unsigned long resend_at, now; int ix; - u8 annotation; + u8 annotation, anno_type; _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); @@ -165,14 +165,16 @@ static void rxrpc_resend(struct rxrpc_call *call) for (seq = cursor + 1; before_eq(seq, top); seq++) { ix = seq & RXRPC_RXTX_BUFF_MASK; annotation = call->rxtx_annotations[ix]; - if (annotation == RXRPC_TX_ANNO_ACK) + anno_type = annotation & RXRPC_TX_ANNO_MASK; + annotation &= ~RXRPC_TX_ANNO_MASK; + if (anno_type == RXRPC_TX_ANNO_ACK) continue; skb = call->rxtx_buffer[ix]; rxrpc_see_skb(skb, rxrpc_skb_tx_seen); sp = rxrpc_skb(skb); - if (annotation == RXRPC_TX_ANNO_UNACK) { + if (anno_type == RXRPC_TX_ANNO_UNACK) { if (time_after(sp->resend_at, now)) { if (time_before(sp->resend_at, resend_at)) resend_at = sp->resend_at; @@ -181,7 +183,7 @@ static void rxrpc_resend(struct rxrpc_call *call) } /* Okay, we need to retransmit a packet. */ - call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS; + call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS | annotation; } call->resend_at = resend_at; @@ -194,7 +196,8 @@ static void rxrpc_resend(struct rxrpc_call *call) for (seq = cursor + 1; before_eq(seq, top); seq++) { ix = seq & RXRPC_RXTX_BUFF_MASK; annotation = call->rxtx_annotations[ix]; - if (annotation != RXRPC_TX_ANNO_RETRANS) + anno_type = annotation & RXRPC_TX_ANNO_MASK; + if (anno_type != RXRPC_TX_ANNO_RETRANS) continue; skb = call->rxtx_buffer[ix]; @@ -220,10 +223,17 @@ static void rxrpc_resend(struct rxrpc_call *call) * received and the packet might have been hard-ACK'd (in which * case it will no longer be in the buffer). */ - if (after(seq, call->tx_hard_ack) && - (call->rxtx_annotations[ix] == RXRPC_TX_ANNO_RETRANS || - call->rxtx_annotations[ix] == RXRPC_TX_ANNO_NAK)) - call->rxtx_annotations[ix] = RXRPC_TX_ANNO_UNACK; + if (after(seq, call->tx_hard_ack)) { + annotation = call->rxtx_annotations[ix]; + anno_type = annotation & RXRPC_TX_ANNO_MASK; + if (anno_type == RXRPC_TX_ANNO_RETRANS || + anno_type == RXRPC_TX_ANNO_NAK) { + annotation &= ~RXRPC_TX_ANNO_MASK; + annotation |= RXRPC_TX_ANNO_UNACK; + } + annotation |= RXRPC_TX_ANNO_RESENT; + call->rxtx_annotations[ix] = annotation; + } if (after(call->tx_hard_ack, seq)) seq = call->tx_hard_ack; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 7ac1edf3aac7..aa261df9fc9e 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -388,17 +388,25 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, { bool resend = false; int ix; + u8 annotation, anno_type; for (; nr_acks > 0; nr_acks--, seq++) { ix = seq & RXRPC_RXTX_BUFF_MASK; + annotation = call->rxtx_annotations[ix]; + anno_type = annotation & RXRPC_TX_ANNO_MASK; + annotation &= ~RXRPC_TX_ANNO_MASK; switch (*acks++) { case RXRPC_ACK_TYPE_ACK: - call->rxtx_annotations[ix] = RXRPC_TX_ANNO_ACK; + if (anno_type == RXRPC_TX_ANNO_ACK) + continue; + call->rxtx_annotations[ix] = + RXRPC_TX_ANNO_ACK | annotation; break; case RXRPC_ACK_TYPE_NACK: - if (call->rxtx_annotations[ix] == RXRPC_TX_ANNO_NAK) + if (anno_type == RXRPC_TX_ANNO_NAK) continue; - call->rxtx_annotations[ix] = RXRPC_TX_ANNO_NAK; + call->rxtx_annotations[ix] = + RXRPC_TX_ANNO_NAK | annotation; resend = true; break; default: From cf1a6474f80735ff4a5d99f3dd68a94dbec8455f Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Sep 2016 00:41:53 +0100 Subject: [PATCH 0594/1050] rxrpc: Add per-peer RTT tracker Add a function to track the average RTT for a peer. Sources of RTT data will be added in subsequent patches. The RTT data will be useful in the future for determining resend timeouts and for handling the slow-start part of the Rx protocol. Also add a pair of tracepoints, one to log transmissions to elicit a response for RTT purposes and one to log responses that contribute RTT data. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 61 ++++++++++++++++++++++++++++++++++++ net/rxrpc/ar-internal.h | 25 ++++++++++++--- net/rxrpc/misc.c | 8 +++++ net/rxrpc/peer_event.c | 41 ++++++++++++++++++++++++ 4 files changed, 131 insertions(+), 4 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 75a5d8bf50e1..e8f2afbbe0bf 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -353,6 +353,67 @@ TRACE_EVENT(rxrpc_recvmsg, __entry->ret) ); +TRACE_EVENT(rxrpc_rtt_tx, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_rtt_tx_trace why, + rxrpc_serial_t send_serial), + + TP_ARGS(call, why, send_serial), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(enum rxrpc_rtt_tx_trace, why ) + __field(rxrpc_serial_t, send_serial ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->why = why; + __entry->send_serial = send_serial; + ), + + TP_printk("c=%p %s sr=%08x", + __entry->call, + rxrpc_rtt_tx_traces[__entry->why], + __entry->send_serial) + ); + +TRACE_EVENT(rxrpc_rtt_rx, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, + rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial, + s64 rtt, u8 nr, s64 avg), + + TP_ARGS(call, why, send_serial, resp_serial, rtt, nr, avg), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(enum rxrpc_rtt_rx_trace, why ) + __field(u8, nr ) + __field(rxrpc_serial_t, send_serial ) + __field(rxrpc_serial_t, resp_serial ) + __field(s64, rtt ) + __field(u64, avg ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->why = why; + __entry->send_serial = send_serial; + __entry->resp_serial = resp_serial; + __entry->rtt = rtt; + __entry->nr = nr; + __entry->avg = avg; + ), + + TP_printk("c=%p %s sr=%08x rr=%08x rtt=%lld nr=%u avg=%lld", + __entry->call, + rxrpc_rtt_rx_traces[__entry->why], + __entry->send_serial, + __entry->resp_serial, + __entry->rtt, + __entry->nr, + __entry->avg) + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index dcf54e3fb478..79c671e552c3 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -258,10 +258,11 @@ struct rxrpc_peer { /* calculated RTT cache */ #define RXRPC_RTT_CACHE_SIZE 32 - suseconds_t rtt; /* current RTT estimate (in uS) */ - unsigned int rtt_point; /* next entry at which to insert */ - unsigned int rtt_usage; /* amount of cache actually used */ - suseconds_t rtt_cache[RXRPC_RTT_CACHE_SIZE]; /* calculated RTT cache */ + u64 rtt; /* Current RTT estimate (in nS) */ + u64 rtt_sum; /* Sum of cache contents */ + u64 rtt_cache[RXRPC_RTT_CACHE_SIZE]; /* Determined RTT cache */ + u8 rtt_cursor; /* next entry at which to insert */ + u8 rtt_usage; /* amount of cache actually used */ }; /* @@ -657,6 +658,20 @@ enum rxrpc_recvmsg_trace { extern const char rxrpc_recvmsg_traces[rxrpc_recvmsg__nr_trace][5]; +enum rxrpc_rtt_tx_trace { + rxrpc_rtt_tx_ping, + rxrpc_rtt_tx__nr_trace +}; + +extern const char rxrpc_rtt_tx_traces[rxrpc_rtt_tx__nr_trace][5]; + +enum rxrpc_rtt_rx_trace { + rxrpc_rtt_rx_ping_response, + rxrpc_rtt_rx__nr_trace +}; + +extern const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5]; + extern const char *const rxrpc_pkts[]; extern const char *rxrpc_acks(u8 reason); @@ -955,6 +970,8 @@ void rxrpc_reject_packets(struct rxrpc_local *); */ void rxrpc_error_report(struct sock *); void rxrpc_peer_error_distributor(struct work_struct *); +void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace, + rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t); /* * peer_object.c diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 026e1f2e83ff..6321c23f9a6e 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -182,3 +182,11 @@ const char rxrpc_recvmsg_traces[rxrpc_recvmsg__nr_trace][5] = { [rxrpc_recvmsg_to_be_accepted] = "TBAC", [rxrpc_recvmsg_return] = "RETN", }; + +const char rxrpc_rtt_tx_traces[rxrpc_rtt_tx__nr_trace][5] = { + [rxrpc_rtt_tx_ping] = "PING", +}; + +const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5] = { + [rxrpc_rtt_rx_ping_response] = "PONG", +}; diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 18276e7cb9e0..bf13b8470c9a 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -305,3 +305,44 @@ void rxrpc_peer_error_distributor(struct work_struct *work) rxrpc_put_peer(peer); _leave(""); } + +/* + * Add RTT information to cache. This is called in softirq mode and has + * exclusive access to the peer RTT data. + */ +void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, + rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial, + ktime_t send_time, ktime_t resp_time) +{ + struct rxrpc_peer *peer = call->peer; + s64 rtt; + u64 sum = peer->rtt_sum, avg; + u8 cursor = peer->rtt_cursor, usage = peer->rtt_usage; + + rtt = ktime_to_ns(ktime_sub(resp_time, send_time)); + if (rtt < 0) + return; + + /* Replace the oldest datum in the RTT buffer */ + sum -= peer->rtt_cache[cursor]; + sum += rtt; + peer->rtt_cache[cursor] = rtt; + peer->rtt_cursor = (cursor + 1) & (RXRPC_RTT_CACHE_SIZE - 1); + peer->rtt_sum = sum; + if (usage < RXRPC_RTT_CACHE_SIZE) { + usage++; + peer->rtt_usage = usage; + } + + /* Now recalculate the average */ + if (usage == RXRPC_RTT_CACHE_SIZE) { + avg = sum / RXRPC_RTT_CACHE_SIZE; + } else { + avg = sum; + do_div(avg, usage); + } + + peer->rtt = avg; + trace_rxrpc_rtt_rx(call, why, send_serial, resp_serial, rtt, + usage, avg); +} From de3d6fa81e684af5817dc379ffc394235a9666cc Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Tue, 20 Sep 2016 14:39:39 +0300 Subject: [PATCH 0595/1050] net/mlx4_en: Add branch prediction hints in RX data-path Add likely/unlikely hints to improve branch predictions in the RX data-path. Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 26 ++++++++++++---------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index c46355bce613..6e474af3fb1f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -72,7 +72,7 @@ static int mlx4_alloc_pages(struct mlx4_en_priv *priv, } dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE << order, frag_info->dma_dir); - if (dma_mapping_error(priv->ddev, dma)) { + if (unlikely(dma_mapping_error(priv->ddev, dma))) { put_page(page); return -ENOMEM; } @@ -108,7 +108,8 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv, ring_alloc[i].page_size) continue; - if (mlx4_alloc_pages(priv, &page_alloc[i], frag_info, gfp)) + if (unlikely(mlx4_alloc_pages(priv, &page_alloc[i], + frag_info, gfp))) goto out; } @@ -585,7 +586,7 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, frag_info = &priv->frag_info[nr]; if (length <= frag_info->frag_prefix_size) break; - if (!frags[nr].page) + if (unlikely(!frags[nr].page)) goto fail; dma = be64_to_cpu(rx_desc->data[nr].addr); @@ -625,7 +626,7 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv, dma_addr_t dma; skb = netdev_alloc_skb(priv->dev, SMALL_PACKET_SIZE + NET_IP_ALIGN); - if (!skb) { + if (unlikely(!skb)) { en_dbg(RX_ERR, priv, "Failed allocating skb\n"); return NULL; } @@ -736,7 +737,8 @@ static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb, { __wsum csum_pseudo_hdr = 0; - if (ipv6h->nexthdr == IPPROTO_FRAGMENT || ipv6h->nexthdr == IPPROTO_HOPOPTS) + if (unlikely(ipv6h->nexthdr == IPPROTO_FRAGMENT || + ipv6h->nexthdr == IPPROTO_HOPOPTS)) return -1; hw_checksum = csum_add(hw_checksum, (__force __wsum)htons(ipv6h->nexthdr)); @@ -769,7 +771,7 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va, get_fixed_ipv4_csum(hw_checksum, skb, hdr); #if IS_ENABLED(CONFIG_IPV6) else if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV6)) - if (get_fixed_ipv6_csum(hw_checksum, skb, hdr)) + if (unlikely(get_fixed_ipv6_csum(hw_checksum, skb, hdr))) return -1; #endif return 0; @@ -796,10 +798,10 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud u64 timestamp; bool l2_tunnel; - if (!priv->port_up) + if (unlikely(!priv->port_up)) return 0; - if (budget <= 0) + if (unlikely(budget <= 0)) return polled; /* Protect accesses to: ring->xdp_prog, priv->mac_hash list */ @@ -902,9 +904,9 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud case XDP_PASS: break; case XDP_TX: - if (!mlx4_en_xmit_frame(frags, dev, + if (likely(!mlx4_en_xmit_frame(frags, dev, length, tx_index, - &doorbell_pending)) + &doorbell_pending))) goto consumed; goto xdp_drop; /* Drop on xmit failure */ default: @@ -912,7 +914,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud case XDP_ABORTED: case XDP_DROP: xdp_drop: - if (mlx4_en_rx_recycle(ring, frags)) + if (likely(mlx4_en_rx_recycle(ring, frags))) goto consumed; goto next; } @@ -1016,7 +1018,7 @@ xdp_drop: /* GRO not possible, complete processing here */ skb = mlx4_en_rx_skb(priv, rx_desc, frags, length); - if (!skb) { + if (unlikely(!skb)) { ring->dropped++; goto next; } From 57c970c2e8d8772237294bb8a6a25a205448fd96 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 20 Sep 2016 14:39:40 +0300 Subject: [PATCH 0596/1050] net/mlx4_en: Fix wrong indentation Use tabs instead of spaces before if statement, no functional change. Fixes: e7c1c2c46201 ("mlx4_en: Added self diagnostics test implementation") Signed-off-by: Kamal Heib Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 6e474af3fb1f..f2e8beddcf44 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -1023,7 +1023,7 @@ xdp_drop: goto next; } - if (unlikely(priv->validate_loopback)) { + if (unlikely(priv->validate_loopback)) { validate_loopback(priv, skb); goto next; } From 30353bfc43a1602c020f31d95cf27182ffd23824 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 20 Sep 2016 14:39:41 +0300 Subject: [PATCH 0597/1050] net/mlx4_core: Use RCU to perform radix tree lookup for SRQ Radix tree lookup can be performed without locking. Fixes: 225c7b1feef1 ("IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters") Signed-off-by: Leon Romanovsky Suggested-by: Sagi Grimberg Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/srq.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c index 67146624eb58..f44d089e2ca6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/srq.c +++ b/drivers/net/ethernet/mellanox/mlx4/srq.c @@ -45,15 +45,12 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type) struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; struct mlx4_srq *srq; - spin_lock(&srq_table->lock); - + rcu_read_lock(); srq = radix_tree_lookup(&srq_table->tree, srqn & (dev->caps.num_srqs - 1)); + rcu_read_unlock(); if (srq) atomic_inc(&srq->refcount); - - spin_unlock(&srq_table->lock); - - if (!srq) { + else { mlx4_warn(dev, "Async event for bogus SRQ %08x\n", srqn); return; } @@ -301,12 +298,11 @@ struct mlx4_srq *mlx4_srq_lookup(struct mlx4_dev *dev, u32 srqn) { struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; struct mlx4_srq *srq; - unsigned long flags; - spin_lock_irqsave(&srq_table->lock, flags); + rcu_read_lock(); srq = radix_tree_lookup(&srq_table->tree, srqn & (dev->caps.num_srqs - 1)); - spin_unlock_irqrestore(&srq_table->lock, flags); + rcu_read_unlock(); return srq; } From a7e1f04905e5b2b90251974dddde781301b6be37 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 20 Sep 2016 14:39:42 +0300 Subject: [PATCH 0598/1050] net/mlx4_core: Fix deadlock when switching between polling and event fw commands When switching from polling-based fw commands to event-based fw commands, there is a race condition which could cause a fw command in another task to hang: that task will keep waiting for the polling sempahore, but may never be able to acquire it. This is due to mlx4_cmd_use_events, which "down"s the sempahore back to 0. During driver initialization, this is not a problem, since no other tasks which invoke FW commands are active. However, there is a problem if the driver switches to polling mode and then back to event mode during normal operation. The "test_interrupts" feature does exactly that. Running "ethtool -t offline" causes the PF driver to temporarily switch to polling mode, and then back to event mode. (Note that for VF drivers, such switching is not performed). Fix this by adding a read-write semaphore for protection when switching between modes. Fixes: 225c7b1feef1 ("IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters") Signed-off-by: Jack Morgenstein Signed-off-by: Matan Barak Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 23 +++++++++++++++++------ drivers/net/ethernet/mellanox/mlx4/mlx4.h | 2 ++ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index f04a423ff79d..a58d96cf1ed1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -785,17 +785,23 @@ int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, return mlx4_cmd_reset_flow(dev, op, op_modifier, -EIO); if (!mlx4_is_mfunc(dev) || (native && mlx4_is_master(dev))) { + int ret; + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) return mlx4_internal_err_ret_value(dev, op, op_modifier); + down_read(&mlx4_priv(dev)->cmd.switch_sem); if (mlx4_priv(dev)->cmd.use_events) - return mlx4_cmd_wait(dev, in_param, out_param, - out_is_imm, in_modifier, - op_modifier, op, timeout); + ret = mlx4_cmd_wait(dev, in_param, out_param, + out_is_imm, in_modifier, + op_modifier, op, timeout); else - return mlx4_cmd_poll(dev, in_param, out_param, - out_is_imm, in_modifier, - op_modifier, op, timeout); + ret = mlx4_cmd_poll(dev, in_param, out_param, + out_is_imm, in_modifier, + op_modifier, op, timeout); + + up_read(&mlx4_priv(dev)->cmd.switch_sem); + return ret; } return mlx4_slave_cmd(dev, in_param, out_param, out_is_imm, in_modifier, op_modifier, op, timeout); @@ -2454,6 +2460,7 @@ int mlx4_cmd_init(struct mlx4_dev *dev) int flags = 0; if (!priv->cmd.initialized) { + init_rwsem(&priv->cmd.switch_sem); mutex_init(&priv->cmd.slave_cmd_mutex); sema_init(&priv->cmd.poll_sem, 1); priv->cmd.use_events = 0; @@ -2583,6 +2590,7 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev) if (!priv->cmd.context) return -ENOMEM; + down_write(&priv->cmd.switch_sem); for (i = 0; i < priv->cmd.max_cmds; ++i) { priv->cmd.context[i].token = i; priv->cmd.context[i].next = i + 1; @@ -2606,6 +2614,7 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev) down(&priv->cmd.poll_sem); priv->cmd.use_events = 1; + up_write(&priv->cmd.switch_sem); return err; } @@ -2618,6 +2627,7 @@ void mlx4_cmd_use_polling(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); int i; + down_write(&priv->cmd.switch_sem); priv->cmd.use_events = 0; for (i = 0; i < priv->cmd.max_cmds; ++i) @@ -2626,6 +2636,7 @@ void mlx4_cmd_use_polling(struct mlx4_dev *dev) kfree(priv->cmd.context); up(&priv->cmd.poll_sem); + up_write(&priv->cmd.switch_sem); } struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index c9d7fc5159f2..c128ba3ef014 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -627,6 +628,7 @@ struct mlx4_cmd { struct mutex slave_cmd_mutex; struct semaphore poll_sem; struct semaphore event_sem; + struct rw_semaphore switch_sem; int max_cmds; spinlock_t context_lock; int free_head; From bfca4c520f7ea78138ddccea2de18dc062b0fefd Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Mon, 19 Sep 2016 19:11:09 +0300 Subject: [PATCH 0599/1050] net: skbuff: Export __skb_vlan_pop This exports the functionality of extracting the tag from the payload, without moving next vlan tag into hw accel tag. Signed-off-by: Shmulik Ladkani Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + net/core/skbuff.c | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c6dab3f7457c..9bf60b556bd2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3085,6 +3085,7 @@ bool skb_gso_validate_mtu(const struct sk_buff *skb, unsigned int mtu); struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); struct sk_buff *skb_vlan_untag(struct sk_buff *skb); int skb_ensure_writable(struct sk_buff *skb, int write_len); +int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci); int skb_vlan_pop(struct sk_buff *skb); int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci); struct sk_buff *pskb_extract(struct sk_buff *skb, int off, int to_copy, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 7bf82a28e10a..6c22351bd519 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4522,8 +4522,10 @@ int skb_ensure_writable(struct sk_buff *skb, int write_len) } EXPORT_SYMBOL(skb_ensure_writable); -/* remove VLAN header from packet and update csum accordingly. */ -static int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci) +/* remove VLAN header from packet and update csum accordingly. + * expects a non skb_vlan_tag_present skb with a vlan tag payload + */ +int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci) { struct vlan_hdr *vhdr; unsigned int offset = skb->data - skb_mac_header(skb); @@ -4554,6 +4556,7 @@ pull: return err; } +EXPORT_SYMBOL(__skb_vlan_pop); int skb_vlan_pop(struct sk_buff *skb) { From 45a497f2d149a4a8061c61518a79d59f1f3034b2 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Mon, 19 Sep 2016 19:11:10 +0300 Subject: [PATCH 0600/1050] net/sched: act_vlan: Introduce TCA_VLAN_ACT_MODIFY vlan action TCA_VLAN_ACT_MODIFY allows one to change an existing tag. It accepts same attributes as TCA_VLAN_ACT_PUSH (protocol, id, priority). If packet is vlan tagged, then the tag gets overwritten according to user specified attributes. For example, this allows user to replace a tag's vid while preserving its priority bits (as opposed to "action vlan pop pipe action vlan push"). Signed-off-by: Shmulik Ladkani Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/uapi/linux/tc_act/tc_vlan.h | 1 + net/sched/act_vlan.c | 29 ++++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/tc_act/tc_vlan.h b/include/uapi/linux/tc_act/tc_vlan.h index be72b6e3843b..bddb272b843f 100644 --- a/include/uapi/linux/tc_act/tc_vlan.h +++ b/include/uapi/linux/tc_act/tc_vlan.h @@ -16,6 +16,7 @@ #define TCA_VLAN_ACT_POP 1 #define TCA_VLAN_ACT_PUSH 2 +#define TCA_VLAN_ACT_MODIFY 3 struct tc_vlan { tc_gen; diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 59a8d3150ae2..a95c00b119da 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -30,6 +30,7 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, struct tcf_vlan *v = to_vlan(a); int action; int err; + u16 tci; spin_lock(&v->tcf_lock); tcf_lastuse_update(&v->tcf_tm); @@ -48,6 +49,30 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, if (err) goto drop; break; + case TCA_VLAN_ACT_MODIFY: + /* No-op if no vlan tag (either hw-accel or in-payload) */ + if (!skb_vlan_tagged(skb)) + goto unlock; + /* extract existing tag (and guarantee no hw-accel tag) */ + if (skb_vlan_tag_present(skb)) { + tci = skb_vlan_tag_get(skb); + skb->vlan_tci = 0; + } else { + /* in-payload vlan tag, pop it */ + err = __skb_vlan_pop(skb, &tci); + if (err) + goto drop; + } + /* replace the vid */ + tci = (tci & ~VLAN_VID_MASK) | v->tcfv_push_vid; + /* replace prio bits, if tcfv_push_prio specified */ + if (v->tcfv_push_prio) { + tci &= ~VLAN_PRIO_MASK; + tci |= v->tcfv_push_prio << VLAN_PRIO_SHIFT; + } + /* put updated tci as hwaccel tag */ + __vlan_hwaccel_put_tag(skb, v->tcfv_push_proto, tci); + break; default: BUG(); } @@ -102,6 +127,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, case TCA_VLAN_ACT_POP: break; case TCA_VLAN_ACT_PUSH: + case TCA_VLAN_ACT_MODIFY: if (!tb[TCA_VLAN_PUSH_VLAN_ID]) { if (exists) tcf_hash_release(*a, bind); @@ -185,7 +211,8 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a, if (nla_put(skb, TCA_VLAN_PARMS, sizeof(opt), &opt)) goto nla_put_failure; - if (v->tcfv_action == TCA_VLAN_ACT_PUSH && + if ((v->tcfv_action == TCA_VLAN_ACT_PUSH || + v->tcfv_action == TCA_VLAN_ACT_MODIFY) && (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, v->tcfv_push_vid) || nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL, v->tcfv_push_proto) || From 636c2628086e40c86dac7ddc84a1c4b4fcccc6e3 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Tue, 20 Sep 2016 12:48:36 +0300 Subject: [PATCH 0601/1050] net: skbuff: Remove errornous length validation in skb_vlan_pop() In 93515d53b1 "net: move vlan pop/push functions into common code" skb_vlan_pop was moved from its private location in openvswitch to skbuff common code. In case skb has non hw-accel vlan tag, the original 'pop_vlan()' assured that skb->len is sufficient (if skb->len < VLAN_ETH_HLEN then pop was considered a no-op). This validation was moved as is into the new common 'skb_vlan_pop'. Alas, in its original location (openvswitch), there was a guarantee that 'data' points to the mac_header, therefore the 'skb->len < VLAN_ETH_HLEN' condition made sense. However there's no such guarantee in the generic 'skb_vlan_pop'. For short packets received in rx path going through 'skb_vlan_pop', this causes 'skb_vlan_pop' to fail pop-ing a valid vlan hdr (in the non hw-accel case) or to fail moving next tag into hw-accel tag. Remove the 'skb->len < VLAN_ETH_HLEN' condition entirely: It is superfluous since inner '__skb_vlan_pop' already verifies there are VLAN_ETH_HLEN writable bytes at the mac_header. Note this presents a slight change to skb_vlan_pop() users: In case total length is smaller than VLAN_ETH_HLEN, skb_vlan_pop() now returns an error, as opposed to previous "no-op" behavior. Existing callers (e.g. tc act vlan, ovs) usually drop the packet if 'skb_vlan_pop' fails. Fixes: 93515d53b1 ("net: move vlan pop/push functions into common code") Signed-off-by: Shmulik Ladkani Cc: Pravin Shelar Reviewed-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/core/skbuff.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 6c22351bd519..b2a51bf1b0f9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4567,9 +4567,8 @@ int skb_vlan_pop(struct sk_buff *skb) if (likely(skb_vlan_tag_present(skb))) { skb->vlan_tci = 0; } else { - if (unlikely((skb->protocol != htons(ETH_P_8021Q) && - skb->protocol != htons(ETH_P_8021AD)) || - skb->len < VLAN_ETH_HLEN)) + if (unlikely(skb->protocol != htons(ETH_P_8021Q) && + skb->protocol != htons(ETH_P_8021AD))) return 0; err = __skb_vlan_pop(skb, &vlan_tci); @@ -4577,9 +4576,8 @@ int skb_vlan_pop(struct sk_buff *skb) return err; } /* move next vlan tag to hw accel tag */ - if (likely((skb->protocol != htons(ETH_P_8021Q) && - skb->protocol != htons(ETH_P_8021AD)) || - skb->len < VLAN_ETH_HLEN)) + if (likely(skb->protocol != htons(ETH_P_8021Q) && + skb->protocol != htons(ETH_P_8021AD))) return 0; vlan_proto = skb->protocol; From ecf4ee41d25832a6ec52f8b54dfaa46c08b949d5 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Tue, 20 Sep 2016 12:48:37 +0300 Subject: [PATCH 0602/1050] net: skbuff: Coding: Use eth_type_vlan() instead of open coding it Fix 'skb_vlan_pop' to use eth_type_vlan instead of directly comparing skb->protocol to ETH_P_8021Q or ETH_P_8021AD. Signed-off-by: Shmulik Ladkani Reviewed-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/core/skbuff.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b2a51bf1b0f9..d36c7548952f 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4567,8 +4567,7 @@ int skb_vlan_pop(struct sk_buff *skb) if (likely(skb_vlan_tag_present(skb))) { skb->vlan_tci = 0; } else { - if (unlikely(skb->protocol != htons(ETH_P_8021Q) && - skb->protocol != htons(ETH_P_8021AD))) + if (unlikely(!eth_type_vlan(skb->protocol))) return 0; err = __skb_vlan_pop(skb, &vlan_tci); @@ -4576,8 +4575,7 @@ int skb_vlan_pop(struct sk_buff *skb) return err; } /* move next vlan tag to hw accel tag */ - if (likely(skb->protocol != htons(ETH_P_8021Q) && - skb->protocol != htons(ETH_P_8021AD))) + if (likely(!eth_type_vlan(skb->protocol))) return 0; vlan_proto = skb->protocol; From d57fd6cafbad29d0648ed769f6df07b02f10d613 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Tue, 20 Sep 2016 17:13:06 +0530 Subject: [PATCH 0603/1050] cxgb4: move common filter code to separate file Move common filter code to separate files. Also fix the following checkpatch checks. CHECK: Comparison to NULL could be written "!f->l2t" + if (f->l2t == NULL) { CHECK: spaces preferred around that '/' (ctx:VxV) + fwr->len16_pkd = htonl(FW_WR_LEN16_V(sizeof(*fwr)/16)); Signed-off-by: Rahul Lakkireddy Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/Makefile | 2 +- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 23 ++ .../net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 274 ++++++++++++++++++ .../net/ethernet/chelsio/cxgb4/cxgb4_filter.h | 47 +++ .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 264 +---------------- 5 files changed, 346 insertions(+), 264 deletions(-) create mode 100644 drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c create mode 100644 drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile index 246129650967..da889817ec27 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/Makefile +++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_CHELSIO_T4) += cxgb4.o -cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o cxgb4_uld.o sched.o +cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o cxgb4_uld.o sched.o cxgb4_filter.o cxgb4-$(CONFIG_CHELSIO_T4_DCB) += cxgb4_dcb.o cxgb4-$(CONFIG_CHELSIO_T4_FCOE) += cxgb4_fcoe.o cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 1f9867db3b78..a844fd25c214 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -1025,6 +1025,29 @@ enum { VLAN_REWRITE }; +/* Host shadow copy of ingress filter entry. This is in host native format + * and doesn't match the ordering or bit order, etc. of the hardware of the + * firmware command. The use of bit-field structure elements is purely to + * remind ourselves of the field size limitations and save memory in the case + * where the filter table is large. + */ +struct filter_entry { + /* Administrative fields for filter. */ + u32 valid:1; /* filter allocated and valid */ + u32 locked:1; /* filter is administratively locked */ + + u32 pending:1; /* filter action is pending firmware reply */ + u32 smtidx:8; /* Source MAC Table index for smac */ + struct l2t_entry *l2t; /* Layer Two Table entry for dmac */ + + /* The filter itself. Most of this is a straight copy of information + * provided by the extended ioctl(). Some fields are translated to + * internal forms -- for instance the Ingress Queue ID passed in from + * the ioctl() is translated into the Absolute Ingress Queue ID. + */ + struct ch_filter_specification fs; +}; + static inline int is_offload(const struct adapter *adap) { return adap->params.offload; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c new file mode 100644 index 000000000000..8a26a54a9c84 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -0,0 +1,274 @@ +/* + * This file is part of the Chelsio T4 Ethernet driver for Linux. + * + * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "cxgb4.h" +#include "l2t.h" +#include "t4fw_api.h" +#include "cxgb4_filter.h" + +/* Delete the filter at a specified index. */ +static int del_filter_wr(struct adapter *adapter, int fidx) +{ + struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; + struct fw_filter_wr *fwr; + unsigned int len, ftid; + struct sk_buff *skb; + + len = sizeof(*fwr); + ftid = adapter->tids.ftid_base + fidx; + + skb = alloc_skb(len, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + fwr = (struct fw_filter_wr *)__skb_put(skb, len); + t4_mk_filtdelwr(ftid, fwr, adapter->sge.fw_evtq.abs_id); + + /* Mark the filter as "pending" and ship off the Filter Work Request. + * When we get the Work Request Reply we'll clear the pending status. + */ + f->pending = 1; + t4_mgmt_tx(adapter, skb); + return 0; +} + +/* Send a Work Request to write the filter at a specified index. We construct + * a Firmware Filter Work Request to have the work done and put the indicated + * filter into "pending" mode which will prevent any further actions against + * it till we get a reply from the firmware on the completion status of the + * request. + */ +int set_filter_wr(struct adapter *adapter, int fidx) +{ + struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; + struct fw_filter_wr *fwr; + struct sk_buff *skb; + unsigned int ftid; + + skb = alloc_skb(sizeof(*fwr), GFP_KERNEL); + if (!skb) + return -ENOMEM; + + /* If the new filter requires loopback Destination MAC and/or VLAN + * rewriting then we need to allocate a Layer 2 Table (L2T) entry for + * the filter. + */ + if (f->fs.newdmac || f->fs.newvlan) { + /* allocate L2T entry for new filter */ + f->l2t = t4_l2t_alloc_switching(adapter, f->fs.vlan, + f->fs.eport, f->fs.dmac); + if (!f->l2t) { + kfree_skb(skb); + return -ENOMEM; + } + } + + ftid = adapter->tids.ftid_base + fidx; + + fwr = (struct fw_filter_wr *)__skb_put(skb, sizeof(*fwr)); + memset(fwr, 0, sizeof(*fwr)); + + /* It would be nice to put most of the following in t4_hw.c but most + * of the work is translating the cxgbtool ch_filter_specification + * into the Work Request and the definition of that structure is + * currently in cxgbtool.h which isn't appropriate to pull into the + * common code. We may eventually try to come up with a more neutral + * filter specification structure but for now it's easiest to simply + * put this fairly direct code in line ... + */ + fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER_WR)); + fwr->len16_pkd = htonl(FW_WR_LEN16_V(sizeof(*fwr) / 16)); + fwr->tid_to_iq = + htonl(FW_FILTER_WR_TID_V(ftid) | + FW_FILTER_WR_RQTYPE_V(f->fs.type) | + FW_FILTER_WR_NOREPLY_V(0) | + FW_FILTER_WR_IQ_V(f->fs.iq)); + fwr->del_filter_to_l2tix = + htonl(FW_FILTER_WR_RPTTID_V(f->fs.rpttid) | + FW_FILTER_WR_DROP_V(f->fs.action == FILTER_DROP) | + FW_FILTER_WR_DIRSTEER_V(f->fs.dirsteer) | + FW_FILTER_WR_MASKHASH_V(f->fs.maskhash) | + FW_FILTER_WR_DIRSTEERHASH_V(f->fs.dirsteerhash) | + FW_FILTER_WR_LPBK_V(f->fs.action == FILTER_SWITCH) | + FW_FILTER_WR_DMAC_V(f->fs.newdmac) | + FW_FILTER_WR_SMAC_V(f->fs.newsmac) | + FW_FILTER_WR_INSVLAN_V(f->fs.newvlan == VLAN_INSERT || + f->fs.newvlan == VLAN_REWRITE) | + FW_FILTER_WR_RMVLAN_V(f->fs.newvlan == VLAN_REMOVE || + f->fs.newvlan == VLAN_REWRITE) | + FW_FILTER_WR_HITCNTS_V(f->fs.hitcnts) | + FW_FILTER_WR_TXCHAN_V(f->fs.eport) | + FW_FILTER_WR_PRIO_V(f->fs.prio) | + FW_FILTER_WR_L2TIX_V(f->l2t ? f->l2t->idx : 0)); + fwr->ethtype = htons(f->fs.val.ethtype); + fwr->ethtypem = htons(f->fs.mask.ethtype); + fwr->frag_to_ovlan_vldm = + (FW_FILTER_WR_FRAG_V(f->fs.val.frag) | + FW_FILTER_WR_FRAGM_V(f->fs.mask.frag) | + FW_FILTER_WR_IVLAN_VLD_V(f->fs.val.ivlan_vld) | + FW_FILTER_WR_OVLAN_VLD_V(f->fs.val.ovlan_vld) | + FW_FILTER_WR_IVLAN_VLDM_V(f->fs.mask.ivlan_vld) | + FW_FILTER_WR_OVLAN_VLDM_V(f->fs.mask.ovlan_vld)); + fwr->smac_sel = 0; + fwr->rx_chan_rx_rpl_iq = + htons(FW_FILTER_WR_RX_CHAN_V(0) | + FW_FILTER_WR_RX_RPL_IQ_V(adapter->sge.fw_evtq.abs_id)); + fwr->maci_to_matchtypem = + htonl(FW_FILTER_WR_MACI_V(f->fs.val.macidx) | + FW_FILTER_WR_MACIM_V(f->fs.mask.macidx) | + FW_FILTER_WR_FCOE_V(f->fs.val.fcoe) | + FW_FILTER_WR_FCOEM_V(f->fs.mask.fcoe) | + FW_FILTER_WR_PORT_V(f->fs.val.iport) | + FW_FILTER_WR_PORTM_V(f->fs.mask.iport) | + FW_FILTER_WR_MATCHTYPE_V(f->fs.val.matchtype) | + FW_FILTER_WR_MATCHTYPEM_V(f->fs.mask.matchtype)); + fwr->ptcl = f->fs.val.proto; + fwr->ptclm = f->fs.mask.proto; + fwr->ttyp = f->fs.val.tos; + fwr->ttypm = f->fs.mask.tos; + fwr->ivlan = htons(f->fs.val.ivlan); + fwr->ivlanm = htons(f->fs.mask.ivlan); + fwr->ovlan = htons(f->fs.val.ovlan); + fwr->ovlanm = htons(f->fs.mask.ovlan); + memcpy(fwr->lip, f->fs.val.lip, sizeof(fwr->lip)); + memcpy(fwr->lipm, f->fs.mask.lip, sizeof(fwr->lipm)); + memcpy(fwr->fip, f->fs.val.fip, sizeof(fwr->fip)); + memcpy(fwr->fipm, f->fs.mask.fip, sizeof(fwr->fipm)); + fwr->lp = htons(f->fs.val.lport); + fwr->lpm = htons(f->fs.mask.lport); + fwr->fp = htons(f->fs.val.fport); + fwr->fpm = htons(f->fs.mask.fport); + if (f->fs.newsmac) + memcpy(fwr->sma, f->fs.smac, sizeof(fwr->sma)); + + /* Mark the filter as "pending" and ship off the Filter Work Request. + * When we get the Work Request Reply we'll clear the pending status. + */ + f->pending = 1; + set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3); + t4_ofld_send(adapter, skb); + return 0; +} + +/* Return an error number if the indicated filter isn't writable ... */ +int writable_filter(struct filter_entry *f) +{ + if (f->locked) + return -EPERM; + if (f->pending) + return -EBUSY; + + return 0; +} + +/* Delete the filter at the specified index (if valid). The checks for all + * the common problems with doing this like the filter being locked, currently + * pending in another operation, etc. + */ +int delete_filter(struct adapter *adapter, unsigned int fidx) +{ + struct filter_entry *f; + int ret; + + if (fidx >= adapter->tids.nftids + adapter->tids.nsftids) + return -EINVAL; + + f = &adapter->tids.ftid_tab[fidx]; + ret = writable_filter(f); + if (ret) + return ret; + if (f->valid) + return del_filter_wr(adapter, fidx); + + return 0; +} + +/* Clear a filter and release any of its resources that we own. This also + * clears the filter's "pending" status. + */ +void clear_filter(struct adapter *adap, struct filter_entry *f) +{ + /* If the new or old filter have loopback rewriteing rules then we'll + * need to free any existing Layer Two Table (L2T) entries of the old + * filter rule. The firmware will handle freeing up any Source MAC + * Table (SMT) entries used for rewriting Source MAC Addresses in + * loopback rules. + */ + if (f->l2t) + cxgb4_l2t_release(f->l2t); + + /* The zeroing of the filter rule below clears the filter valid, + * pending, locked flags, l2t pointer, etc. so it's all we need for + * this operation. + */ + memset(f, 0, sizeof(*f)); +} + +/* Handle a filter write/deletion reply. */ +void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl) +{ + unsigned int idx = GET_TID(rpl); + unsigned int nidx = idx - adap->tids.ftid_base; + struct filter_entry *f; + unsigned int ret; + + if (idx >= adap->tids.ftid_base && nidx < + (adap->tids.nftids + adap->tids.nsftids)) { + idx = nidx; + ret = TCB_COOKIE_G(rpl->cookie); + f = &adap->tids.ftid_tab[idx]; + + if (ret == FW_FILTER_WR_FLT_DELETED) { + /* Clear the filter when we get confirmation from the + * hardware that the filter has been deleted. + */ + clear_filter(adap, f); + } else if (ret == FW_FILTER_WR_SMT_TBL_FULL) { + dev_err(adap->pdev_dev, "filter %u setup failed due to full SMT\n", + idx); + clear_filter(adap, f); + } else if (ret == FW_FILTER_WR_FLT_ADDED) { + f->smtidx = (be64_to_cpu(rpl->oldval) >> 24) & 0xff; + f->pending = 0; /* asynchronous setup completed */ + f->valid = 1; + } else { + /* Something went wrong. Issue a warning about the + * problem and clear everything out. + */ + dev_err(adap->pdev_dev, "filter %u setup failed with error %u\n", + idx, ret); + clear_filter(adap, f); + } + } +} diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h new file mode 100644 index 000000000000..f6bd0bf65b23 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h @@ -0,0 +1,47 @@ +/* + * This file is part of the Chelsio T4 Ethernet driver for Linux. + * + * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __CXGB4_FILTER_H +#define __CXGB4_FILTER_H + +#include "t4_msg.h" + +void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl); +void clear_filter(struct adapter *adap, struct filter_entry *f); + +int set_filter_wr(struct adapter *adapter, int fidx); +int delete_filter(struct adapter *adapter, unsigned int fidx); + +int writable_filter(struct filter_entry *f); +#endif /* __CXGB4_FILTER_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index d1ebb84c073e..5cdcfe871c12 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -67,6 +67,7 @@ #include #include "cxgb4.h" +#include "cxgb4_filter.h" #include "t4_regs.h" #include "t4_values.h" #include "t4_msg.h" @@ -87,30 +88,6 @@ char cxgb4_driver_name[] = KBUILD_MODNAME; const char cxgb4_driver_version[] = DRV_VERSION; #define DRV_DESC "Chelsio T4/T5/T6 Network Driver" -/* Host shadow copy of ingress filter entry. This is in host native format - * and doesn't match the ordering or bit order, etc. of the hardware of the - * firmware command. The use of bit-field structure elements is purely to - * remind ourselves of the field size limitations and save memory in the case - * where the filter table is large. - */ -struct filter_entry { - /* Administrative fields for filter. - */ - u32 valid:1; /* filter allocated and valid */ - u32 locked:1; /* filter is administratively locked */ - - u32 pending:1; /* filter action is pending firmware reply */ - u32 smtidx:8; /* Source MAC Table index for smac */ - struct l2t_entry *l2t; /* Layer Two Table entry for dmac */ - - /* The filter itself. Most of this is a straight copy of information - * provided by the extended ioctl(). Some fields are translated to - * internal forms -- for instance the Ingress Queue ID passed in from - * the ioctl() is translated into the Absolute Ingress Queue ID. - */ - struct ch_filter_specification fs; -}; - #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \ NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\ NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR) @@ -527,66 +504,6 @@ static void dcb_rpl(struct adapter *adap, const struct fw_port_cmd *pcmd) } #endif /* CONFIG_CHELSIO_T4_DCB */ -/* Clear a filter and release any of its resources that we own. This also - * clears the filter's "pending" status. - */ -static void clear_filter(struct adapter *adap, struct filter_entry *f) -{ - /* If the new or old filter have loopback rewriteing rules then we'll - * need to free any existing Layer Two Table (L2T) entries of the old - * filter rule. The firmware will handle freeing up any Source MAC - * Table (SMT) entries used for rewriting Source MAC Addresses in - * loopback rules. - */ - if (f->l2t) - cxgb4_l2t_release(f->l2t); - - /* The zeroing of the filter rule below clears the filter valid, - * pending, locked flags, l2t pointer, etc. so it's all we need for - * this operation. - */ - memset(f, 0, sizeof(*f)); -} - -/* Handle a filter write/deletion reply. - */ -static void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl) -{ - unsigned int idx = GET_TID(rpl); - unsigned int nidx = idx - adap->tids.ftid_base; - unsigned int ret; - struct filter_entry *f; - - if (idx >= adap->tids.ftid_base && nidx < - (adap->tids.nftids + adap->tids.nsftids)) { - idx = nidx; - ret = TCB_COOKIE_G(rpl->cookie); - f = &adap->tids.ftid_tab[idx]; - - if (ret == FW_FILTER_WR_FLT_DELETED) { - /* Clear the filter when we get confirmation from the - * hardware that the filter has been deleted. - */ - clear_filter(adap, f); - } else if (ret == FW_FILTER_WR_SMT_TBL_FULL) { - dev_err(adap->pdev_dev, "filter %u setup failed due to full SMT\n", - idx); - clear_filter(adap, f); - } else if (ret == FW_FILTER_WR_FLT_ADDED) { - f->smtidx = (be64_to_cpu(rpl->oldval) >> 24) & 0xff; - f->pending = 0; /* asynchronous setup completed */ - f->valid = 1; - } else { - /* Something went wrong. Issue a warning about the - * problem and clear everything out. - */ - dev_err(adap->pdev_dev, "filter %u setup failed with error %u\n", - idx, ret); - clear_filter(adap, f); - } - } -} - /* Response queue handler for the FW event queue. */ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp, @@ -1026,151 +943,6 @@ void t4_free_mem(void *addr) kvfree(addr); } -/* Send a Work Request to write the filter at a specified index. We construct - * a Firmware Filter Work Request to have the work done and put the indicated - * filter into "pending" mode which will prevent any further actions against - * it till we get a reply from the firmware on the completion status of the - * request. - */ -static int set_filter_wr(struct adapter *adapter, int fidx) -{ - struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; - struct sk_buff *skb; - struct fw_filter_wr *fwr; - unsigned int ftid; - - skb = alloc_skb(sizeof(*fwr), GFP_KERNEL); - if (!skb) - return -ENOMEM; - - /* If the new filter requires loopback Destination MAC and/or VLAN - * rewriting then we need to allocate a Layer 2 Table (L2T) entry for - * the filter. - */ - if (f->fs.newdmac || f->fs.newvlan) { - /* allocate L2T entry for new filter */ - f->l2t = t4_l2t_alloc_switching(adapter, f->fs.vlan, - f->fs.eport, f->fs.dmac); - if (f->l2t == NULL) { - kfree_skb(skb); - return -ENOMEM; - } - } - - ftid = adapter->tids.ftid_base + fidx; - - fwr = (struct fw_filter_wr *)__skb_put(skb, sizeof(*fwr)); - memset(fwr, 0, sizeof(*fwr)); - - /* It would be nice to put most of the following in t4_hw.c but most - * of the work is translating the cxgbtool ch_filter_specification - * into the Work Request and the definition of that structure is - * currently in cxgbtool.h which isn't appropriate to pull into the - * common code. We may eventually try to come up with a more neutral - * filter specification structure but for now it's easiest to simply - * put this fairly direct code in line ... - */ - fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER_WR)); - fwr->len16_pkd = htonl(FW_WR_LEN16_V(sizeof(*fwr)/16)); - fwr->tid_to_iq = - htonl(FW_FILTER_WR_TID_V(ftid) | - FW_FILTER_WR_RQTYPE_V(f->fs.type) | - FW_FILTER_WR_NOREPLY_V(0) | - FW_FILTER_WR_IQ_V(f->fs.iq)); - fwr->del_filter_to_l2tix = - htonl(FW_FILTER_WR_RPTTID_V(f->fs.rpttid) | - FW_FILTER_WR_DROP_V(f->fs.action == FILTER_DROP) | - FW_FILTER_WR_DIRSTEER_V(f->fs.dirsteer) | - FW_FILTER_WR_MASKHASH_V(f->fs.maskhash) | - FW_FILTER_WR_DIRSTEERHASH_V(f->fs.dirsteerhash) | - FW_FILTER_WR_LPBK_V(f->fs.action == FILTER_SWITCH) | - FW_FILTER_WR_DMAC_V(f->fs.newdmac) | - FW_FILTER_WR_SMAC_V(f->fs.newsmac) | - FW_FILTER_WR_INSVLAN_V(f->fs.newvlan == VLAN_INSERT || - f->fs.newvlan == VLAN_REWRITE) | - FW_FILTER_WR_RMVLAN_V(f->fs.newvlan == VLAN_REMOVE || - f->fs.newvlan == VLAN_REWRITE) | - FW_FILTER_WR_HITCNTS_V(f->fs.hitcnts) | - FW_FILTER_WR_TXCHAN_V(f->fs.eport) | - FW_FILTER_WR_PRIO_V(f->fs.prio) | - FW_FILTER_WR_L2TIX_V(f->l2t ? f->l2t->idx : 0)); - fwr->ethtype = htons(f->fs.val.ethtype); - fwr->ethtypem = htons(f->fs.mask.ethtype); - fwr->frag_to_ovlan_vldm = - (FW_FILTER_WR_FRAG_V(f->fs.val.frag) | - FW_FILTER_WR_FRAGM_V(f->fs.mask.frag) | - FW_FILTER_WR_IVLAN_VLD_V(f->fs.val.ivlan_vld) | - FW_FILTER_WR_OVLAN_VLD_V(f->fs.val.ovlan_vld) | - FW_FILTER_WR_IVLAN_VLDM_V(f->fs.mask.ivlan_vld) | - FW_FILTER_WR_OVLAN_VLDM_V(f->fs.mask.ovlan_vld)); - fwr->smac_sel = 0; - fwr->rx_chan_rx_rpl_iq = - htons(FW_FILTER_WR_RX_CHAN_V(0) | - FW_FILTER_WR_RX_RPL_IQ_V(adapter->sge.fw_evtq.abs_id)); - fwr->maci_to_matchtypem = - htonl(FW_FILTER_WR_MACI_V(f->fs.val.macidx) | - FW_FILTER_WR_MACIM_V(f->fs.mask.macidx) | - FW_FILTER_WR_FCOE_V(f->fs.val.fcoe) | - FW_FILTER_WR_FCOEM_V(f->fs.mask.fcoe) | - FW_FILTER_WR_PORT_V(f->fs.val.iport) | - FW_FILTER_WR_PORTM_V(f->fs.mask.iport) | - FW_FILTER_WR_MATCHTYPE_V(f->fs.val.matchtype) | - FW_FILTER_WR_MATCHTYPEM_V(f->fs.mask.matchtype)); - fwr->ptcl = f->fs.val.proto; - fwr->ptclm = f->fs.mask.proto; - fwr->ttyp = f->fs.val.tos; - fwr->ttypm = f->fs.mask.tos; - fwr->ivlan = htons(f->fs.val.ivlan); - fwr->ivlanm = htons(f->fs.mask.ivlan); - fwr->ovlan = htons(f->fs.val.ovlan); - fwr->ovlanm = htons(f->fs.mask.ovlan); - memcpy(fwr->lip, f->fs.val.lip, sizeof(fwr->lip)); - memcpy(fwr->lipm, f->fs.mask.lip, sizeof(fwr->lipm)); - memcpy(fwr->fip, f->fs.val.fip, sizeof(fwr->fip)); - memcpy(fwr->fipm, f->fs.mask.fip, sizeof(fwr->fipm)); - fwr->lp = htons(f->fs.val.lport); - fwr->lpm = htons(f->fs.mask.lport); - fwr->fp = htons(f->fs.val.fport); - fwr->fpm = htons(f->fs.mask.fport); - if (f->fs.newsmac) - memcpy(fwr->sma, f->fs.smac, sizeof(fwr->sma)); - - /* Mark the filter as "pending" and ship off the Filter Work Request. - * When we get the Work Request Reply we'll clear the pending status. - */ - f->pending = 1; - set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3); - t4_ofld_send(adapter, skb); - return 0; -} - -/* Delete the filter at a specified index. - */ -static int del_filter_wr(struct adapter *adapter, int fidx) -{ - struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; - struct sk_buff *skb; - struct fw_filter_wr *fwr; - unsigned int len, ftid; - - len = sizeof(*fwr); - ftid = adapter->tids.ftid_base + fidx; - - skb = alloc_skb(len, GFP_KERNEL); - if (!skb) - return -ENOMEM; - - fwr = (struct fw_filter_wr *)__skb_put(skb, len); - t4_mk_filtdelwr(ftid, fwr, adapter->sge.fw_evtq.abs_id); - - /* Mark the filter as "pending" and ship off the Filter Work Request. - * When we get the Work Request Reply we'll clear the pending status. - */ - f->pending = 1; - t4_mgmt_tx(adapter, skb); - return 0; -} - static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb, void *accel_priv, select_queue_fallback_t fallback) { @@ -2514,40 +2286,6 @@ static int cxgb_close(struct net_device *dev) return t4_enable_vi(adapter, adapter->pf, pi->viid, false, false); } -/* Return an error number if the indicated filter isn't writable ... - */ -static int writable_filter(struct filter_entry *f) -{ - if (f->locked) - return -EPERM; - if (f->pending) - return -EBUSY; - - return 0; -} - -/* Delete the filter at the specified index (if valid). The checks for all - * the common problems with doing this like the filter being locked, currently - * pending in another operation, etc. - */ -static int delete_filter(struct adapter *adapter, unsigned int fidx) -{ - struct filter_entry *f; - int ret; - - if (fidx >= adapter->tids.nftids + adapter->tids.nsftids) - return -EINVAL; - - f = &adapter->tids.ftid_tab[fidx]; - ret = writable_filter(f); - if (ret) - return ret; - if (f->valid) - return del_filter_wr(adapter, fidx); - - return 0; -} - int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid, __be32 sip, __be16 sport, __be16 vlan, unsigned int queue, unsigned char port, unsigned char mask) From 578b46b9383c3619cc0a6002ff867e732b08b67a Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Tue, 20 Sep 2016 17:13:07 +0530 Subject: [PATCH 0604/1050] cxgb4: add common api support for configuring filters Enable filters for non-offload configuration and add common api support for setting and deleting filters in LE-TCAM region of the hardware. IPv4 filters occupy one slot. IPv6 filters occupy 4 slots and must be on a 4-slot boundary. IPv4 filters can not occupy a slot belonging to IPv6 and the vice-versa is also true. Filters are set and deleted asynchronously. Use completion to wait for reply from firmware in order to allow for synchronization if needed. Signed-off-by: Rahul Lakkireddy Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 3 + .../net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 477 +++++++++++++++++- .../net/ethernet/chelsio/cxgb4/cxgb4_filter.h | 1 + .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 39 +- .../net/ethernet/chelsio/cxgb4/cxgb4_uld.h | 26 +- 5 files changed, 512 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index a844fd25c214..51edb126da65 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -1038,7 +1038,10 @@ struct filter_entry { u32 pending:1; /* filter action is pending firmware reply */ u32 smtidx:8; /* Source MAC Table index for smac */ + struct filter_ctx *ctx; /* Caller's completion hook */ struct l2t_entry *l2t; /* Layer Two Table entry for dmac */ + struct net_device *dev; /* Associated net device */ + u32 tid; /* This will store the actual tid */ /* The filter itself. Most of this is a straight copy of information * provided by the extended ioctl(). Some fields are translated to diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index 8a26a54a9c84..2a616171cb68 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -33,27 +33,165 @@ */ #include "cxgb4.h" +#include "t4_regs.h" #include "l2t.h" #include "t4fw_api.h" #include "cxgb4_filter.h" +static inline bool is_field_set(u32 val, u32 mask) +{ + return val || mask; +} + +static inline bool unsupported(u32 conf, u32 conf_mask, u32 val, u32 mask) +{ + return !(conf & conf_mask) && is_field_set(val, mask); +} + +/* Validate filter spec against configuration done on the card. */ +static int validate_filter(struct net_device *dev, + struct ch_filter_specification *fs) +{ + struct adapter *adapter = netdev2adap(dev); + u32 fconf, iconf; + + /* Check for unconfigured fields being used. */ + fconf = adapter->params.tp.vlan_pri_map; + iconf = adapter->params.tp.ingress_config; + + if (unsupported(fconf, FCOE_F, fs->val.fcoe, fs->mask.fcoe) || + unsupported(fconf, PORT_F, fs->val.iport, fs->mask.iport) || + unsupported(fconf, TOS_F, fs->val.tos, fs->mask.tos) || + unsupported(fconf, ETHERTYPE_F, fs->val.ethtype, + fs->mask.ethtype) || + unsupported(fconf, MACMATCH_F, fs->val.macidx, fs->mask.macidx) || + unsupported(fconf, MPSHITTYPE_F, fs->val.matchtype, + fs->mask.matchtype) || + unsupported(fconf, FRAGMENTATION_F, fs->val.frag, fs->mask.frag) || + unsupported(fconf, PROTOCOL_F, fs->val.proto, fs->mask.proto) || + unsupported(fconf, VNIC_ID_F, fs->val.pfvf_vld, + fs->mask.pfvf_vld) || + unsupported(fconf, VNIC_ID_F, fs->val.ovlan_vld, + fs->mask.ovlan_vld) || + unsupported(fconf, VLAN_F, fs->val.ivlan_vld, fs->mask.ivlan_vld)) + return -EOPNOTSUPP; + + /* T4 inconveniently uses the same FT_VNIC_ID_W bits for both the Outer + * VLAN Tag and PF/VF/VFvld fields based on VNIC_F being set + * in TP_INGRESS_CONFIG. Hense the somewhat crazy checks + * below. Additionally, since the T4 firmware interface also + * carries that overlap, we need to translate any PF/VF + * specification into that internal format below. + */ + if (is_field_set(fs->val.pfvf_vld, fs->mask.pfvf_vld) && + is_field_set(fs->val.ovlan_vld, fs->mask.ovlan_vld)) + return -EOPNOTSUPP; + if (unsupported(iconf, VNIC_F, fs->val.pfvf_vld, fs->mask.pfvf_vld) || + (is_field_set(fs->val.ovlan_vld, fs->mask.ovlan_vld) && + (iconf & VNIC_F))) + return -EOPNOTSUPP; + if (fs->val.pf > 0x7 || fs->val.vf > 0x7f) + return -ERANGE; + fs->mask.pf &= 0x7; + fs->mask.vf &= 0x7f; + + /* If the user is requesting that the filter action loop + * matching packets back out one of our ports, make sure that + * the egress port is in range. + */ + if (fs->action == FILTER_SWITCH && + fs->eport >= adapter->params.nports) + return -ERANGE; + + /* Don't allow various trivially obvious bogus out-of-range values... */ + if (fs->val.iport >= adapter->params.nports) + return -ERANGE; + + /* T4 doesn't support removing VLAN Tags for loop back filters. */ + if (is_t4(adapter->params.chip) && + fs->action == FILTER_SWITCH && + (fs->newvlan == VLAN_REMOVE || + fs->newvlan == VLAN_REWRITE)) + return -EOPNOTSUPP; + + return 0; +} + +static unsigned int get_filter_steerq(struct net_device *dev, + struct ch_filter_specification *fs) +{ + struct adapter *adapter = netdev2adap(dev); + unsigned int iq; + + /* If the user has requested steering matching Ingress Packets + * to a specific Queue Set, we need to make sure it's in range + * for the port and map that into the Absolute Queue ID of the + * Queue Set's Response Queue. + */ + if (!fs->dirsteer) { + if (fs->iq) + return -EINVAL; + iq = 0; + } else { + struct port_info *pi = netdev_priv(dev); + + /* If the iq id is greater than the number of qsets, + * then assume it is an absolute qid. + */ + if (fs->iq < pi->nqsets) + iq = adapter->sge.ethrxq[pi->first_qset + + fs->iq].rspq.abs_id; + else + iq = fs->iq; + } + + return iq; +} + +static int cxgb4_set_ftid(struct tid_info *t, int fidx, int family) +{ + spin_lock_bh(&t->ftid_lock); + + if (test_bit(fidx, t->ftid_bmap)) { + spin_unlock_bh(&t->ftid_lock); + return -EBUSY; + } + + if (family == PF_INET) + __set_bit(fidx, t->ftid_bmap); + else + bitmap_allocate_region(t->ftid_bmap, fidx, 2); + + spin_unlock_bh(&t->ftid_lock); + return 0; +} + +static void cxgb4_clear_ftid(struct tid_info *t, int fidx, int family) +{ + spin_lock_bh(&t->ftid_lock); + if (family == PF_INET) + __clear_bit(fidx, t->ftid_bmap); + else + bitmap_release_region(t->ftid_bmap, fidx, 2); + spin_unlock_bh(&t->ftid_lock); +} + /* Delete the filter at a specified index. */ static int del_filter_wr(struct adapter *adapter, int fidx) { struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; struct fw_filter_wr *fwr; - unsigned int len, ftid; struct sk_buff *skb; + unsigned int len; len = sizeof(*fwr); - ftid = adapter->tids.ftid_base + fidx; skb = alloc_skb(len, GFP_KERNEL); if (!skb) return -ENOMEM; fwr = (struct fw_filter_wr *)__skb_put(skb, len); - t4_mk_filtdelwr(ftid, fwr, adapter->sge.fw_evtq.abs_id); + t4_mk_filtdelwr(f->tid, fwr, adapter->sge.fw_evtq.abs_id); /* Mark the filter as "pending" and ship off the Filter Work Request. * When we get the Work Request Reply we'll clear the pending status. @@ -74,7 +212,6 @@ int set_filter_wr(struct adapter *adapter, int fidx) struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; struct fw_filter_wr *fwr; struct sk_buff *skb; - unsigned int ftid; skb = alloc_skb(sizeof(*fwr), GFP_KERNEL); if (!skb) @@ -94,8 +231,6 @@ int set_filter_wr(struct adapter *adapter, int fidx) } } - ftid = adapter->tids.ftid_base + fidx; - fwr = (struct fw_filter_wr *)__skb_put(skb, sizeof(*fwr)); memset(fwr, 0, sizeof(*fwr)); @@ -110,7 +245,7 @@ int set_filter_wr(struct adapter *adapter, int fidx) fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER_WR)); fwr->len16_pkd = htonl(FW_WR_LEN16_V(sizeof(*fwr) / 16)); fwr->tid_to_iq = - htonl(FW_FILTER_WR_TID_V(ftid) | + htonl(FW_FILTER_WR_TID_V(f->tid) | FW_FILTER_WR_RQTYPE_V(f->fs.type) | FW_FILTER_WR_NOREPLY_V(0) | FW_FILTER_WR_IQ_V(f->fs.iq)); @@ -235,33 +370,341 @@ void clear_filter(struct adapter *adap, struct filter_entry *f) memset(f, 0, sizeof(*f)); } +void clear_all_filters(struct adapter *adapter) +{ + unsigned int i; + + if (adapter->tids.ftid_tab) { + struct filter_entry *f = &adapter->tids.ftid_tab[0]; + unsigned int max_ftid = adapter->tids.nftids + + adapter->tids.nsftids; + + for (i = 0; i < max_ftid; i++, f++) + if (f->valid || f->pending) + clear_filter(adapter, f); + } +} + +/* Fill up default masks for set match fields. */ +static void fill_default_mask(struct ch_filter_specification *fs) +{ + unsigned int lip = 0, lip_mask = 0; + unsigned int fip = 0, fip_mask = 0; + unsigned int i; + + if (fs->val.iport && !fs->mask.iport) + fs->mask.iport |= ~0; + if (fs->val.fcoe && !fs->mask.fcoe) + fs->mask.fcoe |= ~0; + if (fs->val.matchtype && !fs->mask.matchtype) + fs->mask.matchtype |= ~0; + if (fs->val.macidx && !fs->mask.macidx) + fs->mask.macidx |= ~0; + if (fs->val.ethtype && !fs->mask.ethtype) + fs->mask.ethtype |= ~0; + if (fs->val.ivlan && !fs->mask.ivlan) + fs->mask.ivlan |= ~0; + if (fs->val.ovlan && !fs->mask.ovlan) + fs->mask.ovlan |= ~0; + if (fs->val.frag && !fs->mask.frag) + fs->mask.frag |= ~0; + if (fs->val.tos && !fs->mask.tos) + fs->mask.tos |= ~0; + if (fs->val.proto && !fs->mask.proto) + fs->mask.proto |= ~0; + + for (i = 0; i < ARRAY_SIZE(fs->val.lip); i++) { + lip |= fs->val.lip[i]; + lip_mask |= fs->mask.lip[i]; + fip |= fs->val.fip[i]; + fip_mask |= fs->mask.fip[i]; + } + + if (lip && !lip_mask) + memset(fs->mask.lip, ~0, sizeof(fs->mask.lip)); + + if (fip && !fip_mask) + memset(fs->mask.fip, ~0, sizeof(fs->mask.lip)); + + if (fs->val.lport && !fs->mask.lport) + fs->mask.lport = ~0; + if (fs->val.fport && !fs->mask.fport) + fs->mask.fport = ~0; +} + +/* Check a Chelsio Filter Request for validity, convert it into our internal + * format and send it to the hardware. Return 0 on success, an error number + * otherwise. We attach any provided filter operation context to the internal + * filter specification in order to facilitate signaling completion of the + * operation. + */ +int __cxgb4_set_filter(struct net_device *dev, int filter_id, + struct ch_filter_specification *fs, + struct filter_ctx *ctx) +{ + struct adapter *adapter = netdev2adap(dev); + unsigned int max_fidx, fidx, iq; + struct filter_entry *f; + u32 iconf; + int ret; + + max_fidx = adapter->tids.nftids; + if (filter_id != (max_fidx + adapter->tids.nsftids - 1) && + filter_id >= max_fidx) + return -E2BIG; + + fill_default_mask(fs); + + ret = validate_filter(dev, fs); + if (ret) + return ret; + + iq = get_filter_steerq(dev, fs); + if (iq < 0) + return iq; + + /* IPv6 filters occupy four slots and must be aligned on + * four-slot boundaries. IPv4 filters only occupy a single + * slot and have no alignment requirements but writing a new + * IPv4 filter into the middle of an existing IPv6 filter + * requires clearing the old IPv6 filter and hence we prevent + * insertion. + */ + if (fs->type == 0) { /* IPv4 */ + /* If our IPv4 filter isn't being written to a + * multiple of four filter index and there's an IPv6 + * filter at the multiple of 4 base slot, then we + * prevent insertion. + */ + fidx = filter_id & ~0x3; + if (fidx != filter_id && + adapter->tids.ftid_tab[fidx].fs.type) { + f = &adapter->tids.ftid_tab[fidx]; + if (f->valid) { + dev_err(adapter->pdev_dev, + "Invalid location. IPv6 requires 4 slots and is occupying slots %u to %u\n", + fidx, fidx + 3); + return -EINVAL; + } + } + } else { /* IPv6 */ + /* Ensure that the IPv6 filter is aligned on a + * multiple of 4 boundary. + */ + if (filter_id & 0x3) { + dev_err(adapter->pdev_dev, + "Invalid location. IPv6 must be aligned on a 4-slot boundary\n"); + return -EINVAL; + } + + /* Check all except the base overlapping IPv4 filter slots. */ + for (fidx = filter_id + 1; fidx < filter_id + 4; fidx++) { + f = &adapter->tids.ftid_tab[fidx]; + if (f->valid) { + dev_err(adapter->pdev_dev, + "Invalid location. IPv6 requires 4 slots and an IPv4 filter exists at %u\n", + fidx); + return -EINVAL; + } + } + } + + /* Check to make sure that provided filter index is not + * already in use by someone else + */ + f = &adapter->tids.ftid_tab[filter_id]; + if (f->valid) + return -EBUSY; + + fidx = filter_id + adapter->tids.ftid_base; + ret = cxgb4_set_ftid(&adapter->tids, filter_id, + fs->type ? PF_INET6 : PF_INET); + if (ret) + return ret; + + /* Check to make sure the filter requested is writable ... */ + ret = writable_filter(f); + if (ret) { + /* Clear the bits we have set above */ + cxgb4_clear_ftid(&adapter->tids, filter_id, + fs->type ? PF_INET6 : PF_INET); + return ret; + } + + /* Clear out any old resources being used by the filter before + * we start constructing the new filter. + */ + if (f->valid) + clear_filter(adapter, f); + + /* Convert the filter specification into our internal format. + * We copy the PF/VF specification into the Outer VLAN field + * here so the rest of the code -- including the interface to + * the firmware -- doesn't have to constantly do these checks. + */ + f->fs = *fs; + f->fs.iq = iq; + f->dev = dev; + + iconf = adapter->params.tp.ingress_config; + if (iconf & VNIC_F) { + f->fs.val.ovlan = (fs->val.pf << 13) | fs->val.vf; + f->fs.mask.ovlan = (fs->mask.pf << 13) | fs->mask.vf; + f->fs.val.ovlan_vld = fs->val.pfvf_vld; + f->fs.mask.ovlan_vld = fs->mask.pfvf_vld; + } + + /* Attempt to set the filter. If we don't succeed, we clear + * it and return the failure. + */ + f->ctx = ctx; + f->tid = fidx; /* Save the actual tid */ + ret = set_filter_wr(adapter, filter_id); + if (ret) { + cxgb4_clear_ftid(&adapter->tids, filter_id, + fs->type ? PF_INET6 : PF_INET); + clear_filter(adapter, f); + } + + return ret; +} + +/* Check a delete filter request for validity and send it to the hardware. + * Return 0 on success, an error number otherwise. We attach any provided + * filter operation context to the internal filter specification in order to + * facilitate signaling completion of the operation. + */ +int __cxgb4_del_filter(struct net_device *dev, int filter_id, + struct filter_ctx *ctx) +{ + struct adapter *adapter = netdev2adap(dev); + struct filter_entry *f; + unsigned int max_fidx; + int ret; + + max_fidx = adapter->tids.nftids; + if (filter_id != (max_fidx + adapter->tids.nsftids - 1) && + filter_id >= max_fidx) + return -E2BIG; + + f = &adapter->tids.ftid_tab[filter_id]; + ret = writable_filter(f); + if (ret) + return ret; + + if (f->valid) { + f->ctx = ctx; + cxgb4_clear_ftid(&adapter->tids, filter_id, + f->fs.type ? PF_INET6 : PF_INET); + return del_filter_wr(adapter, filter_id); + } + + /* If the caller has passed in a Completion Context then we need to + * mark it as a successful completion so they don't stall waiting + * for it. + */ + if (ctx) { + ctx->result = 0; + complete(&ctx->completion); + } + return ret; +} + +int cxgb4_set_filter(struct net_device *dev, int filter_id, + struct ch_filter_specification *fs) +{ + struct filter_ctx ctx; + int ret; + + init_completion(&ctx.completion); + + ret = __cxgb4_set_filter(dev, filter_id, fs, &ctx); + if (ret) + goto out; + + /* Wait for reply */ + ret = wait_for_completion_timeout(&ctx.completion, 10 * HZ); + if (!ret) + return -ETIMEDOUT; + + ret = ctx.result; +out: + return ret; +} + +int cxgb4_del_filter(struct net_device *dev, int filter_id) +{ + struct filter_ctx ctx; + int ret; + + init_completion(&ctx.completion); + + ret = __cxgb4_del_filter(dev, filter_id, &ctx); + if (ret) + goto out; + + /* Wait for reply */ + ret = wait_for_completion_timeout(&ctx.completion, 10 * HZ); + if (!ret) + return -ETIMEDOUT; + + ret = ctx.result; +out: + return ret; +} + /* Handle a filter write/deletion reply. */ void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl) { - unsigned int idx = GET_TID(rpl); - unsigned int nidx = idx - adap->tids.ftid_base; - struct filter_entry *f; - unsigned int ret; + unsigned int tid = GET_TID(rpl); + struct filter_entry *f = NULL; + unsigned int max_fidx; + int idx; - if (idx >= adap->tids.ftid_base && nidx < - (adap->tids.nftids + adap->tids.nsftids)) { - idx = nidx; - ret = TCB_COOKIE_G(rpl->cookie); + max_fidx = adap->tids.nftids + adap->tids.nsftids; + /* Get the corresponding filter entry for this tid */ + if (adap->tids.ftid_tab) { + /* Check this in normal filter region */ + idx = tid - adap->tids.ftid_base; + if (idx >= max_fidx) + return; f = &adap->tids.ftid_tab[idx]; + if (f->tid != tid) + return; + } + + /* We found the filter entry for this tid */ + if (f) { + unsigned int ret = TCB_COOKIE_G(rpl->cookie); + struct filter_ctx *ctx; + + /* Pull off any filter operation context attached to the + * filter. + */ + ctx = f->ctx; + f->ctx = NULL; if (ret == FW_FILTER_WR_FLT_DELETED) { /* Clear the filter when we get confirmation from the * hardware that the filter has been deleted. */ clear_filter(adap, f); + if (ctx) + ctx->result = 0; } else if (ret == FW_FILTER_WR_SMT_TBL_FULL) { dev_err(adap->pdev_dev, "filter %u setup failed due to full SMT\n", idx); clear_filter(adap, f); + if (ctx) + ctx->result = -ENOMEM; } else if (ret == FW_FILTER_WR_FLT_ADDED) { f->smtidx = (be64_to_cpu(rpl->oldval) >> 24) & 0xff; f->pending = 0; /* asynchronous setup completed */ f->valid = 1; + if (ctx) { + ctx->result = 0; + ctx->tid = idx; + } } else { /* Something went wrong. Issue a warning about the * problem and clear everything out. @@ -269,6 +712,10 @@ void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl) dev_err(adap->pdev_dev, "filter %u setup failed with error %u\n", idx, ret); clear_filter(adap, f); + if (ctx) + ctx->result = -EINVAL; } + if (ctx) + complete(&ctx->completion); } } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h index f6bd0bf65b23..23742cb1c69f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h @@ -44,4 +44,5 @@ int set_filter_wr(struct adapter *adapter, int fidx); int delete_filter(struct adapter *adapter, unsigned int fidx); int writable_filter(struct filter_entry *f); +void clear_all_filters(struct adapter *adapter); #endif /* __CXGB4_FILTER_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 5cdcfe871c12..e97daa0510e5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -1324,19 +1324,22 @@ EXPORT_SYMBOL(cxgb4_remove_tid); */ static int tid_init(struct tid_info *t) { - size_t size; - unsigned int stid_bmap_size; - unsigned int natids = t->natids; struct adapter *adap = container_of(t, struct adapter, tids); + unsigned int max_ftids = t->nftids + t->nsftids; + unsigned int natids = t->natids; + unsigned int stid_bmap_size; + unsigned int ftid_bmap_size; + size_t size; stid_bmap_size = BITS_TO_LONGS(t->nstids + t->nsftids); + ftid_bmap_size = BITS_TO_LONGS(t->nftids); size = t->ntids * sizeof(*t->tid_tab) + natids * sizeof(*t->atid_tab) + t->nstids * sizeof(*t->stid_tab) + t->nsftids * sizeof(*t->stid_tab) + stid_bmap_size * sizeof(long) + - t->nftids * sizeof(*t->ftid_tab) + - t->nsftids * sizeof(*t->ftid_tab); + max_ftids * sizeof(*t->ftid_tab) + + ftid_bmap_size * sizeof(long); t->tid_tab = t4_alloc_mem(size); if (!t->tid_tab) @@ -1346,8 +1349,10 @@ static int tid_init(struct tid_info *t) t->stid_tab = (struct serv_entry *)&t->atid_tab[natids]; t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids + t->nsftids]; t->ftid_tab = (struct filter_entry *)&t->stid_bmap[stid_bmap_size]; + t->ftid_bmap = (unsigned long *)&t->ftid_tab[max_ftids]; spin_lock_init(&t->stid_lock); spin_lock_init(&t->atid_lock); + spin_lock_init(&t->ftid_lock); t->stids_in_use = 0; t->sftids_in_use = 0; @@ -1362,12 +1367,16 @@ static int tid_init(struct tid_info *t) t->atid_tab[natids - 1].next = &t->atid_tab[natids]; t->afree = t->atid_tab; } - bitmap_zero(t->stid_bmap, t->nstids + t->nsftids); - /* Reserve stid 0 for T4/T5 adapters */ - if (!t->stid_base && - (CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5)) - __set_bit(0, t->stid_bmap); + if (is_offload(adap)) { + bitmap_zero(t->stid_bmap, t->nstids + t->nsftids); + /* Reserve stid 0 for T4/T5 adapters */ + if (!t->stid_base && + CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5) + __set_bit(0, t->stid_bmap); + } + + bitmap_zero(t->ftid_bmap, t->nftids); return 0; } @@ -4825,7 +4834,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) i); } - if (is_offload(adapter) && tid_init(&adapter->tids) < 0) { + if (tid_init(&adapter->tids) < 0) { dev_warn(&pdev->dev, "could not allocate TID table, " "continuing\n"); adapter->params.offload = 0; @@ -5012,13 +5021,7 @@ static void remove_one(struct pci_dev *pdev) /* If we allocated filters, free up state associated with any * valid filters ... */ - if (adapter->tids.ftid_tab) { - struct filter_entry *f = &adapter->tids.ftid_tab[0]; - for (i = 0; i < (adapter->tids.nftids + - adapter->tids.nsftids); i++, f++) - if (f->valid) - clear_filter(adapter, f); - } + clear_all_filters(adapter); if (adapter->flags & FULL_INIT_DONE) cxgb_down(adapter); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index b3544f6b88ca..47bd14f602db 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -1,7 +1,7 @@ /* * This file is part of the Chelsio T4 Ethernet driver for Linux. * - * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved. + * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -106,6 +106,7 @@ struct tid_info { unsigned int atid_base; struct filter_entry *ftid_tab; + unsigned long *ftid_bmap; unsigned int nftids; unsigned int ftid_base; unsigned int aftid_base; @@ -126,6 +127,8 @@ struct tid_info { atomic_t tids_in_use; /* TIDs in the HASH */ atomic_t hash_tids_in_use; + /* lock for setting/clearing filter bitmap */ + spinlock_t ftid_lock; }; static inline void *lookup_tid(const struct tid_info *t, unsigned int tid) @@ -185,6 +188,27 @@ int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid, int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid, unsigned int queue, bool ipv6); +/* Filter operation context to allow callers of cxgb4_set_filter() and + * cxgb4_del_filter() to wait for an asynchronous completion. + */ +struct filter_ctx { + struct completion completion; /* completion rendezvous */ + void *closure; /* caller's opaque information */ + int result; /* result of operation */ + u32 tid; /* to store tid */ +}; + +struct ch_filter_specification; + +int __cxgb4_set_filter(struct net_device *dev, int filter_id, + struct ch_filter_specification *fs, + struct filter_ctx *ctx); +int __cxgb4_del_filter(struct net_device *dev, int filter_id, + struct filter_ctx *ctx); +int cxgb4_set_filter(struct net_device *dev, int filter_id, + struct ch_filter_specification *fs); +int cxgb4_del_filter(struct net_device *dev, int filter_id); + static inline void set_wr_txq(struct sk_buff *skb, int prio, int queue) { skb_set_queue_mapping(skb, (queue << 1) | prio); From 2e8aad7bf20323c6ef0beec859a77c94a082c55d Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Tue, 20 Sep 2016 17:13:08 +0530 Subject: [PATCH 0605/1050] cxgb4: add parser to translate u32 filters to internal spec Parse information sent by u32 into internal filter specification. Add support for parsing several fields in IPv4, IPv6, TCP, and UDP. Signed-off-by: Rahul Lakkireddy Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- .../chelsio/cxgb4/cxgb4_tc_u32_parse.h | 282 ++++++++++++++++++ 1 file changed, 282 insertions(+) create mode 100644 drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h new file mode 100644 index 000000000000..65c20ca51baa --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h @@ -0,0 +1,282 @@ +/* + * This file is part of the Chelsio T4 Ethernet driver for Linux. + * + * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __CXGB4_TC_U32_PARSE_H +#define __CXGB4_TC_U32_PARSE_H + +struct cxgb4_match_field { + int off; /* Offset from the beginning of the header to match */ + /* Fill the value/mask pair in the spec if matched */ + int (*val)(struct ch_filter_specification *f, u32 val, u32 mask); +}; + +/* IPv4 match fields */ +static inline int cxgb4_fill_ipv4_tos(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + f->val.tos = (ntohl(val) >> 16) & 0x000000FF; + f->mask.tos = (ntohl(mask) >> 16) & 0x000000FF; + + return 0; +} + +static inline int cxgb4_fill_ipv4_frag(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + u32 mask_val; + u8 frag_val; + + frag_val = (ntohl(val) >> 13) & 0x00000007; + mask_val = ntohl(mask) & 0x0000FFFF; + + if (frag_val == 0x1 && mask_val != 0x3FFF) { /* MF set */ + f->val.frag = 1; + f->mask.frag = 1; + } else if (frag_val == 0x2 && mask_val != 0x3FFF) { /* DF set */ + f->val.frag = 0; + f->mask.frag = 1; + } else { + return -EINVAL; + } + + return 0; +} + +static inline int cxgb4_fill_ipv4_proto(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + f->val.proto = (ntohl(val) >> 16) & 0x000000FF; + f->mask.proto = (ntohl(mask) >> 16) & 0x000000FF; + + return 0; +} + +static inline int cxgb4_fill_ipv4_src_ip(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.fip[0], &val, sizeof(u32)); + memcpy(&f->mask.fip[0], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv4_dst_ip(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.lip[0], &val, sizeof(u32)); + memcpy(&f->mask.lip[0], &mask, sizeof(u32)); + + return 0; +} + +static const struct cxgb4_match_field cxgb4_ipv4_fields[] = { + { .off = 0, .val = cxgb4_fill_ipv4_tos }, + { .off = 4, .val = cxgb4_fill_ipv4_frag }, + { .off = 8, .val = cxgb4_fill_ipv4_proto }, + { .off = 12, .val = cxgb4_fill_ipv4_src_ip }, + { .off = 16, .val = cxgb4_fill_ipv4_dst_ip }, + { .val = NULL } +}; + +/* IPv6 match fields */ +static inline int cxgb4_fill_ipv6_tos(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + f->val.tos = (ntohl(val) >> 20) & 0x000000FF; + f->mask.tos = (ntohl(mask) >> 20) & 0x000000FF; + + return 0; +} + +static inline int cxgb4_fill_ipv6_proto(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + f->val.proto = (ntohl(val) >> 8) & 0x000000FF; + f->mask.proto = (ntohl(mask) >> 8) & 0x000000FF; + + return 0; +} + +static inline int cxgb4_fill_ipv6_src_ip0(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.fip[0], &val, sizeof(u32)); + memcpy(&f->mask.fip[0], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv6_src_ip1(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.fip[4], &val, sizeof(u32)); + memcpy(&f->mask.fip[4], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv6_src_ip2(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.fip[8], &val, sizeof(u32)); + memcpy(&f->mask.fip[8], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv6_src_ip3(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.fip[12], &val, sizeof(u32)); + memcpy(&f->mask.fip[12], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv6_dst_ip0(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.lip[0], &val, sizeof(u32)); + memcpy(&f->mask.lip[0], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv6_dst_ip1(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.lip[4], &val, sizeof(u32)); + memcpy(&f->mask.lip[4], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv6_dst_ip2(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.lip[8], &val, sizeof(u32)); + memcpy(&f->mask.lip[8], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv6_dst_ip3(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.lip[12], &val, sizeof(u32)); + memcpy(&f->mask.lip[12], &mask, sizeof(u32)); + + return 0; +} + +static const struct cxgb4_match_field cxgb4_ipv6_fields[] = { + { .off = 0, .val = cxgb4_fill_ipv6_tos }, + { .off = 4, .val = cxgb4_fill_ipv6_proto }, + { .off = 8, .val = cxgb4_fill_ipv6_src_ip0 }, + { .off = 12, .val = cxgb4_fill_ipv6_src_ip1 }, + { .off = 16, .val = cxgb4_fill_ipv6_src_ip2 }, + { .off = 20, .val = cxgb4_fill_ipv6_src_ip3 }, + { .off = 24, .val = cxgb4_fill_ipv6_dst_ip0 }, + { .off = 28, .val = cxgb4_fill_ipv6_dst_ip1 }, + { .off = 32, .val = cxgb4_fill_ipv6_dst_ip2 }, + { .off = 36, .val = cxgb4_fill_ipv6_dst_ip3 }, + { .val = NULL } +}; + +/* TCP/UDP match */ +static inline int cxgb4_fill_l4_ports(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + f->val.fport = ntohl(val) >> 16; + f->mask.fport = ntohl(mask) >> 16; + f->val.lport = ntohl(val) & 0x0000FFFF; + f->mask.lport = ntohl(mask) & 0x0000FFFF; + + return 0; +}; + +static const struct cxgb4_match_field cxgb4_tcp_fields[] = { + { .off = 0, .val = cxgb4_fill_l4_ports }, + { .val = NULL } +}; + +static const struct cxgb4_match_field cxgb4_udp_fields[] = { + { .off = 0, .val = cxgb4_fill_l4_ports }, + { .val = NULL } +}; + +struct cxgb4_next_header { + unsigned int offset; /* Offset to next header */ + /* offset, shift, and mask added to offset above + * to get to next header. Useful when using a header + * field's value to jump to next header such as IHL field + * in IPv4 header. + */ + unsigned int offoff; + u32 shift; + u32 mask; + /* match criteria to make this jump */ + unsigned int match_off; + u32 match_val; + u32 match_mask; + /* location of jump to make */ + const struct cxgb4_match_field *jump; +}; + +/* Accept a rule with a jump to transport layer header based on IHL field in + * IPv4 header. + */ +static const struct cxgb4_next_header cxgb4_ipv4_jumps[] = { + { .offset = 0, .offoff = 0, .shift = 6, .mask = 0xF, + .match_off = 8, .match_val = 0x600, .match_mask = 0xFF00, + .jump = cxgb4_tcp_fields }, + { .offset = 0, .offoff = 0, .shift = 6, .mask = 0xF, + .match_off = 8, .match_val = 0x1100, .match_mask = 0xFF00, + .jump = cxgb4_udp_fields }, + { .jump = NULL } +}; + +/* Accept a rule with a jump directly past the 40 Bytes of IPv6 fixed header + * to get to transport layer header. + */ +static const struct cxgb4_next_header cxgb4_ipv6_jumps[] = { + { .offset = 0x28, .offoff = 0, .shift = 0, .mask = 0, + .match_off = 4, .match_val = 0x60000, .match_mask = 0xFF0000, + .jump = cxgb4_tcp_fields }, + { .offset = 0x28, .offoff = 0, .shift = 0, .mask = 0, + .match_off = 4, .match_val = 0x110000, .match_mask = 0xFF0000, + .jump = cxgb4_udp_fields }, + { .jump = NULL } +}; +#endif /* __CXGB4_TC_U32_PARSE_H */ From d8931847488d250e27d8f18ca6b7373e9f981d7a Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Tue, 20 Sep 2016 17:13:09 +0530 Subject: [PATCH 0606/1050] cxgb4: add support for offloading u32 filters Add support for offloading u32 filter onto hardware. Links are stored in a jump table to perform necessary jumps to match TCP/UDP header. When inserting rules in the linked bucket, the TCP/UDP match fields in the corresponding entry of the jump table are appended to the filter rule before insertion. If a link is deleted, then all corresponding filters associated with the link are also deleted. Also enable hardware tc offload as a supported feature. Signed-off-by: Rahul Lakkireddy Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/Makefile | 2 +- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 3 + .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 41 +- .../net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c | 412 ++++++++++++++++++ .../net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h | 57 +++ .../chelsio/cxgb4/cxgb4_tc_u32_parse.h | 12 + 6 files changed, 525 insertions(+), 2 deletions(-) create mode 100644 drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c create mode 100644 drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile index da889817ec27..c6b71f656992 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/Makefile +++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_CHELSIO_T4) += cxgb4.o -cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o cxgb4_uld.o sched.o cxgb4_filter.o +cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o cxgb4_uld.o sched.o cxgb4_filter.o cxgb4_tc_u32.o cxgb4-$(CONFIG_CHELSIO_T4_DCB) += cxgb4_dcb.o cxgb4-$(CONFIG_CHELSIO_T4_FCOE) += cxgb4_fcoe.o cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 51edb126da65..ea0d1f188802 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -851,6 +851,9 @@ struct adapter { spinlock_t stats_lock; spinlock_t win0_lock ____cacheline_aligned_in_smp; + + /* TC u32 offload */ + struct cxgb4_tc_u32_table *tc_u32; }; /* Support for "sched-class" command to allow a TX Scheduling Class to be diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index e97daa0510e5..1be4d235a576 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -78,6 +78,7 @@ #include "clip_tbl.h" #include "l2t.h" #include "sched.h" +#include "cxgb4_tc_u32.h" char cxgb4_driver_name[] = KBUILD_MODNAME; @@ -2711,6 +2712,35 @@ static int cxgb_set_tx_maxrate(struct net_device *dev, int index, u32 rate) return err; } +int cxgb_setup_tc(struct net_device *dev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) +{ + struct port_info *pi = netdev2pinfo(dev); + struct adapter *adap = netdev2adap(dev); + + if (!(adap->flags & FULL_INIT_DONE)) { + dev_err(adap->pdev_dev, + "Failed to setup tc on port %d. Link Down?\n", + pi->port_id); + return -EINVAL; + } + + if (TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS) && + tc->type == TC_SETUP_CLSU32) { + switch (tc->cls_u32->command) { + case TC_CLSU32_NEW_KNODE: + case TC_CLSU32_REPLACE_KNODE: + return cxgb4_config_knode(dev, proto, tc->cls_u32); + case TC_CLSU32_DELETE_KNODE: + return cxgb4_delete_knode(dev, proto, tc->cls_u32); + default: + return -EOPNOTSUPP; + } + } + + return -EOPNOTSUPP; +} + static const struct net_device_ops cxgb4_netdev_ops = { .ndo_open = cxgb_open, .ndo_stop = cxgb_close, @@ -2734,6 +2764,7 @@ static const struct net_device_ops cxgb4_netdev_ops = { .ndo_busy_poll = cxgb_busy_poll, #endif .ndo_set_tx_maxrate = cxgb_set_tx_maxrate, + .ndo_setup_tc = cxgb_setup_tc, }; #ifdef CONFIG_PCI_IOV @@ -4406,6 +4437,7 @@ static void free_some_resources(struct adapter *adapter) t4_free_mem(adapter->l2t); t4_cleanup_sched(adapter); t4_free_mem(adapter->tids.tid_tab); + cxgb4_cleanup_tc_u32(adapter); kfree(adapter->sge.egr_map); kfree(adapter->sge.ingr_map); kfree(adapter->sge.starving_fl); @@ -4750,7 +4782,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->hw_features = NETIF_F_SG | TSO_FLAGS | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM | NETIF_F_RXHASH | - NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_TC; if (highdma) netdev->hw_features |= NETIF_F_HIGHDMA; netdev->features |= netdev->hw_features; @@ -4838,6 +4871,12 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dev_warn(&pdev->dev, "could not allocate TID table, " "continuing\n"); adapter->params.offload = 0; + } else { + adapter->tc_u32 = cxgb4_init_tc_u32(adapter, + CXGB4_MAX_LINK_HANDLE); + if (!adapter->tc_u32) + dev_warn(&pdev->dev, + "could not offload tc u32, continuing\n"); } if (is_offload(adapter)) { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c new file mode 100644 index 000000000000..d63b8955ea05 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c @@ -0,0 +1,412 @@ +/* + * This file is part of the Chelsio T4 Ethernet driver for Linux. + * + * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "cxgb4.h" +#include "cxgb4_tc_u32_parse.h" +#include "cxgb4_tc_u32.h" + +/* Fill ch_filter_specification with parsed match value/mask pair. */ +static int fill_match_fields(struct adapter *adap, + struct ch_filter_specification *fs, + struct tc_cls_u32_offload *cls, + const struct cxgb4_match_field *entry, + bool next_header) +{ + unsigned int i, j; + u32 val, mask; + int off, err; + bool found; + + for (i = 0; i < cls->knode.sel->nkeys; i++) { + off = cls->knode.sel->keys[i].off; + val = cls->knode.sel->keys[i].val; + mask = cls->knode.sel->keys[i].mask; + + if (next_header) { + /* For next headers, parse only keys with offmask */ + if (!cls->knode.sel->keys[i].offmask) + continue; + } else { + /* For the remaining, parse only keys without offmask */ + if (cls->knode.sel->keys[i].offmask) + continue; + } + + found = false; + + for (j = 0; entry[j].val; j++) { + if (off == entry[j].off) { + found = true; + err = entry[j].val(fs, val, mask); + if (err) + return err; + break; + } + } + + if (!found) + return -EINVAL; + } + + return 0; +} + +int cxgb4_config_knode(struct net_device *dev, __be16 protocol, + struct tc_cls_u32_offload *cls) +{ + const struct cxgb4_match_field *start, *link_start = NULL; + struct adapter *adapter = netdev2adap(dev); + struct ch_filter_specification fs; + struct cxgb4_tc_u32_table *t; + struct cxgb4_link *link; + unsigned int filter_id; + u32 uhtid, link_uhtid; + bool is_ipv6 = false; + int ret; + + if (!can_tc_u32_offload(dev)) + return -EOPNOTSUPP; + + if (protocol != htons(ETH_P_IP) && protocol != htons(ETH_P_IPV6)) + return -EOPNOTSUPP; + + /* Fetch the location to insert the filter. */ + filter_id = cls->knode.handle & 0xFFFFF; + + if (filter_id > adapter->tids.nftids) { + dev_err(adapter->pdev_dev, + "Location %d out of range for insertion. Max: %d\n", + filter_id, adapter->tids.nftids); + return -ERANGE; + } + + t = adapter->tc_u32; + uhtid = TC_U32_USERHTID(cls->knode.handle); + link_uhtid = TC_U32_USERHTID(cls->knode.link_handle); + + /* Ensure that uhtid is either root u32 (i.e. 0x800) + * or a a valid linked bucket. + */ + if (uhtid != 0x800 && uhtid >= t->size) + return -EINVAL; + + /* Ensure link handle uhtid is sane, if specified. */ + if (link_uhtid >= t->size) + return -EINVAL; + + memset(&fs, 0, sizeof(fs)); + + if (protocol == htons(ETH_P_IPV6)) { + start = cxgb4_ipv6_fields; + is_ipv6 = true; + } else { + start = cxgb4_ipv4_fields; + is_ipv6 = false; + } + + if (uhtid != 0x800) { + /* Link must exist from root node before insertion. */ + if (!t->table[uhtid - 1].link_handle) + return -EINVAL; + + /* Link must have a valid supported next header. */ + link_start = t->table[uhtid - 1].match_field; + if (!link_start) + return -EINVAL; + } + + /* Parse links and record them for subsequent jumps to valid + * next headers. + */ + if (link_uhtid) { + const struct cxgb4_next_header *next; + bool found = false; + unsigned int i, j; + u32 val, mask; + int off; + + if (t->table[link_uhtid - 1].link_handle) { + dev_err(adapter->pdev_dev, + "Link handle exists for: 0x%x\n", + link_uhtid); + return -EINVAL; + } + + next = is_ipv6 ? cxgb4_ipv6_jumps : cxgb4_ipv4_jumps; + + /* Try to find matches that allow jumps to next header. */ + for (i = 0; next[i].jump; i++) { + if (next[i].offoff != cls->knode.sel->offoff || + next[i].shift != cls->knode.sel->offshift || + next[i].mask != cls->knode.sel->offmask || + next[i].offset != cls->knode.sel->off) + continue; + + /* Found a possible candidate. Find a key that + * matches the corresponding offset, value, and + * mask to jump to next header. + */ + for (j = 0; j < cls->knode.sel->nkeys; j++) { + off = cls->knode.sel->keys[j].off; + val = cls->knode.sel->keys[j].val; + mask = cls->knode.sel->keys[j].mask; + + if (next[i].match_off == off && + next[i].match_val == val && + next[i].match_mask == mask) { + found = true; + break; + } + } + + if (!found) + continue; /* Try next candidate. */ + + /* Candidate to jump to next header found. + * Translate all keys to internal specification + * and store them in jump table. This spec is copied + * later to set the actual filters. + */ + ret = fill_match_fields(adapter, &fs, cls, + start, false); + if (ret) + goto out; + + link = &t->table[link_uhtid - 1]; + link->match_field = next[i].jump; + link->link_handle = cls->knode.handle; + memcpy(&link->fs, &fs, sizeof(fs)); + break; + } + + /* No candidate found to jump to next header. */ + if (!found) + return -EINVAL; + + return 0; + } + + /* Fill ch_filter_specification match fields to be shipped to hardware. + * Copy the linked spec (if any) first. And then update the spec as + * needed. + */ + if (uhtid != 0x800 && t->table[uhtid - 1].link_handle) { + /* Copy linked ch_filter_specification */ + memcpy(&fs, &t->table[uhtid - 1].fs, sizeof(fs)); + ret = fill_match_fields(adapter, &fs, cls, + link_start, true); + if (ret) + goto out; + } + + ret = fill_match_fields(adapter, &fs, cls, start, false); + if (ret) + goto out; + + /* The filter spec has been completely built from the info + * provided from u32. We now set some default fields in the + * spec for sanity. + */ + + /* Match only packets coming from the ingress port where this + * filter will be created. + */ + fs.val.iport = netdev2pinfo(dev)->port_id; + fs.mask.iport = ~0; + + /* Enable filter hit counts. */ + fs.hitcnts = 1; + + /* Set type of filter - IPv6 or IPv4 */ + fs.type = is_ipv6 ? 1 : 0; + + /* Set the filter */ + ret = cxgb4_set_filter(dev, filter_id, &fs); + if (ret) + goto out; + + /* If this is a linked bucket, then set the corresponding + * entry in the bitmap to mark it as belonging to this linked + * bucket. + */ + if (uhtid != 0x800 && t->table[uhtid - 1].link_handle) + set_bit(filter_id, t->table[uhtid - 1].tid_map); + +out: + return ret; +} + +int cxgb4_delete_knode(struct net_device *dev, __be16 protocol, + struct tc_cls_u32_offload *cls) +{ + struct adapter *adapter = netdev2adap(dev); + unsigned int filter_id, max_tids, i, j; + struct cxgb4_link *link = NULL; + struct cxgb4_tc_u32_table *t; + u32 handle, uhtid; + int ret; + + if (!can_tc_u32_offload(dev)) + return -EOPNOTSUPP; + + /* Fetch the location to delete the filter. */ + filter_id = cls->knode.handle & 0xFFFFF; + + if (filter_id > adapter->tids.nftids) { + dev_err(adapter->pdev_dev, + "Location %d out of range for deletion. Max: %d\n", + filter_id, adapter->tids.nftids); + return -ERANGE; + } + + t = adapter->tc_u32; + handle = cls->knode.handle; + uhtid = TC_U32_USERHTID(cls->knode.handle); + + /* Ensure that uhtid is either root u32 (i.e. 0x800) + * or a a valid linked bucket. + */ + if (uhtid != 0x800 && uhtid >= t->size) + return -EINVAL; + + /* Delete the specified filter */ + if (uhtid != 0x800) { + link = &t->table[uhtid - 1]; + if (!link->link_handle) + return -EINVAL; + + if (!test_bit(filter_id, link->tid_map)) + return -EINVAL; + } + + ret = cxgb4_del_filter(dev, filter_id); + if (ret) + goto out; + + if (link) + clear_bit(filter_id, link->tid_map); + + /* If a link is being deleted, then delete all filters + * associated with the link. + */ + max_tids = adapter->tids.nftids; + for (i = 0; i < t->size; i++) { + link = &t->table[i]; + + if (link->link_handle == handle) { + for (j = 0; j < max_tids; j++) { + if (!test_bit(j, link->tid_map)) + continue; + + ret = __cxgb4_del_filter(dev, j, NULL); + if (ret) + goto out; + + clear_bit(j, link->tid_map); + } + + /* Clear the link state */ + link->match_field = NULL; + link->link_handle = 0; + memset(&link->fs, 0, sizeof(link->fs)); + break; + } + } + +out: + return ret; +} + +void cxgb4_cleanup_tc_u32(struct adapter *adap) +{ + struct cxgb4_tc_u32_table *t; + unsigned int i; + + if (!adap->tc_u32) + return; + + /* Free up all allocated memory. */ + t = adap->tc_u32; + for (i = 0; i < t->size; i++) { + struct cxgb4_link *link = &t->table[i]; + + t4_free_mem(link->tid_map); + } + t4_free_mem(adap->tc_u32); +} + +struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap, + unsigned int size) +{ + struct cxgb4_tc_u32_table *t; + unsigned int i; + + if (!size) + return NULL; + + t = t4_alloc_mem(sizeof(*t) + + (size * sizeof(struct cxgb4_link))); + if (!t) + return NULL; + + t->size = size; + + for (i = 0; i < t->size; i++) { + struct cxgb4_link *link = &t->table[i]; + unsigned int bmap_size; + unsigned int max_tids; + + max_tids = adap->tids.nftids; + bmap_size = BITS_TO_LONGS(max_tids); + link->tid_map = t4_alloc_mem(sizeof(unsigned long) * bmap_size); + if (!link->tid_map) + goto out_no_mem; + bitmap_zero(link->tid_map, max_tids); + } + + return t; + +out_no_mem: + for (i = 0; i < t->size; i++) { + struct cxgb4_link *link = &t->table[i]; + + if (link->tid_map) + t4_free_mem(link->tid_map); + } + + if (t) + t4_free_mem(t); + + return NULL; +} diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h new file mode 100644 index 000000000000..6bdc885eff22 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h @@ -0,0 +1,57 @@ +/* + * This file is part of the Chelsio T4 Ethernet driver for Linux. + * + * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __CXGB4_TC_U32_H +#define __CXGB4_TC_U32_H + +#include + +#define CXGB4_MAX_LINK_HANDLE 32 + +static inline bool can_tc_u32_offload(struct net_device *dev) +{ + struct adapter *adap = netdev2adap(dev); + + return (dev->features & NETIF_F_HW_TC) && adap->tc_u32 ? true : false; +} + +int cxgb4_config_knode(struct net_device *dev, __be16 protocol, + struct tc_cls_u32_offload *cls); +int cxgb4_delete_knode(struct net_device *dev, __be16 protocol, + struct tc_cls_u32_offload *cls); + +void cxgb4_cleanup_tc_u32(struct adapter *adapter); +struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap, + unsigned int size); +#endif /* __CXGB4_TC_U32_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h index 65c20ca51baa..a4b99edcc339 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h @@ -279,4 +279,16 @@ static const struct cxgb4_next_header cxgb4_ipv6_jumps[] = { .jump = cxgb4_udp_fields }, { .jump = NULL } }; + +struct cxgb4_link { + const struct cxgb4_match_field *match_field; /* Next header */ + struct ch_filter_specification fs; /* Match spec associated with link */ + u32 link_handle; /* Knode handle associated with the link */ + unsigned long *tid_map; /* Bitmap for filter tids */ +}; + +struct cxgb4_tc_u32_table { + unsigned int size; /* number of entries in table */ + struct cxgb4_link table[0]; /* Jump table */ +}; #endif /* __CXGB4_TC_U32_PARSE_H */ From b20ff726fa8360a0508d2d79ecdee5a45d854e99 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Tue, 20 Sep 2016 17:13:10 +0530 Subject: [PATCH 0607/1050] cxgb4: add support for drop and redirect actions Add support for dropping matched packets in hardware. Also add support for re-directing matched packets to a specified port in hardware. Signed-off-by: Rahul Lakkireddy Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c | 71 +++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c index d63b8955ea05..49d2debb334e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c @@ -32,6 +32,9 @@ * SOFTWARE. */ +#include +#include + #include "cxgb4.h" #include "cxgb4_tc_u32_parse.h" #include "cxgb4_tc_u32.h" @@ -82,6 +85,67 @@ static int fill_match_fields(struct adapter *adap, return 0; } +/* Fill ch_filter_specification with parsed action. */ +static int fill_action_fields(struct adapter *adap, + struct ch_filter_specification *fs, + struct tc_cls_u32_offload *cls) +{ + unsigned int num_actions = 0; + const struct tc_action *a; + struct tcf_exts *exts; + LIST_HEAD(actions); + + exts = cls->knode.exts; + if (tc_no_actions(exts)) + return -EINVAL; + + tcf_exts_to_list(exts, &actions); + list_for_each_entry(a, &actions, list) { + /* Don't allow more than one action per rule. */ + if (num_actions) + return -EINVAL; + + /* Drop in hardware. */ + if (is_tcf_gact_shot(a)) { + fs->action = FILTER_DROP; + num_actions++; + continue; + } + + /* Re-direct to specified port in hardware. */ + if (is_tcf_mirred_redirect(a)) { + struct net_device *n_dev; + unsigned int i, index; + bool found = false; + + index = tcf_mirred_ifindex(a); + for_each_port(adap, i) { + n_dev = adap->port[i]; + if (index == n_dev->ifindex) { + fs->action = FILTER_SWITCH; + fs->eport = i; + found = true; + break; + } + } + + /* Interface doesn't belong to any port of + * the underlying hardware. + */ + if (!found) + return -EINVAL; + + num_actions++; + continue; + } + + /* Un-supported action. */ + return -EINVAL; + } + + return 0; +} + int cxgb4_config_knode(struct net_device *dev, __be16 protocol, struct tc_cls_u32_offload *cls) { @@ -234,6 +298,13 @@ int cxgb4_config_knode(struct net_device *dev, __be16 protocol, if (ret) goto out; + /* Fill ch_filter_specification action fields to be shipped to + * hardware. + */ + ret = fill_action_fields(adapter, &fs, cls); + if (ret) + goto out; + /* The filter spec has been completely built from the info * provided from u32. We now set some default fields in the * spec for sanity. From fba1296624bf95fc07057da1e26beee8a733180c Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 20 Sep 2016 14:55:31 +0300 Subject: [PATCH 0608/1050] net/mlx4_core: Fix to clean devlink resources This patch cleans devlink resources by calling devlink_port_unregister() to avoid the following issues: - Kernel panic when triggering reset flow. - Memory leak due to unfreed resources in mlx4_init_port_info(). Fixes: 09d4d087cd48 ("mlx4: Implement devlink interface") Signed-off-by: Kamal Heib Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 75dd2e3d3059..7183ac4135d2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2970,6 +2970,7 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port) mlx4_err(dev, "Failed to create mtu file for port %d\n", port); device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr); + devlink_port_unregister(&info->devlink_port); info->port = -1; } @@ -2984,6 +2985,8 @@ static void mlx4_cleanup_port_info(struct mlx4_port_info *info) device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr); device_remove_file(&info->dev->persist->pdev->dev, &info->port_mtu_attr); + devlink_port_unregister(&info->devlink_port); + #ifdef CONFIG_RFS_ACCEL free_irq_cpu_rmap(info->rmap); info->rmap = NULL; From e82f71489ffb325a9b7bca367b06a39315452dfe Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Tue, 20 Sep 2016 23:53:24 +0800 Subject: [PATCH 0609/1050] net: ethernet: mediatek: fix missing changes merged for conflicts overlapping commits add the missing commits about 1) Commit d3bd1ce4db8e843dce421e2f8f123e5251a9c7d3 ("remove redundant free_irq for devm_request_ir allocated irq") 2) Commit 7c6b0d76fa02213393815e3b6d5e4a415bf3f0e2 ("fix logic unbalance between probe and remove") during merge for conflicts overlapping commits by Commit b20b378d49926b82c0a131492fa8842156e0e8a9 ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net") Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index ca6b5013e275..2909372c4da0 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1894,11 +1894,8 @@ static void mtk_uninit(struct net_device *dev) struct mtk_eth *eth = mac->hw; phy_disconnect(mac->phy_dev); - mtk_mdio_cleanup(eth); mtk_irq_disable(eth, MTK_QDMA_INT_MASK, ~0); mtk_irq_disable(eth, MTK_PDMA_INT_MASK, ~0); - free_irq(eth->irq[1], dev); - free_irq(eth->irq[2], dev); } static int mtk_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) @@ -2454,6 +2451,7 @@ static int mtk_remove(struct platform_device *pdev) netif_napi_del(ð->tx_napi); netif_napi_del(ð->rx_napi); mtk_cleanup(eth); + mtk_mdio_cleanup(eth); return 0; } From 262b38cdb3e47d402f4fdf76fcf3e8c4c8380a52 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Tue, 20 Sep 2016 22:30:11 +0200 Subject: [PATCH 0610/1050] net: ethernet: hisilicon: hns: use phydev from struct net_device The private structure contain a pointer to phydev, but the structure net_device already contain such pointer. So we can remove the pointer phydev in the private structure, and update the driver to use the one contained in struct net_device. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 23 +++++-------- drivers/net/ethernet/hisilicon/hns/hns_enet.h | 1 - .../net/ethernet/hisilicon/hns/hns_ethtool.c | 33 +++++++++---------- 3 files changed, 24 insertions(+), 33 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index d7e1f8c7ae92..059aaeda46b1 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -994,10 +994,10 @@ static void hns_nic_adjust_link(struct net_device *ndev) struct hnae_handle *h = priv->ae_handle; int state = 1; - if (priv->phy) { + if (ndev->phydev) { h->dev->ops->adjust_link(h, ndev->phydev->speed, ndev->phydev->duplex); - state = priv->phy->link; + state = ndev->phydev->link; } state = state && h->dev->ops->get_status(h); @@ -1022,7 +1022,6 @@ static void hns_nic_adjust_link(struct net_device *ndev) */ int hns_nic_init_phy(struct net_device *ndev, struct hnae_handle *h) { - struct hns_nic_priv *priv = netdev_priv(ndev); struct phy_device *phy_dev = h->phy_dev; int ret; @@ -1046,8 +1045,6 @@ int hns_nic_init_phy(struct net_device *ndev, struct hnae_handle *h) if (h->phy_if == PHY_INTERFACE_MODE_XGMII) phy_dev->autoneg = false; - priv->phy = phy_dev; - return 0; } @@ -1224,8 +1221,8 @@ static int hns_nic_net_up(struct net_device *ndev) if (ret) goto out_start_err; - if (priv->phy) - phy_start(priv->phy); + if (ndev->phydev) + phy_start(ndev->phydev); clear_bit(NIC_STATE_DOWN, &priv->state); (void)mod_timer(&priv->service_timer, jiffies + SERVICE_TIMER_HZ); @@ -1259,8 +1256,8 @@ static void hns_nic_net_down(struct net_device *ndev) netif_tx_disable(ndev); priv->link = 0; - if (priv->phy) - phy_stop(priv->phy); + if (ndev->phydev) + phy_stop(ndev->phydev); ops = priv->ae_handle->dev->ops; @@ -1359,8 +1356,7 @@ static void hns_nic_net_timeout(struct net_device *ndev) static int hns_nic_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) { - struct hns_nic_priv *priv = netdev_priv(netdev); - struct phy_device *phy_dev = priv->phy; + struct phy_device *phy_dev = netdev->phydev; if (!netif_running(netdev)) return -EINVAL; @@ -2017,9 +2013,8 @@ static int hns_nic_dev_remove(struct platform_device *pdev) hns_nic_uninit_ring_data(priv); priv->ring_data = NULL; - if (priv->phy) - phy_disconnect(priv->phy); - priv->phy = NULL; + if (ndev->phydev) + phy_disconnect(ndev->phydev); if (!IS_ERR_OR_NULL(priv->ae_handle)) hnae_put_handle(priv->ae_handle); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.h b/drivers/net/ethernet/hisilicon/hns/hns_enet.h index 44bb3015eed3..5b412de350aa 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.h @@ -59,7 +59,6 @@ struct hns_nic_priv { u32 port_id; int phy_mode; int phy_led_val; - struct phy_device *phy; struct net_device *netdev; struct device *dev; struct hnae_handle *ae_handle; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index 5eb3245bdf86..0e2c17423a34 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -48,9 +48,9 @@ static u32 hns_nic_get_link(struct net_device *net_dev) h = priv->ae_handle; - if (priv->phy) { - if (!genphy_read_status(priv->phy)) - link_stat = priv->phy->link; + if (net_dev->phydev) { + if (!genphy_read_status(net_dev->phydev)) + link_stat = net_dev->phydev->link; else link_stat = 0; } @@ -67,8 +67,7 @@ static void hns_get_mdix_mode(struct net_device *net_dev, struct ethtool_cmd *cmd) { int mdix_ctrl, mdix, retval, is_resolved; - struct hns_nic_priv *priv = netdev_priv(net_dev); - struct phy_device *phy_dev = priv->phy; + struct phy_device *phy_dev = net_dev->phydev; if (!phy_dev || !phy_dev->mdio.bus) { cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID; @@ -144,8 +143,8 @@ static int hns_nic_get_settings(struct net_device *net_dev, ethtool_cmd_speed_set(cmd, speed); cmd->duplex = duplex; - if (priv->phy) - (void)phy_ethtool_gset(priv->phy, cmd); + if (net_dev->phydev) + (void)phy_ethtool_gset(net_dev->phydev, cmd); link_stat = hns_nic_get_link(net_dev); if (!link_stat) { @@ -215,13 +214,13 @@ static int hns_nic_set_settings(struct net_device *net_dev, cmd->duplex != DUPLEX_FULL) return -EINVAL; } else if (h->phy_if == PHY_INTERFACE_MODE_SGMII) { - if (!priv->phy && cmd->autoneg == AUTONEG_ENABLE) + if (!net_dev->phydev && cmd->autoneg == AUTONEG_ENABLE) return -EINVAL; if (speed == SPEED_1000 && cmd->duplex == DUPLEX_HALF) return -EINVAL; - if (priv->phy) - return phy_ethtool_sset(priv->phy, cmd); + if (net_dev->phydev) + return phy_ethtool_sset(net_dev->phydev, cmd); if ((speed != SPEED_10 && speed != SPEED_100 && speed != SPEED_1000) || (cmd->duplex != DUPLEX_HALF && @@ -305,7 +304,7 @@ static int __lb_setup(struct net_device *ndev, { int ret = 0; struct hns_nic_priv *priv = netdev_priv(ndev); - struct phy_device *phy_dev = priv->phy; + struct phy_device *phy_dev = ndev->phydev; struct hnae_handle *h = priv->ae_handle; switch (loop) { @@ -910,7 +909,7 @@ void hns_get_strings(struct net_device *netdev, u32 stringset, u8 *data) memcpy(buff, hns_nic_test_strs[MAC_INTERNALLOOP_SERDES], ETH_GSTRING_LEN); buff += ETH_GSTRING_LEN; - if ((priv->phy) && (!priv->phy->is_c45)) + if ((netdev->phydev) && (!netdev->phydev->is_c45)) memcpy(buff, hns_nic_test_strs[MAC_INTERNALLOOP_PHY], ETH_GSTRING_LEN); @@ -996,7 +995,7 @@ int hns_get_sset_count(struct net_device *netdev, int stringset) if (priv->ae_handle->phy_if == PHY_INTERFACE_MODE_XGMII) cnt--; - if ((!priv->phy) || (priv->phy->is_c45)) + if ((!netdev->phydev) || (netdev->phydev->is_c45)) cnt--; return cnt; @@ -1015,8 +1014,7 @@ int hns_get_sset_count(struct net_device *netdev, int stringset) int hns_phy_led_set(struct net_device *netdev, int value) { int retval; - struct hns_nic_priv *priv = netdev_priv(netdev); - struct phy_device *phy_dev = priv->phy; + struct phy_device *phy_dev = netdev->phydev; retval = phy_write(phy_dev, HNS_PHY_PAGE_REG, HNS_PHY_PAGE_LED); retval |= phy_write(phy_dev, HNS_LED_FC_REG, value); @@ -1039,7 +1037,7 @@ int hns_set_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state) { struct hns_nic_priv *priv = netdev_priv(netdev); struct hnae_handle *h = priv->ae_handle; - struct phy_device *phy_dev = priv->phy; + struct phy_device *phy_dev = netdev->phydev; int ret; if (phy_dev) @@ -1159,8 +1157,7 @@ static int hns_get_regs_len(struct net_device *net_dev) static int hns_nic_nway_reset(struct net_device *netdev) { int ret = 0; - struct hns_nic_priv *priv = netdev_priv(netdev); - struct phy_device *phy = priv->phy; + struct phy_device *phy = netdev->phydev; if (netif_running(netdev)) { if (phy) From d270f76c2d6ee3d96cfb1affb78a3d536e0b8fd6 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Tue, 20 Sep 2016 22:30:12 +0200 Subject: [PATCH 0611/1050] net: ethernet: hisilicon: hns: use new api ethtool_{get|set}_link_ksettings The ethtool api {get|set}_settings is deprecated. We move this driver to new api {get|set}_link_ksettings. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- .../net/ethernet/hisilicon/hns/hns_ethtool.c | 105 ++++++++++-------- 1 file changed, 58 insertions(+), 47 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index 0e2c17423a34..47e59bbfd061 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -64,14 +64,14 @@ static u32 hns_nic_get_link(struct net_device *net_dev) } static void hns_get_mdix_mode(struct net_device *net_dev, - struct ethtool_cmd *cmd) + struct ethtool_link_ksettings *cmd) { int mdix_ctrl, mdix, retval, is_resolved; struct phy_device *phy_dev = net_dev->phydev; if (!phy_dev || !phy_dev->mdio.bus) { - cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID; - cmd->eth_tp_mdix = ETH_TP_MDI_INVALID; + cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID; + cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID; return; } @@ -88,35 +88,35 @@ static void hns_get_mdix_mode(struct net_device *net_dev, switch (mdix_ctrl) { case 0x0: - cmd->eth_tp_mdix_ctrl = ETH_TP_MDI; + cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI; break; case 0x1: - cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_X; + cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_X; break; case 0x3: - cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO; + cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO; break; default: - cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID; + cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID; break; } if (!is_resolved) - cmd->eth_tp_mdix = ETH_TP_MDI_INVALID; + cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID; else if (mdix) - cmd->eth_tp_mdix = ETH_TP_MDI_X; + cmd->base.eth_tp_mdix = ETH_TP_MDI_X; else - cmd->eth_tp_mdix = ETH_TP_MDI; + cmd->base.eth_tp_mdix = ETH_TP_MDI; } /** - *hns_nic_get_settings - implement ethtool get settings + *hns_nic_get_link_ksettings - implement ethtool get link ksettings *@net_dev: net_device - *@cmd: ethtool_cmd + *@cmd: ethtool_link_ksettings *retuen 0 - success , negative --fail */ -static int hns_nic_get_settings(struct net_device *net_dev, - struct ethtool_cmd *cmd) +static int hns_nic_get_link_ksettings(struct net_device *net_dev, + struct ethtool_link_ksettings *cmd) { struct hns_nic_priv *priv = netdev_priv(net_dev); struct hnae_handle *h; @@ -124,6 +124,7 @@ static int hns_nic_get_settings(struct net_device *net_dev, int ret; u8 duplex; u16 speed; + u32 supported, advertising; if (!priv || !priv->ae_handle) return -ESRCH; @@ -138,38 +139,43 @@ static int hns_nic_get_settings(struct net_device *net_dev, return -EINVAL; } + ethtool_convert_link_mode_to_legacy_u32(&supported, + cmd->link_modes.supported); + ethtool_convert_link_mode_to_legacy_u32(&advertising, + cmd->link_modes.advertising); + /* When there is no phy, autoneg is off. */ - cmd->autoneg = false; - ethtool_cmd_speed_set(cmd, speed); - cmd->duplex = duplex; + cmd->base.autoneg = false; + cmd->base.cmd = speed; + cmd->base.duplex = duplex; if (net_dev->phydev) - (void)phy_ethtool_gset(net_dev->phydev, cmd); + (void)phy_ethtool_ksettings_get(net_dev->phydev, cmd); link_stat = hns_nic_get_link(net_dev); if (!link_stat) { - ethtool_cmd_speed_set(cmd, (u32)SPEED_UNKNOWN); - cmd->duplex = DUPLEX_UNKNOWN; + cmd->base.speed = (u32)SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; } - if (cmd->autoneg) - cmd->advertising |= ADVERTISED_Autoneg; + if (cmd->base.autoneg) + advertising |= ADVERTISED_Autoneg; - cmd->supported |= h->if_support; + supported |= h->if_support; if (h->phy_if == PHY_INTERFACE_MODE_SGMII) { - cmd->supported |= SUPPORTED_TP; - cmd->advertising |= ADVERTISED_1000baseT_Full; + supported |= SUPPORTED_TP; + advertising |= ADVERTISED_1000baseT_Full; } else if (h->phy_if == PHY_INTERFACE_MODE_XGMII) { - cmd->supported |= SUPPORTED_FIBRE; - cmd->advertising |= ADVERTISED_10000baseKR_Full; + supported |= SUPPORTED_FIBRE; + advertising |= ADVERTISED_10000baseKR_Full; } switch (h->media_type) { case HNAE_MEDIA_TYPE_FIBER: - cmd->port = PORT_FIBRE; + cmd->base.port = PORT_FIBRE; break; case HNAE_MEDIA_TYPE_COPPER: - cmd->port = PORT_TP; + cmd->base.port = PORT_TP; break; case HNAE_MEDIA_TYPE_UNKNOWN: default: @@ -177,23 +183,27 @@ static int hns_nic_get_settings(struct net_device *net_dev, } if (!(AE_IS_VER1(priv->enet_ver) && h->port_type == HNAE_PORT_DEBUG)) - cmd->supported |= SUPPORTED_Pause; + supported |= SUPPORTED_Pause; - cmd->transceiver = XCVR_EXTERNAL; - cmd->mdio_support = (ETH_MDIO_SUPPORTS_C45 | ETH_MDIO_SUPPORTS_C22); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); + + cmd->base.mdio_support = ETH_MDIO_SUPPORTS_C45 | ETH_MDIO_SUPPORTS_C22; hns_get_mdix_mode(net_dev, cmd); return 0; } /** - *hns_nic_set_settings - implement ethtool set settings + *hns_nic_set_link_settings - implement ethtool set link ksettings *@net_dev: net_device - *@cmd: ethtool_cmd + *@cmd: ethtool_link_ksettings *retuen 0 - success , negative --fail */ -static int hns_nic_set_settings(struct net_device *net_dev, - struct ethtool_cmd *cmd) +static int hns_nic_set_link_ksettings(struct net_device *net_dev, + const struct ethtool_link_ksettings *cmd) { struct hns_nic_priv *priv = netdev_priv(net_dev); struct hnae_handle *h; @@ -207,24 +217,25 @@ static int hns_nic_set_settings(struct net_device *net_dev, return -ENODEV; h = priv->ae_handle; - speed = ethtool_cmd_speed(cmd); + speed = cmd->base.speed; if (h->phy_if == PHY_INTERFACE_MODE_XGMII) { - if (cmd->autoneg == AUTONEG_ENABLE || speed != SPEED_10000 || - cmd->duplex != DUPLEX_FULL) + if (cmd->base.autoneg == AUTONEG_ENABLE || + speed != SPEED_10000 || + cmd->base.duplex != DUPLEX_FULL) return -EINVAL; } else if (h->phy_if == PHY_INTERFACE_MODE_SGMII) { - if (!net_dev->phydev && cmd->autoneg == AUTONEG_ENABLE) + if (!net_dev->phydev && cmd->base.autoneg == AUTONEG_ENABLE) return -EINVAL; - if (speed == SPEED_1000 && cmd->duplex == DUPLEX_HALF) + if (speed == SPEED_1000 && cmd->base.duplex == DUPLEX_HALF) return -EINVAL; if (net_dev->phydev) - return phy_ethtool_sset(net_dev->phydev, cmd); + return phy_ethtool_ksettings_set(net_dev->phydev, cmd); if ((speed != SPEED_10 && speed != SPEED_100 && - speed != SPEED_1000) || (cmd->duplex != DUPLEX_HALF && - cmd->duplex != DUPLEX_FULL)) + speed != SPEED_1000) || (cmd->base.duplex != DUPLEX_HALF && + cmd->base.duplex != DUPLEX_FULL)) return -EINVAL; } else { netdev_err(net_dev, "Not supported!"); @@ -232,7 +243,7 @@ static int hns_nic_set_settings(struct net_device *net_dev, } if (h->dev->ops->adjust_link) { - h->dev->ops->adjust_link(h, (int)speed, cmd->duplex); + h->dev->ops->adjust_link(h, (int)speed, cmd->base.duplex); return 0; } @@ -1264,8 +1275,6 @@ static int hns_get_rxnfc(struct net_device *netdev, static const struct ethtool_ops hns_ethtool_ops = { .get_drvinfo = hns_nic_get_drvinfo, .get_link = hns_nic_get_link, - .get_settings = hns_nic_get_settings, - .set_settings = hns_nic_set_settings, .get_ringparam = hns_get_ringparam, .get_pauseparam = hns_get_pauseparam, .set_pauseparam = hns_set_pauseparam, @@ -1285,6 +1294,8 @@ static const struct ethtool_ops hns_ethtool_ops = { .get_rxfh = hns_get_rss, .set_rxfh = hns_set_rss, .get_rxnfc = hns_get_rxnfc, + .get_link_ksettings = hns_nic_get_link_ksettings, + .set_link_ksettings = hns_nic_set_link_ksettings, }; void hns_ethtool_set_ops(struct net_device *ndev) From efee95f42b5dddedcaff0a0eaa44e170fc7522e8 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 20 Sep 2016 19:25:58 -0400 Subject: [PATCH 0612/1050] ptp_clock: future-proofing drivers against PTP subsystem becoming optional Drivers must be ready to accept NULL from ptp_clock_register() if the PTP clock subsystem is configured out. This patch documents that and ensures that all drivers cope well with a NULL return. Signed-off-by: Nicolas Pitre Reviewed-by: Eugenia Emantayev Acked-by: Richard Cochran Acked-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/e1000e/ptp.c | 2 +- drivers/net/ethernet/intel/i40e/i40e_ptp.c | 2 +- drivers/net/ethernet/intel/igb/igb_ptp.c | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_clock.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_clock.c | 2 +- drivers/net/ethernet/sfc/ptp.c | 14 +++++++------- drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c | 2 +- include/linux/ptp_clock_kernel.h | 5 +++++ 9 files changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c index 2e1b17ad52a3..ad03763e009a 100644 --- a/drivers/net/ethernet/intel/e1000e/ptp.c +++ b/drivers/net/ethernet/intel/e1000e/ptp.c @@ -334,7 +334,7 @@ void e1000e_ptp_init(struct e1000_adapter *adapter) if (IS_ERR(adapter->ptp_clock)) { adapter->ptp_clock = NULL; e_err("ptp_clock_register failed\n"); - } else { + } else if (adapter->ptp_clock) { e_info("registered PHC clock\n"); } } diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index ed39cbad24bd..f1feceab758a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -669,7 +669,7 @@ void i40e_ptp_init(struct i40e_pf *pf) pf->ptp_clock = NULL; dev_err(&pf->pdev->dev, "%s: ptp_clock_register failed\n", __func__); - } else { + } else if (pf->ptp_clock) { struct timespec64 ts; u32 regval; diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 66dfa2085cc7..1dd14e166dc8 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -1159,7 +1159,7 @@ void igb_ptp_init(struct igb_adapter *adapter) if (IS_ERR(adapter->ptp_clock)) { adapter->ptp_clock = NULL; dev_err(&adapter->pdev->dev, "ptp_clock_register failed\n"); - } else { + } else if (adapter->ptp_clock) { dev_info(&adapter->pdev->dev, "added PHC on %s\n", adapter->netdev->name); adapter->ptp_flags |= IGB_PTP_ENABLED; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index e5431bfe3339..a92277683a64 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -1254,7 +1254,7 @@ static long ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter) adapter->ptp_clock = NULL; e_dev_err("ptp_clock_register failed\n"); return err; - } else + } else if (adapter->ptp_clock) e_dev_info("registered PHC device on %s\n", netdev->name); /* set default timestamp mode to disabled here. We do this in diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c index 1494997c4f7e..08fc5fc56d43 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c @@ -298,7 +298,7 @@ void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev) if (IS_ERR(mdev->ptp_clock)) { mdev->ptp_clock = NULL; mlx4_err(mdev, "ptp_clock_register failed\n"); - } else { + } else if (mdev->ptp_clock) { mlx4_info(mdev, "registered PHC clock\n"); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c index 847a8f3ac2b2..13dc388667b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c @@ -273,7 +273,7 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv) tstamp->ptp = ptp_clock_register(&tstamp->ptp_info, &priv->mdev->pdev->dev); - if (IS_ERR_OR_NULL(tstamp->ptp)) { + if (IS_ERR(tstamp->ptp)) { mlx5_core_warn(priv->mdev, "ptp_clock_register failed %ld\n", PTR_ERR(tstamp->ptp)); tstamp->ptp = NULL; diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index dd204d9704c6..77a5364f7a10 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -1269,13 +1269,13 @@ int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel) if (IS_ERR(ptp->phc_clock)) { rc = PTR_ERR(ptp->phc_clock); goto fail3; - } - - INIT_WORK(&ptp->pps_work, efx_ptp_pps_worker); - ptp->pps_workwq = create_singlethread_workqueue("sfc_pps"); - if (!ptp->pps_workwq) { - rc = -ENOMEM; - goto fail4; + } else if (ptp->phc_clock) { + INIT_WORK(&ptp->pps_work, efx_ptp_pps_worker); + ptp->pps_workwq = create_singlethread_workqueue("sfc_pps"); + if (!ptp->pps_workwq) { + rc = -ENOMEM; + goto fail4; + } } } ptp->nic_ts_enabled = false; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index 170a18b61281..6e3b82972ce8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -187,7 +187,7 @@ int stmmac_ptp_register(struct stmmac_priv *priv) if (IS_ERR(priv->ptp_clock)) { priv->ptp_clock = NULL; pr_err("ptp_clock_register() failed on %s\n", priv->dev->name); - } else + } else if (priv->ptp_clock) pr_debug("Added PTP HW clock successfully on %s\n", priv->dev->name); diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 6b15e168148a..5ad54fc66cf0 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -127,6 +127,11 @@ struct ptp_clock; * * @info: Structure describing the new clock. * @parent: Pointer to the parent device of the new clock. + * + * Returns a valid pointer on success or PTR_ERR on failure. If PHC + * support is missing at the configuration level, this function + * returns NULL, and drivers are expected to gracefully handle that + * case separately. */ extern struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, From 0e7b99257be4b596d9fdd435698c0bfdb0b38d91 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 21 Sep 2016 01:40:31 +0200 Subject: [PATCH 0613/1050] net: dsa: mv88e6xxx: Add helper for accessing port registers There is a device coming soon which places its port registers somewhere different to all other Marvell switches supported so far. Add helper functions for reading/writing port registers, making it easier to handle this new device. Signed-off-by: Andrew Lunn Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 370 +++++++++++++------------- drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 1 - 2 files changed, 182 insertions(+), 189 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 1d71802396fb..25bd3fa744d9 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -216,6 +216,22 @@ int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val) return 0; } +int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, int reg, + u16 *val) +{ + int addr = chip->info->port_base_addr + port; + + return mv88e6xxx_read(chip, addr, reg, val); +} + +int mv88e6xxx_port_write(struct mv88e6xxx_chip *chip, int port, int reg, + u16 val) +{ + int addr = chip->info->port_base_addr + port; + + return mv88e6xxx_write(chip, addr, reg, val); +} + static int mv88e6xxx_phy_read(struct mv88e6xxx_chip *chip, int phy, int reg, u16 *val) { @@ -585,23 +601,23 @@ static void mv88e6xxx_adjust_link(struct dsa_switch *ds, int port, struct phy_device *phydev) { struct mv88e6xxx_chip *chip = ds->priv; - u32 reg; - int ret; + u16 reg; + int err; if (!phy_is_pseudo_fixed_link(phydev)) return; mutex_lock(&chip->reg_lock); - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_PCS_CTRL); - if (ret < 0) + err = mv88e6xxx_port_read(chip, port, PORT_PCS_CTRL, ®); + if (err) goto out; - reg = ret & ~(PORT_PCS_CTRL_LINK_UP | - PORT_PCS_CTRL_FORCE_LINK | - PORT_PCS_CTRL_DUPLEX_FULL | - PORT_PCS_CTRL_FORCE_DUPLEX | - PORT_PCS_CTRL_UNFORCED); + reg &= ~(PORT_PCS_CTRL_LINK_UP | + PORT_PCS_CTRL_FORCE_LINK | + PORT_PCS_CTRL_DUPLEX_FULL | + PORT_PCS_CTRL_FORCE_DUPLEX | + PORT_PCS_CTRL_UNFORCED); reg |= PORT_PCS_CTRL_FORCE_LINK; if (phydev->link) @@ -639,7 +655,7 @@ static void mv88e6xxx_adjust_link(struct dsa_switch *ds, int port, reg |= (PORT_PCS_CTRL_RGMII_DELAY_RXCLK | PORT_PCS_CTRL_RGMII_DELAY_TXCLK); } - _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_PCS_CTRL, reg); + mv88e6xxx_port_write(chip, port, PORT_PCS_CTRL, reg); out: mutex_unlock(&chip->reg_lock); @@ -799,22 +815,22 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip, { u32 low; u32 high = 0; - int ret; + int err; + u16 reg; u64 value; switch (s->type) { case PORT: - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), s->reg); - if (ret < 0) + err = mv88e6xxx_port_read(chip, port, s->reg, ®); + if (err) return UINT64_MAX; - low = ret; + low = reg; if (s->sizeof_stat == 4) { - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), - s->reg + 1); - if (ret < 0) + err = mv88e6xxx_port_read(chip, port, s->reg + 1, ®); + if (err) return UINT64_MAX; - high = ret; + high = reg; } break; case BANK0: @@ -893,6 +909,8 @@ static void mv88e6xxx_get_regs(struct dsa_switch *ds, int port, struct ethtool_regs *regs, void *_p) { struct mv88e6xxx_chip *chip = ds->priv; + int err; + u16 reg; u16 *p = _p; int i; @@ -903,11 +921,10 @@ static void mv88e6xxx_get_regs(struct dsa_switch *ds, int port, mutex_lock(&chip->reg_lock); for (i = 0; i < 32; i++) { - int ret; - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), i); - if (ret >= 0) - p[i] = ret; + err = mv88e6xxx_port_read(chip, port, i, ®); + if (!err) + p[i] = reg; } mutex_unlock(&chip->reg_lock); @@ -938,7 +955,7 @@ static int mv88e6xxx_get_eee(struct dsa_switch *ds, int port, e->eee_enabled = !!(reg & 0x0200); e->tx_lpi_enabled = !!(reg & 0x0100); - err = mv88e6xxx_read(chip, REG_PORT(port), PORT_STATUS, ®); + err = mv88e6xxx_port_read(chip, port, PORT_STATUS, ®); if (err) goto out; @@ -1106,12 +1123,13 @@ static int _mv88e6xxx_port_state(struct mv88e6xxx_chip *chip, int port, u8 state) { struct dsa_switch *ds = chip->ds; - int reg, ret = 0; + u16 reg; + int err; u8 oldstate; - reg = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_CONTROL); - if (reg < 0) - return reg; + err = mv88e6xxx_port_read(chip, port, PORT_CONTROL, ®); + if (err) + return err; oldstate = reg & PORT_CONTROL_STATE_MASK; @@ -1124,23 +1142,22 @@ static int _mv88e6xxx_port_state(struct mv88e6xxx_chip *chip, int port, oldstate == PORT_CONTROL_STATE_FORWARDING) && (state == PORT_CONTROL_STATE_DISABLED || state == PORT_CONTROL_STATE_BLOCKING)) { - ret = _mv88e6xxx_atu_remove(chip, 0, port, false); - if (ret) - return ret; + err = _mv88e6xxx_atu_remove(chip, 0, port, false); + if (err) + return err; } reg = (reg & ~PORT_CONTROL_STATE_MASK) | state; - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_CONTROL, - reg); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL, reg); + if (err) + return err; netdev_dbg(ds->ports[port].netdev, "PortState %s (was %s)\n", mv88e6xxx_port_state_names[state], mv88e6xxx_port_state_names[oldstate]); } - return ret; + return err; } static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port) @@ -1149,7 +1166,8 @@ static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port) const u16 mask = (1 << chip->info->num_ports) - 1; struct dsa_switch *ds = chip->ds; u16 output_ports = 0; - int reg; + u16 reg; + int err; int i; /* allow CPU port or DSA link(s) to send frames to every port */ @@ -1170,14 +1188,14 @@ static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port) /* prevent frames from going back out of the port they came in on */ output_ports &= ~BIT(port); - reg = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_BASE_VLAN); - if (reg < 0) - return reg; + err = mv88e6xxx_port_read(chip, port, PORT_BASE_VLAN, ®); + if (err) + return err; reg &= ~mask; reg |= output_ports & mask; - return _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_BASE_VLAN, reg); + return mv88e6xxx_port_write(chip, port, PORT_BASE_VLAN, reg); } static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port, @@ -1218,23 +1236,22 @@ static int _mv88e6xxx_port_pvid(struct mv88e6xxx_chip *chip, int port, u16 *new, u16 *old) { struct dsa_switch *ds = chip->ds; - u16 pvid; - int ret; + u16 pvid, reg; + int err; - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_DEFAULT_VLAN); - if (ret < 0) - return ret; + err = mv88e6xxx_port_read(chip, port, PORT_DEFAULT_VLAN, ®); + if (err) + return err; - pvid = ret & PORT_DEFAULT_VLAN_MASK; + pvid = reg & PORT_DEFAULT_VLAN_MASK; if (new) { - ret &= ~PORT_DEFAULT_VLAN_MASK; - ret |= *new & PORT_DEFAULT_VLAN_MASK; + reg &= ~PORT_DEFAULT_VLAN_MASK; + reg |= *new & PORT_DEFAULT_VLAN_MASK; - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_DEFAULT_VLAN, ret); - if (ret < 0) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_DEFAULT_VLAN, reg); + if (err) + return err; netdev_dbg(ds->ports[port].netdev, "DefaultVID %d (was %d)\n", *new, pvid); @@ -1613,7 +1630,8 @@ static int _mv88e6xxx_port_fid(struct mv88e6xxx_chip *chip, int port, struct dsa_switch *ds = chip->ds; u16 upper_mask; u16 fid; - int ret; + u16 reg; + int err; if (mv88e6xxx_num_databases(chip) == 4096) upper_mask = 0xff; @@ -1623,37 +1641,35 @@ static int _mv88e6xxx_port_fid(struct mv88e6xxx_chip *chip, int port, return -EOPNOTSUPP; /* Port's default FID bits 3:0 are located in reg 0x06, offset 12 */ - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_BASE_VLAN); - if (ret < 0) - return ret; + err = mv88e6xxx_port_read(chip, port, PORT_BASE_VLAN, ®); + if (err) + return err; - fid = (ret & PORT_BASE_VLAN_FID_3_0_MASK) >> 12; + fid = (reg & PORT_BASE_VLAN_FID_3_0_MASK) >> 12; if (new) { - ret &= ~PORT_BASE_VLAN_FID_3_0_MASK; - ret |= (*new << 12) & PORT_BASE_VLAN_FID_3_0_MASK; + reg &= ~PORT_BASE_VLAN_FID_3_0_MASK; + reg |= (*new << 12) & PORT_BASE_VLAN_FID_3_0_MASK; - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_BASE_VLAN, - ret); - if (ret < 0) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_BASE_VLAN, reg); + if (err) + return err; } /* Port's default FID bits 11:4 are located in reg 0x05, offset 0 */ - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_CONTROL_1); - if (ret < 0) - return ret; + err = mv88e6xxx_port_read(chip, port, PORT_CONTROL_1, ®); + if (err) + return err; - fid |= (ret & upper_mask) << 4; + fid |= (reg & upper_mask) << 4; if (new) { - ret &= ~upper_mask; - ret |= (*new >> 4) & upper_mask; + reg &= ~upper_mask; + reg |= (*new >> 4) & upper_mask; - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_CONTROL_1, - ret); - if (ret < 0) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_1, reg); + if (err) + return err; netdev_dbg(ds->ports[port].netdev, "FID %d (was %d)\n", *new, fid); @@ -1865,26 +1881,26 @@ static int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port, struct mv88e6xxx_chip *chip = ds->priv; u16 old, new = vlan_filtering ? PORT_CONTROL_2_8021Q_SECURE : PORT_CONTROL_2_8021Q_DISABLED; - int ret; + u16 reg; + int err; if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_VTU)) return -EOPNOTSUPP; mutex_lock(&chip->reg_lock); - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_CONTROL_2); - if (ret < 0) + err = mv88e6xxx_port_read(chip, port, PORT_CONTROL_2, ®); + if (err) goto unlock; - old = ret & PORT_CONTROL_2_8021Q_MASK; + old = reg & PORT_CONTROL_2_8021Q_MASK; if (new != old) { - ret &= ~PORT_CONTROL_2_8021Q_MASK; - ret |= new & PORT_CONTROL_2_8021Q_MASK; + reg &= ~PORT_CONTROL_2_8021Q_MASK; + reg |= new & PORT_CONTROL_2_8021Q_MASK; - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_CONTROL_2, - ret); - if (ret < 0) + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_2, reg); + if (err) goto unlock; netdev_dbg(ds->ports[port].netdev, "802.1Q Mode %s (was %s)\n", @@ -1892,11 +1908,11 @@ static int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port, mv88e6xxx_port_8021q_mode_names[old]); } - ret = 0; + err = 0; unlock: mutex_unlock(&chip->reg_lock); - return ret; + return err; } static int @@ -2406,19 +2422,20 @@ static int mv88e6xxx_switch_reset(struct mv88e6xxx_chip *chip) u16 is_reset = (ppu_active ? 0x8800 : 0xc800); struct gpio_desc *gpiod = chip->reset; unsigned long timeout; - int ret; + int err, ret; + u16 reg; int i; /* Set all ports to the disabled state. */ for (i = 0; i < chip->info->num_ports; i++) { - ret = _mv88e6xxx_reg_read(chip, REG_PORT(i), PORT_CONTROL); - if (ret < 0) - return ret; + err = mv88e6xxx_port_read(chip, i, PORT_CONTROL, ®); + if (err) + return err; - ret = _mv88e6xxx_reg_write(chip, REG_PORT(i), PORT_CONTROL, - ret & 0xfffc); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, i, PORT_CONTROL, + reg & 0xfffc); + if (err) + return err; } /* Wait for transmit queues to drain. */ @@ -2437,11 +2454,11 @@ static int mv88e6xxx_switch_reset(struct mv88e6xxx_chip *chip) * through global registers 0x18 and 0x19. */ if (ppu_active) - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, 0x04, 0xc000); + err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, 0x04, 0xc000); else - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, 0x04, 0xc400); - if (ret) - return ret; + err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, 0x04, 0xc400); + if (err) + return err; /* Wait up to one second for reset to complete. */ timeout = jiffies + 1 * HZ; @@ -2455,11 +2472,11 @@ static int mv88e6xxx_switch_reset(struct mv88e6xxx_chip *chip) usleep_range(1000, 2000); } if (time_after(jiffies, timeout)) - ret = -ETIMEDOUT; + err = -ETIMEDOUT; else - ret = 0; + err = 0; - return ret; + return err; } static int mv88e6xxx_serdes_power_on(struct mv88e6xxx_chip *chip) @@ -2480,21 +2497,10 @@ static int mv88e6xxx_serdes_power_on(struct mv88e6xxx_chip *chip) return err; } -static int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, - int reg, u16 *val) -{ - int addr = chip->info->port_base_addr + port; - - if (port >= chip->info->num_ports) - return -EINVAL; - - return mv88e6xxx_read(chip, addr, reg, val); -} - static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) { struct dsa_switch *ds = chip->ds; - int ret; + int err; u16 reg; if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) || @@ -2507,7 +2513,7 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) * and all DSA ports to their maximum bandwidth and * full duplex. */ - reg = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_PCS_CTRL); + err = mv88e6xxx_port_read(chip, port, PORT_PCS_CTRL, ®); if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) { reg &= ~PORT_PCS_CTRL_UNFORCED; reg |= PORT_PCS_CTRL_FORCE_LINK | @@ -2522,10 +2528,9 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) reg |= PORT_PCS_CTRL_UNFORCED; } - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_PCS_CTRL, reg); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_PCS_CTRL, reg); + if (err) + return err; } /* Port Control: disable Drop-on-Unlock, disable Drop-on-Lock, @@ -2576,26 +2581,25 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) PORT_CONTROL_FORWARD_UNKNOWN_MC; } if (reg) { - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_CONTROL, reg); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL, reg); + if (err) + return err; } /* If this port is connected to a SerDes, make sure the SerDes is not * powered down. */ if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_SERDES)) { - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_STATUS); - if (ret < 0) - return ret; - ret &= PORT_STATUS_CMODE_MASK; - if ((ret == PORT_STATUS_CMODE_100BASE_X) || - (ret == PORT_STATUS_CMODE_1000BASE_X) || - (ret == PORT_STATUS_CMODE_SGMII)) { - ret = mv88e6xxx_serdes_power_on(chip); - if (ret < 0) - return ret; + err = mv88e6xxx_port_read(chip, port, PORT_STATUS, ®); + if (err) + return err; + reg &= PORT_STATUS_CMODE_MASK; + if ((reg == PORT_STATUS_CMODE_100BASE_X) || + (reg == PORT_STATUS_CMODE_1000BASE_X) || + (reg == PORT_STATUS_CMODE_SGMII)) { + err = mv88e6xxx_serdes_power_on(chip); + if (err < 0) + return err; } } @@ -2629,10 +2633,9 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) reg |= PORT_CONTROL_2_8021Q_DISABLED; if (reg) { - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_CONTROL_2, reg); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_2, reg); + if (err) + return err; } /* Port Association Vector: when learning source addresses @@ -2645,16 +2648,14 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) if (dsa_is_cpu_port(ds, port)) reg = 0; - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_ASSOC_VECTOR, - reg); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_ASSOC_VECTOR, reg); + if (err) + return err; /* Egress rate control 2: disable egress rate control. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_RATE_CONTROL_2, - 0x0000); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_RATE_CONTROL_2, 0x0000); + if (err) + return err; if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) || mv88e6xxx_6165_family(chip) || mv88e6xxx_6097_family(chip) || @@ -2663,96 +2664,89 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) * be paused for by the remote end or the period of * time that this port can pause the remote end. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_PAUSE_CTRL, 0x0000); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_PAUSE_CTRL, 0x0000); + if (err) + return err; /* Port ATU control: disable limiting the number of * address database entries that this port is allowed * to use. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_ATU_CONTROL, 0x0000); + err = mv88e6xxx_port_write(chip, port, PORT_ATU_CONTROL, + 0x0000); /* Priority Override: disable DA, SA and VTU priority * override. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_PRI_OVERRIDE, 0x0000); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_PRI_OVERRIDE, + 0x0000); + if (err) + return err; /* Port Ethertype: use the Ethertype DSA Ethertype * value. */ if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_EDSA)) { - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_ETH_TYPE, ETH_P_EDSA); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_ETH_TYPE, + ETH_P_EDSA); + if (err) + return err; } /* Tag Remap: use an identity 802.1p prio -> switch * prio mapping. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_TAG_REGMAP_0123, 0x3210); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_TAG_REGMAP_0123, + 0x3210); + if (err) + return err; /* Tag Remap 2: use an identity 802.1p prio -> switch * prio mapping. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_TAG_REGMAP_4567, 0x7654); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_TAG_REGMAP_4567, + 0x7654); + if (err) + return err; } /* Rate Control: disable ingress rate limiting. */ if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) || mv88e6xxx_6165_family(chip) || mv88e6xxx_6097_family(chip) || mv88e6xxx_6320_family(chip)) { - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_RATE_CONTROL, 0x0001); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_RATE_CONTROL, + 0x0001); + if (err) + return err; } else if (mv88e6xxx_6185_family(chip) || mv88e6xxx_6095_family(chip)) { - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_RATE_CONTROL, 0x0000); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_RATE_CONTROL, + 0x0000); + if (err) + return err; } /* Port Control 1: disable trunking, disable sending * learning messages to this port. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_CONTROL_1, - 0x0000); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_1, 0x0000); + if (err) + return err; /* Port based VLAN map: give each port the same default address * database, and allow bidirectional communication between the * CPU and DSA port(s), and the other ports. */ - ret = _mv88e6xxx_port_fid_set(chip, port, 0); - if (ret) - return ret; + err = _mv88e6xxx_port_fid_set(chip, port, 0); + if (err) + return err; - ret = _mv88e6xxx_port_based_vlan_map(chip, port); - if (ret) - return ret; + err = _mv88e6xxx_port_based_vlan_map(chip, port); + if (err) + return err; /* Default VLAN ID and priority: don't set a default VLAN * ID, and set the default packet priority to zero. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_DEFAULT_VLAN, - 0x0000); - if (ret) - return ret; - - return 0; + return mv88e6xxx_port_write(chip, port, PORT_DEFAULT_VLAN, 0x0000); } static int mv88e6xxx_g1_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr) diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index 52f3f5231f51..e349d0d64645 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -37,7 +37,6 @@ #define ADDR_SERDES 0x0f #define SERDES_PAGE_FIBER 0x01 -#define REG_PORT(p) (0x10 + (p)) #define PORT_STATUS 0x00 #define PORT_STATUS_PAUSE_EN BIT(15) #define PORT_STATUS_MY_PAUSE BIT(14) From d6b1023a83e85943d1f2fa3baafbb4d5cfee7179 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 21 Sep 2016 01:40:32 +0200 Subject: [PATCH 0614/1050] net: dsa: mv88e6xxx: Convert flag bits to unsigned long long We are soon going to run out of flag bits on 32bit systems. Convert to unsigned long long. Signed-off-by: Andrew Lunn Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 52 +++++++++++++-------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index e349d0d64645..827988397fd8 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -452,36 +452,36 @@ enum mv88e6xxx_cap { }; /* Bitmask of capabilities */ -#define MV88E6XXX_FLAG_EDSA BIT(MV88E6XXX_CAP_EDSA) -#define MV88E6XXX_FLAG_EEE BIT(MV88E6XXX_CAP_EEE) +#define MV88E6XXX_FLAG_EDSA BIT_ULL(MV88E6XXX_CAP_EDSA) +#define MV88E6XXX_FLAG_EEE BIT_ULL(MV88E6XXX_CAP_EEE) -#define MV88E6XXX_FLAG_SMI_CMD BIT(MV88E6XXX_CAP_SMI_CMD) -#define MV88E6XXX_FLAG_SMI_DATA BIT(MV88E6XXX_CAP_SMI_DATA) +#define MV88E6XXX_FLAG_SMI_CMD BIT_ULL(MV88E6XXX_CAP_SMI_CMD) +#define MV88E6XXX_FLAG_SMI_DATA BIT_ULL(MV88E6XXX_CAP_SMI_DATA) -#define MV88E6XXX_FLAG_PHY_PAGE BIT(MV88E6XXX_CAP_PHY_PAGE) +#define MV88E6XXX_FLAG_PHY_PAGE BIT_ULL(MV88E6XXX_CAP_PHY_PAGE) -#define MV88E6XXX_FLAG_SERDES BIT(MV88E6XXX_CAP_SERDES) +#define MV88E6XXX_FLAG_SERDES BIT_ULL(MV88E6XXX_CAP_SERDES) -#define MV88E6XXX_FLAG_GLOBAL2 BIT(MV88E6XXX_CAP_GLOBAL2) -#define MV88E6XXX_FLAG_G2_MGMT_EN_2X BIT(MV88E6XXX_CAP_G2_MGMT_EN_2X) -#define MV88E6XXX_FLAG_G2_MGMT_EN_0X BIT(MV88E6XXX_CAP_G2_MGMT_EN_0X) -#define MV88E6XXX_FLAG_G2_IRL_CMD BIT(MV88E6XXX_CAP_G2_IRL_CMD) -#define MV88E6XXX_FLAG_G2_IRL_DATA BIT(MV88E6XXX_CAP_G2_IRL_DATA) -#define MV88E6XXX_FLAG_G2_PVT_ADDR BIT(MV88E6XXX_CAP_G2_PVT_ADDR) -#define MV88E6XXX_FLAG_G2_PVT_DATA BIT(MV88E6XXX_CAP_G2_PVT_DATA) -#define MV88E6XXX_FLAG_G2_SWITCH_MAC BIT(MV88E6XXX_CAP_G2_SWITCH_MAC) -#define MV88E6XXX_FLAG_G2_POT BIT(MV88E6XXX_CAP_G2_POT) -#define MV88E6XXX_FLAG_G2_EEPROM_CMD BIT(MV88E6XXX_CAP_G2_EEPROM_CMD) -#define MV88E6XXX_FLAG_G2_EEPROM_DATA BIT(MV88E6XXX_CAP_G2_EEPROM_DATA) -#define MV88E6XXX_FLAG_G2_SMI_PHY_CMD BIT(MV88E6XXX_CAP_G2_SMI_PHY_CMD) -#define MV88E6XXX_FLAG_G2_SMI_PHY_DATA BIT(MV88E6XXX_CAP_G2_SMI_PHY_DATA) +#define MV88E6XXX_FLAG_GLOBAL2 BIT_ULL(MV88E6XXX_CAP_GLOBAL2) +#define MV88E6XXX_FLAG_G2_MGMT_EN_2X BIT_ULL(MV88E6XXX_CAP_G2_MGMT_EN_2X) +#define MV88E6XXX_FLAG_G2_MGMT_EN_0X BIT_ULL(MV88E6XXX_CAP_G2_MGMT_EN_0X) +#define MV88E6XXX_FLAG_G2_IRL_CMD BIT_ULL(MV88E6XXX_CAP_G2_IRL_CMD) +#define MV88E6XXX_FLAG_G2_IRL_DATA BIT_ULL(MV88E6XXX_CAP_G2_IRL_DATA) +#define MV88E6XXX_FLAG_G2_PVT_ADDR BIT_ULL(MV88E6XXX_CAP_G2_PVT_ADDR) +#define MV88E6XXX_FLAG_G2_PVT_DATA BIT_ULL(MV88E6XXX_CAP_G2_PVT_DATA) +#define MV88E6XXX_FLAG_G2_SWITCH_MAC BIT_ULL(MV88E6XXX_CAP_G2_SWITCH_MAC) +#define MV88E6XXX_FLAG_G2_POT BIT_ULL(MV88E6XXX_CAP_G2_POT) +#define MV88E6XXX_FLAG_G2_EEPROM_CMD BIT_ULL(MV88E6XXX_CAP_G2_EEPROM_CMD) +#define MV88E6XXX_FLAG_G2_EEPROM_DATA BIT_ULL(MV88E6XXX_CAP_G2_EEPROM_DATA) +#define MV88E6XXX_FLAG_G2_SMI_PHY_CMD BIT_ULL(MV88E6XXX_CAP_G2_SMI_PHY_CMD) +#define MV88E6XXX_FLAG_G2_SMI_PHY_DATA BIT_ULL(MV88E6XXX_CAP_G2_SMI_PHY_DATA) -#define MV88E6XXX_FLAG_PPU BIT(MV88E6XXX_CAP_PPU) -#define MV88E6XXX_FLAG_PPU_ACTIVE BIT(MV88E6XXX_CAP_PPU_ACTIVE) -#define MV88E6XXX_FLAG_STU BIT(MV88E6XXX_CAP_STU) -#define MV88E6XXX_FLAG_TEMP BIT(MV88E6XXX_CAP_TEMP) -#define MV88E6XXX_FLAG_TEMP_LIMIT BIT(MV88E6XXX_CAP_TEMP_LIMIT) -#define MV88E6XXX_FLAG_VTU BIT(MV88E6XXX_CAP_VTU) +#define MV88E6XXX_FLAG_PPU BIT_ULL(MV88E6XXX_CAP_PPU) +#define MV88E6XXX_FLAG_PPU_ACTIVE BIT_ULL(MV88E6XXX_CAP_PPU_ACTIVE) +#define MV88E6XXX_FLAG_STU BIT_ULL(MV88E6XXX_CAP_STU) +#define MV88E6XXX_FLAG_TEMP BIT_ULL(MV88E6XXX_CAP_TEMP) +#define MV88E6XXX_FLAG_TEMP_LIMIT BIT_ULL(MV88E6XXX_CAP_TEMP_LIMIT) +#define MV88E6XXX_FLAG_VTU BIT_ULL(MV88E6XXX_CAP_VTU) /* EEPROM Programming via Global2 with 16-bit data */ #define MV88E6XXX_FLAGS_EEPROM16 \ @@ -614,7 +614,7 @@ struct mv88e6xxx_info { unsigned int num_ports; unsigned int port_base_addr; unsigned int age_time_coeff; - unsigned long flags; + unsigned long long flags; }; struct mv88e6xxx_atu_entry { From adb03115f4590baa280ddc440a8eff08a6be0cb7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 20 Sep 2016 18:06:17 -0700 Subject: [PATCH 0615/1050] net: get rid of an signed integer overflow in ip_idents_reserve() Jiri Pirko reported an UBSAN warning happening in ip_idents_reserve() [] UBSAN: Undefined behaviour in ./arch/x86/include/asm/atomic.h:156:11 [] signed integer overflow: [] -2117905507 + -695755206 cannot be represented in type 'int' Since we do not have uatomic_add_return() yet, use atomic_cmpxchg() so that the arithmetics can be done using unsigned int. Fixes: 04ca6973f7c1 ("ip: make IP identifiers less predictable") Signed-off-by: Eric Dumazet Reported-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv4/route.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a1f2830d8110..b5b47a26d4ec 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -476,12 +476,18 @@ u32 ip_idents_reserve(u32 hash, int segs) atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ; u32 old = ACCESS_ONCE(*p_tstamp); u32 now = (u32)jiffies; - u32 delta = 0; + u32 new, delta = 0; if (old != now && cmpxchg(p_tstamp, old, now) == old) delta = prandom_u32_max(now - old); - return atomic_add_return(segs + delta, p_id) - segs; + /* Do not use atomic_add_return() as it makes UBSAN unhappy */ + do { + old = (u32)atomic_read(p_id); + new = old + delta + segs; + } while (atomic_cmpxchg(p_id, old, new) != old); + + return new - segs; } EXPORT_SYMBOL(ip_idents_reserve); From 75c9510b8f745f75280029a8a9f96567f55f401e Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 20 Sep 2016 23:33:15 -0400 Subject: [PATCH 0616/1050] MAINTAINERS: Update b44 maintainer. Taking over as maintainer since Gary Zambrano is no longer working for Broadcom. Signed-off-by: Michael Chan Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 247b418959fa..3df4be3c4e0d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2500,7 +2500,7 @@ S: Supported F: kernel/bpf/ BROADCOM B44 10/100 ETHERNET DRIVER -M: Gary Zambrano +M: Michael Chan L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/broadcom/b44.* From f9616c35a0d786bc64fff4bf819d1e4984873367 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 20 Sep 2016 22:45:58 -0700 Subject: [PATCH 0617/1050] tcp: implement TSQ for retransmits We saw sch_fq drops caused by the per flow limit of 100 packets and TCP when dealing with large cwnd and bursts of retransmits. Even after increasing the limit to 1000, and even after commit 10d3be569243 ("tcp-tso: do not split TSO packets at retransmit time"), we can still have these drops. Under certain conditions, TCP can spend a considerable amount of time queuing thousands of skbs in a single tcp_xmit_retransmit_queue() invocation, incurring latency spikes and stalls of other softirq handlers. This patch implements TSQ for retransmits, limiting number of packets and giving more chance for scheduling packets in both ways. Signed-off-by: Eric Dumazet Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 72 ++++++++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 25 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7d025a7804b5..478dfc539178 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -734,9 +734,16 @@ static void tcp_tsq_handler(struct sock *sk) { if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING | - TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) - tcp_write_xmit(sk, tcp_current_mss(sk), tcp_sk(sk)->nonagle, + TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) { + struct tcp_sock *tp = tcp_sk(sk); + + if (tp->lost_out > tp->retrans_out && + tp->snd_cwnd > tcp_packets_in_flight(tp)) + tcp_xmit_retransmit_queue(sk); + + tcp_write_xmit(sk, tcp_current_mss(sk), tp->nonagle, 0, GFP_ATOMIC); + } } /* * One tasklet per cpu tries to send more skbs. @@ -2039,6 +2046,39 @@ static int tcp_mtu_probe(struct sock *sk) return -1; } +/* TCP Small Queues : + * Control number of packets in qdisc/devices to two packets / or ~1 ms. + * (These limits are doubled for retransmits) + * This allows for : + * - better RTT estimation and ACK scheduling + * - faster recovery + * - high rates + * Alas, some drivers / subsystems require a fair amount + * of queued bytes to ensure line rate. + * One example is wifi aggregation (802.11 AMPDU) + */ +static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, + unsigned int factor) +{ + unsigned int limit; + + limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10); + limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes); + limit <<= factor; + + if (atomic_read(&sk->sk_wmem_alloc) > limit) { + set_bit(TSQ_THROTTLED, &tcp_sk(sk)->tsq_flags); + /* It is possible TX completion already happened + * before we set TSQ_THROTTLED, so we must + * test again the condition. + */ + smp_mb__after_atomic(); + if (atomic_read(&sk->sk_wmem_alloc) > limit) + return true; + } + return false; +} + /* This routine writes packets to the network. It advances the * send_head. This happens as incoming acks open up the remote * window for us. @@ -2125,29 +2165,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) break; - /* TCP Small Queues : - * Control number of packets in qdisc/devices to two packets / or ~1 ms. - * This allows for : - * - better RTT estimation and ACK scheduling - * - faster recovery - * - high rates - * Alas, some drivers / subsystems require a fair amount - * of queued bytes to ensure line rate. - * One example is wifi aggregation (802.11 AMPDU) - */ - limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10); - limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes); - - if (atomic_read(&sk->sk_wmem_alloc) > limit) { - set_bit(TSQ_THROTTLED, &tp->tsq_flags); - /* It is possible TX completion already happened - * before we set TSQ_THROTTLED, so we must - * test again the condition. - */ - smp_mb__after_atomic(); - if (atomic_read(&sk->sk_wmem_alloc) > limit) - break; - } + if (tcp_small_queue_check(sk, skb, 0)) + break; if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp))) break; @@ -2847,6 +2866,9 @@ begin_fwd: if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS)) continue; + if (tcp_small_queue_check(sk, skb, 1)) + return; + if (tcp_retransmit_skb(sk, skb, segs)) return; From 1bfecfca565c0505d04dbf5fdd3d2fbb951827c0 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 21 Sep 2016 12:19:42 +0300 Subject: [PATCH 0618/1050] net/mlx5e: Build RX SKB on demand For non-striding RQ configuration before this patch we had a ring with pre-allocated SKBs and mapped the SKB->data buffers for device. For robustness and better RX data buffers management, we allocate a page per packet and build_skb around it. This patch (which is a prerequisite for XDP) will actually reduce performance for normal stack usage, because we are now hitting a bottleneck in the page allocator. We use the page-cache to restore or even improve performance in comparison to the old RX scheme. Packet rate performance testing was done with pktgen 64B packets on xmit side and TC ingress dropping action on RX side. CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz Comparison is done between: 1.Baseline, before 'net/mlx5e: Build RX SKB on demand' 2.Build SKB with RX page cache (This patch) RX Cores Baseline Build SKB+page-cache Improvement ----------------------------------------------------------- 1 4.16Mpps 5.33Mpps 28% 2 7.16Mpps 10.24Mpps 43% 4 13.61Mpps 20.51Mpps 51% 8 25.32Mpps 32.00Mpps 26% All respective cores were 100% utilized. Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 10 +- .../net/ethernet/mellanox/mlx5/core/en_main.c | 31 ++- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 221 +++++++++--------- 3 files changed, 136 insertions(+), 126 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 7dd4763e726e..4d06c1b9348b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -65,6 +65,8 @@ #define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW 0x3 #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW 0x6 +#define MLX5_RX_HEADROOM NET_SKB_PAD + #define MLX5_MPWRQ_LOG_STRIDE_SIZE 6 /* >= 6, HW restriction */ #define MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS 8 /* >= 6, HW restriction */ #define MLX5_MPWRQ_LOG_WQE_SZ 18 @@ -302,10 +304,14 @@ struct mlx5e_page_cache { struct mlx5e_rq { /* data path */ struct mlx5_wq_ll wq; - u32 wqe_sz; - struct sk_buff **skb; + + struct mlx5e_dma_info *dma_info; struct mlx5e_mpw_info *wqe_info; void *mtt_no_align; + struct { + u8 page_order; + u32 wqe_sz; /* wqe data buffer size */ + } buff; __be32 mkey_be; struct device *pdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 8595b507e200..d09588b54c38 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -408,6 +408,8 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, void *rqc = param->rqc; void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); u32 byte_count; + u32 frag_sz; + int npages; int wq_sz; int err; int i; @@ -442,29 +444,40 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz); rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides); - rq->wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides; - byte_count = rq->wqe_sz; + + rq->buff.wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides; + byte_count = rq->buff.wqe_sz; rq->mkey_be = cpu_to_be32(c->priv->umr_mkey.key); err = mlx5e_rq_alloc_mpwqe_info(rq, c); if (err) goto err_rq_wq_destroy; break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ - rq->skb = kzalloc_node(wq_sz * sizeof(*rq->skb), GFP_KERNEL, - cpu_to_node(c->cpu)); - if (!rq->skb) { + rq->dma_info = kzalloc_node(wq_sz * sizeof(*rq->dma_info), + GFP_KERNEL, cpu_to_node(c->cpu)); + if (!rq->dma_info) { err = -ENOMEM; goto err_rq_wq_destroy; } + rq->handle_rx_cqe = mlx5e_handle_rx_cqe; rq->alloc_wqe = mlx5e_alloc_rx_wqe; rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; - rq->wqe_sz = (priv->params.lro_en) ? + rq->buff.wqe_sz = (priv->params.lro_en) ? priv->params.lro_wqe_sz : MLX5E_SW2HW_MTU(priv->netdev->mtu); - rq->wqe_sz = SKB_DATA_ALIGN(rq->wqe_sz); - byte_count = rq->wqe_sz; + byte_count = rq->buff.wqe_sz; + + /* calc the required page order */ + frag_sz = MLX5_RX_HEADROOM + + byte_count /* packet data */ + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + frag_sz = SKB_DATA_ALIGN(frag_sz); + + npages = DIV_ROUND_UP(frag_sz, PAGE_SIZE); + rq->buff.page_order = order_base_2(npages); + byte_count |= MLX5_HW_START_PADDING; rq->mkey_be = c->mkey_be; } @@ -499,7 +512,7 @@ static void mlx5e_destroy_rq(struct mlx5e_rq *rq) mlx5e_rq_free_mpwqe_info(rq); break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ - kfree(rq->skb); + kfree(rq->dma_info); } for (i = rq->page_cache.head; i != rq->page_cache.tail; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index dc8677933f76..d017829b77eb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -179,50 +179,99 @@ unlock: mutex_unlock(&priv->state_lock); } -int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) -{ - struct sk_buff *skb; - dma_addr_t dma_addr; +#define RQ_PAGE_SIZE(rq) ((1 << rq->buff.page_order) << PAGE_SHIFT) - skb = napi_alloc_skb(rq->cq.napi, rq->wqe_sz); - if (unlikely(!skb)) +static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + struct mlx5e_page_cache *cache = &rq->page_cache; + u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1); + + if (tail_next == cache->head) { + rq->stats.cache_full++; + return false; + } + + cache->page_cache[cache->tail] = *dma_info; + cache->tail = tail_next; + return true; +} + +static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + struct mlx5e_page_cache *cache = &rq->page_cache; + + if (unlikely(cache->head == cache->tail)) { + rq->stats.cache_empty++; + return false; + } + + if (page_ref_count(cache->page_cache[cache->head].page) != 1) { + rq->stats.cache_busy++; + return false; + } + + *dma_info = cache->page_cache[cache->head]; + cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1); + rq->stats.cache_reuse++; + + dma_sync_single_for_device(rq->pdev, dma_info->addr, + RQ_PAGE_SIZE(rq), + DMA_FROM_DEVICE); + return true; +} + +static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + struct page *page; + + if (mlx5e_rx_cache_get(rq, dma_info)) + return 0; + + page = dev_alloc_pages(rq->buff.page_order); + if (unlikely(!page)) return -ENOMEM; - dma_addr = dma_map_single(rq->pdev, - /* hw start padding */ - skb->data, - /* hw end padding */ - rq->wqe_sz, - DMA_FROM_DEVICE); - - if (unlikely(dma_mapping_error(rq->pdev, dma_addr))) - goto err_free_skb; - - *((dma_addr_t *)skb->cb) = dma_addr; - wqe->data.addr = cpu_to_be64(dma_addr); - - rq->skb[ix] = skb; + dma_info->page = page; + dma_info->addr = dma_map_page(rq->pdev, page, 0, + RQ_PAGE_SIZE(rq), DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) { + put_page(page); + return -ENOMEM; + } return 0; +} -err_free_skb: - dev_kfree_skb(skb); +void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, + bool recycle) +{ + if (likely(recycle) && mlx5e_rx_cache_put(rq, dma_info)) + return; - return -ENOMEM; + dma_unmap_page(rq->pdev, dma_info->addr, RQ_PAGE_SIZE(rq), + DMA_FROM_DEVICE); + put_page(dma_info->page); +} + +int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) +{ + struct mlx5e_dma_info *di = &rq->dma_info[ix]; + + if (unlikely(mlx5e_page_alloc_mapped(rq, di))) + return -ENOMEM; + + wqe->data.addr = cpu_to_be64(di->addr + MLX5_RX_HEADROOM); + return 0; } void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix) { - struct sk_buff *skb = rq->skb[ix]; + struct mlx5e_dma_info *di = &rq->dma_info[ix]; - if (skb) { - rq->skb[ix] = NULL; - dma_unmap_single(rq->pdev, - *((dma_addr_t *)skb->cb), - rq->wqe_sz, - DMA_FROM_DEVICE); - dev_kfree_skb(skb); - } + mlx5e_page_release(rq, di, true); } static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq) @@ -305,79 +354,6 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); } -static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, - struct mlx5e_dma_info *dma_info) -{ - struct mlx5e_page_cache *cache = &rq->page_cache; - u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1); - - if (tail_next == cache->head) { - rq->stats.cache_full++; - return false; - } - - cache->page_cache[cache->tail] = *dma_info; - cache->tail = tail_next; - return true; -} - -static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, - struct mlx5e_dma_info *dma_info) -{ - struct mlx5e_page_cache *cache = &rq->page_cache; - - if (unlikely(cache->head == cache->tail)) { - rq->stats.cache_empty++; - return false; - } - - if (page_ref_count(cache->page_cache[cache->head].page) != 1) { - rq->stats.cache_busy++; - return false; - } - - *dma_info = cache->page_cache[cache->head]; - cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1); - rq->stats.cache_reuse++; - - dma_sync_single_for_device(rq->pdev, dma_info->addr, PAGE_SIZE, - DMA_FROM_DEVICE); - return true; -} - -static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, - struct mlx5e_dma_info *dma_info) -{ - struct page *page; - - if (mlx5e_rx_cache_get(rq, dma_info)) - return 0; - - page = dev_alloc_page(); - if (unlikely(!page)) - return -ENOMEM; - - dma_info->page = page; - dma_info->addr = dma_map_page(rq->pdev, page, 0, PAGE_SIZE, - DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) { - put_page(page); - return -ENOMEM; - } - - return 0; -} - -void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, - bool recycle) -{ - if (likely(recycle) && mlx5e_rx_cache_put(rq, dma_info)) - return; - - dma_unmap_page(rq->pdev, dma_info->addr, PAGE_SIZE, DMA_FROM_DEVICE); - put_page(dma_info->page); -} - static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) @@ -448,7 +424,7 @@ void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq) mlx5_wq_ll_update_db_record(wq); } -int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) +int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) { int err; @@ -658,31 +634,46 @@ static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { + struct mlx5e_dma_info *di; struct mlx5e_rx_wqe *wqe; - struct sk_buff *skb; __be16 wqe_counter_be; + struct sk_buff *skb; u16 wqe_counter; u32 cqe_bcnt; + void *va; wqe_counter_be = cqe->wqe_counter; wqe_counter = be16_to_cpu(wqe_counter_be); wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); - skb = rq->skb[wqe_counter]; - prefetch(skb->data); - rq->skb[wqe_counter] = NULL; + di = &rq->dma_info[wqe_counter]; + va = page_address(di->page); - dma_unmap_single(rq->pdev, - *((dma_addr_t *)skb->cb), - rq->wqe_sz, - DMA_FROM_DEVICE); + dma_sync_single_range_for_cpu(rq->pdev, + di->addr, + MLX5_RX_HEADROOM, + rq->buff.wqe_sz, + DMA_FROM_DEVICE); + prefetch(va + MLX5_RX_HEADROOM); if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { rq->stats.wqe_err++; - dev_kfree_skb(skb); + mlx5e_page_release(rq, di, true); goto wq_ll_pop; } + skb = build_skb(va, RQ_PAGE_SIZE(rq)); + if (unlikely(!skb)) { + rq->stats.buff_alloc_err++; + mlx5e_page_release(rq, di, true); + goto wq_ll_pop; + } + + /* queue up for recycling ..*/ + page_ref_inc(di->page); + mlx5e_page_release(rq, di, true); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + skb_reserve(skb, MLX5_RX_HEADROOM); skb_put(skb, cqe_bcnt); mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); From 21c59685dd176dd6b2c4fc5e18dc65730cfd546a Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 21 Sep 2016 12:19:43 +0300 Subject: [PATCH 0619/1050] net/mlx5e: Union RQ RX info per RQ type We have two types of RX RQs, and they use two separate sets of info arrays and structures in RX data path function. Today those structures are mutually exclusive per RQ type, hence one kind is allocated on RQ creation according to the RQ type. For better cache locality and to minimalize the sizeof(struct mlx5e_rq), in this patch we define them as a union. Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 14 +++++--- .../net/ethernet/mellanox/mlx5/core/en_main.c | 32 +++++++++---------- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 10 +++--- 3 files changed, 30 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 4d06c1b9348b..e3331237ce0e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -305,9 +305,14 @@ struct mlx5e_rq { /* data path */ struct mlx5_wq_ll wq; - struct mlx5e_dma_info *dma_info; - struct mlx5e_mpw_info *wqe_info; - void *mtt_no_align; + union { + struct mlx5e_dma_info *dma_info; + struct { + struct mlx5e_mpw_info *info; + void *mtt_no_align; + u32 mtt_offset; + } mpwqe; + }; struct { u8 page_order; u32 wqe_sz; /* wqe data buffer size */ @@ -327,7 +332,6 @@ struct mlx5e_rq { unsigned long state; int ix; - u32 mpwqe_mtt_offset; struct mlx5e_rx_am am; /* Adaptive Moderation */ @@ -770,7 +774,7 @@ static inline void mlx5e_cq_arm(struct mlx5e_cq *cq) static inline u32 mlx5e_get_wqe_mtt_offset(struct mlx5e_rq *rq, u16 wqe_ix) { - return rq->mpwqe_mtt_offset + + return rq->mpwqe.mtt_offset + wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index d09588b54c38..f2efa532f453 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -314,7 +314,7 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, struct mlx5e_sq *sq, struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl; struct mlx5_wqe_data_seg *dseg = &wqe->data; - struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; u8 ds_cnt = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS); u32 umr_wqe_mtt_offset = mlx5e_get_wqe_mtt_offset(rq, ix); @@ -342,21 +342,21 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1; int i; - rq->wqe_info = kzalloc_node(wq_sz * sizeof(*rq->wqe_info), - GFP_KERNEL, cpu_to_node(c->cpu)); - if (!rq->wqe_info) + rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info), + GFP_KERNEL, cpu_to_node(c->cpu)); + if (!rq->mpwqe.info) goto err_out; /* We allocate more than mtt_sz as we will align the pointer */ - rq->mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL, + rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL, cpu_to_node(c->cpu)); - if (unlikely(!rq->mtt_no_align)) + if (unlikely(!rq->mpwqe.mtt_no_align)) goto err_free_wqe_info; for (i = 0; i < wq_sz; i++) { - struct mlx5e_mpw_info *wi = &rq->wqe_info[i]; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i]; - wi->umr.mtt = PTR_ALIGN(rq->mtt_no_align + i * mtt_alloc, + wi->umr.mtt = PTR_ALIGN(rq->mpwqe.mtt_no_align + i * mtt_alloc, MLX5_UMR_ALIGN); wi->umr.mtt_addr = dma_map_single(c->pdev, wi->umr.mtt, mtt_sz, PCI_DMA_TODEVICE); @@ -370,14 +370,14 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, err_unmap_mtts: while (--i >= 0) { - struct mlx5e_mpw_info *wi = &rq->wqe_info[i]; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i]; dma_unmap_single(c->pdev, wi->umr.mtt_addr, mtt_sz, PCI_DMA_TODEVICE); } - kfree(rq->mtt_no_align); + kfree(rq->mpwqe.mtt_no_align); err_free_wqe_info: - kfree(rq->wqe_info); + kfree(rq->mpwqe.info); err_out: return -ENOMEM; @@ -390,13 +390,13 @@ static void mlx5e_rq_free_mpwqe_info(struct mlx5e_rq *rq) int i; for (i = 0; i < wq_sz; i++) { - struct mlx5e_mpw_info *wi = &rq->wqe_info[i]; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i]; dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz, PCI_DMA_TODEVICE); } - kfree(rq->mtt_no_align); - kfree(rq->wqe_info); + kfree(rq->mpwqe.mtt_no_align); + kfree(rq->mpwqe.info); } static int mlx5e_create_rq(struct mlx5e_channel *c, @@ -439,7 +439,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->alloc_wqe = mlx5e_alloc_rx_mpwqe; rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; - rq->mpwqe_mtt_offset = c->ix * + rq->mpwqe.mtt_offset = c->ix * MLX5E_REQUIRED_MTTS(1, BIT(priv->params.log_rq_size)); rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz); @@ -654,7 +654,7 @@ static void mlx5e_free_rx_descs(struct mlx5e_rq *rq) /* UMR WQE (if in progress) is always at wq->head */ if (test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state)) - mlx5e_free_rx_mpwqe(rq, &rq->wqe_info[wq->head]); + mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]); while (!mlx5_wq_ll_is_empty(wq)) { wqe_ix_be = *wq->tail_next; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index d017829b77eb..a403a797ebef 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -328,7 +328,7 @@ mlx5e_copy_skb_header_mpwqe(struct device *pdev, static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) { - struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; struct mlx5e_sq *sq = &rq->channel->icosq; struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5e_umr_wqe *wqe; @@ -358,7 +358,7 @@ static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) { - struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; u64 dma_offset = (u64)mlx5e_get_wqe_mtt_offset(rq, ix) << PAGE_SHIFT; int pg_strides = mlx5e_mpwqe_strides_per_page(rq); int err; @@ -412,7 +412,7 @@ void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq) clear_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state))) { - mlx5e_free_rx_mpwqe(rq, &rq->wqe_info[wq->head]); + mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]); return; } @@ -438,7 +438,7 @@ int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) { - struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; mlx5e_free_rx_mpwqe(rq, wi); } @@ -725,7 +725,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); u16 wqe_id = be16_to_cpu(cqe->wqe_id); - struct mlx5e_mpw_info *wi = &rq->wqe_info[wqe_id]; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[wqe_id]; struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_id); struct sk_buff *skb; u16 cqe_bcnt; From e4b85508072b32682ba84df32cac5cf6a4f6178e Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 21 Sep 2016 12:19:44 +0300 Subject: [PATCH 0620/1050] net/mlx5e: Slightly reduce hardware LRO size Before this patch LRO size was 64K, now with build_skb requires extra room, headroom + sizeof(skb_shared_info) added to the data buffer will make wqe size or page_frag_size slightly larger than 64K which will demand order 5 page instead of order 4 in 4K page systems. We take those extra bytes from hardware LRO data size in order to not increase the required page order for when hardware LRO is enabled. Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index f2efa532f453..8734240865e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3187,8 +3187,11 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, mlx5e_build_default_indir_rqt(mdev, priv->params.indirection_rqt, MLX5E_INDIR_RQT_SIZE, profile->max_nch(mdev)); - priv->params.lro_wqe_sz = - MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; + priv->params.lro_wqe_sz = + MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - + /* Extra room needed for build_skb */ + MLX5_RX_HEADROOM - + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); /* Initialize pflags */ MLX5E_SET_PRIV_FLAG(priv, MLX5E_PFLAG_RX_CQE_BASED_MODER, From 2fc4bfb7250d79ee4e58c1d5bca257687e9f5e53 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 21 Sep 2016 12:19:45 +0300 Subject: [PATCH 0621/1050] net/mlx5e: Dynamic RQ type infrastructure Add two helper functions to allow dynamic changes of RQ type. mlx5e_set_rq_priv_params and mlx5e_set_rq_type_params will be used on netdev creation to determine the default RQ type. This will be needed later for downstream patches of XDP support. When enabling XDP we will dynamically move from striding RQ to linked list RQ type. Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 92 ++++++++++--------- 1 file changed, 50 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 8734240865e1..ff520ad27f5e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -69,6 +69,47 @@ struct mlx5e_channel_param { struct mlx5e_cq_param icosq_cq; }; +static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) +{ + return MLX5_CAP_GEN(mdev, striding_rq) && + MLX5_CAP_GEN(mdev, umr_ptr_rlky) && + MLX5_CAP_ETH(mdev, reg_umr_sq); +} + +static void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type) +{ + priv->params.rq_wq_type = rq_type; + switch (priv->params.rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; + priv->params.mpwqe_log_stride_sz = priv->params.rx_cqe_compress ? + MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS : + MLX5_MPWRQ_LOG_STRIDE_SIZE; + priv->params.mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - + priv->params.mpwqe_log_stride_sz; + break; + default: /* MLX5_WQ_TYPE_LINKED_LIST */ + priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; + } + priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, + BIT(priv->params.log_rq_size)); + + mlx5_core_info(priv->mdev, + "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n", + priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, + BIT(priv->params.log_rq_size), + BIT(priv->params.mpwqe_log_stride_sz), + priv->params.rx_cqe_compress_admin); +} + +static void mlx5e_set_rq_priv_params(struct mlx5e_priv *priv) +{ + u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(priv->mdev) ? + MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : + MLX5_WQ_TYPE_LINKED_LIST; + mlx5e_set_rq_type_params(priv, rq_type); +} + static void mlx5e_update_carrier(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; @@ -3038,13 +3079,6 @@ void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev, indirection_rqt[i] = i % num_channels; } -static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) -{ - return MLX5_CAP_GEN(mdev, striding_rq) && - MLX5_CAP_GEN(mdev, umr_ptr_rlky) && - MLX5_CAP_ETH(mdev, reg_umr_sq); -} - static int mlx5e_get_pci_bw(struct mlx5_core_dev *mdev, u32 *pci_bw) { enum pcie_link_width width; @@ -3124,11 +3158,13 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, MLX5_CQ_PERIOD_MODE_START_FROM_CQE : MLX5_CQ_PERIOD_MODE_START_FROM_EQE; - priv->params.log_sq_size = - MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; - priv->params.rq_wq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) ? - MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : - MLX5_WQ_TYPE_LINKED_LIST; + priv->mdev = mdev; + priv->netdev = netdev; + priv->params.num_channels = profile->max_nch(mdev); + priv->profile = profile; + priv->ppriv = ppriv; + + priv->params.log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; /* set CQE compression */ priv->params.rx_cqe_compress_admin = false; @@ -3141,33 +3177,11 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, priv->params.rx_cqe_compress_admin = cqe_compress_heuristic(link_speed, pci_bw); } - priv->params.rx_cqe_compress = priv->params.rx_cqe_compress_admin; - switch (priv->params.rq_wq_type) { - case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; - priv->params.mpwqe_log_stride_sz = - priv->params.rx_cqe_compress ? - MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS : - MLX5_MPWRQ_LOG_STRIDE_SIZE; - priv->params.mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - - priv->params.mpwqe_log_stride_sz; + mlx5e_set_rq_priv_params(priv); + if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) priv->params.lro_en = true; - break; - default: /* MLX5_WQ_TYPE_LINKED_LIST */ - priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; - } - - mlx5_core_info(mdev, - "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n", - priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, - BIT(priv->params.log_rq_size), - BIT(priv->params.mpwqe_log_stride_sz), - priv->params.rx_cqe_compress_admin); - - priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, - BIT(priv->params.log_rq_size)); priv->params.rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation); mlx5e_set_rx_cq_mode_params(&priv->params, cq_period_mode); @@ -3197,12 +3211,6 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, MLX5E_SET_PRIV_FLAG(priv, MLX5E_PFLAG_RX_CQE_BASED_MODER, priv->params.rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE); - priv->mdev = mdev; - priv->netdev = netdev; - priv->params.num_channels = profile->max_nch(mdev); - priv->profile = profile; - priv->ppriv = ppriv; - #ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_ets_init(priv); #endif From 86994156c736978d113e7927455d4eeeb2128b9f Mon Sep 17 00:00:00 2001 From: Rana Shahout Date: Wed, 21 Sep 2016 12:19:46 +0300 Subject: [PATCH 0622/1050] net/mlx5e: XDP fast RX drop bpf programs support Add support for the BPF_PROG_TYPE_PHYS_DEV hook in mlx5e driver. When XDP is on we make sure to change channels RQs type to MLX5_WQ_TYPE_LINKED_LIST rather than "striding RQ" type to ensure "page per packet". On XDP set, we fail if HW LRO is set and request from user to turn it off. Since on ConnectX4-LX HW LRO is always on by default, this will be annoying, but we prefer not to enforce LRO off from XDP set function. Full channels reset (close/open) is required only when setting XDP on/off. When XDP set is called just to exchange programs, we will update each RQ xdp program on the fly and for synchronization with current data path RX activity of that RQ, we temporally disable that RQ and ensure RX path is not running, quickly update and re-enable that RQ, for that we do: - rq.state = disabled - napi_synnchronize - xchg(rq->xdp_prg) - rq.state = enabled - napi_schedule // Just in case we've missed an IRQ Packet rate performance testing was done with pktgen 64B packets and on TX side and, TC drop action on RX side compared to XDP fast drop. CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz Comparison is done between: 1. Baseline, Before this patch with TC drop action 2. This patch with TC drop action 3. This patch with XDP RX fast drop RX Cores Baseline(TC drop) TC drop XDP fast Drop -------------------------------------------------------------- 1 5.3Mpps 5.3Mpps 16.5Mpps 2 10.2Mpps 10.2Mpps 31.3Mpps 4 20.5Mpps 19.9Mpps 36.3Mpps* *My xmitter was limited to 36.3Mpps, so it is the bottleneck. It seems that receive side can handle more. Signed-off-by: Rana Shahout Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 + .../net/ethernet/mellanox/mlx5/core/en_main.c | 100 +++++++++++++++++- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 26 ++++- .../ethernet/mellanox/mlx5/core/en_stats.h | 4 + 4 files changed, 130 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index e3331237ce0e..5e8e669f69c0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -334,6 +334,7 @@ struct mlx5e_rq { int ix; struct mlx5e_rx_am am; /* Adaptive Moderation */ + struct bpf_prog *xdp_prog; /* control */ struct mlx5_wq_ctrl wq_ctrl; @@ -627,6 +628,7 @@ struct mlx5e_priv { /* priv data path fields - start */ struct mlx5e_sq **txq_to_sq_map; int channeltc_to_txq_map[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC]; + struct bpf_prog *xdp_prog; /* priv data path fields - end */ unsigned long state; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index ff520ad27f5e..6e95a16226f8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "en.h" #include "en_tc.h" #include "eswitch.h" @@ -104,7 +105,8 @@ static void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type) static void mlx5e_set_rq_priv_params(struct mlx5e_priv *priv) { - u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(priv->mdev) ? + u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(priv->mdev) && + !priv->xdp_prog ? MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : MLX5_WQ_TYPE_LINKED_LIST; mlx5e_set_rq_type_params(priv, rq_type); @@ -177,6 +179,7 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->rx_csum_none += rq_stats->csum_none; s->rx_csum_complete += rq_stats->csum_complete; s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner; + s->rx_xdp_drop += rq_stats->xdp_drop; s->rx_wqe_err += rq_stats->wqe_err; s->rx_mpwqe_filler += rq_stats->mpwqe_filler; s->rx_buff_alloc_err += rq_stats->buff_alloc_err; @@ -473,6 +476,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->channel = c; rq->ix = c->ix; rq->priv = c->priv; + rq->xdp_prog = priv->xdp_prog; switch (priv->params.rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: @@ -536,6 +540,9 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->page_cache.head = 0; rq->page_cache.tail = 0; + if (rq->xdp_prog) + bpf_prog_add(rq->xdp_prog, 1); + return 0; err_rq_wq_destroy: @@ -548,6 +555,9 @@ static void mlx5e_destroy_rq(struct mlx5e_rq *rq) { int i; + if (rq->xdp_prog) + bpf_prog_put(rq->xdp_prog); + switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: mlx5e_rq_free_mpwqe_info(rq); @@ -2955,6 +2965,92 @@ static void mlx5e_tx_timeout(struct net_device *dev) schedule_work(&priv->tx_timeout_work); } +static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct bpf_prog *old_prog; + int err = 0; + bool reset, was_opened; + int i; + + mutex_lock(&priv->state_lock); + + if ((netdev->features & NETIF_F_LRO) && prog) { + netdev_warn(netdev, "can't set XDP while LRO is on, disable LRO first\n"); + err = -EINVAL; + goto unlock; + } + + was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + /* no need for full reset when exchanging programs */ + reset = (!priv->xdp_prog || !prog); + + if (was_opened && reset) + mlx5e_close_locked(netdev); + + /* exchange programs */ + old_prog = xchg(&priv->xdp_prog, prog); + if (prog) + bpf_prog_add(prog, 1); + if (old_prog) + bpf_prog_put(old_prog); + + if (reset) /* change RQ type according to priv->xdp_prog */ + mlx5e_set_rq_priv_params(priv); + + if (was_opened && reset) + mlx5e_open_locked(netdev); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset) + goto unlock; + + /* exchanging programs w/o reset, we update ref counts on behalf + * of the channels RQs here. + */ + bpf_prog_add(prog, priv->params.num_channels); + for (i = 0; i < priv->params.num_channels; i++) { + struct mlx5e_channel *c = priv->channel[i]; + + set_bit(MLX5E_RQ_STATE_FLUSH, &c->rq.state); + napi_synchronize(&c->napi); + /* prevent mlx5e_poll_rx_cq from accessing rq->xdp_prog */ + + old_prog = xchg(&c->rq.xdp_prog, prog); + + clear_bit(MLX5E_RQ_STATE_FLUSH, &c->rq.state); + /* napi_schedule in case we have missed anything */ + set_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags); + napi_schedule(&c->napi); + + if (old_prog) + bpf_prog_put(old_prog); + } + +unlock: + mutex_unlock(&priv->state_lock); + return err; +} + +static bool mlx5e_xdp_attached(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return !!priv->xdp_prog; +} + +static int mlx5e_xdp(struct net_device *dev, struct netdev_xdp *xdp) +{ + switch (xdp->command) { + case XDP_SETUP_PROG: + return mlx5e_xdp_set(dev, xdp->prog); + case XDP_QUERY_PROG: + xdp->prog_attached = mlx5e_xdp_attached(dev); + return 0; + default: + return -EINVAL; + } +} + static const struct net_device_ops mlx5e_netdev_ops_basic = { .ndo_open = mlx5e_open, .ndo_stop = mlx5e_close, @@ -2974,6 +3070,7 @@ static const struct net_device_ops mlx5e_netdev_ops_basic = { .ndo_rx_flow_steer = mlx5e_rx_flow_steer, #endif .ndo_tx_timeout = mlx5e_tx_timeout, + .ndo_xdp = mlx5e_xdp, }; static const struct net_device_ops mlx5e_netdev_ops_sriov = { @@ -3005,6 +3102,7 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = { .ndo_set_vf_link_state = mlx5e_set_vf_link_state, .ndo_get_vf_stats = mlx5e_get_vf_stats, .ndo_tx_timeout = mlx5e_tx_timeout, + .ndo_xdp = mlx5e_xdp, }; static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index a403a797ebef..96f6317a95ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -632,8 +632,20 @@ static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, napi_gro_receive(rq->cq.napi, skb); } +static inline enum xdp_action mlx5e_xdp_handle(struct mlx5e_rq *rq, + const struct bpf_prog *prog, + void *data, u32 len) +{ + struct xdp_buff xdp; + + xdp.data = data; + xdp.data_end = xdp.data + len; + return bpf_prog_run_xdp(prog, &xdp); +} + void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { + struct bpf_prog *xdp_prog = READ_ONCE(rq->xdp_prog); struct mlx5e_dma_info *di; struct mlx5e_rx_wqe *wqe; __be16 wqe_counter_be; @@ -654,6 +666,7 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) rq->buff.wqe_sz, DMA_FROM_DEVICE); prefetch(va + MLX5_RX_HEADROOM); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { rq->stats.wqe_err++; @@ -661,6 +674,18 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) goto wq_ll_pop; } + if (xdp_prog) { + enum xdp_action act = + mlx5e_xdp_handle(rq, xdp_prog, va + MLX5_RX_HEADROOM, + cqe_bcnt); + + if (act != XDP_PASS) { + rq->stats.xdp_drop++; + mlx5e_page_release(rq, di, true); + goto wq_ll_pop; + } + } + skb = build_skb(va, RQ_PAGE_SIZE(rq)); if (unlikely(!skb)) { rq->stats.buff_alloc_err++; @@ -672,7 +697,6 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) page_ref_inc(di->page); mlx5e_page_release(rq, di, true); - cqe_bcnt = be32_to_cpu(cqe->byte_cnt); skb_reserve(skb, MLX5_RX_HEADROOM); skb_put(skb, cqe_bcnt); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 6af8d79e8c2a..084d6c893a6e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -65,6 +65,7 @@ struct mlx5e_sw_stats { u64 rx_csum_none; u64 rx_csum_complete; u64 rx_csum_unnecessary_inner; + u64 rx_xdp_drop; u64 tx_csum_partial; u64 tx_csum_partial_inner; u64 tx_queue_stopped; @@ -100,6 +101,7 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial_inner) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) }, @@ -278,6 +280,7 @@ struct mlx5e_rq_stats { u64 csum_none; u64 lro_packets; u64 lro_bytes; + u64 xdp_drop; u64 wqe_err; u64 mpwqe_filler; u64 buff_alloc_err; @@ -295,6 +298,7 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_drop) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) }, From f10b7cc7707f7d598e3ddacd848080b18ba4cbff Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 21 Sep 2016 12:19:47 +0300 Subject: [PATCH 0623/1050] net/mlx5e: Have a clear separation between different SQ types Make a clear separate between Regular SQ (TXQ) and ICO SQ creation, destruction and union their mutual information structures. Don't allocate redundant TXQ skb/wqe_info/dma_fifo arrays for ICO SQ. And have a different SQ edge for ICO SQ than TXQ SQ, to be more accurate. In preparation for XDP TX support. Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 23 +++- .../net/ethernet/mellanox/mlx5/core/en_main.c | 121 ++++++++++++------ .../net/ethernet/mellanox/mlx5/core/en_rx.c | 8 +- .../net/ethernet/mellanox/mlx5/core/en_tx.c | 30 +++-- .../net/ethernet/mellanox/mlx5/core/en_txrx.c | 2 +- 5 files changed, 120 insertions(+), 64 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 5e8e669f69c0..5917f5e609ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -101,6 +101,9 @@ #define MLX5E_UPDATE_STATS_INTERVAL 200 /* msecs */ #define MLX5E_SQ_BF_BUDGET 16 +#define MLX5E_ICOSQ_MAX_WQEBBS \ + (DIV_ROUND_UP(sizeof(struct mlx5e_umr_wqe), MLX5_SEND_WQE_BB)) + #define MLX5E_NUM_MAIN_GROUPS 9 static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size) @@ -386,6 +389,11 @@ struct mlx5e_ico_wqe_info { u8 num_wqebbs; }; +enum mlx5e_sq_type { + MLX5E_SQ_TXQ, + MLX5E_SQ_ICO +}; + struct mlx5e_sq { /* data path */ @@ -403,10 +411,15 @@ struct mlx5e_sq { struct mlx5e_cq cq; - /* pointers to per packet info: write@xmit, read@completion */ - struct sk_buff **skb; - struct mlx5e_sq_dma *dma_fifo; - struct mlx5e_tx_wqe_info *wqe_info; + /* pointers to per tx element info: write@xmit, read@completion */ + union { + struct { + struct sk_buff **skb; + struct mlx5e_sq_dma *dma_fifo; + struct mlx5e_tx_wqe_info *wqe_info; + } txq; + struct mlx5e_ico_wqe_info *ico_wqe; + } db; /* read only */ struct mlx5_wq_cyc wq; @@ -428,8 +441,8 @@ struct mlx5e_sq { struct mlx5_uar uar; struct mlx5e_channel *channel; int tc; - struct mlx5e_ico_wqe_info *ico_wqe_info; u32 rate_limit; + u8 type; } ____cacheline_aligned_in_smp; static inline bool mlx5e_sq_has_room_for(struct mlx5e_sq *sq, u16 n) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 6e95a16226f8..632de09e69cf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -51,7 +51,7 @@ struct mlx5e_sq_param { struct mlx5_wq_param wq; u16 max_inline; u8 min_inline_mode; - bool icosq; + enum mlx5e_sq_type type; }; struct mlx5e_cq_param { @@ -740,8 +740,8 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, if (param->am_enabled) set_bit(MLX5E_RQ_STATE_AM, &c->rq.state); - sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_NOP; - sq->ico_wqe_info[pi].num_wqebbs = 1; + sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP; + sq->db.ico_wqe[pi].num_wqebbs = 1; mlx5e_send_nop(sq, true); /* trigger mlx5e_post_rx_wqes() */ return 0; @@ -765,26 +765,43 @@ static void mlx5e_close_rq(struct mlx5e_rq *rq) mlx5e_destroy_rq(rq); } -static void mlx5e_free_sq_db(struct mlx5e_sq *sq) +static void mlx5e_free_sq_ico_db(struct mlx5e_sq *sq) { - kfree(sq->wqe_info); - kfree(sq->dma_fifo); - kfree(sq->skb); + kfree(sq->db.ico_wqe); } -static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa) +static int mlx5e_alloc_sq_ico_db(struct mlx5e_sq *sq, int numa) +{ + u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + + sq->db.ico_wqe = kzalloc_node(sizeof(*sq->db.ico_wqe) * wq_sz, + GFP_KERNEL, numa); + if (!sq->db.ico_wqe) + return -ENOMEM; + + return 0; +} + +static void mlx5e_free_sq_txq_db(struct mlx5e_sq *sq) +{ + kfree(sq->db.txq.wqe_info); + kfree(sq->db.txq.dma_fifo); + kfree(sq->db.txq.skb); +} + +static int mlx5e_alloc_sq_txq_db(struct mlx5e_sq *sq, int numa) { int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS; - sq->skb = kzalloc_node(wq_sz * sizeof(*sq->skb), GFP_KERNEL, numa); - sq->dma_fifo = kzalloc_node(df_sz * sizeof(*sq->dma_fifo), GFP_KERNEL, - numa); - sq->wqe_info = kzalloc_node(wq_sz * sizeof(*sq->wqe_info), GFP_KERNEL, - numa); - - if (!sq->skb || !sq->dma_fifo || !sq->wqe_info) { - mlx5e_free_sq_db(sq); + sq->db.txq.skb = kzalloc_node(wq_sz * sizeof(*sq->db.txq.skb), + GFP_KERNEL, numa); + sq->db.txq.dma_fifo = kzalloc_node(df_sz * sizeof(*sq->db.txq.dma_fifo), + GFP_KERNEL, numa); + sq->db.txq.wqe_info = kzalloc_node(wq_sz * sizeof(*sq->db.txq.wqe_info), + GFP_KERNEL, numa); + if (!sq->db.txq.skb || !sq->db.txq.dma_fifo || !sq->db.txq.wqe_info) { + mlx5e_free_sq_txq_db(sq); return -ENOMEM; } @@ -793,6 +810,30 @@ static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa) return 0; } +static void mlx5e_free_sq_db(struct mlx5e_sq *sq) +{ + switch (sq->type) { + case MLX5E_SQ_TXQ: + mlx5e_free_sq_txq_db(sq); + break; + case MLX5E_SQ_ICO: + mlx5e_free_sq_ico_db(sq); + break; + } +} + +static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa) +{ + switch (sq->type) { + case MLX5E_SQ_TXQ: + return mlx5e_alloc_sq_txq_db(sq, numa); + case MLX5E_SQ_ICO: + return mlx5e_alloc_sq_ico_db(sq, numa); + } + + return 0; +} + static int mlx5e_create_sq(struct mlx5e_channel *c, int tc, struct mlx5e_sq_param *param, @@ -803,8 +844,16 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, void *sqc = param->sqc; void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); + u16 sq_max_wqebbs; int err; + sq->type = param->type; + sq->pdev = c->pdev; + sq->tstamp = &priv->tstamp; + sq->mkey_be = c->mkey_be; + sq->channel = c; + sq->tc = tc; + err = mlx5_alloc_map_uar(mdev, &sq->uar, !!MLX5_CAP_GEN(mdev, bf)); if (err) return err; @@ -833,18 +882,8 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, if (err) goto err_sq_wq_destroy; - if (param->icosq) { - u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq); - - sq->ico_wqe_info = kzalloc_node(sizeof(*sq->ico_wqe_info) * - wq_sz, - GFP_KERNEL, - cpu_to_node(c->cpu)); - if (!sq->ico_wqe_info) { - err = -ENOMEM; - goto err_free_sq_db; - } - } else { + sq_max_wqebbs = MLX5_SEND_WQE_MAX_WQEBBS; + if (sq->type == MLX5E_SQ_TXQ) { int txq_ix; txq_ix = c->ix + tc * priv->params.num_channels; @@ -852,19 +891,14 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, priv->txq_to_sq_map[txq_ix] = sq; } - sq->pdev = c->pdev; - sq->tstamp = &priv->tstamp; - sq->mkey_be = c->mkey_be; - sq->channel = c; - sq->tc = tc; - sq->edge = (sq->wq.sz_m1 + 1) - MLX5_SEND_WQE_MAX_WQEBBS; + if (sq->type == MLX5E_SQ_ICO) + sq_max_wqebbs = MLX5E_ICOSQ_MAX_WQEBBS; + + sq->edge = (sq->wq.sz_m1 + 1) - sq_max_wqebbs; sq->bf_budget = MLX5E_SQ_BF_BUDGET; return 0; -err_free_sq_db: - mlx5e_free_sq_db(sq); - err_sq_wq_destroy: mlx5_wq_destroy(&sq->wq_ctrl); @@ -879,7 +913,6 @@ static void mlx5e_destroy_sq(struct mlx5e_sq *sq) struct mlx5e_channel *c = sq->channel; struct mlx5e_priv *priv = c->priv; - kfree(sq->ico_wqe_info); mlx5e_free_sq_db(sq); mlx5_wq_destroy(&sq->wq_ctrl); mlx5_unmap_free_uar(priv->mdev, &sq->uar); @@ -908,11 +941,12 @@ static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) memcpy(sqc, param->sqc, sizeof(param->sqc)); - MLX5_SET(sqc, sqc, tis_num_0, param->icosq ? 0 : priv->tisn[sq->tc]); + MLX5_SET(sqc, sqc, tis_num_0, param->type == MLX5E_SQ_ICO ? + 0 : priv->tisn[sq->tc]); MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn); MLX5_SET(sqc, sqc, min_wqe_inline_mode, sq->min_inline_mode); MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); - MLX5_SET(sqc, sqc, tis_lst_sz, param->icosq ? 0 : 1); + MLX5_SET(sqc, sqc, tis_lst_sz, param->type == MLX5E_SQ_ICO ? 0 : 1); MLX5_SET(sqc, sqc, flush_in_error_en, 1); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); @@ -1027,8 +1061,10 @@ static void mlx5e_close_sq(struct mlx5e_sq *sq) netif_tx_disable_queue(sq->txq); /* last doorbell out, godspeed .. */ - if (mlx5e_sq_has_room_for(sq, 1)) + if (mlx5e_sq_has_room_for(sq, 1)) { + sq->db.txq.skb[(sq->pc & sq->wq.sz_m1)] = NULL; mlx5e_send_nop(sq, true); + } } mlx5e_disable_sq(sq); @@ -1505,6 +1541,7 @@ static void mlx5e_build_sq_param(struct mlx5e_priv *priv, param->max_inline = priv->params.tx_max_inline; param->min_inline_mode = priv->params.tx_min_inline_mode; + param->type = MLX5E_SQ_TXQ; } static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, @@ -1578,7 +1615,7 @@ static void mlx5e_build_icosq_param(struct mlx5e_priv *priv, MLX5_SET(wq, wq, log_wq_sz, log_wq_size); MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq)); - param->icosq = true; + param->type = MLX5E_SQ_ICO; } static void mlx5e_build_channel_param(struct mlx5e_priv *priv, struct mlx5e_channel_param *cparam) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 96f6317a95ea..941e53169838 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -337,8 +337,8 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) /* fill sq edge with nops to avoid wqe wrap around */ while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { - sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_NOP; - sq->ico_wqe_info[pi].num_wqebbs = 1; + sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP; + sq->db.ico_wqe[pi].num_wqebbs = 1; mlx5e_send_nop(sq, true); } @@ -348,8 +348,8 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR); - sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_UMR; - sq->ico_wqe_info[pi].num_wqebbs = num_wqebbs; + sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR; + sq->db.ico_wqe[pi].num_wqebbs = num_wqebbs; sq->pc += num_wqebbs; mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index eb0e72537f10..f02f24cfcb7a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -52,7 +52,6 @@ void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw) cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP); cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | 0x01); - sq->skb[pi] = NULL; sq->pc++; sq->stats.nop++; @@ -82,15 +81,17 @@ static inline void mlx5e_dma_push(struct mlx5e_sq *sq, u32 size, enum mlx5e_dma_map_type map_type) { - sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].addr = addr; - sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].size = size; - sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].type = map_type; + u32 i = sq->dma_fifo_pc & sq->dma_fifo_mask; + + sq->db.txq.dma_fifo[i].addr = addr; + sq->db.txq.dma_fifo[i].size = size; + sq->db.txq.dma_fifo[i].type = map_type; sq->dma_fifo_pc++; } static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_sq *sq, u32 i) { - return &sq->dma_fifo[i & sq->dma_fifo_mask]; + return &sq->db.txq.dma_fifo[i & sq->dma_fifo_mask]; } static void mlx5e_dma_unmap_wqe_err(struct mlx5e_sq *sq, u8 num_dma) @@ -221,7 +222,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) u16 pi = sq->pc & wq->sz_m1; struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); - struct mlx5e_tx_wqe_info *wi = &sq->wqe_info[pi]; + struct mlx5e_tx_wqe_info *wi = &sq->db.txq.wqe_info[pi]; struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; struct mlx5_wqe_eth_seg *eseg = &wqe->eth; @@ -341,7 +342,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); - sq->skb[pi] = skb; + sq->db.txq.skb[pi] = skb; wi->num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); sq->pc += wi->num_wqebbs; @@ -368,8 +369,10 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) } /* fill sq edge with nops to avoid wqe wrap around */ - while ((sq->pc & wq->sz_m1) > sq->edge) + while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { + sq->db.txq.skb[pi] = NULL; mlx5e_send_nop(sq, false); + } if (bf) sq->bf_budget--; @@ -442,8 +445,8 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) last_wqe = (sqcc == wqe_counter); ci = sqcc & sq->wq.sz_m1; - skb = sq->skb[ci]; - wi = &sq->wqe_info[ci]; + skb = sq->db.txq.skb[ci]; + wi = &sq->db.txq.wqe_info[ci]; if (unlikely(!skb)) { /* nop */ sqcc++; @@ -499,10 +502,13 @@ void mlx5e_free_tx_descs(struct mlx5e_sq *sq) u16 ci; int i; + if (sq->type != MLX5E_SQ_TXQ) + return; + while (sq->cc != sq->pc) { ci = sq->cc & sq->wq.sz_m1; - skb = sq->skb[ci]; - wi = &sq->wqe_info[ci]; + skb = sq->db.txq.skb[ci]; + wi = &sq->db.txq.wqe_info[ci]; if (!skb) { /* nop */ sq->cc++; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 08d8b0c91f07..47cd5619ae02 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -72,7 +72,7 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) do { u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1; - struct mlx5e_ico_wqe_info *icowi = &sq->ico_wqe_info[ci]; + struct mlx5e_ico_wqe_info *icowi = &sq->db.ico_wqe[ci]; mlx5_cqwq_pop(&cq->wq); sqcc += icowi->num_wqebbs; From b5503b994ed5ed8dbfe821317e7b5b38acb065c5 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 21 Sep 2016 12:19:48 +0300 Subject: [PATCH 0624/1050] net/mlx5e: XDP TX forwarding support Adding support for XDP_TX forwarding from xdp program. Using XDP, now user can loop packets out of the same port. We create a dedicated TX SQ for each channel that will serve XDP programs that return XDP_TX action to loop packets back to the wire directly from the channel RQ RX path. For that RX pages will now need to be mapped bi-directionally, and on XDP_TX action we will sync the page back to device then queue it into SQ for transmission. The XDP xmit frame function will report back to the RX path if the page was consumed (transmitted), if so, RX path will forget about that page as if it were released to the stack. Later on, on XDP TX completion, the page will be released back to the page cache. For simplicity this patch will hit a doorbell on every XDP TX packet. Next patch will introduce a xmit more like mechanism that will queue up more than one packet into SQ w/o notifying the hardware, once RX napi loop is done we will hit doorbell once for all XDP TX packets form the previous loop. This should drastically improve XDP TX performance. Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 25 +++- .../net/ethernet/mellanox/mlx5/core/en_main.c | 93 +++++++++++++-- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 110 +++++++++++++++--- .../ethernet/mellanox/mlx5/core/en_stats.h | 8 ++ .../net/ethernet/mellanox/mlx5/core/en_tx.c | 39 ++++++- .../net/ethernet/mellanox/mlx5/core/en_txrx.c | 65 ++++++++++- 6 files changed, 304 insertions(+), 36 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 5917f5e609ae..82eededfc92a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -104,6 +104,15 @@ #define MLX5E_ICOSQ_MAX_WQEBBS \ (DIV_ROUND_UP(sizeof(struct mlx5e_umr_wqe), MLX5_SEND_WQE_BB)) +#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN) +#define MLX5E_XDP_IHS_DS_COUNT \ + DIV_ROUND_UP(MLX5E_XDP_MIN_INLINE - 2, MLX5_SEND_WQE_DS) +#define MLX5E_XDP_TX_DS_COUNT \ + (MLX5E_XDP_IHS_DS_COUNT + \ + (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */) +#define MLX5E_XDP_TX_WQEBBS \ + DIV_ROUND_UP(MLX5E_XDP_TX_DS_COUNT, MLX5_SEND_WQEBB_NUM_DS) + #define MLX5E_NUM_MAIN_GROUPS 9 static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size) @@ -319,6 +328,7 @@ struct mlx5e_rq { struct { u8 page_order; u32 wqe_sz; /* wqe data buffer size */ + u8 map_dir; /* dma map direction */ } buff; __be32 mkey_be; @@ -384,14 +394,15 @@ enum { MLX5E_SQ_STATE_BF_ENABLE, }; -struct mlx5e_ico_wqe_info { +struct mlx5e_sq_wqe_info { u8 opcode; u8 num_wqebbs; }; enum mlx5e_sq_type { MLX5E_SQ_TXQ, - MLX5E_SQ_ICO + MLX5E_SQ_ICO, + MLX5E_SQ_XDP }; struct mlx5e_sq { @@ -418,7 +429,11 @@ struct mlx5e_sq { struct mlx5e_sq_dma *dma_fifo; struct mlx5e_tx_wqe_info *wqe_info; } txq; - struct mlx5e_ico_wqe_info *ico_wqe; + struct mlx5e_sq_wqe_info *ico_wqe; + struct { + struct mlx5e_sq_wqe_info *wqe_info; + struct mlx5e_dma_info *di; + } xdp; } db; /* read only */ @@ -458,8 +473,10 @@ enum channel_flags { struct mlx5e_channel { /* data path */ struct mlx5e_rq rq; + struct mlx5e_sq xdp_sq; struct mlx5e_sq sq[MLX5E_MAX_NUM_TC]; struct mlx5e_sq icosq; /* internal control operations */ + bool xdp; struct napi_struct napi; struct device *pdev; struct net_device *netdev; @@ -688,7 +705,7 @@ void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event); int mlx5e_napi_poll(struct napi_struct *napi, int budget); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); -void mlx5e_free_tx_descs(struct mlx5e_sq *sq); +void mlx5e_free_sq_descs(struct mlx5e_sq *sq); void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, bool recycle); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 632de09e69cf..a9fc9d4c6af4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -64,6 +64,7 @@ struct mlx5e_cq_param { struct mlx5e_channel_param { struct mlx5e_rq_param rq; struct mlx5e_sq_param sq; + struct mlx5e_sq_param xdp_sq; struct mlx5e_sq_param icosq; struct mlx5e_cq_param rx_cq; struct mlx5e_cq_param tx_cq; @@ -180,6 +181,8 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->rx_csum_complete += rq_stats->csum_complete; s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner; s->rx_xdp_drop += rq_stats->xdp_drop; + s->rx_xdp_tx += rq_stats->xdp_tx; + s->rx_xdp_tx_full += rq_stats->xdp_tx_full; s->rx_wqe_err += rq_stats->wqe_err; s->rx_mpwqe_filler += rq_stats->mpwqe_filler; s->rx_buff_alloc_err += rq_stats->buff_alloc_err; @@ -478,6 +481,10 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->priv = c->priv; rq->xdp_prog = priv->xdp_prog; + rq->buff.map_dir = DMA_FROM_DEVICE; + if (rq->xdp_prog) + rq->buff.map_dir = DMA_BIDIRECTIONAL; + switch (priv->params.rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq; @@ -765,6 +772,28 @@ static void mlx5e_close_rq(struct mlx5e_rq *rq) mlx5e_destroy_rq(rq); } +static void mlx5e_free_sq_xdp_db(struct mlx5e_sq *sq) +{ + kfree(sq->db.xdp.di); + kfree(sq->db.xdp.wqe_info); +} + +static int mlx5e_alloc_sq_xdp_db(struct mlx5e_sq *sq, int numa) +{ + int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + + sq->db.xdp.di = kzalloc_node(sizeof(*sq->db.xdp.di) * wq_sz, + GFP_KERNEL, numa); + sq->db.xdp.wqe_info = kzalloc_node(sizeof(*sq->db.xdp.wqe_info) * wq_sz, + GFP_KERNEL, numa); + if (!sq->db.xdp.di || !sq->db.xdp.wqe_info) { + mlx5e_free_sq_xdp_db(sq); + return -ENOMEM; + } + + return 0; +} + static void mlx5e_free_sq_ico_db(struct mlx5e_sq *sq) { kfree(sq->db.ico_wqe); @@ -819,6 +848,9 @@ static void mlx5e_free_sq_db(struct mlx5e_sq *sq) case MLX5E_SQ_ICO: mlx5e_free_sq_ico_db(sq); break; + case MLX5E_SQ_XDP: + mlx5e_free_sq_xdp_db(sq); + break; } } @@ -829,11 +861,24 @@ static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa) return mlx5e_alloc_sq_txq_db(sq, numa); case MLX5E_SQ_ICO: return mlx5e_alloc_sq_ico_db(sq, numa); + case MLX5E_SQ_XDP: + return mlx5e_alloc_sq_xdp_db(sq, numa); } return 0; } +static int mlx5e_sq_get_max_wqebbs(u8 sq_type) +{ + switch (sq_type) { + case MLX5E_SQ_ICO: + return MLX5E_ICOSQ_MAX_WQEBBS; + case MLX5E_SQ_XDP: + return MLX5E_XDP_TX_WQEBBS; + } + return MLX5_SEND_WQE_MAX_WQEBBS; +} + static int mlx5e_create_sq(struct mlx5e_channel *c, int tc, struct mlx5e_sq_param *param, @@ -844,7 +889,6 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, void *sqc = param->sqc; void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); - u16 sq_max_wqebbs; int err; sq->type = param->type; @@ -882,7 +926,6 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, if (err) goto err_sq_wq_destroy; - sq_max_wqebbs = MLX5_SEND_WQE_MAX_WQEBBS; if (sq->type == MLX5E_SQ_TXQ) { int txq_ix; @@ -891,10 +934,7 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, priv->txq_to_sq_map[txq_ix] = sq; } - if (sq->type == MLX5E_SQ_ICO) - sq_max_wqebbs = MLX5E_ICOSQ_MAX_WQEBBS; - - sq->edge = (sq->wq.sz_m1 + 1) - sq_max_wqebbs; + sq->edge = (sq->wq.sz_m1 + 1) - mlx5e_sq_get_max_wqebbs(sq->type); sq->bf_budget = MLX5E_SQ_BF_BUDGET; return 0; @@ -1068,7 +1108,7 @@ static void mlx5e_close_sq(struct mlx5e_sq *sq) } mlx5e_disable_sq(sq); - mlx5e_free_tx_descs(sq); + mlx5e_free_sq_descs(sq); mlx5e_destroy_sq(sq); } @@ -1429,14 +1469,31 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, } } + if (priv->xdp_prog) { + /* XDP SQ CQ params are same as normal TXQ sq CQ params */ + err = mlx5e_open_cq(c, &cparam->tx_cq, &c->xdp_sq.cq, + priv->params.tx_cq_moderation); + if (err) + goto err_close_sqs; + + err = mlx5e_open_sq(c, 0, &cparam->xdp_sq, &c->xdp_sq); + if (err) { + mlx5e_close_cq(&c->xdp_sq.cq); + goto err_close_sqs; + } + } + + c->xdp = !!priv->xdp_prog; err = mlx5e_open_rq(c, &cparam->rq, &c->rq); if (err) - goto err_close_sqs; + goto err_close_xdp_sq; netif_set_xps_queue(netdev, get_cpu_mask(c->cpu), ix); *cp = c; return 0; +err_close_xdp_sq: + mlx5e_close_sq(&c->xdp_sq); err_close_sqs: mlx5e_close_sqs(c); @@ -1465,9 +1522,13 @@ err_napi_del: static void mlx5e_close_channel(struct mlx5e_channel *c) { mlx5e_close_rq(&c->rq); + if (c->xdp) + mlx5e_close_sq(&c->xdp_sq); mlx5e_close_sqs(c); mlx5e_close_sq(&c->icosq); napi_disable(&c->napi); + if (c->xdp) + mlx5e_close_cq(&c->xdp_sq.cq); mlx5e_close_cq(&c->rq.cq); mlx5e_close_tx_cqs(c); mlx5e_close_cq(&c->icosq.cq); @@ -1618,12 +1679,28 @@ static void mlx5e_build_icosq_param(struct mlx5e_priv *priv, param->type = MLX5E_SQ_ICO; } +static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + mlx5e_build_sq_param_common(priv, param); + MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size); + + param->max_inline = priv->params.tx_max_inline; + /* FOR XDP SQs will support only L2 inline mode */ + param->min_inline_mode = MLX5_INLINE_MODE_NONE; + param->type = MLX5E_SQ_XDP; +} + static void mlx5e_build_channel_param(struct mlx5e_priv *priv, struct mlx5e_channel_param *cparam) { u8 icosq_log_wq_sz = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; mlx5e_build_rq_param(priv, &cparam->rq); mlx5e_build_sq_param(priv, &cparam->sq); + mlx5e_build_xdpsq_param(priv, &cparam->xdp_sq); mlx5e_build_icosq_param(priv, &cparam->icosq, icosq_log_wq_sz); mlx5e_build_rx_cq_param(priv, &cparam->rx_cq); mlx5e_build_tx_cq_param(priv, &cparam->tx_cq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 941e53169838..57d49513a87a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -236,7 +236,7 @@ static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, dma_info->page = page; dma_info->addr = dma_map_page(rq->pdev, page, 0, - RQ_PAGE_SIZE(rq), DMA_FROM_DEVICE); + RQ_PAGE_SIZE(rq), rq->buff.map_dir); if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) { put_page(page); return -ENOMEM; @@ -252,7 +252,7 @@ void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, return; dma_unmap_page(rq->pdev, dma_info->addr, RQ_PAGE_SIZE(rq), - DMA_FROM_DEVICE); + rq->buff.map_dir); put_page(dma_info->page); } @@ -632,15 +632,95 @@ static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, napi_gro_receive(rq->cq.napi, skb); } -static inline enum xdp_action mlx5e_xdp_handle(struct mlx5e_rq *rq, - const struct bpf_prog *prog, - void *data, u32 len) +static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, + struct mlx5e_dma_info *di, + unsigned int data_offset, + int len) +{ + struct mlx5e_sq *sq = &rq->channel->xdp_sq; + struct mlx5_wq_cyc *wq = &sq->wq; + u16 pi = sq->pc & wq->sz_m1; + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + struct mlx5e_sq_wqe_info *wi = &sq->db.xdp.wqe_info[pi]; + + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_eth_seg *eseg = &wqe->eth; + struct mlx5_wqe_data_seg *dseg; + + dma_addr_t dma_addr = di->addr + data_offset + MLX5E_XDP_MIN_INLINE; + unsigned int dma_len = len - MLX5E_XDP_MIN_INLINE; + void *data = page_address(di->page) + data_offset; + + if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5E_XDP_TX_WQEBBS))) { + rq->stats.xdp_tx_full++; + mlx5e_page_release(rq, di, true); + return; + } + + dma_sync_single_for_device(sq->pdev, dma_addr, dma_len, + PCI_DMA_TODEVICE); + + memset(wqe, 0, sizeof(*wqe)); + + /* copy the inline part */ + memcpy(eseg->inline_hdr_start, data, MLX5E_XDP_MIN_INLINE); + eseg->inline_hdr_sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE); + + dseg = (struct mlx5_wqe_data_seg *)cseg + (MLX5E_XDP_TX_DS_COUNT - 1); + + /* write the dma part */ + dseg->addr = cpu_to_be64(dma_addr); + dseg->byte_count = cpu_to_be32(dma_len); + dseg->lkey = sq->mkey_be; + + cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | MLX5E_XDP_TX_DS_COUNT); + + sq->db.xdp.di[pi] = *di; + wi->opcode = MLX5_OPCODE_SEND; + wi->num_wqebbs = MLX5E_XDP_TX_WQEBBS; + sq->pc += MLX5E_XDP_TX_WQEBBS; + + wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); + + /* fill sq edge with nops to avoid wqe wrap around */ + while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { + sq->db.xdp.wqe_info[pi].opcode = MLX5_OPCODE_NOP; + mlx5e_send_nop(sq, false); + } + rq->stats.xdp_tx++; +} + +/* returns true if packet was consumed by xdp */ +static inline bool mlx5e_xdp_handle(struct mlx5e_rq *rq, + const struct bpf_prog *prog, + struct mlx5e_dma_info *di, + void *data, u16 len) { struct xdp_buff xdp; + u32 act; + + if (!prog) + return false; xdp.data = data; xdp.data_end = xdp.data + len; - return bpf_prog_run_xdp(prog, &xdp); + act = bpf_prog_run_xdp(prog, &xdp); + switch (act) { + case XDP_PASS: + return false; + case XDP_TX: + mlx5e_xmit_xdp_frame(rq, di, MLX5_RX_HEADROOM, len); + return true; + default: + bpf_warn_invalid_xdp_action(act); + case XDP_ABORTED: + case XDP_DROP: + rq->stats.xdp_drop++; + mlx5e_page_release(rq, di, true); + return true; + } } void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) @@ -651,21 +731,22 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) __be16 wqe_counter_be; struct sk_buff *skb; u16 wqe_counter; + void *va, *data; u32 cqe_bcnt; - void *va; wqe_counter_be = cqe->wqe_counter; wqe_counter = be16_to_cpu(wqe_counter_be); wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); di = &rq->dma_info[wqe_counter]; va = page_address(di->page); + data = va + MLX5_RX_HEADROOM; dma_sync_single_range_for_cpu(rq->pdev, di->addr, MLX5_RX_HEADROOM, rq->buff.wqe_sz, DMA_FROM_DEVICE); - prefetch(va + MLX5_RX_HEADROOM); + prefetch(data); cqe_bcnt = be32_to_cpu(cqe->byte_cnt); if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { @@ -674,17 +755,8 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) goto wq_ll_pop; } - if (xdp_prog) { - enum xdp_action act = - mlx5e_xdp_handle(rq, xdp_prog, va + MLX5_RX_HEADROOM, - cqe_bcnt); - - if (act != XDP_PASS) { - rq->stats.xdp_drop++; - mlx5e_page_release(rq, di, true); - goto wq_ll_pop; - } - } + if (mlx5e_xdp_handle(rq, xdp_prog, di, data, cqe_bcnt)) + goto wq_ll_pop; /* page/packet was consumed by XDP */ skb = build_skb(va, RQ_PAGE_SIZE(rq)); if (unlikely(!skb)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 084d6c893a6e..57452fdc5154 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -66,6 +66,8 @@ struct mlx5e_sw_stats { u64 rx_csum_complete; u64 rx_csum_unnecessary_inner; u64 rx_xdp_drop; + u64 rx_xdp_tx; + u64 rx_xdp_tx_full; u64 tx_csum_partial; u64 tx_csum_partial_inner; u64 tx_queue_stopped; @@ -102,6 +104,8 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_full) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial_inner) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) }, @@ -281,6 +285,8 @@ struct mlx5e_rq_stats { u64 lro_packets; u64 lro_bytes; u64 xdp_drop; + u64 xdp_tx; + u64 xdp_tx_full; u64 wqe_err; u64 mpwqe_filler; u64 buff_alloc_err; @@ -299,6 +305,8 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_drop) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_tx) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_tx_full) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) }, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index f02f24cfcb7a..70a717382357 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -495,16 +495,13 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) return (i == MLX5E_TX_CQ_POLL_BUDGET); } -void mlx5e_free_tx_descs(struct mlx5e_sq *sq) +static void mlx5e_free_txq_sq_descs(struct mlx5e_sq *sq) { struct mlx5e_tx_wqe_info *wi; struct sk_buff *skb; u16 ci; int i; - if (sq->type != MLX5E_SQ_TXQ) - return; - while (sq->cc != sq->pc) { ci = sq->cc & sq->wq.sz_m1; skb = sq->db.txq.skb[ci]; @@ -526,3 +523,37 @@ void mlx5e_free_tx_descs(struct mlx5e_sq *sq) sq->cc += wi->num_wqebbs; } } + +static void mlx5e_free_xdp_sq_descs(struct mlx5e_sq *sq) +{ + struct mlx5e_sq_wqe_info *wi; + struct mlx5e_dma_info *di; + u16 ci; + + while (sq->cc != sq->pc) { + ci = sq->cc & sq->wq.sz_m1; + di = &sq->db.xdp.di[ci]; + wi = &sq->db.xdp.wqe_info[ci]; + + if (wi->opcode == MLX5_OPCODE_NOP) { + sq->cc++; + continue; + } + + sq->cc += wi->num_wqebbs; + + mlx5e_page_release(&sq->channel->rq, di, false); + } +} + +void mlx5e_free_sq_descs(struct mlx5e_sq *sq) +{ + switch (sq->type) { + case MLX5E_SQ_TXQ: + mlx5e_free_txq_sq_descs(sq); + break; + case MLX5E_SQ_XDP: + mlx5e_free_xdp_sq_descs(sq); + break; + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 47cd5619ae02..5703f19a6a24 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -72,7 +72,7 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) do { u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1; - struct mlx5e_ico_wqe_info *icowi = &sq->db.ico_wqe[ci]; + struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci]; mlx5_cqwq_pop(&cq->wq); sqcc += icowi->num_wqebbs; @@ -105,6 +105,66 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) sq->cc = sqcc; } +static inline bool mlx5e_poll_xdp_tx_cq(struct mlx5e_cq *cq) +{ + struct mlx5e_sq *sq; + u16 sqcc; + int i; + + sq = container_of(cq, struct mlx5e_sq, cq); + + if (unlikely(test_bit(MLX5E_SQ_STATE_FLUSH, &sq->state))) + return false; + + /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), + * otherwise a cq overrun may occur + */ + sqcc = sq->cc; + + for (i = 0; i < MLX5E_TX_CQ_POLL_BUDGET; i++) { + struct mlx5_cqe64 *cqe; + u16 wqe_counter; + bool last_wqe; + + cqe = mlx5e_get_cqe(cq); + if (!cqe) + break; + + mlx5_cqwq_pop(&cq->wq); + + wqe_counter = be16_to_cpu(cqe->wqe_counter); + + do { + struct mlx5e_sq_wqe_info *wi; + struct mlx5e_dma_info *di; + u16 ci; + + last_wqe = (sqcc == wqe_counter); + + ci = sqcc & sq->wq.sz_m1; + di = &sq->db.xdp.di[ci]; + wi = &sq->db.xdp.wqe_info[ci]; + + if (unlikely(wi->opcode == MLX5_OPCODE_NOP)) { + sqcc++; + continue; + } + + sqcc += wi->num_wqebbs; + /* Recycle RX page */ + mlx5e_page_release(&sq->channel->rq, di, true); + } while (!last_wqe); + } + + mlx5_cqwq_update_db_record(&cq->wq); + + /* ensure cq space is freed before enabling more cqes */ + wmb(); + + sq->cc = sqcc; + return (i == MLX5E_TX_CQ_POLL_BUDGET); +} + int mlx5e_napi_poll(struct napi_struct *napi, int budget) { struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel, @@ -121,6 +181,9 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget); busy |= work_done == budget; + if (c->xdp) + busy |= mlx5e_poll_xdp_tx_cq(&c->xdp_sq.cq); + mlx5e_poll_ico_cq(&c->icosq.cq); busy |= mlx5e_post_rx_wqes(&c->rq); From 35b510e257f7516546a0a3f725f71dfbccc3f733 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 21 Sep 2016 12:19:49 +0300 Subject: [PATCH 0625/1050] net/mlx5e: XDP TX xmit more Previously we rang XDP SQ doorbell on every forwarded XDP packet. Here we introduce a xmit more like mechanism that will queue up more than one packet into SQ (up to RX napi budget) w/o notifying the hardware. Once RX napi budget is consumed and we exit napi RX loop, we will flush (doorbell) all XDP looped packets in case there are such. XDP forward packet rate: Comparing XDP with and w/o xmit more (bulk transmit): RX Cores XDP TX XDP TX (xmit more) --------------------------------------------------- 1 6.5Mpps 12.4Mpps 2 13.2Mpps 24.2Mpps 4 25.2Mpps 36.3Mpps* 8 36.3Mpps* 36.3Mpps* *My xmitter was limited to 36.3Mpps, so it is the bottleneck. It seems that receive side can handle more. Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + .../net/ethernet/mellanox/mlx5/core/en_rx.c | 32 ++++++++++++++----- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 82eededfc92a..346015407b70 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -433,6 +433,7 @@ struct mlx5e_sq { struct { struct mlx5e_sq_wqe_info *wqe_info; struct mlx5e_dma_info *di; + bool doorbell; } xdp; } db; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 57d49513a87a..0a81bd34abbe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -632,6 +632,18 @@ static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, napi_gro_receive(rq->cq.napi, skb); } +static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_sq *sq) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + struct mlx5e_tx_wqe *wqe; + u16 pi = (sq->pc - MLX5E_XDP_TX_WQEBBS) & wq->sz_m1; /* last pi */ + + wqe = mlx5_wq_cyc_get_wqe(wq, pi); + + wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); +} + static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, unsigned int data_offset, @@ -652,6 +664,11 @@ static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, void *data = page_address(di->page) + data_offset; if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5E_XDP_TX_WQEBBS))) { + if (sq->db.xdp.doorbell) { + /* SQ is full, ring doorbell */ + mlx5e_xmit_xdp_doorbell(sq); + sq->db.xdp.doorbell = false; + } rq->stats.xdp_tx_full++; mlx5e_page_release(rq, di, true); return; @@ -681,14 +698,7 @@ static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, wi->num_wqebbs = MLX5E_XDP_TX_WQEBBS; sq->pc += MLX5E_XDP_TX_WQEBBS; - wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; - mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); - - /* fill sq edge with nops to avoid wqe wrap around */ - while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { - sq->db.xdp.wqe_info[pi].opcode = MLX5_OPCODE_NOP; - mlx5e_send_nop(sq, false); - } + sq->db.xdp.doorbell = true; rq->stats.xdp_tx++; } @@ -863,6 +873,7 @@ mpwrq_cqe_out: int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) { struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); + struct mlx5e_sq *xdp_sq = &rq->channel->xdp_sq; int work_done = 0; if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state))) @@ -889,6 +900,11 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) rq->handle_rx_cqe(rq, cqe); } + if (xdp_sq->db.xdp.doorbell) { + mlx5e_xmit_xdp_doorbell(xdp_sq); + xdp_sq->db.xdp.doorbell = false; + } + mlx5_cqwq_update_db_record(&cq->wq); /* ensure cq space is freed before enabling more cqes */ From e2f036a97271cf5811ee754bf321a29a814577f9 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 21 Sep 2016 08:45:55 -0300 Subject: [PATCH 0626/1050] sctp: rename WORD_TRUNC/ROUND macros To something more meaningful these days, specially because this is working on packet headers or lengths and which are not tied to any CPU arch but to the protocol itself. So, WORD_TRUNC becomes SCTP_TRUNC4 and WORD_ROUND becomes SCTP_PAD4. Reported-by: David Laight Reported-by: David Miller Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 10 +++++----- net/netfilter/xt_sctp.c | 2 +- net/sctp/associola.c | 2 +- net/sctp/chunk.c | 6 +++--- net/sctp/input.c | 8 ++++---- net/sctp/inqueue.c | 2 +- net/sctp/output.c | 12 ++++++------ net/sctp/sm_make_chunk.c | 28 ++++++++++++++-------------- net/sctp/sm_statefuns.c | 6 +++--- net/sctp/transport.c | 4 ++-- net/sctp/ulpevent.c | 4 ++-- 11 files changed, 42 insertions(+), 42 deletions(-) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 632e205ca54b..87a7f42e7639 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -83,9 +83,9 @@ #endif /* Round an int up to the next multiple of 4. */ -#define WORD_ROUND(s) (((s)+3)&~3) +#define SCTP_PAD4(s) (((s)+3)&~3) /* Truncate to the previous multiple of 4. */ -#define WORD_TRUNC(s) ((s)&~3) +#define SCTP_TRUNC4(s) ((s)&~3) /* * Function declarations. @@ -433,7 +433,7 @@ static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu) if (asoc->user_frag) frag = min_t(int, frag, asoc->user_frag); - frag = WORD_TRUNC(min_t(int, frag, SCTP_MAX_CHUNK_LEN)); + frag = SCTP_TRUNC4(min_t(int, frag, SCTP_MAX_CHUNK_LEN)); return frag; } @@ -462,7 +462,7 @@ _sctp_walk_params((pos), (chunk), ntohs((chunk)->chunk_hdr.length), member) for (pos.v = chunk->member;\ pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\ ntohs(pos.p->length) >= sizeof(sctp_paramhdr_t);\ - pos.v += WORD_ROUND(ntohs(pos.p->length))) + pos.v += SCTP_PAD4(ntohs(pos.p->length))) #define sctp_walk_errors(err, chunk_hdr)\ _sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length)) @@ -472,7 +472,7 @@ for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \ sizeof(sctp_chunkhdr_t));\ (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\ ntohs(err->length) >= sizeof(sctp_errhdr_t); \ - err = (sctp_errhdr_t *)((void *)err + WORD_ROUND(ntohs(err->length)))) + err = (sctp_errhdr_t *)((void *)err + SCTP_PAD4(ntohs(err->length)))) #define sctp_walk_fwdtsn(pos, chunk)\ _sctp_walk_fwdtsn((pos), (chunk), ntohs((chunk)->chunk_hdr->length) - sizeof(struct sctp_fwdtsn_chunk)) diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c index ef36a56a02c6..4dedb96d1a06 100644 --- a/net/netfilter/xt_sctp.c +++ b/net/netfilter/xt_sctp.c @@ -68,7 +68,7 @@ match_packet(const struct sk_buff *skb, ++i, offset, sch->type, htons(sch->length), sch->flags); #endif - offset += WORD_ROUND(ntohs(sch->length)); + offset += SCTP_PAD4(ntohs(sch->length)); pr_debug("skb->len: %d\toffset: %d\n", skb->len, offset); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 1c23060c41a6..f10d3397f917 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1408,7 +1408,7 @@ void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc) transports) { if (t->pmtu_pending && t->dst) { sctp_transport_update_pmtu(sk, t, - WORD_TRUNC(dst_mtu(t->dst))); + SCTP_TRUNC4(dst_mtu(t->dst))); t->pmtu_pending = 0; } if (!pmtu || (t->pathmtu < pmtu)) diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index af9cc8055465..76eae828ec89 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -208,8 +208,8 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, struct sctp_hmac *hmac_desc = sctp_auth_asoc_get_hmac(asoc); if (hmac_desc) - max_data -= WORD_ROUND(sizeof(sctp_auth_chunk_t) + - hmac_desc->hmac_len); + max_data -= SCTP_PAD4(sizeof(sctp_auth_chunk_t) + + hmac_desc->hmac_len); } /* Now, check if we need to reduce our max */ @@ -229,7 +229,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, asoc->outqueue.out_qlen == 0 && list_empty(&asoc->outqueue.retransmit) && msg_len > max) - max_data -= WORD_ROUND(sizeof(sctp_sack_chunk_t)); + max_data -= SCTP_PAD4(sizeof(sctp_sack_chunk_t)); /* Encourage Cookie-ECHO bundling. */ if (asoc->state < SCTP_STATE_COOKIE_ECHOED) diff --git a/net/sctp/input.c b/net/sctp/input.c index 69444d32ecda..a1d85065bfc0 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -605,7 +605,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) /* PMTU discovery (RFC1191) */ if (ICMP_FRAG_NEEDED == code) { sctp_icmp_frag_needed(sk, asoc, transport, - WORD_TRUNC(info)); + SCTP_TRUNC4(info)); goto out_unlock; } else { if (ICMP_PROT_UNREACH == code) { @@ -673,7 +673,7 @@ static int sctp_rcv_ootb(struct sk_buff *skb) if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t)) break; - ch_end = offset + WORD_ROUND(ntohs(ch->length)); + ch_end = offset + SCTP_PAD4(ntohs(ch->length)); if (ch_end > skb->len) break; @@ -1121,7 +1121,7 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net, if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t)) break; - ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length)); + ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length)); if (ch_end > skb_tail_pointer(skb)) break; @@ -1190,7 +1190,7 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net, * that the chunk length doesn't cause overflow. Otherwise, we'll * walk off the end. */ - if (WORD_ROUND(ntohs(ch->length)) > skb->len) + if (SCTP_PAD4(ntohs(ch->length)) > skb->len) return NULL; /* If this is INIT/INIT-ACK look inside the chunk too. */ diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index 6437aa97cfd7..f731de3e8428 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -213,7 +213,7 @@ new_skb: } chunk->chunk_hdr = ch; - chunk->chunk_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length)); + chunk->chunk_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length)); skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t)); chunk->subh.v = NULL; /* Subheader is no longer valid. */ diff --git a/net/sctp/output.c b/net/sctp/output.c index 0c605ec74dc4..2a5c1896d18f 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -297,7 +297,7 @@ static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet, struct sctp_chunk *chunk) { sctp_xmit_t retval = SCTP_XMIT_OK; - __u16 chunk_len = WORD_ROUND(ntohs(chunk->chunk_hdr->length)); + __u16 chunk_len = SCTP_PAD4(ntohs(chunk->chunk_hdr->length)); /* Check to see if this chunk will fit into the packet */ retval = sctp_packet_will_fit(packet, chunk, chunk_len); @@ -508,7 +508,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) if (gso) { pkt_size = packet->overhead; list_for_each_entry(chunk, &packet->chunk_list, list) { - int padded = WORD_ROUND(chunk->skb->len); + int padded = SCTP_PAD4(chunk->skb->len); if (pkt_size + padded > tp->pathmtu) break; @@ -538,7 +538,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) * included in the chunk length field. The sender should * never pad with more than 3 bytes. * - * [This whole comment explains WORD_ROUND() below.] + * [This whole comment explains SCTP_PAD4() below.] */ pkt_size -= packet->overhead; @@ -560,7 +560,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) has_data = 1; } - padding = WORD_ROUND(chunk->skb->len) - chunk->skb->len; + padding = SCTP_PAD4(chunk->skb->len) - chunk->skb->len; if (padding) memset(skb_put(chunk->skb, padding), 0, padding); @@ -587,7 +587,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) * acknowledged or have failed. * Re-queue auth chunks if needed. */ - pkt_size -= WORD_ROUND(chunk->skb->len); + pkt_size -= SCTP_PAD4(chunk->skb->len); if (!sctp_chunk_is_data(chunk) && chunk != packet->auth) sctp_chunk_free(chunk); @@ -911,7 +911,7 @@ static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet, */ maxsize = pmtu - packet->overhead; if (packet->auth) - maxsize -= WORD_ROUND(packet->auth->skb->len); + maxsize -= SCTP_PAD4(packet->auth->skb->len); if (chunk_len > maxsize) retval = SCTP_XMIT_PMTU_FULL; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 8c77b87a8565..79dd66079dd7 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -253,7 +253,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, num_types = sp->pf->supported_addrs(sp, types); chunksize = sizeof(init) + addrs_len; - chunksize += WORD_ROUND(SCTP_SAT_LEN(num_types)); + chunksize += SCTP_PAD4(SCTP_SAT_LEN(num_types)); chunksize += sizeof(ecap_param); if (asoc->prsctp_enable) @@ -283,14 +283,14 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, /* Add HMACS parameter length if any were defined */ auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs; if (auth_hmacs->length) - chunksize += WORD_ROUND(ntohs(auth_hmacs->length)); + chunksize += SCTP_PAD4(ntohs(auth_hmacs->length)); else auth_hmacs = NULL; /* Add CHUNKS parameter length */ auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks; if (auth_chunks->length) - chunksize += WORD_ROUND(ntohs(auth_chunks->length)); + chunksize += SCTP_PAD4(ntohs(auth_chunks->length)); else auth_chunks = NULL; @@ -300,8 +300,8 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, /* If we have any extensions to report, account for that */ if (num_ext) - chunksize += WORD_ROUND(sizeof(sctp_supported_ext_param_t) + - num_ext); + chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) + + num_ext); /* RFC 2960 3.3.2 Initiation (INIT) (1) * @@ -443,13 +443,13 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs; if (auth_hmacs->length) - chunksize += WORD_ROUND(ntohs(auth_hmacs->length)); + chunksize += SCTP_PAD4(ntohs(auth_hmacs->length)); else auth_hmacs = NULL; auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks; if (auth_chunks->length) - chunksize += WORD_ROUND(ntohs(auth_chunks->length)); + chunksize += SCTP_PAD4(ntohs(auth_chunks->length)); else auth_chunks = NULL; @@ -458,8 +458,8 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, } if (num_ext) - chunksize += WORD_ROUND(sizeof(sctp_supported_ext_param_t) + - num_ext); + chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) + + num_ext); /* Now allocate and fill out the chunk. */ retval = sctp_make_control(asoc, SCTP_CID_INIT_ACK, 0, chunksize, gfp); @@ -1390,7 +1390,7 @@ static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc, struct sock *sk; /* No need to allocate LL here, as this is only a chunk. */ - skb = alloc_skb(WORD_ROUND(sizeof(sctp_chunkhdr_t) + paylen), gfp); + skb = alloc_skb(SCTP_PAD4(sizeof(sctp_chunkhdr_t) + paylen), gfp); if (!skb) goto nodata; @@ -1482,7 +1482,7 @@ void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data) void *target; void *padding; int chunklen = ntohs(chunk->chunk_hdr->length); - int padlen = WORD_ROUND(chunklen) - chunklen; + int padlen = SCTP_PAD4(chunklen) - chunklen; padding = skb_put(chunk->skb, padlen); target = skb_put(chunk->skb, len); @@ -1900,7 +1900,7 @@ static int sctp_process_missing_param(const struct sctp_association *asoc, struct __sctp_missing report; __u16 len; - len = WORD_ROUND(sizeof(report)); + len = SCTP_PAD4(sizeof(report)); /* Make an ERROR chunk, preparing enough room for * returning multiple unknown parameters. @@ -2098,9 +2098,9 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc, if (*errp) { if (!sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM, - WORD_ROUND(ntohs(param.p->length)))) + SCTP_PAD4(ntohs(param.p->length)))) sctp_addto_chunk_fixed(*errp, - WORD_ROUND(ntohs(param.p->length)), + SCTP_PAD4(ntohs(param.p->length)), param.v); } else { /* If there is no memory for generating the ERROR diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index d88bb2b0b699..026e3bca4a94 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3454,7 +3454,7 @@ sctp_disposition_t sctp_sf_ootb(struct net *net, } /* Report violation if chunk len overflows */ - ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length)); + ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length)); if (ch_end > skb_tail_pointer(skb)) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -4185,7 +4185,7 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net, hdr = unk_chunk->chunk_hdr; err_chunk = sctp_make_op_error(asoc, unk_chunk, SCTP_ERROR_UNKNOWN_CHUNK, hdr, - WORD_ROUND(ntohs(hdr->length)), + SCTP_PAD4(ntohs(hdr->length)), 0); if (err_chunk) { sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, @@ -4203,7 +4203,7 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net, hdr = unk_chunk->chunk_hdr; err_chunk = sctp_make_op_error(asoc, unk_chunk, SCTP_ERROR_UNKNOWN_CHUNK, hdr, - WORD_ROUND(ntohs(hdr->length)), + SCTP_PAD4(ntohs(hdr->length)), 0); if (err_chunk) { sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 81b86678be4d..ce54dce13ddb 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -233,7 +233,7 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) } if (transport->dst) { - transport->pathmtu = WORD_TRUNC(dst_mtu(transport->dst)); + transport->pathmtu = SCTP_TRUNC4(dst_mtu(transport->dst)); } else transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; } @@ -287,7 +287,7 @@ void sctp_transport_route(struct sctp_transport *transport, return; } if (transport->dst) { - transport->pathmtu = WORD_TRUNC(dst_mtu(transport->dst)); + transport->pathmtu = SCTP_TRUNC4(dst_mtu(transport->dst)); /* Initialize sk->sk_rcv_saddr, if the transport is the * association's active path for getsockname(). diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index d85b803da11d..bea00058ce35 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -383,7 +383,7 @@ sctp_ulpevent_make_remote_error(const struct sctp_association *asoc, ch = (sctp_errhdr_t *)(chunk->skb->data); cause = ch->cause; - elen = WORD_ROUND(ntohs(ch->length)) - sizeof(sctp_errhdr_t); + elen = SCTP_PAD4(ntohs(ch->length)) - sizeof(sctp_errhdr_t); /* Pull off the ERROR header. */ skb_pull(chunk->skb, sizeof(sctp_errhdr_t)); @@ -688,7 +688,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, * MUST ignore the padding bytes. */ len = ntohs(chunk->chunk_hdr->length); - padding = WORD_ROUND(len) - len; + padding = SCTP_PAD4(len) - len; /* Fixup cloned skb with just this chunks data. */ skb_trim(skb, chunk->chunk_end - padding - skb->data); From 4a225ce3950879a5426c56f306f5d1c9d6330292 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 21 Sep 2016 08:45:56 -0300 Subject: [PATCH 0627/1050] sctp: make use of SCTP_TRUNC4 macro And avoid the usage of '&~3'. This is the last place still not using the macro. Also break the line to make it easier to read. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/chunk.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 76eae828ec89..8afe2e90d003 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -195,9 +195,10 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, /* This is the biggest possible DATA chunk that can fit into * the packet */ - max_data = (asoc->pathmtu - - sctp_sk(asoc->base.sk)->pf->af->net_header_len - - sizeof(struct sctphdr) - sizeof(struct sctp_data_chunk)) & ~3; + max_data = asoc->pathmtu - + sctp_sk(asoc->base.sk)->pf->af->net_header_len - + sizeof(struct sctphdr) - sizeof(struct sctp_data_chunk); + max_data = SCTP_TRUNC4(max_data); max = asoc->frag_point; /* If the the peer requested that we authenticate DATA chunks From fcfbfd68b36b2bf4e0133dffa316831a5e180199 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 21 Sep 2016 15:04:43 +0000 Subject: [PATCH 0628/1050] net: dsa: qca8k: fix non static symbol warning Fixes the following sparse warning: drivers/net/dsa/qca8k.c:259:22: warning: symbol 'qca8k_regmap_config' was not declared. Should it be static? Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/dsa/qca8k.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 4788a89520dd..6fc379cf12e9 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -256,7 +256,7 @@ static struct regmap_access_table qca8k_readable_table = { .n_yes_ranges = ARRAY_SIZE(qca8k_readable_ranges), }; -struct regmap_config qca8k_regmap_config = { +static struct regmap_config qca8k_regmap_config = { .reg_bits = 16, .val_bits = 32, .reg_stride = 4, From a084ab33543cfa07033f91161978b626a9d9bd57 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 21 Sep 2016 15:05:05 +0000 Subject: [PATCH 0629/1050] net: dsa: qca8k: use mdio_module_driver to simplify the code mdio_module_driver() makes the code simpler by eliminating boilerplate code. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/dsa/qca8k.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 6fc379cf12e9..b3df70d07ff6 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -1032,19 +1032,7 @@ static struct mdio_driver qca8kmdio_driver = { }, }; -static int __init -qca8kmdio_driver_register(void) -{ - return mdio_driver_register(&qca8kmdio_driver); -} -module_init(qca8kmdio_driver_register); - -static void __exit -qca8kmdio_driver_unregister(void) -{ - mdio_driver_unregister(&qca8kmdio_driver); -} -module_exit(qca8kmdio_driver_unregister); +mdio_module_driver(qca8kmdio_driver); MODULE_AUTHOR("Mathieu Olivari, John Crispin "); MODULE_DESCRIPTION("Driver for QCA8K ethernet switch family"); From 524605e5ba5a897fb0a8c29398ed049100fe80aa Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 21 Sep 2016 15:09:16 +0000 Subject: [PATCH 0630/1050] cxgb4: Convert to use simple_open() Remove an open coded simple_open() function and replace file operations references to the function with simple_open() instead. Generated by: scripts/coccinelle/api/simple_open.cocci Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index 52be9a4ef97a..20455d082cb8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -2748,12 +2748,6 @@ static void add_debugfs_mem(struct adapter *adap, const char *name, size_mb << 20); } -static int blocked_fl_open(struct inode *inode, struct file *file) -{ - file->private_data = inode->i_private; - return 0; -} - static ssize_t blocked_fl_read(struct file *filp, char __user *ubuf, size_t count, loff_t *ppos) { @@ -2797,7 +2791,7 @@ static ssize_t blocked_fl_write(struct file *filp, const char __user *ubuf, static const struct file_operations blocked_fl_fops = { .owner = THIS_MODULE, - .open = blocked_fl_open, + .open = simple_open, .read = blocked_fl_read, .write = blocked_fl_write, .llseek = generic_file_llseek, From 8e83134db4ecb77a1dc3390b60ddeea840a5afbc Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Sep 2016 00:29:31 +0100 Subject: [PATCH 0631/1050] rxrpc: Send pings to get RTT data Send a PING ACK packet to the peer when we get a new incoming call from a peer we don't have a record for. The PING RESPONSE ACK packet will tell us the following about the peer: (1) its receive window size (2) its MTU sizes (3) its support for jumbo DATA packets (4) if it supports slow start (similar to RFC 5681) (5) an estimate of the RTT This is necessary because the peer won't normally send us an ACK until it gets to the Rx phase and we send it a packet, but we would like to know some of this information before we start sending packets. A pair of tracepoints are added so that RTT determination can be observed. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 7 ++++-- net/rxrpc/input.c | 48 ++++++++++++++++++++++++++++++++++++++++- net/rxrpc/misc.c | 11 +++++----- net/rxrpc/output.c | 22 +++++++++++++++++++ 4 files changed, 80 insertions(+), 8 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 79c671e552c3..8b47f468eb9d 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -403,6 +403,7 @@ enum rxrpc_call_flag { RXRPC_CALL_EXPOSED, /* The call was exposed to the world */ RXRPC_CALL_RX_LAST, /* Received the last packet (at rxtx_top) */ RXRPC_CALL_TX_LAST, /* Last packet in Tx buffer (at rxtx_top) */ + RXRPC_CALL_PINGING, /* Ping in process */ }; /* @@ -487,6 +488,8 @@ struct rxrpc_call { u32 call_id; /* call ID on connection */ u32 cid; /* connection ID plus channel index */ int debug_id; /* debug ID for printks */ + unsigned short rx_pkt_offset; /* Current recvmsg packet offset */ + unsigned short rx_pkt_len; /* Current recvmsg packet len */ /* Rx/Tx circular buffer, depending on phase. * @@ -530,8 +533,8 @@ struct rxrpc_call { u16 ackr_skew; /* skew on packet being ACK'd */ rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */ rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */ - unsigned short rx_pkt_offset; /* Current recvmsg packet offset */ - unsigned short rx_pkt_len; /* Current recvmsg packet len */ + rxrpc_serial_t ackr_ping; /* Last ping sent */ + ktime_t ackr_ping_time; /* Time last ping sent */ /* transmission-phase ACK management */ rxrpc_serial_t acks_latest; /* serial number of latest ACK received */ diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index aa261df9fc9e..a0a5bd108c9e 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -36,6 +36,19 @@ static void rxrpc_proto_abort(const char *why, } } +/* + * Ping the other end to fill our RTT cache and to retrieve the rwind + * and MTU parameters. + */ +static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb, + int skew) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial, + true, true); +} + /* * Apply a hard ACK by advancing the Tx window. */ @@ -342,6 +355,32 @@ ack: _leave(" [queued]"); } +/* + * Process a ping response. + */ +static void rxrpc_input_ping_response(struct rxrpc_call *call, + ktime_t resp_time, + rxrpc_serial_t orig_serial, + rxrpc_serial_t ack_serial) +{ + rxrpc_serial_t ping_serial; + ktime_t ping_time; + + ping_time = call->ackr_ping_time; + smp_rmb(); + ping_serial = call->ackr_ping; + + if (!test_bit(RXRPC_CALL_PINGING, &call->flags) || + before(orig_serial, ping_serial)) + return; + clear_bit(RXRPC_CALL_PINGING, &call->flags); + if (after(orig_serial, ping_serial)) + return; + + rxrpc_peer_add_rtt(call, rxrpc_rtt_rx_ping_response, + orig_serial, ack_serial, ping_time, resp_time); +} + /* * Process the extra information that may be appended to an ACK packet */ @@ -438,6 +477,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_ackinfo info; u8 acks[RXRPC_MAXACKS]; } buf; + rxrpc_serial_t acked_serial; rxrpc_seq_t first_soft_ack, hard_ack; int nr_acks, offset; @@ -449,6 +489,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, } sp->offset += sizeof(buf.ack); + acked_serial = ntohl(buf.ack.serial); first_soft_ack = ntohl(buf.ack.firstPacket); hard_ack = first_soft_ack - 1; nr_acks = buf.ack.nAcks; @@ -460,10 +501,14 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, ntohs(buf.ack.maxSkew), first_soft_ack, ntohl(buf.ack.previousPacket), - ntohl(buf.ack.serial), + acked_serial, rxrpc_acks(buf.ack.reason), buf.ack.nAcks); + if (buf.ack.reason == RXRPC_ACK_PING_RESPONSE) + rxrpc_input_ping_response(call, skb->tstamp, acked_serial, + sp->hdr.serial); + if (buf.ack.reason == RXRPC_ACK_PING) { _proto("Rx ACK %%%u PING Request", sp->hdr.serial); rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE, @@ -830,6 +875,7 @@ void rxrpc_data_ready(struct sock *udp_sk) rcu_read_unlock(); goto reject_packet; } + rxrpc_send_ping(call, skb, skew); } rxrpc_input_call_packet(call, skb, skew); diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 6321c23f9a6e..56e668352fc7 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -83,11 +83,12 @@ const s8 rxrpc_ack_priority[] = { [RXRPC_ACK_DELAY] = 1, [RXRPC_ACK_REQUESTED] = 2, [RXRPC_ACK_IDLE] = 3, - [RXRPC_ACK_PING_RESPONSE] = 4, - [RXRPC_ACK_DUPLICATE] = 5, - [RXRPC_ACK_OUT_OF_SEQUENCE] = 6, - [RXRPC_ACK_EXCEEDS_WINDOW] = 7, - [RXRPC_ACK_NOSPACE] = 8, + [RXRPC_ACK_DUPLICATE] = 4, + [RXRPC_ACK_OUT_OF_SEQUENCE] = 5, + [RXRPC_ACK_EXCEEDS_WINDOW] = 6, + [RXRPC_ACK_NOSPACE] = 7, + [RXRPC_ACK_PING_RESPONSE] = 8, + [RXRPC_ACK_PING] = 9, }; const char *rxrpc_acks(u8 reason) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 817fb0e82d6a..0d89cd3f2c01 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -57,6 +57,9 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, pkt->ack.reason = call->ackr_reason; pkt->ack.nAcks = top - hard_ack; + if (pkt->ack.reason == RXRPC_ACK_PING) + pkt->whdr.flags |= RXRPC_REQUEST_ACK; + if (after(top, hard_ack)) { seq = hard_ack + 1; do { @@ -97,6 +100,7 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) struct kvec iov[2]; rxrpc_serial_t serial; size_t len, n; + bool ping = false; int ioc, ret; u32 abort_code; @@ -147,6 +151,7 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) ret = 0; goto out; } + ping = (call->ackr_reason == RXRPC_ACK_PING); n = rxrpc_fill_out_ack(call, pkt); call->ackr_reason = 0; @@ -183,12 +188,29 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) goto out; } + if (ping) { + call->ackr_ping = serial; + smp_wmb(); + /* We need to stick a time in before we send the packet in case + * the reply gets back before kernel_sendmsg() completes - but + * asking UDP to send the packet can take a relatively long + * time, so we update the time after, on the assumption that + * the packet transmission is more likely to happen towards the + * end of the kernel_sendmsg() call. + */ + call->ackr_ping_time = ktime_get_real(); + set_bit(RXRPC_CALL_PINGING, &call->flags); + trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_ping, serial); + } ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len); + if (ping) + call->ackr_ping_time = ktime_get_real(); if (ret < 0 && call->state < RXRPC_CALL_COMPLETE) { switch (type) { case RXRPC_PACKET_TYPE_ACK: + clear_bit(RXRPC_CALL_PINGING, &call->flags); rxrpc_propose_ACK(call, pkt->ack.reason, ntohs(pkt->ack.maxSkew), ntohl(pkt->ack.serial), From 7aa51da7c88d42cc0bb85ab7d01429fbd4e51282 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Sep 2016 00:29:31 +0100 Subject: [PATCH 0632/1050] rxrpc: Expedite ping response transmission Expedite the transmission of a response to a PING ACK by sending it from sendmsg if one is pending. We're most likely to see a PING ACK during the client call Tx phase as the other side may use it to determine a number of parameters, such as the client's receive window size, the RTT and whether the client is doing slow start (similar to RFC5681). If we don't expedite it, it's left to the background processing thread to transmit. Signed-off-by: David Howells --- net/rxrpc/sendmsg.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 814b17f23971..3c969de3ef05 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -180,6 +180,10 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, copied = 0; do { + /* Check to see if there's a ping ACK to reply to. */ + if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE) + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + if (!skb) { size_t size, chunk, max, space; From 77f2efcbdd7133466060198e02c6e8a170c3cd14 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Sep 2016 00:29:01 +0100 Subject: [PATCH 0633/1050] rxrpc: Add ktime_sub_ms() Add a ktime_sub_ms() to go with ktime_add_ms() and co. for use in AF_RXRPC RTT determination. Signed-off-by: David Howells --- include/linux/ktime.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 2b6a204bd8d4..aa118bad1407 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -231,6 +231,11 @@ static inline ktime_t ktime_sub_us(const ktime_t kt, const u64 usec) return ktime_sub_ns(kt, usec * NSEC_PER_USEC); } +static inline ktime_t ktime_sub_ms(const ktime_t kt, const u64 msec) +{ + return ktime_sub_ns(kt, msec * NSEC_PER_MSEC); +} + extern ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs); /** From 50235c4b5a2fb9a9690f02cd1dea6ca047d7f79e Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Sep 2016 00:29:31 +0100 Subject: [PATCH 0634/1050] rxrpc: Obtain RTT data by requesting ACKs on DATA packets In addition to sending a PING ACK to gain RTT data, we can set the RXRPC_REQUEST_ACK flag on a DATA packet and get a REQUESTED-ACK ACK. The ACK packet contains the serial number of the packet it is in response to, so we can look through the Tx buffer for a matching DATA packet. This requires that the data packets be stamped with the time of transmission as a ktime rather than having the resend_at time in jiffies. This further requires the resend code to do the resend determination in ktimes and convert to jiffies to set the timer. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 7 +++---- net/rxrpc/call_event.c | 19 +++++++++---------- net/rxrpc/input.c | 35 +++++++++++++++++++++++++++++++++++ net/rxrpc/misc.c | 6 ++++-- net/rxrpc/output.c | 7 +++++-- net/rxrpc/sendmsg.c | 1 - net/rxrpc/sysctl.c | 2 +- 7 files changed, 57 insertions(+), 20 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 8b47f468eb9d..1c4597b2c6cd 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -142,10 +142,7 @@ struct rxrpc_host_header { */ struct rxrpc_skb_priv { union { - unsigned long resend_at; /* time in jiffies at which to resend */ - struct { - u8 nr_jumbo; /* Number of jumbo subpackets */ - }; + u8 nr_jumbo; /* Number of jumbo subpackets */ }; union { unsigned int offset; /* offset into buffer of next read */ @@ -663,6 +660,7 @@ extern const char rxrpc_recvmsg_traces[rxrpc_recvmsg__nr_trace][5]; enum rxrpc_rtt_tx_trace { rxrpc_rtt_tx_ping, + rxrpc_rtt_tx_data, rxrpc_rtt_tx__nr_trace }; @@ -670,6 +668,7 @@ extern const char rxrpc_rtt_tx_traces[rxrpc_rtt_tx__nr_trace][5]; enum rxrpc_rtt_rx_trace { rxrpc_rtt_rx_ping_response, + rxrpc_rtt_rx_requested_ack, rxrpc_rtt_rx__nr_trace }; diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 34ad967f2d81..adb2ec61e21f 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -142,12 +142,14 @@ static void rxrpc_resend(struct rxrpc_call *call) struct rxrpc_skb_priv *sp; struct sk_buff *skb; rxrpc_seq_t cursor, seq, top; - unsigned long resend_at, now; + ktime_t now = ktime_get_real(), max_age, oldest, resend_at; int ix; u8 annotation, anno_type; _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); + max_age = ktime_sub_ms(now, rxrpc_resend_timeout); + spin_lock_bh(&call->lock); cursor = call->tx_hard_ack; @@ -160,8 +162,7 @@ static void rxrpc_resend(struct rxrpc_call *call) * the packets in the Tx buffer we're going to resend and what the new * resend timeout will be. */ - now = jiffies; - resend_at = now + rxrpc_resend_timeout; + oldest = now; for (seq = cursor + 1; before_eq(seq, top); seq++) { ix = seq & RXRPC_RXTX_BUFF_MASK; annotation = call->rxtx_annotations[ix]; @@ -175,9 +176,9 @@ static void rxrpc_resend(struct rxrpc_call *call) sp = rxrpc_skb(skb); if (anno_type == RXRPC_TX_ANNO_UNACK) { - if (time_after(sp->resend_at, now)) { - if (time_before(sp->resend_at, resend_at)) - resend_at = sp->resend_at; + if (ktime_after(skb->tstamp, max_age)) { + if (ktime_before(skb->tstamp, oldest)) + oldest = skb->tstamp; continue; } } @@ -186,7 +187,8 @@ static void rxrpc_resend(struct rxrpc_call *call) call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS | annotation; } - call->resend_at = resend_at; + resend_at = ktime_sub(ktime_add_ns(oldest, rxrpc_resend_timeout), now); + call->resend_at = jiffies + nsecs_to_jiffies(ktime_to_ns(resend_at)); /* Now go through the Tx window and perform the retransmissions. We * have to drop the lock for each send. If an ACK comes in whilst the @@ -205,15 +207,12 @@ static void rxrpc_resend(struct rxrpc_call *call) spin_unlock_bh(&call->lock); if (rxrpc_send_data_packet(call, skb) < 0) { - call->resend_at = now + 2; rxrpc_free_skb(skb, rxrpc_skb_tx_freed); return; } if (rxrpc_is_client_call(call)) rxrpc_expose_client_call(call); - sp = rxrpc_skb(skb); - sp->resend_at = now + rxrpc_resend_timeout; rxrpc_free_skb(skb, rxrpc_skb_tx_freed); spin_lock_bh(&call->lock); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index a0a5bd108c9e..c121949de3c8 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -355,6 +355,38 @@ ack: _leave(" [queued]"); } +/* + * Process a requested ACK. + */ +static void rxrpc_input_requested_ack(struct rxrpc_call *call, + ktime_t resp_time, + rxrpc_serial_t orig_serial, + rxrpc_serial_t ack_serial) +{ + struct rxrpc_skb_priv *sp; + struct sk_buff *skb; + ktime_t sent_at; + int ix; + + for (ix = 0; ix < RXRPC_RXTX_BUFF_SIZE; ix++) { + skb = call->rxtx_buffer[ix]; + if (!skb) + continue; + + sp = rxrpc_skb(skb); + if (sp->hdr.serial != orig_serial) + continue; + smp_rmb(); + sent_at = skb->tstamp; + goto found; + } + return; + +found: + rxrpc_peer_add_rtt(call, rxrpc_rtt_rx_requested_ack, + orig_serial, ack_serial, sent_at, resp_time); +} + /* * Process a ping response. */ @@ -508,6 +540,9 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, if (buf.ack.reason == RXRPC_ACK_PING_RESPONSE) rxrpc_input_ping_response(call, skb->tstamp, acked_serial, sp->hdr.serial); + if (buf.ack.reason == RXRPC_ACK_REQUESTED) + rxrpc_input_requested_ack(call, skb->tstamp, acked_serial, + sp->hdr.serial); if (buf.ack.reason == RXRPC_ACK_PING) { _proto("Rx ACK %%%u PING Request", sp->hdr.serial); diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 56e668352fc7..0d425e707f22 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -68,9 +68,9 @@ unsigned int rxrpc_rx_mtu = 5692; unsigned int rxrpc_rx_jumbo_max = 4; /* - * Time till packet resend (in jiffies). + * Time till packet resend (in milliseconds). */ -unsigned int rxrpc_resend_timeout = 4 * HZ; +unsigned int rxrpc_resend_timeout = 4 * 1000; const char *const rxrpc_pkts[] = { "?00", @@ -186,8 +186,10 @@ const char rxrpc_recvmsg_traces[rxrpc_recvmsg__nr_trace][5] = { const char rxrpc_rtt_tx_traces[rxrpc_rtt_tx__nr_trace][5] = { [rxrpc_rtt_tx_ping] = "PING", + [rxrpc_rtt_tx_data] = "DATA", }; const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5] = { [rxrpc_rtt_rx_ping_response] = "PONG", + [rxrpc_rtt_rx_requested_ack] = "RACK", }; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 0d89cd3f2c01..db01fbb70d23 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -300,9 +300,12 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb) goto send_fragmentable; done: - if (ret == 0) { - sp->resend_at = jiffies + rxrpc_resend_timeout; + if (ret >= 0) { + skb->tstamp = ktime_get_real(); + smp_wmb(); sp->hdr.serial = serial; + if (whdr.flags & RXRPC_REQUEST_ACK) + trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial); } _leave(" = %d [%u]", ret, call->peer->maxdata); return ret; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 3c969de3ef05..607223f4f871 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -137,7 +137,6 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, if (seq == 1 && rxrpc_is_client_call(call)) rxrpc_expose_client_call(call); - sp->resend_at = jiffies + rxrpc_resend_timeout; ret = rxrpc_send_data_packet(call, skb); if (ret < 0) { _debug("need instant resend %d", ret); diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index a03c61c672f5..13d1df03ebac 100644 --- a/net/rxrpc/sysctl.c +++ b/net/rxrpc/sysctl.c @@ -59,7 +59,7 @@ static struct ctl_table rxrpc_sysctl_table[] = { .data = &rxrpc_resend_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, + .proc_handler = proc_dointvec, .extra1 = (void *)&one, }, { From ada66b54c44add8702612940a08d077b4d6ecd0e Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Thu, 22 Sep 2016 08:34:58 +0930 Subject: [PATCH 0635/1050] net/faraday: Separate rx page storage from rxdesc The ftgmac100 hardware revision in e.g. the Aspeed AST2500 no longer reserves all bits in RXDES#2 but instead uses the bottom 16 bits to store MAC frame metadata. Avoid corruption by shifting struct page pointers out to their own member in struct ftgmac100. Signed-off-by: Andrew Jeffery Signed-off-by: Joel Stanley Signed-off-by: David S. Miller --- drivers/net/ethernet/faraday/ftgmac100.c | 25 +++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 36361f8bf894..40622567159a 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -60,6 +60,8 @@ struct ftgmac100 { struct ftgmac100_descs *descs; dma_addr_t descs_dma_addr; + struct page *rx_pages[RX_QUEUE_ENTRIES]; + unsigned int rx_pointer; unsigned int tx_clean_pointer; unsigned int tx_pointer; @@ -341,18 +343,27 @@ static bool ftgmac100_rxdes_ipcs_err(struct ftgmac100_rxdes *rxdes) return rxdes->rxdes1 & cpu_to_le32(FTGMAC100_RXDES1_IP_CHKSUM_ERR); } +static inline struct page **ftgmac100_rxdes_page_slot(struct ftgmac100 *priv, + struct ftgmac100_rxdes *rxdes) +{ + return &priv->rx_pages[rxdes - priv->descs->rxdes]; +} + /* * rxdes2 is not used by hardware. We use it to keep track of page. * Since hardware does not touch it, we can skip cpu_to_le32()/le32_to_cpu(). */ -static void ftgmac100_rxdes_set_page(struct ftgmac100_rxdes *rxdes, struct page *page) +static void ftgmac100_rxdes_set_page(struct ftgmac100 *priv, + struct ftgmac100_rxdes *rxdes, + struct page *page) { - rxdes->rxdes2 = (unsigned int)page; + *ftgmac100_rxdes_page_slot(priv, rxdes) = page; } -static struct page *ftgmac100_rxdes_get_page(struct ftgmac100_rxdes *rxdes) +static struct page *ftgmac100_rxdes_get_page(struct ftgmac100 *priv, + struct ftgmac100_rxdes *rxdes) { - return (struct page *)rxdes->rxdes2; + return *ftgmac100_rxdes_page_slot(priv, rxdes); } /****************************************************************************** @@ -501,7 +512,7 @@ static bool ftgmac100_rx_packet(struct ftgmac100 *priv, int *processed) do { dma_addr_t map = ftgmac100_rxdes_get_dma_addr(rxdes); - struct page *page = ftgmac100_rxdes_get_page(rxdes); + struct page *page = ftgmac100_rxdes_get_page(priv, rxdes); unsigned int size; dma_unmap_page(priv->dev, map, RX_BUF_SIZE, DMA_FROM_DEVICE); @@ -779,7 +790,7 @@ static int ftgmac100_alloc_rx_page(struct ftgmac100 *priv, return -ENOMEM; } - ftgmac100_rxdes_set_page(rxdes, page); + ftgmac100_rxdes_set_page(priv, rxdes, page); ftgmac100_rxdes_set_dma_addr(rxdes, map); ftgmac100_rxdes_set_dma_own(rxdes); return 0; @@ -791,7 +802,7 @@ static void ftgmac100_free_buffers(struct ftgmac100 *priv) for (i = 0; i < RX_QUEUE_ENTRIES; i++) { struct ftgmac100_rxdes *rxdes = &priv->descs->rxdes[i]; - struct page *page = ftgmac100_rxdes_get_page(rxdes); + struct page *page = ftgmac100_rxdes_get_page(priv, rxdes); dma_addr_t map = ftgmac100_rxdes_get_dma_addr(rxdes); if (!page) From 7906a4da0ef845d01e76f2187c23cc71ae00fa1d Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Thu, 22 Sep 2016 08:34:59 +0930 Subject: [PATCH 0636/1050] net/faraday: Make EDO{R,T}R bits configurable These bits are #defined at a fixed location. In order to support future hardware that has chosen to move these bits around move the bits into a member of the struct ftgmac100. Signed-off-by: Andrew Jeffery Signed-off-by: Joel Stanley Signed-off-by: David S. Miller --- drivers/net/ethernet/faraday/ftgmac100.c | 40 +++++++++++++++--------- drivers/net/ethernet/faraday/ftgmac100.h | 2 -- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 40622567159a..62a88d1a1f99 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -79,6 +79,9 @@ struct ftgmac100 { int int_mask_all; bool use_ncsi; bool enabled; + + u32 rxdes0_edorr_mask; + u32 txdes0_edotr_mask; }; static int ftgmac100_alloc_rx_page(struct ftgmac100 *priv, @@ -259,10 +262,11 @@ static bool ftgmac100_rxdes_packet_ready(struct ftgmac100_rxdes *rxdes) return rxdes->rxdes0 & cpu_to_le32(FTGMAC100_RXDES0_RXPKT_RDY); } -static void ftgmac100_rxdes_set_dma_own(struct ftgmac100_rxdes *rxdes) +static void ftgmac100_rxdes_set_dma_own(const struct ftgmac100 *priv, + struct ftgmac100_rxdes *rxdes) { /* clear status bits */ - rxdes->rxdes0 &= cpu_to_le32(FTGMAC100_RXDES0_EDORR); + rxdes->rxdes0 &= cpu_to_le32(priv->rxdes0_edorr_mask); } static bool ftgmac100_rxdes_rx_error(struct ftgmac100_rxdes *rxdes) @@ -300,9 +304,10 @@ static bool ftgmac100_rxdes_multicast(struct ftgmac100_rxdes *rxdes) return rxdes->rxdes0 & cpu_to_le32(FTGMAC100_RXDES0_MULTICAST); } -static void ftgmac100_rxdes_set_end_of_ring(struct ftgmac100_rxdes *rxdes) +static void ftgmac100_rxdes_set_end_of_ring(const struct ftgmac100 *priv, + struct ftgmac100_rxdes *rxdes) { - rxdes->rxdes0 |= cpu_to_le32(FTGMAC100_RXDES0_EDORR); + rxdes->rxdes0 |= cpu_to_le32(priv->rxdes0_edorr_mask); } static void ftgmac100_rxdes_set_dma_addr(struct ftgmac100_rxdes *rxdes, @@ -393,7 +398,7 @@ ftgmac100_rx_locate_first_segment(struct ftgmac100 *priv) if (ftgmac100_rxdes_first_segment(rxdes)) return rxdes; - ftgmac100_rxdes_set_dma_own(rxdes); + ftgmac100_rxdes_set_dma_own(priv, rxdes); ftgmac100_rx_pointer_advance(priv); rxdes = ftgmac100_current_rxdes(priv); } @@ -464,7 +469,7 @@ static void ftgmac100_rx_drop_packet(struct ftgmac100 *priv) if (ftgmac100_rxdes_last_segment(rxdes)) done = true; - ftgmac100_rxdes_set_dma_own(rxdes); + ftgmac100_rxdes_set_dma_own(priv, rxdes); ftgmac100_rx_pointer_advance(priv); rxdes = ftgmac100_current_rxdes(priv); } while (!done && ftgmac100_rxdes_packet_ready(rxdes)); @@ -556,10 +561,11 @@ static bool ftgmac100_rx_packet(struct ftgmac100 *priv, int *processed) /****************************************************************************** * internal functions (transmit descriptor) *****************************************************************************/ -static void ftgmac100_txdes_reset(struct ftgmac100_txdes *txdes) +static void ftgmac100_txdes_reset(const struct ftgmac100 *priv, + struct ftgmac100_txdes *txdes) { /* clear all except end of ring bit */ - txdes->txdes0 &= cpu_to_le32(FTGMAC100_TXDES0_EDOTR); + txdes->txdes0 &= cpu_to_le32(priv->txdes0_edotr_mask); txdes->txdes1 = 0; txdes->txdes2 = 0; txdes->txdes3 = 0; @@ -580,9 +586,10 @@ static void ftgmac100_txdes_set_dma_own(struct ftgmac100_txdes *txdes) txdes->txdes0 |= cpu_to_le32(FTGMAC100_TXDES0_TXDMA_OWN); } -static void ftgmac100_txdes_set_end_of_ring(struct ftgmac100_txdes *txdes) +static void ftgmac100_txdes_set_end_of_ring(const struct ftgmac100 *priv, + struct ftgmac100_txdes *txdes) { - txdes->txdes0 |= cpu_to_le32(FTGMAC100_TXDES0_EDOTR); + txdes->txdes0 |= cpu_to_le32(priv->txdes0_edotr_mask); } static void ftgmac100_txdes_set_first_segment(struct ftgmac100_txdes *txdes) @@ -701,7 +708,7 @@ static bool ftgmac100_tx_complete_packet(struct ftgmac100 *priv) dev_kfree_skb(skb); - ftgmac100_txdes_reset(txdes); + ftgmac100_txdes_reset(priv, txdes); ftgmac100_tx_clean_pointer_advance(priv); @@ -792,7 +799,7 @@ static int ftgmac100_alloc_rx_page(struct ftgmac100 *priv, ftgmac100_rxdes_set_page(priv, rxdes, page); ftgmac100_rxdes_set_dma_addr(rxdes, map); - ftgmac100_rxdes_set_dma_own(rxdes); + ftgmac100_rxdes_set_dma_own(priv, rxdes); return 0; } @@ -839,7 +846,8 @@ static int ftgmac100_alloc_buffers(struct ftgmac100 *priv) return -ENOMEM; /* initialize RX ring */ - ftgmac100_rxdes_set_end_of_ring(&priv->descs->rxdes[RX_QUEUE_ENTRIES - 1]); + ftgmac100_rxdes_set_end_of_ring(priv, + &priv->descs->rxdes[RX_QUEUE_ENTRIES - 1]); for (i = 0; i < RX_QUEUE_ENTRIES; i++) { struct ftgmac100_rxdes *rxdes = &priv->descs->rxdes[i]; @@ -849,7 +857,8 @@ static int ftgmac100_alloc_buffers(struct ftgmac100 *priv) } /* initialize TX ring */ - ftgmac100_txdes_set_end_of_ring(&priv->descs->txdes[TX_QUEUE_ENTRIES - 1]); + ftgmac100_txdes_set_end_of_ring(priv, + &priv->descs->txdes[TX_QUEUE_ENTRIES - 1]); return 0; err: @@ -1336,6 +1345,9 @@ static int ftgmac100_probe(struct platform_device *pdev) priv->netdev = netdev; priv->dev = &pdev->dev; + priv->rxdes0_edorr_mask = BIT(15); + priv->txdes0_edotr_mask = BIT(15); + spin_lock_init(&priv->tx_lock); /* initialize NAPI */ diff --git a/drivers/net/ethernet/faraday/ftgmac100.h b/drivers/net/ethernet/faraday/ftgmac100.h index 13408d448b05..c258586ce4a4 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.h +++ b/drivers/net/ethernet/faraday/ftgmac100.h @@ -189,7 +189,6 @@ struct ftgmac100_txdes { } __attribute__ ((aligned(16))); #define FTGMAC100_TXDES0_TXBUF_SIZE(x) ((x) & 0x3fff) -#define FTGMAC100_TXDES0_EDOTR (1 << 15) #define FTGMAC100_TXDES0_CRC_ERR (1 << 19) #define FTGMAC100_TXDES0_LTS (1 << 28) #define FTGMAC100_TXDES0_FTS (1 << 29) @@ -215,7 +214,6 @@ struct ftgmac100_rxdes { } __attribute__ ((aligned(16))); #define FTGMAC100_RXDES0_VDBC 0x3fff -#define FTGMAC100_RXDES0_EDORR (1 << 15) #define FTGMAC100_RXDES0_MULTICAST (1 << 16) #define FTGMAC100_RXDES0_BROADCAST (1 << 17) #define FTGMAC100_RXDES0_RX_ERR (1 << 18) From 2a0ab8ebbec634127987fc8dbbd09a7fd7274e3d Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Thu, 22 Sep 2016 08:35:00 +0930 Subject: [PATCH 0637/1050] net/faraday: Adapt for Aspeed SoCs The RXDES and TXDES registers bits in the ftgmac100 indicates EDO{R,T}R at bit position 15 for the Faraday Tech IP. However, the version of this IP present in the Aspeed SoCs has these bits at position 30 in the registers. It appers that ast2400 SoCs support both positions, with the 15th bit marked as reserved but still functional. In the ast2500 this bit is reused for another function, so we need a work around. This was confirmed with engineers from Aspeed that using bit 30 is correct for both the ast2400 and ast2500 SoCs. Signed-off-by: Joel Stanley Signed-off-by: David S. Miller --- drivers/net/ethernet/faraday/ftgmac100.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 62a88d1a1f99..47f512224b57 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1345,9 +1345,6 @@ static int ftgmac100_probe(struct platform_device *pdev) priv->netdev = netdev; priv->dev = &pdev->dev; - priv->rxdes0_edorr_mask = BIT(15); - priv->txdes0_edotr_mask = BIT(15); - spin_lock_init(&priv->tx_lock); /* initialize NAPI */ @@ -1381,6 +1378,16 @@ static int ftgmac100_probe(struct platform_device *pdev) FTGMAC100_INT_PHYSTS_CHG | FTGMAC100_INT_RPKT_BUF | FTGMAC100_INT_NO_RXBUF); + + if (of_machine_is_compatible("aspeed,ast2400") || + of_machine_is_compatible("aspeed,ast2500")) { + priv->rxdes0_edorr_mask = BIT(30); + priv->txdes0_edotr_mask = BIT(30); + } else { + priv->rxdes0_edorr_mask = BIT(15); + priv->txdes0_edotr_mask = BIT(15); + } + if (pdev->dev.of_node && of_get_property(pdev->dev.of_node, "use-ncsi", NULL)) { if (!IS_ENABLED(CONFIG_NET_NCSI)) { From 08c9c126004e999f0c05b369d1e0cc757e6040cc Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 22 Sep 2016 08:35:01 +0930 Subject: [PATCH 0638/1050] net/faraday: Clear stale interrupts There is stale interrupt (PHYSTS_CHG in ISR, bit#6 in 0x0) from the bootloader (uboot) when enabling the MAC. The stale interrupts aren't part of kernel and should be cleared. This clears the stale interrupts in ISR (0x0) when enabling the MAC. Signed-off-by: Gavin Shan Signed-off-by: Joel Stanley Signed-off-by: David S. Miller --- drivers/net/ethernet/faraday/ftgmac100.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 47f512224b57..189373743ddf 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1112,6 +1112,7 @@ static int ftgmac100_poll(struct napi_struct *napi, int budget) static int ftgmac100_open(struct net_device *netdev) { struct ftgmac100 *priv = netdev_priv(netdev); + unsigned int status; int err; err = ftgmac100_alloc_buffers(priv); @@ -1137,6 +1138,11 @@ static int ftgmac100_open(struct net_device *netdev) ftgmac100_init_hw(priv); ftgmac100_start_hw(priv, priv->use_ncsi ? 100 : 10); + + /* Clear stale interrupts */ + status = ioread32(priv->base + FTGMAC100_OFFSET_ISR); + iowrite32(status, priv->base + FTGMAC100_OFFSET_ISR); + if (netdev->phydev) phy_start(netdev->phydev); else if (priv->use_ncsi) From e07dc63ba22df2d8bc931821b22938d4ce37a934 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Thu, 22 Sep 2016 08:35:02 +0930 Subject: [PATCH 0639/1050] net/faraday: Configure old MDIO interface on Aspeed SoCs The Aspeed SoCs have a new MDIO interface as an option in the G4 and G5 SoCs. The old one is still available, so select it in order to remain compatible with the ftgmac100 driver. Signed-off-by: Joel Stanley Signed-off-by: David S. Miller --- drivers/net/ethernet/faraday/ftgmac100.c | 9 +++++++++ drivers/net/ethernet/faraday/ftgmac100.h | 5 +++++ 2 files changed, 14 insertions(+) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 189373743ddf..e3653b14008a 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1252,12 +1252,21 @@ static int ftgmac100_setup_mdio(struct net_device *netdev) struct ftgmac100 *priv = netdev_priv(netdev); struct platform_device *pdev = to_platform_device(priv->dev); int i, err = 0; + u32 reg; /* initialize mdio bus */ priv->mii_bus = mdiobus_alloc(); if (!priv->mii_bus) return -EIO; + if (of_machine_is_compatible("aspeed,ast2400") || + of_machine_is_compatible("aspeed,ast2500")) { + /* This driver supports the old MDIO interface */ + reg = ioread32(priv->base + FTGMAC100_OFFSET_REVR); + reg &= ~FTGMAC100_REVR_NEW_MDIO_INTERFACE; + iowrite32(reg, priv->base + FTGMAC100_OFFSET_REVR); + }; + priv->mii_bus->name = "ftgmac100_mdio"; snprintf(priv->mii_bus->id, MII_BUS_ID_SIZE, "%s-%d", pdev->name, pdev->id); diff --git a/drivers/net/ethernet/faraday/ftgmac100.h b/drivers/net/ethernet/faraday/ftgmac100.h index c258586ce4a4..8a377ab1d127 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.h +++ b/drivers/net/ethernet/faraday/ftgmac100.h @@ -133,6 +133,11 @@ #define FTGMAC100_DMAFIFOS_RXDMA_REQ (1 << 30) #define FTGMAC100_DMAFIFOS_TXDMA_REQ (1 << 31) +/* + * Feature Register + */ +#define FTGMAC100_REVR_NEW_MDIO_INTERFACE BIT(31) + /* * Receive buffer size register */ From edcd692fe4816ba6cb240b7a1d8b984ee7082763 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Thu, 22 Sep 2016 08:35:03 +0930 Subject: [PATCH 0640/1050] net/faraday: Mask out PHYSTS_CHG interrupt The PHYSTS_CHG (the ftgmac100's PHY IRQ) is telling the system to go look at the PHY registers for a link status change. The interrupt was causing issues on Aspeed SoC where some board designs had an active high configuration, some active low, and in some cases repurposed for other functions. When misconfigured Linux would chew 100% of CPU cycles servicing interrupts: [ 20.280000] ftgmac100 1e660000.ethernet eth0: [ISR] = 0x200: PHYSTS_CHG [ 20.280000] ftgmac100 1e660000.ethernet eth0: [ISR] = 0x200: PHYSTS_CHG [ 20.280000] ftgmac100 1e660000.ethernet eth0: [ISR] = 0x200: PHYSTS_CHG [ 20.300000] ftgmac100 1e660000.ethernet eth0: [ISR] = 0x200: PHYSTS_CHG While in the ftgmac100 IP can be configured for high, low and edge sensitivity the current driver always polls the PHY, so we chose to mask out the interrupt. See https://patchwork.ozlabs.org/patch/672099/ for more discussion. Signed-off-by: Joel Stanley Signed-off-by: David S. Miller --- drivers/net/ethernet/faraday/ftgmac100.c | 10 +++------- drivers/net/ethernet/faraday/ftgmac100.h | 1 + 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index e3653b14008a..90f9c5481290 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1075,14 +1075,12 @@ static int ftgmac100_poll(struct napi_struct *napi, int budget) } if (status & priv->int_mask_all & (FTGMAC100_INT_NO_RXBUF | - FTGMAC100_INT_RPKT_LOST | FTGMAC100_INT_AHB_ERR | - FTGMAC100_INT_PHYSTS_CHG)) { + FTGMAC100_INT_RPKT_LOST | FTGMAC100_INT_AHB_ERR)) { if (net_ratelimit()) - netdev_info(netdev, "[ISR] = 0x%x: %s%s%s%s\n", status, + netdev_info(netdev, "[ISR] = 0x%x: %s%s%s\n", status, status & FTGMAC100_INT_NO_RXBUF ? "NO_RXBUF " : "", status & FTGMAC100_INT_RPKT_LOST ? "RPKT_LOST " : "", - status & FTGMAC100_INT_AHB_ERR ? "AHB_ERR " : "", - status & FTGMAC100_INT_PHYSTS_CHG ? "PHYSTS_CHG" : ""); + status & FTGMAC100_INT_AHB_ERR ? "AHB_ERR " : ""); if (status & FTGMAC100_INT_NO_RXBUF) { /* RX buffer unavailable */ @@ -1390,7 +1388,6 @@ static int ftgmac100_probe(struct platform_device *pdev) FTGMAC100_INT_XPKT_ETH | FTGMAC100_INT_XPKT_LOST | FTGMAC100_INT_AHB_ERR | - FTGMAC100_INT_PHYSTS_CHG | FTGMAC100_INT_RPKT_BUF | FTGMAC100_INT_NO_RXBUF); @@ -1412,7 +1409,6 @@ static int ftgmac100_probe(struct platform_device *pdev) dev_info(&pdev->dev, "Using NCSI interface\n"); priv->use_ncsi = true; - priv->int_mask_all &= ~FTGMAC100_INT_PHYSTS_CHG; priv->ndev = ncsi_register_dev(netdev, ftgmac100_ncsi_handler); if (!priv->ndev) goto err_ncsi_dev; diff --git a/drivers/net/ethernet/faraday/ftgmac100.h b/drivers/net/ethernet/faraday/ftgmac100.h index 8a377ab1d127..a7ce0ac8858a 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.h +++ b/drivers/net/ethernet/faraday/ftgmac100.h @@ -157,6 +157,7 @@ #define FTGMAC100_MACCR_FULLDUP (1 << 8) #define FTGMAC100_MACCR_GIGA_MODE (1 << 9) #define FTGMAC100_MACCR_CRC_APD (1 << 10) +#define FTGMAC100_MACCR_PHY_LINK_LEVEL (1 << 11) #define FTGMAC100_MACCR_RX_RUNT (1 << 12) #define FTGMAC100_MACCR_JUMBO_LF (1 << 13) #define FTGMAC100_MACCR_RX_ALL (1 << 14) From 666ca3d8f19082f40745d75f3cc7cc0200ee87e3 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Fri, 9 Sep 2016 22:34:02 -0400 Subject: [PATCH 0641/1050] drm/nouveau/fifo/nv04: avoid ramht race against cookie insertion Signed-off-by: Ilia Mirkin Cc: stable@vger.kernel.org Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c index edec30fd3ecd..0a7b6ed5ed28 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c @@ -37,7 +37,10 @@ nv04_fifo_dma_object_dtor(struct nvkm_fifo_chan *base, int cookie) { struct nv04_fifo_chan *chan = nv04_fifo_chan(base); struct nvkm_instmem *imem = chan->fifo->base.engine.subdev.device->imem; + + mutex_lock(&chan->fifo->base.engine.subdev.mutex); nvkm_ramht_remove(imem->ramht, cookie); + mutex_unlock(&chan->fifo->base.engine.subdev.mutex); } static int From de1d657816c6fbb70f07b01d50ec669dff0d4e60 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 21 Sep 2016 16:16:14 -0700 Subject: [PATCH 0642/1050] tcp: fix under-accounting retransmit SNMP counters This patch fixes these under-accounting SNMP rtx stats LINUX_MIB_TCPFORWARDRETRANS LINUX_MIB_TCPFASTRETRANS LINUX_MIB_TCPSLOWSTARTRETRANS when retransmitting TSO packets Fixes: 10d3be569243 ("tcp-tso: do not split TSO packets at retransmit time") Signed-off-by: Yuchung Cheng Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f53d0cca5fa4..e15ec82a6319 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2831,7 +2831,7 @@ begin_fwd: if (tcp_retransmit_skb(sk, skb, segs)) return; - NET_INC_STATS(sock_net(sk), mib_idx); + NET_ADD_STATS(sock_net(sk), mib_idx, tcp_skb_pcount(skb)); if (tcp_in_cwnd_reduction(sk)) tp->prr_out += tcp_skb_pcount(skb); From 7e32b44361abc77fbc01f2b97b045c405b2583e5 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 21 Sep 2016 16:16:15 -0700 Subject: [PATCH 0643/1050] tcp: properly account Fast Open SYN-ACK retrans Since the TFO socket is accepted right off SYN-data, the socket owner can call getsockopt(TCP_INFO) to collect ongoing SYN-ACK retransmission or timeout stats (i.e., tcpi_total_retrans, tcpi_retransmits). Currently those stats are only updated upon handshake completes. This patch fixes it. Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_output.c | 2 ++ net/ipv4/tcp_timer.c | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3ebf45b38bc3..08323bd95f2a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5885,7 +5885,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) * so release it. */ if (req) { - tp->total_retrans = req->num_retrans; + inet_csk(sk)->icsk_retransmits = 0; reqsk_fastopen_remove(sk, req, false); } else { /* Make sure socket is routed, for correct metrics. */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e15ec82a6319..5288cec4a2b2 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3568,6 +3568,8 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) if (!res) { __TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); + if (unlikely(tcp_passive_fastopen(sk))) + tcp_sk(sk)->total_retrans++; } return res; } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index d84930b2dd95..f712b411f6ed 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -384,6 +384,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk) */ inet_rtx_syn_ack(sk, req); req->num_timeout++; + icsk->icsk_retransmits++; inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX); } From bad3d80fd001d3b74ab8bb3d561bc4d1b08797d3 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Sun, 18 Sep 2016 12:21:56 +0200 Subject: [PATCH 0644/1050] drm/nouveau: Revert "bus: remove cpu_coherent flag" This reverts commit aff51175cdbf345740ec9203eff88e772af88059. The commit caused fence timeouts within nvc0_screen_destroy and most likely other places as well. The most obvious effect is, that userspace processes take minutes to actually quit. Signed-off-by: Karol Herbst Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/include/nvkm/core/device.h | 1 + drivers/gpu/drm/nouveau/nouveau_bo.c | 3 ++- drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c | 1 + drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c | 1 + 4 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h index 7ea8aa7ca408..6bc712f32c8b 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h @@ -175,6 +175,7 @@ struct nvkm_device_func { void (*fini)(struct nvkm_device *, bool suspend); resource_size_t (*resource_addr)(struct nvkm_device *, unsigned bar); resource_size_t (*resource_size)(struct nvkm_device *, unsigned bar); + bool cpu_coherent; }; struct nvkm_device_quirk { diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 6190035edfea..864323b19cf7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -209,7 +209,8 @@ nouveau_bo_new(struct drm_device *dev, int size, int align, nvbo->tile_flags = tile_flags; nvbo->bo.bdev = &drm->ttm.bdev; - nvbo->force_coherent = flags & TTM_PL_FLAG_UNCACHED; + if (!nvxx_device(&drm->device)->func->cpu_coherent) + nvbo->force_coherent = flags & TTM_PL_FLAG_UNCACHED; nvbo->page_shift = 12; if (drm->client.vm) { diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c index b1b693219db3..62ad0300cfa5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c @@ -1614,6 +1614,7 @@ nvkm_device_pci_func = { .fini = nvkm_device_pci_fini, .resource_addr = nvkm_device_pci_resource_addr, .resource_size = nvkm_device_pci_resource_size, + .cpu_coherent = !IS_ENABLED(CONFIG_ARM), }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c index 939682f18788..9b638bd905ff 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c @@ -245,6 +245,7 @@ nvkm_device_tegra_func = { .fini = nvkm_device_tegra_fini, .resource_addr = nvkm_device_tegra_resource_addr, .resource_size = nvkm_device_tegra_resource_size, + .cpu_coherent = false, }; int From 005d675aa1909ad70456dec8c5b0ba9b60b52d24 Mon Sep 17 00:00:00 2001 From: Jaehoon Chung Date: Thu, 22 Sep 2016 14:12:00 +0900 Subject: [PATCH 0645/1050] mmc: dw_mmc: fix the spamming log message When there is no Card which is set to "broken-cd", it's displayed a clock information continuously. Because it's polling for detecting card. This patch is fixed this problem. Fixes: 65257a0deed5 ("mmc: dw_mmc: remove UBSAN warning in dw_mci_setup_bus()") Reported-by: Tobias Jakobi Signed-off-by: Jaehoon Chung Signed-off-by: Ulf Hansson --- drivers/mmc/host/dw_mmc.c | 14 +++++++++----- drivers/mmc/host/dw_mmc.h | 3 +++ 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index 32380d5d4f6b..767af2026f8b 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -1112,11 +1112,12 @@ static void dw_mci_setup_bus(struct dw_mci_slot *slot, bool force_clkinit) div = (host->bus_hz != clock) ? DIV_ROUND_UP(div, 2) : 0; - dev_info(&slot->mmc->class_dev, - "Bus speed (slot %d) = %dHz (slot req %dHz, actual %dHZ div = %d)\n", - slot->id, host->bus_hz, clock, - div ? ((host->bus_hz / div) >> 1) : - host->bus_hz, div); + if (clock != slot->__clk_old || force_clkinit) + dev_info(&slot->mmc->class_dev, + "Bus speed (slot %d) = %dHz (slot req %dHz, actual %dHZ div = %d)\n", + slot->id, host->bus_hz, clock, + div ? ((host->bus_hz / div) >> 1) : + host->bus_hz, div); /* disable clock */ mci_writel(host, CLKENA, 0); @@ -1139,6 +1140,9 @@ static void dw_mci_setup_bus(struct dw_mci_slot *slot, bool force_clkinit) /* inform CIU */ mci_send_cmd(slot, sdmmc_cmd_bits, 0); + + /* keep the last clock value that was requested from core */ + slot->__clk_old = clock; } host->current_speed = clock; diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h index 9e740bc232a8..e8cd2dec3263 100644 --- a/drivers/mmc/host/dw_mmc.h +++ b/drivers/mmc/host/dw_mmc.h @@ -249,6 +249,8 @@ extern int dw_mci_resume(struct dw_mci *host); * @queue_node: List node for placing this node in the @queue list of * &struct dw_mci. * @clock: Clock rate configured by set_ios(). Protected by host->lock. + * @__clk_old: The last clock value that was requested from core. + * Keeping track of this helps us to avoid spamming the console. * @flags: Random state bits associated with the slot. * @id: Number of this slot. * @sdio_id: Number of this slot in the SDIO interrupt registers. @@ -263,6 +265,7 @@ struct dw_mci_slot { struct list_head queue_node; unsigned int clock; + unsigned int __clk_old; unsigned long flags; #define DW_MMC_CARD_PRESENT 0 From 0d4b103c008ac9f6f438d2618c155f6e868e5a67 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Sep 2016 00:29:31 +0100 Subject: [PATCH 0646/1050] rxrpc: Reduce the number of ACK-Requests sent Reduce the number of ACK-Requests we set on DATA packets that we're sending to reduce network traffic. We set the flag on odd-numbered DATA packets to start off the RTT cache until we have at least three entries in it and then probe once per second thereafter to keep it topped up. This could be made tunable in future. Note that from this point, the RXRPC_REQUEST_ACK flag is set on DATA packets as we transmit them and not stored statically in the sk_buff. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 1 + net/rxrpc/output.c | 13 +++++++++++-- net/rxrpc/peer_object.c | 1 + net/rxrpc/sendmsg.c | 2 -- 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 1c4597b2c6cd..b13754a6dd7a 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -255,6 +255,7 @@ struct rxrpc_peer { /* calculated RTT cache */ #define RXRPC_RTT_CACHE_SIZE 32 + ktime_t rtt_last_req; /* Time of last RTT request */ u64 rtt; /* Current RTT estimate (in nS) */ u64 rtt_sum; /* Sum of cache contents */ u64 rtt_cache[RXRPC_RTT_CACHE_SIZE]; /* Determined RTT cache */ diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index db01fbb70d23..282cb1e36d06 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -270,6 +270,12 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb) msg.msg_controllen = 0; msg.msg_flags = 0; + /* If our RTT cache needs working on, request an ACK. */ + if ((call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || + ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), + ktime_get_real())) + whdr.flags |= RXRPC_REQUEST_ACK; + if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { static int lose; if ((lose++ & 7) == 7) { @@ -301,11 +307,14 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb) done: if (ret >= 0) { - skb->tstamp = ktime_get_real(); + ktime_t now = ktime_get_real(); + skb->tstamp = now; smp_wmb(); sp->hdr.serial = serial; - if (whdr.flags & RXRPC_REQUEST_ACK) + if (whdr.flags & RXRPC_REQUEST_ACK) { + call->peer->rtt_last_req = now; trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial); + } } _leave(" = %d [%u]", ret, call->peer->maxdata); return ret; diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index f3e5766910fd..941b724d523b 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -244,6 +244,7 @@ static void rxrpc_init_peer(struct rxrpc_peer *peer, unsigned long hash_key) peer->hash_key = hash_key; rxrpc_assess_MTU_size(peer); peer->mtu = peer->if_mtu; + peer->rtt_last_req = ktime_get_real(); switch (peer->srx.transport.family) { case AF_INET: diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 607223f4f871..ca7c3be60ad2 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -299,8 +299,6 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, else if (call->tx_top - call->tx_hard_ack < call->tx_winsize) sp->hdr.flags |= RXRPC_MORE_PACKETS; - if (seq & 1) - sp->hdr.flags |= RXRPC_REQUEST_ACK; ret = conn->security->secure_packet( call, skb, skb->mark, skb->head); From fc943f67773487bb85131273f39b5f183caafe95 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Sep 2016 00:29:32 +0100 Subject: [PATCH 0647/1050] rxrpc: Reduce the number of PING ACKs sent We don't want to send a PING ACK for every new incoming call as that just adds to the network traffic. Instead, we send a PING ACK to the first three that we receive and then once per second thereafter. This could probably be made adjustable in future. Signed-off-by: David Howells --- net/rxrpc/call_event.c | 2 +- net/rxrpc/input.c | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index adb2ec61e21f..6e2ea8f4ae75 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -142,7 +142,7 @@ static void rxrpc_resend(struct rxrpc_call *call) struct rxrpc_skb_priv *sp; struct sk_buff *skb; rxrpc_seq_t cursor, seq, top; - ktime_t now = ktime_get_real(), max_age, oldest, resend_at; + ktime_t now = ktime_get_real(), max_age, oldest, resend_at; int ix; u8 annotation, anno_type; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index c121949de3c8..cbb5d53f09d7 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -44,9 +44,12 @@ static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb, int skew) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + ktime_t now = skb->tstamp; - rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial, - true, true); + if (call->peer->rtt_usage < 3 || + ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now)) + rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial, + true, true); } /* From 9abefcb1aaa58b9d5aa40a8bb12c87d02415e4c8 Mon Sep 17 00:00:00 2001 From: Sergei Miroshnichenko Date: Wed, 7 Sep 2016 16:51:12 +0300 Subject: [PATCH 0648/1050] can: dev: fix deadlock reported after bus-off A timer was used to restart after the bus-off state, leading to a relatively large can_restart() executed in an interrupt context, which in turn sets up pinctrl. When this happens during system boot, there is a high probability of grabbing the pinctrl_list_mutex, which is locked already by the probe() of other device, making the kernel suspect a deadlock condition [1]. To resolve this issue, the restart_timer is replaced by a delayed work. [1] https://github.com/victronenergy/venus/issues/24 Signed-off-by: Sergei Miroshnichenko Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev.c | 27 +++++++++++++++++---------- include/linux/can/dev.h | 3 ++- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index e21f7cc5ae4d..8d6208c0b400 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -501,9 +502,8 @@ EXPORT_SYMBOL_GPL(can_free_echo_skb); /* * CAN device restart for bus-off recovery */ -static void can_restart(unsigned long data) +static void can_restart(struct net_device *dev) { - struct net_device *dev = (struct net_device *)data; struct can_priv *priv = netdev_priv(dev); struct net_device_stats *stats = &dev->stats; struct sk_buff *skb; @@ -543,6 +543,14 @@ restart: netdev_err(dev, "Error %d during restart", err); } +static void can_restart_work(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct can_priv *priv = container_of(dwork, struct can_priv, restart_work); + + can_restart(priv->dev); +} + int can_restart_now(struct net_device *dev) { struct can_priv *priv = netdev_priv(dev); @@ -556,8 +564,8 @@ int can_restart_now(struct net_device *dev) if (priv->state != CAN_STATE_BUS_OFF) return -EBUSY; - /* Runs as soon as possible in the timer context */ - mod_timer(&priv->restart_timer, jiffies); + cancel_delayed_work_sync(&priv->restart_work); + can_restart(dev); return 0; } @@ -578,8 +586,8 @@ void can_bus_off(struct net_device *dev) netif_carrier_off(dev); if (priv->restart_ms) - mod_timer(&priv->restart_timer, - jiffies + (priv->restart_ms * HZ) / 1000); + schedule_delayed_work(&priv->restart_work, + msecs_to_jiffies(priv->restart_ms)); } EXPORT_SYMBOL_GPL(can_bus_off); @@ -688,6 +696,7 @@ struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max) return NULL; priv = netdev_priv(dev); + priv->dev = dev; if (echo_skb_max) { priv->echo_skb_max = echo_skb_max; @@ -697,7 +706,7 @@ struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max) priv->state = CAN_STATE_STOPPED; - init_timer(&priv->restart_timer); + INIT_DELAYED_WORK(&priv->restart_work, can_restart_work); return dev; } @@ -778,8 +787,6 @@ int open_candev(struct net_device *dev) if (!netif_carrier_ok(dev)) netif_carrier_on(dev); - setup_timer(&priv->restart_timer, can_restart, (unsigned long)dev); - return 0; } EXPORT_SYMBOL_GPL(open_candev); @@ -794,7 +801,7 @@ void close_candev(struct net_device *dev) { struct can_priv *priv = netdev_priv(dev); - del_timer_sync(&priv->restart_timer); + cancel_delayed_work_sync(&priv->restart_work); can_flush_echo_skb(dev); } EXPORT_SYMBOL_GPL(close_candev); diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 5261751f6bd4..5f5270941ba0 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -32,6 +32,7 @@ enum can_mode { * CAN common private data */ struct can_priv { + struct net_device *dev; struct can_device_stats can_stats; struct can_bittiming bittiming, data_bittiming; @@ -47,7 +48,7 @@ struct can_priv { u32 ctrlmode_static; /* static enabled options for driver/hardware */ int restart_ms; - struct timer_list restart_timer; + struct delayed_work restart_work; int (*do_set_bittiming)(struct net_device *dev); int (*do_set_data_bittiming)(struct net_device *dev); From 456bee986e0a372ad4beed5d3cedb3622633d9df Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 20 Sep 2016 20:35:55 +0800 Subject: [PATCH 0649/1050] KEYS: Fix skcipher IV clobbering The IV must not be modified by the skcipher operation so we need to duplicate it. Fixes: c3917fd9dfbc ("KEYS: Use skcipher") Cc: stable@vger.kernel.org Reported-by: Mimi Zohar Signed-off-by: Herbert Xu --- security/keys/encrypted-keys/encrypted.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c index 5adbfc32242f..17a06105ccb6 100644 --- a/security/keys/encrypted-keys/encrypted.c +++ b/security/keys/encrypted-keys/encrypted.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -478,6 +479,7 @@ static int derived_key_encrypt(struct encrypted_key_payload *epayload, struct crypto_skcipher *tfm; struct skcipher_request *req; unsigned int encrypted_datalen; + u8 iv[AES_BLOCK_SIZE]; unsigned int padlen; char pad[16]; int ret; @@ -500,8 +502,8 @@ static int derived_key_encrypt(struct encrypted_key_payload *epayload, sg_init_table(sg_out, 1); sg_set_buf(sg_out, epayload->encrypted_data, encrypted_datalen); - skcipher_request_set_crypt(req, sg_in, sg_out, encrypted_datalen, - epayload->iv); + memcpy(iv, epayload->iv, sizeof(iv)); + skcipher_request_set_crypt(req, sg_in, sg_out, encrypted_datalen, iv); ret = crypto_skcipher_encrypt(req); tfm = crypto_skcipher_reqtfm(req); skcipher_request_free(req); @@ -581,6 +583,7 @@ static int derived_key_decrypt(struct encrypted_key_payload *epayload, struct crypto_skcipher *tfm; struct skcipher_request *req; unsigned int encrypted_datalen; + u8 iv[AES_BLOCK_SIZE]; char pad[16]; int ret; @@ -599,8 +602,8 @@ static int derived_key_decrypt(struct encrypted_key_payload *epayload, epayload->decrypted_datalen); sg_set_buf(&sg_out[1], pad, sizeof pad); - skcipher_request_set_crypt(req, sg_in, sg_out, encrypted_datalen, - epayload->iv); + memcpy(iv, epayload->iv, sizeof(iv)); + skcipher_request_set_crypt(req, sg_in, sg_out, encrypted_datalen, iv); ret = crypto_skcipher_decrypt(req); tfm = crypto_skcipher_reqtfm(req); skcipher_request_free(req); From 0cf43f509f72128196e23f5ade7e512a72152cc6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 22 Sep 2016 17:04:57 +0800 Subject: [PATCH 0650/1050] crypto: rsa-pkcs1pad - Handle leading zero for decryption As the software RSA implementation now produces fixed-length output, we need to eliminate leading zeros in the calling code instead. This patch does just that for pkcs1pad decryption while signature verification was fixed in an earlier patch. Fixes: 9b45b7bba3d2 ("crypto: rsa - Generate fixed-length output") Reported-by: Mat Martineau Signed-off-by: Herbert Xu --- crypto/rsa-pkcs1pad.c | 41 ++++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index 877019a6d3ea..8baab4307f7b 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -298,41 +298,48 @@ static int pkcs1pad_decrypt_complete(struct akcipher_request *req, int err) struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); struct pkcs1pad_ctx *ctx = akcipher_tfm_ctx(tfm); struct pkcs1pad_request *req_ctx = akcipher_request_ctx(req); + unsigned int dst_len; unsigned int pos; - - if (err == -EOVERFLOW) - /* Decrypted value had no leading 0 byte */ - err = -EINVAL; + u8 *out_buf; if (err) goto done; - if (req_ctx->child_req.dst_len != ctx->key_size - 1) { - err = -EINVAL; + err = -EINVAL; + dst_len = req_ctx->child_req.dst_len; + if (dst_len < ctx->key_size - 1) goto done; + + out_buf = req_ctx->out_buf; + if (dst_len == ctx->key_size) { + if (out_buf[0] != 0x00) + /* Decrypted value had no leading 0 byte */ + goto done; + + dst_len--; + out_buf++; } - if (req_ctx->out_buf[0] != 0x02) { - err = -EINVAL; + if (out_buf[0] != 0x02) goto done; - } - for (pos = 1; pos < req_ctx->child_req.dst_len; pos++) - if (req_ctx->out_buf[pos] == 0x00) + + for (pos = 1; pos < dst_len; pos++) + if (out_buf[pos] == 0x00) break; - if (pos < 9 || pos == req_ctx->child_req.dst_len) { - err = -EINVAL; + if (pos < 9 || pos == dst_len) goto done; - } pos++; - if (req->dst_len < req_ctx->child_req.dst_len - pos) + err = 0; + + if (req->dst_len < dst_len - pos) err = -EOVERFLOW; - req->dst_len = req_ctx->child_req.dst_len - pos; + req->dst_len = dst_len - pos; if (!err) sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst, req->dst_len), - req_ctx->out_buf + pos, req->dst_len); + out_buf + pos, req->dst_len); done: kzfree(req_ctx->out_buf); From f0aa1ce6259eb65f53f969b3250c1d0aac84f30b Mon Sep 17 00:00:00 2001 From: Nikita Yushchenko Date: Thu, 22 Sep 2016 12:02:25 +0300 Subject: [PATCH 0651/1050] regmap: fix deadlock on _regmap_raw_write() error path Commit 815806e39bf6 ("regmap: drop cache if the bus transfer error") added a call to regcache_drop_region() to error path in _regmap_raw_write(). However that path runs with regmap lock taken, and regcache_drop_region() tries to re-take it, causing a deadlock. Fix that by calling map->cache_ops->drop() directly. Signed-off-by: Nikita Yushchenko Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 25d26bb18970..e964d068874d 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -1475,7 +1475,11 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg, kfree(buf); } else if (ret != 0 && !map->cache_bypass && map->format.parse_val) { - regcache_drop_region(map, reg, reg + 1); + /* regcache_drop_region() takes lock that we already have, + * thus call map->cache_ops->drop() directly + */ + if (map->cache_ops && map->cache_ops->drop) + map->cache_ops->drop(map, reg, reg + 1); } trace_regmap_hw_write_done(map, reg, val_len / map->format.val_bytes); From 572de608e36279f249c9a6350f142e69f23dacab Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 22 Sep 2016 10:33:54 +0800 Subject: [PATCH 0652/1050] net: ethernet: mediatek: add extension of phy-mode for TRGMII adds PHY-mode "trgmii" as an extension for the operation mode of the PHY interface for PHY_INTERFACE_MODE_TRGMII. and adds a variable trgmii inside mtk_mac as the indication to make the difference between the MAC connected to internal switch or connected to external PHY by the given configuration on the board and then to perform the corresponding setup on TRGMII hardware module. Signed-off-by: Sean Wang Cc: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 ++ drivers/net/ethernet/mediatek/mtk_eth_soc.h | 3 +++ include/linux/phy.h | 3 +++ 3 files changed, 8 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 2909372c4da0..e873e21fd20e 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -244,6 +244,8 @@ static int mtk_phy_connect(struct mtk_mac *mac) return -ENODEV; switch (of_get_phy_mode(np)) { + case PHY_INTERFACE_MODE_TRGMII: + mac->trgmii = true; case PHY_INTERFACE_MODE_RGMII_TXID: case PHY_INTERFACE_MODE_RGMII_RXID: case PHY_INTERFACE_MODE_RGMII_ID: diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 7c5e534d2120..e3b9525f94c6 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -529,6 +529,8 @@ struct mtk_eth { * @hw: Backpointer to our main datastruture * @hw_stats: Packet statistics counter * @phy_dev: The attached PHY if available + * @trgmii Indicate if the MAC uses TRGMII connected to internal + switch */ struct mtk_mac { int id; @@ -539,6 +541,7 @@ struct mtk_mac { struct phy_device *phy_dev; __be32 hwlro_ip[MTK_MAX_LRO_IP_CNT]; int hwlro_ip_cnt; + bool trgmii; }; /* the struct describing the SoC. these are declared in the soc_xyz.c files */ diff --git a/include/linux/phy.h b/include/linux/phy.h index 2d24b283aa2d..e25f1830fbcf 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -80,6 +80,7 @@ typedef enum { PHY_INTERFACE_MODE_XGMII, PHY_INTERFACE_MODE_MOCA, PHY_INTERFACE_MODE_QSGMII, + PHY_INTERFACE_MODE_TRGMII, PHY_INTERFACE_MODE_MAX, } phy_interface_t; @@ -123,6 +124,8 @@ static inline const char *phy_modes(phy_interface_t interface) return "moca"; case PHY_INTERFACE_MODE_QSGMII: return "qsgmii"; + case PHY_INTERFACE_MODE_TRGMII: + return "trgmii"; default: return "unknown"; } From f430dea7c150dab2c103d28fa32efb59b5ae80b4 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 22 Sep 2016 10:33:55 +0800 Subject: [PATCH 0653/1050] net: ethernet: mediatek: add support for GMAC0 connecting with external PHY through TRGMII Changing dynamically source clock, TX/RX delay and interface mode used by TRGMII hardware module inside PHY capability polling routine for adapting to the various speed of RGMII used by external PHY for GMAC0. Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 32 ++++++++++++++++++++- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 31 +++++++++++++++++++- 2 files changed, 61 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index e873e21fd20e..ec60794f802c 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -52,7 +52,7 @@ static const struct mtk_ethtool_stats { }; static const char * const mtk_clks_source_name[] = { - "ethif", "esw", "gp1", "gp2" + "ethif", "esw", "gp1", "gp2", "trgpll" }; void mtk_w32(struct mtk_eth *eth, u32 val, unsigned reg) @@ -135,6 +135,33 @@ static int mtk_mdio_read(struct mii_bus *bus, int phy_addr, int phy_reg) return _mtk_mdio_read(eth, phy_addr, phy_reg); } +static void mtk_gmac0_rgmii_adjust(struct mtk_eth *eth, int speed) +{ + u32 val; + int ret; + + val = (speed == SPEED_1000) ? + INTF_MODE_RGMII_1000 : INTF_MODE_RGMII_10_100; + mtk_w32(eth, val, INTF_MODE); + + regmap_update_bits(eth->ethsys, ETHSYS_CLKCFG0, + ETHSYS_TRGMII_CLK_SEL362_5, + ETHSYS_TRGMII_CLK_SEL362_5); + + val = (speed == SPEED_1000) ? 250000000 : 500000000; + ret = clk_set_rate(eth->clks[MTK_CLK_TRGPLL], val); + if (ret) + dev_err(eth->dev, "Failed to set trgmii pll: %d\n", ret); + + val = (speed == SPEED_1000) ? + RCK_CTRL_RGMII_1000 : RCK_CTRL_RGMII_10_100; + mtk_w32(eth, val, TRGMII_RCK_CTRL); + + val = (speed == SPEED_1000) ? + TCK_CTRL_RGMII_1000 : TCK_CTRL_RGMII_10_100; + mtk_w32(eth, val, TRGMII_TCK_CTRL); +} + static void mtk_phy_link_adjust(struct net_device *dev) { struct mtk_mac *mac = netdev_priv(dev); @@ -157,6 +184,9 @@ static void mtk_phy_link_adjust(struct net_device *dev) break; }; + if (mac->id == 0 && !mac->trgmii) + mtk_gmac0_rgmii_adjust(mac->hw, mac->phy_dev->speed); + if (mac->phy_dev->link) mcr |= MAC_MCR_FORCE_LINK; diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index e3b9525f94c6..e52115633726 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -313,6 +313,30 @@ MAC_MCR_FORCE_TX_FC | MAC_MCR_SPEED_1000 | \ MAC_MCR_FORCE_DPX | MAC_MCR_FORCE_LINK) +/* TRGMII RXC control register */ +#define TRGMII_RCK_CTRL 0x10300 +#define DQSI0(x) ((x << 0) & GENMASK(6, 0)) +#define DQSI1(x) ((x << 8) & GENMASK(14, 8)) +#define RXCTL_DMWTLAT(x) ((x << 16) & GENMASK(18, 16)) +#define RXC_DQSISEL BIT(30) +#define RCK_CTRL_RGMII_1000 (RXC_DQSISEL | RXCTL_DMWTLAT(2) | DQSI1(16)) +#define RCK_CTRL_RGMII_10_100 RXCTL_DMWTLAT(2) + +/* TRGMII RXC control register */ +#define TRGMII_TCK_CTRL 0x10340 +#define TXCTL_DMWTLAT(x) ((x << 16) & GENMASK(18, 16)) +#define TXC_INV BIT(30) +#define TCK_CTRL_RGMII_1000 TXCTL_DMWTLAT(2) +#define TCK_CTRL_RGMII_10_100 (TXC_INV | TXCTL_DMWTLAT(2)) + +/* TRGMII Interface mode register */ +#define INTF_MODE 0x10390 +#define TRGMII_INTF_DIS BIT(0) +#define TRGMII_MODE BIT(1) +#define TRGMII_CENTRAL_ALIGNED BIT(2) +#define INTF_MODE_RGMII_1000 (TRGMII_MODE | TRGMII_CENTRAL_ALIGNED) +#define INTF_MODE_RGMII_10_100 0 + /* GPIO port control registers for GMAC 2*/ #define GPIO_OD33_CTRL8 0x4c0 #define GPIO_BIAS_CTRL 0xed0 @@ -323,7 +347,11 @@ #define SYSCFG0_GE_MASK 0x3 #define SYSCFG0_GE_MODE(x, y) (x << (12 + (y * 2))) -/*ethernet reset control register*/ +/* ethernet subsystem clock register */ +#define ETHSYS_CLKCFG0 0x2c +#define ETHSYS_TRGMII_CLK_SEL362_5 BIT(11) + +/* ethernet reset control register */ #define ETHSYS_RSTCTRL 0x34 #define RSTCTRL_FE BIT(6) #define RSTCTRL_PPE BIT(31) @@ -389,6 +417,7 @@ enum mtk_clks_map { MTK_CLK_ESW, MTK_CLK_GP1, MTK_CLK_GP2, + MTK_CLK_TRGPLL, MTK_CLK_MAX }; From b88539658a597e649627c4cc2d446456fc01e104 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 22 Sep 2016 10:33:56 +0800 Subject: [PATCH 0654/1050] net: ethernet: mediatek: add the dts property to set if TRGMII supported on GMAC0 Add the dts property for the capability if TRGMII supported on GAMC0 Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/mediatek-net.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/mediatek-net.txt b/Documentation/devicetree/bindings/net/mediatek-net.txt index 6103e5583070..7111278adc7e 100644 --- a/Documentation/devicetree/bindings/net/mediatek-net.txt +++ b/Documentation/devicetree/bindings/net/mediatek-net.txt @@ -31,7 +31,10 @@ Optional properties: Required properties: - compatible: Should be "mediatek,eth-mac" - reg: The number of the MAC -- phy-handle: see ethernet.txt file in the same directory. +- phy-handle: see ethernet.txt file in the same directory and + the phy-mode "trgmii" required being provided when reg + is equal to 0 and the MAC uses fixed-link to connect + with inernal switch such as MT7530. Example: From 2364c5c5ec14e936826eb10af56a337ccec01ffa Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 22 Sep 2016 16:33:35 +0800 Subject: [PATCH 0655/1050] net: ethernet: mediatek: use phydev from struct net_device reuse phydev already in struct net_device instead of creating another new one in private structure. Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 73 ++++++++++----------- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 2 - 2 files changed, 36 insertions(+), 39 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index ec60794f802c..6b7acf4463fa 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -175,7 +175,7 @@ static void mtk_phy_link_adjust(struct net_device *dev) if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) return; - switch (mac->phy_dev->speed) { + switch (dev->phydev->speed) { case SPEED_1000: mcr |= MAC_MCR_SPEED_1000; break; @@ -185,22 +185,22 @@ static void mtk_phy_link_adjust(struct net_device *dev) }; if (mac->id == 0 && !mac->trgmii) - mtk_gmac0_rgmii_adjust(mac->hw, mac->phy_dev->speed); + mtk_gmac0_rgmii_adjust(mac->hw, dev->phydev->speed); - if (mac->phy_dev->link) + if (dev->phydev->link) mcr |= MAC_MCR_FORCE_LINK; - if (mac->phy_dev->duplex) { + if (dev->phydev->duplex) { mcr |= MAC_MCR_FORCE_DPX; - if (mac->phy_dev->pause) + if (dev->phydev->pause) rmt_adv = LPA_PAUSE_CAP; - if (mac->phy_dev->asym_pause) + if (dev->phydev->asym_pause) rmt_adv |= LPA_PAUSE_ASYM; - if (mac->phy_dev->advertising & ADVERTISED_Pause) + if (dev->phydev->advertising & ADVERTISED_Pause) lcl_adv |= ADVERTISE_PAUSE_CAP; - if (mac->phy_dev->advertising & ADVERTISED_Asym_Pause) + if (dev->phydev->advertising & ADVERTISED_Asym_Pause) lcl_adv |= ADVERTISE_PAUSE_ASYM; flowctrl = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv); @@ -217,7 +217,7 @@ static void mtk_phy_link_adjust(struct net_device *dev) mtk_w32(mac->hw, mcr, MTK_MAC_MCR(mac->id)); - if (mac->phy_dev->link) + if (dev->phydev->link) netif_carrier_on(dev); else netif_carrier_off(dev); @@ -255,17 +255,17 @@ static int mtk_phy_connect_node(struct mtk_eth *eth, struct mtk_mac *mac, mac->id, phydev_name(phydev), phydev->phy_id, phydev->drv->name); - mac->phy_dev = phydev; - return 0; } -static int mtk_phy_connect(struct mtk_mac *mac) +static int mtk_phy_connect(struct net_device *dev) { - struct mtk_eth *eth = mac->hw; + struct mtk_mac *mac = netdev_priv(dev); + struct mtk_eth *eth; struct device_node *np; u32 val; + eth = mac->hw; np = of_parse_phandle(mac->of_node, "phy-handle", 0); if (!np && of_phy_is_fixed_link(mac->of_node)) if (!of_phy_register_fixed_link(mac->of_node)) @@ -303,20 +303,21 @@ static int mtk_phy_connect(struct mtk_mac *mac) val |= SYSCFG0_GE_MODE(mac->ge_mode, mac->id); regmap_write(eth->ethsys, ETHSYS_SYSCFG0, val); + /* couple phydev to net_device */ mtk_phy_connect_node(eth, mac, np); - mac->phy_dev->autoneg = AUTONEG_ENABLE; - mac->phy_dev->speed = 0; - mac->phy_dev->duplex = 0; + dev->phydev->autoneg = AUTONEG_ENABLE; + dev->phydev->speed = 0; + dev->phydev->duplex = 0; if (of_phy_is_fixed_link(mac->of_node)) - mac->phy_dev->supported |= + dev->phydev->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; - mac->phy_dev->supported &= PHY_GBIT_FEATURES | SUPPORTED_Pause | + dev->phydev->supported &= PHY_GBIT_FEATURES | SUPPORTED_Pause | SUPPORTED_Asym_Pause; - mac->phy_dev->advertising = mac->phy_dev->supported | + dev->phydev->advertising = dev->phydev->supported | ADVERTISED_Autoneg; - phy_start_aneg(mac->phy_dev); + phy_start_aneg(dev->phydev); of_node_put(np); @@ -1742,7 +1743,7 @@ static int mtk_open(struct net_device *dev) } atomic_inc(ð->dma_refcnt); - phy_start(mac->phy_dev); + phy_start(dev->phydev); netif_start_queue(dev); return 0; @@ -1777,7 +1778,7 @@ static int mtk_stop(struct net_device *dev) struct mtk_eth *eth = mac->hw; netif_tx_disable(dev); - phy_stop(mac->phy_dev); + phy_stop(dev->phydev); /* only shutdown DMA if this is the last user */ if (!atomic_dec_and_test(ð->dma_refcnt)) @@ -1917,7 +1918,7 @@ static int __init mtk_init(struct net_device *dev) dev->addr_assign_type = NET_ADDR_RANDOM; } - return mtk_phy_connect(mac); + return mtk_phy_connect(dev); } static void mtk_uninit(struct net_device *dev) @@ -1925,20 +1926,18 @@ static void mtk_uninit(struct net_device *dev) struct mtk_mac *mac = netdev_priv(dev); struct mtk_eth *eth = mac->hw; - phy_disconnect(mac->phy_dev); + phy_disconnect(dev->phydev); mtk_irq_disable(eth, MTK_QDMA_INT_MASK, ~0); mtk_irq_disable(eth, MTK_PDMA_INT_MASK, ~0); } static int mtk_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { - struct mtk_mac *mac = netdev_priv(dev); - switch (cmd) { case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: - return phy_mii_ioctl(mac->phy_dev, ifr, cmd); + return phy_mii_ioctl(dev->phydev, ifr, cmd); default: break; } @@ -1983,7 +1982,7 @@ static void mtk_pending_work(struct work_struct *work) if (!eth->mac[i] || of_phy_is_fixed_link(eth->mac[i]->of_node)) continue; - err = phy_init_hw(eth->mac[i]->phy_dev); + err = phy_init_hw(eth->netdev[i]->phydev); if (err) dev_err(eth->dev, "%s: PHY init failed.\n", eth->netdev[i]->name); @@ -2052,11 +2051,11 @@ static int mtk_get_settings(struct net_device *dev, if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) return -EBUSY; - err = phy_read_status(mac->phy_dev); + err = phy_read_status(dev->phydev); if (err) return -ENODEV; - return phy_ethtool_gset(mac->phy_dev, cmd); + return phy_ethtool_gset(dev->phydev, cmd); } static int mtk_set_settings(struct net_device *dev, @@ -2064,14 +2063,14 @@ static int mtk_set_settings(struct net_device *dev, { struct mtk_mac *mac = netdev_priv(dev); - if (cmd->phy_address != mac->phy_dev->mdio.addr) { - mac->phy_dev = mdiobus_get_phy(mac->hw->mii_bus, + if (cmd->phy_address != dev->phydev->mdio.addr) { + dev->phydev = mdiobus_get_phy(mac->hw->mii_bus, cmd->phy_address); - if (!mac->phy_dev) + if (!dev->phydev) return -ENODEV; } - return phy_ethtool_sset(mac->phy_dev, cmd); + return phy_ethtool_sset(dev->phydev, cmd); } static void mtk_get_drvinfo(struct net_device *dev, @@ -2105,7 +2104,7 @@ static int mtk_nway_reset(struct net_device *dev) if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) return -EBUSY; - return genphy_restart_aneg(mac->phy_dev); + return genphy_restart_aneg(dev->phydev); } static u32 mtk_get_link(struct net_device *dev) @@ -2116,11 +2115,11 @@ static u32 mtk_get_link(struct net_device *dev) if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) return -EBUSY; - err = genphy_update_link(mac->phy_dev); + err = genphy_update_link(dev->phydev); if (err) return ethtool_op_get_link(dev); - return mac->phy_dev->link; + return dev->phydev->link; } static void mtk_get_strings(struct net_device *dev, u32 stringset, u8 *data) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index e52115633726..7e194f795c9a 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -557,7 +557,6 @@ struct mtk_eth { * @of_node: Our devicetree node * @hw: Backpointer to our main datastruture * @hw_stats: Packet statistics counter - * @phy_dev: The attached PHY if available * @trgmii Indicate if the MAC uses TRGMII connected to internal switch */ @@ -567,7 +566,6 @@ struct mtk_mac { struct device_node *of_node; struct mtk_eth *hw; struct mtk_hw_stats *hw_stats; - struct phy_device *phy_dev; __be32 hwlro_ip[MTK_MAX_LRO_IP_CNT]; int hwlro_ip_cnt; bool trgmii; From a2b2a19f0fbc674478a6806ea9e4f6aff06763f8 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 22 Sep 2016 16:36:15 +0800 Subject: [PATCH 0656/1050] net: ethernet: mediatek: remove superfluous local variable for phy address remove the unused variable for parsing PHY address and the related logic for sanity test which would be all already handled done when of_mdiobus_register was called Reported-by: Nelson Chang Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 6b7acf4463fa..1918c39bbd40 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -226,17 +226,9 @@ static void mtk_phy_link_adjust(struct net_device *dev) static int mtk_phy_connect_node(struct mtk_eth *eth, struct mtk_mac *mac, struct device_node *phy_node) { - const __be32 *_addr = NULL; struct phy_device *phydev; - int phy_mode, addr; + int phy_mode; - _addr = of_get_property(phy_node, "reg", NULL); - - if (!_addr || (be32_to_cpu(*_addr) >= 0x20)) { - pr_err("%s: invalid phy address\n", phy_node->name); - return -EINVAL; - } - addr = be32_to_cpu(*_addr); phy_mode = of_get_phy_mode(phy_node); if (phy_mode < 0) { dev_err(eth->dev, "incorrect phy-mode %d\n", phy_mode); From 3e60b748fd2f50bbdb6a61fdd48222cd492c77e3 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 22 Sep 2016 16:42:03 +0800 Subject: [PATCH 0657/1050] net: ethernet: mediatek: use [get|set]_link_ksettings 1) use new api [get|set]_link_ksettings instead of [get|set]_settings old ones. 2) dev->phydev is sure being ready before calling these callbacks, so removing all the sanity check if it is existing. Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 33 ++++++++------------- 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 1918c39bbd40..a65801a5e2ee 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -2034,35 +2034,26 @@ static int mtk_cleanup(struct mtk_eth *eth) return 0; } -static int mtk_get_settings(struct net_device *dev, - struct ethtool_cmd *cmd) +int mtk_get_link_ksettings(struct net_device *ndev, + struct ethtool_link_ksettings *cmd) { - struct mtk_mac *mac = netdev_priv(dev); - int err; + struct mtk_mac *mac = netdev_priv(ndev); if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) return -EBUSY; - err = phy_read_status(dev->phydev); - if (err) - return -ENODEV; - - return phy_ethtool_gset(dev->phydev, cmd); + return phy_ethtool_ksettings_get(ndev->phydev, cmd); } -static int mtk_set_settings(struct net_device *dev, - struct ethtool_cmd *cmd) +int mtk_set_link_ksettings(struct net_device *ndev, + const struct ethtool_link_ksettings *cmd) { - struct mtk_mac *mac = netdev_priv(dev); + struct mtk_mac *mac = netdev_priv(ndev); - if (cmd->phy_address != dev->phydev->mdio.addr) { - dev->phydev = mdiobus_get_phy(mac->hw->mii_bus, - cmd->phy_address); - if (!dev->phydev) - return -ENODEV; - } + if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) + return -EBUSY; - return phy_ethtool_sset(dev->phydev, cmd); + return phy_ethtool_ksettings_set(ndev->phydev, cmd); } static void mtk_get_drvinfo(struct net_device *dev, @@ -2225,8 +2216,8 @@ static int mtk_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) } static const struct ethtool_ops mtk_ethtool_ops = { - .get_settings = mtk_get_settings, - .set_settings = mtk_set_settings, + .get_link_ksettings = mtk_get_link_ksettings, + .set_link_ksettings = mtk_set_link_ksettings, .get_drvinfo = mtk_get_drvinfo, .get_msglevel = mtk_get_msglevel, .set_msglevel = mtk_set_msglevel, From f6f7d9c03f5daae04449ad19de5e2f0c20c5eaac Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 22 Sep 2016 16:44:16 +0800 Subject: [PATCH 0658/1050] net: ethernet: mediatek: get out of potential invalid pointer access Potential dangerous invalid pointer might be accessed if the error happens when couple phy_device to net_device so cleanup the error path. Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index a65801a5e2ee..3d7e0cb601b6 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -296,7 +296,9 @@ static int mtk_phy_connect(struct net_device *dev) regmap_write(eth->ethsys, ETHSYS_SYSCFG0, val); /* couple phydev to net_device */ - mtk_phy_connect_node(eth, mac, np); + if (mtk_phy_connect_node(eth, mac, np)) + goto err_phy; + dev->phydev->autoneg = AUTONEG_ENABLE; dev->phydev->speed = 0; dev->phydev->duplex = 0; @@ -317,7 +319,7 @@ static int mtk_phy_connect(struct net_device *dev) err_phy: of_node_put(np); - dev_err(eth->dev, "invalid phy_mode\n"); + dev_err(eth->dev, "%s: invalid phy\n", __func__); return -EINVAL; } From 0364a8824c020f12e2d5e9fad963685b58f7574e Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 22 Sep 2016 11:06:25 +0200 Subject: [PATCH 0659/1050] xen-netback: switch to threaded irq for control ring Instead of open coding it use the threaded irq mechanism in xen-netback. Signed-off-by: Juergen Gross Acked-by: Wei Liu Signed-off-by: David S. Miller --- drivers/net/xen-netback/common.h | 4 +-- drivers/net/xen-netback/interface.c | 38 +++++------------------------ drivers/net/xen-netback/netback.c | 18 +++----------- 3 files changed, 11 insertions(+), 49 deletions(-) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 3a562683603c..ff94c513fe20 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -292,8 +292,6 @@ struct xenvif { #endif struct xen_netif_ctrl_back_ring ctrl; - struct task_struct *ctrl_task; - wait_queue_head_t ctrl_wq; unsigned int ctrl_irq; /* Miscellaneous private stuff. */ @@ -359,7 +357,7 @@ void xenvif_kick_thread(struct xenvif_queue *queue); int xenvif_dealloc_kthread(void *data); -int xenvif_ctrl_kthread(void *data); +irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data); void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 83deeebfc2d1..fb50c6d5f6c3 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -128,15 +128,6 @@ irqreturn_t xenvif_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -irqreturn_t xenvif_ctrl_interrupt(int irq, void *dev_id) -{ - struct xenvif *vif = dev_id; - - wake_up(&vif->ctrl_wq); - - return IRQ_HANDLED; -} - int xenvif_queue_stopped(struct xenvif_queue *queue) { struct net_device *dev = queue->vif->dev; @@ -570,8 +561,7 @@ int xenvif_connect_ctrl(struct xenvif *vif, grant_ref_t ring_ref, struct net_device *dev = vif->dev; void *addr; struct xen_netif_ctrl_sring *shared; - struct task_struct *task; - int err = -ENOMEM; + int err; err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif), &ring_ref, 1, &addr); @@ -581,11 +571,7 @@ int xenvif_connect_ctrl(struct xenvif *vif, grant_ref_t ring_ref, shared = (struct xen_netif_ctrl_sring *)addr; BACK_RING_INIT(&vif->ctrl, shared, XEN_PAGE_SIZE); - init_waitqueue_head(&vif->ctrl_wq); - - err = bind_interdomain_evtchn_to_irqhandler(vif->domid, evtchn, - xenvif_ctrl_interrupt, - 0, dev->name, vif); + err = bind_interdomain_evtchn_to_irq(vif->domid, evtchn); if (err < 0) goto err_unmap; @@ -593,19 +579,13 @@ int xenvif_connect_ctrl(struct xenvif *vif, grant_ref_t ring_ref, xenvif_init_hash(vif); - task = kthread_create(xenvif_ctrl_kthread, (void *)vif, - "%s-control", dev->name); - if (IS_ERR(task)) { - pr_warn("Could not allocate kthread for %s\n", dev->name); - err = PTR_ERR(task); + err = request_threaded_irq(vif->ctrl_irq, NULL, xenvif_ctrl_irq_fn, + IRQF_ONESHOT, "xen-netback-ctrl", vif); + if (err) { + pr_warn("Could not setup irq handler for %s\n", dev->name); goto err_deinit; } - get_task_struct(task); - vif->ctrl_task = task; - - wake_up_process(vif->ctrl_task); - return 0; err_deinit: @@ -774,12 +754,6 @@ void xenvif_disconnect_data(struct xenvif *vif) void xenvif_disconnect_ctrl(struct xenvif *vif) { - if (vif->ctrl_task) { - kthread_stop(vif->ctrl_task); - put_task_struct(vif->ctrl_task); - vif->ctrl_task = NULL; - } - if (vif->ctrl_irq) { xenvif_deinit_hash(vif); unbind_from_irqhandler(vif->ctrl_irq, vif); diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index edbae0b1e8f0..3d0c989384b5 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -2359,24 +2359,14 @@ static bool xenvif_ctrl_work_todo(struct xenvif *vif) return 0; } -int xenvif_ctrl_kthread(void *data) +irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data) { struct xenvif *vif = data; - for (;;) { - wait_event_interruptible(vif->ctrl_wq, - xenvif_ctrl_work_todo(vif) || - kthread_should_stop()); - if (kthread_should_stop()) - break; + while (xenvif_ctrl_work_todo(vif)) + xenvif_ctrl_action(vif); - while (xenvif_ctrl_work_todo(vif)) - xenvif_ctrl_action(vif); - - cond_resched(); - } - - return 0; + return IRQ_HANDLED; } static int __init netback_init(void) From 9bf6ffdabdd6e70a0b69d032a0aff091afe1773e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 26 Aug 2016 15:06:04 +0200 Subject: [PATCH 0660/1050] locking/atomic, arch/sh: Fix ATOMIC_FETCH_OP() We cannot use the "z" constraint twice, since its a single register (r0). Change the one not used by movli.l/movco.l to "r". Reported-by: Fengguang Wu Tested-by: Fengguang Wu Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rich Felker Cc: Thomas Gleixner Cc: Yoshinori Sato Signed-off-by: Ingo Molnar --- arch/sh/include/asm/atomic-llsc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sh/include/asm/atomic-llsc.h b/arch/sh/include/asm/atomic-llsc.h index caea2c45f6c2..1d159ce50f5a 100644 --- a/arch/sh/include/asm/atomic-llsc.h +++ b/arch/sh/include/asm/atomic-llsc.h @@ -60,7 +60,7 @@ static inline int atomic_fetch_##op(int i, atomic_t *v) \ " movco.l %0, @%3 \n" \ " bf 1b \n" \ " synco \n" \ - : "=&z" (temp), "=&z" (res) \ + : "=&z" (temp), "=&r" (res) \ : "r" (i), "r" (&v->counter) \ : "t"); \ \ From 08b90f0655258411a1b41d856331e20e7ec8d55c Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 20 Sep 2016 18:48:10 +0300 Subject: [PATCH 0661/1050] perf/x86/intel/bts: Make it an exclusive PMU Just like intel_pt, intel_bts can only handle one event at a time, which is the reason we introduced PERF_PMU_CAP_EXCLUSIVE in the first place. However, at the moment one can have as many intel_bts events within the same context at the same time as one pleases. Only one of them, however, will get scheduled and receive the actual trace data. Fix this by making intel_bts an "exclusive" PMU. Signed-off-by: Alexander Shishkin Acked-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: vince@deater.net Link: http://lkml.kernel.org/r/20160920154811.3255-2-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/bts.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 6ff66efa0feb..982c9e31daca 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -584,7 +584,8 @@ static __init int bts_init(void) if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts) return -ENODEV; - bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE; + bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE | + PERF_PMU_CAP_EXCLUSIVE; bts_pmu.task_ctx_nr = perf_sw_context; bts_pmu.event_init = bts_event_init; bts_pmu.add = bts_event_add; From 3bf6215a1b30db7df6083c708caab3fe1a8e8abe Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 20 Sep 2016 18:48:11 +0300 Subject: [PATCH 0662/1050] perf/core: Limit matching exclusive events to one PMU An "exclusive" PMU is the one that can only have one event scheduled in at any given time. There may be more than one of such PMUs in a system, though, like Intel PT and BTS. It should be allowed to have one event for either of those inside the same context (there may be other constraints that may prevent this, but those would be hardware-specific). However, the exclusivity code is written so that only one event from any of the "exclusive" PMUs is allowed in a context. Fix this by making the exclusive event filter explicitly match two events' PMUs. Signed-off-by: Alexander Shishkin Acked-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: vince@deater.net Link: http://lkml.kernel.org/r/20160920154811.3255-3-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index a54f2c2cdb20..fc9bb2225291 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3929,7 +3929,7 @@ static void exclusive_event_destroy(struct perf_event *event) static bool exclusive_event_match(struct perf_event *e1, struct perf_event *e2) { - if ((e1->pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE) && + if ((e1->pmu == e2->pmu) && (e1->cpu == e2->cpu || e1->cpu == -1 || e2->cpu == -1)) From 2b03bf732488a3c2e920afe22c03b82cb8477e28 Mon Sep 17 00:00:00 2001 From: Laura Garcia Liebana Date: Tue, 13 Sep 2016 13:49:53 +0200 Subject: [PATCH 0663/1050] netfilter: nft_numgen: add number generation offset Add support of an offset value for incremental counter and random. With this option the sysadmin is able to start the counter to a certain value and then apply the generated number. Example: meta mark set numgen inc mod 2 offset 100 This will generate marks with the serie 100, 101, 100, 101, ... Suggested-by: Pablo Neira Ayuso Signed-off-by: Laura Garcia Liebana Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ net/netfilter/nft_numgen.c | 32 +++++++++++++++++++----- 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index bc0eb6a1066d..bcfb892ff148 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1136,12 +1136,14 @@ enum nft_trace_types { * @NFTA_NG_DREG: destination register (NLA_U32) * @NFTA_NG_MODULUS: maximum counter value (NLA_U32) * @NFTA_NG_TYPE: operation type (NLA_U32) + * @NFTA_NG_OFFSET: offset to be added to the counter (NLA_U32) */ enum nft_ng_attributes { NFTA_NG_UNSPEC, NFTA_NG_DREG, NFTA_NG_MODULUS, NFTA_NG_TYPE, + NFTA_NG_OFFSET, __NFTA_NG_MAX }; #define NFTA_NG_MAX (__NFTA_NG_MAX - 1) diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c index f173ebec30a7..55bc5ab78d4a 100644 --- a/net/netfilter/nft_numgen.c +++ b/net/netfilter/nft_numgen.c @@ -23,6 +23,7 @@ struct nft_ng_inc { enum nft_registers dreg:8; u32 modulus; atomic_t counter; + u32 offset; }; static void nft_ng_inc_eval(const struct nft_expr *expr, @@ -37,13 +38,14 @@ static void nft_ng_inc_eval(const struct nft_expr *expr, nval = (oval + 1 < priv->modulus) ? oval + 1 : 0; } while (atomic_cmpxchg(&priv->counter, oval, nval) != oval); - regs->data[priv->dreg] = nval; + regs->data[priv->dreg] = nval + priv->offset; } static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = { [NFTA_NG_DREG] = { .type = NLA_U32 }, [NFTA_NG_MODULUS] = { .type = NLA_U32 }, [NFTA_NG_TYPE] = { .type = NLA_U32 }, + [NFTA_NG_OFFSET] = { .type = NLA_U32 }, }; static int nft_ng_inc_init(const struct nft_ctx *ctx, @@ -52,10 +54,16 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx, { struct nft_ng_inc *priv = nft_expr_priv(expr); + if (tb[NFTA_NG_OFFSET]) + priv->offset = ntohl(nla_get_be32(tb[NFTA_NG_OFFSET])); + priv->modulus = ntohl(nla_get_be32(tb[NFTA_NG_MODULUS])); if (priv->modulus == 0) return -ERANGE; + if (priv->offset + priv->modulus - 1 < priv->offset) + return -EOVERFLOW; + priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]); atomic_set(&priv->counter, 0); @@ -64,7 +72,7 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx, } static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg, - u32 modulus, enum nft_ng_types type) + u32 modulus, enum nft_ng_types type, u32 offset) { if (nft_dump_register(skb, NFTA_NG_DREG, dreg)) goto nla_put_failure; @@ -72,6 +80,8 @@ static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg, goto nla_put_failure; if (nla_put_be32(skb, NFTA_NG_TYPE, htonl(type))) goto nla_put_failure; + if (nla_put_be32(skb, NFTA_NG_OFFSET, htonl(offset))) + goto nla_put_failure; return 0; @@ -83,12 +93,14 @@ static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_ng_inc *priv = nft_expr_priv(expr); - return nft_ng_dump(skb, priv->dreg, priv->modulus, NFT_NG_INCREMENTAL); + return nft_ng_dump(skb, priv->dreg, priv->modulus, NFT_NG_INCREMENTAL, + priv->offset); } struct nft_ng_random { enum nft_registers dreg:8; u32 modulus; + u32 offset; }; static void nft_ng_random_eval(const struct nft_expr *expr, @@ -97,9 +109,10 @@ static void nft_ng_random_eval(const struct nft_expr *expr, { struct nft_ng_random *priv = nft_expr_priv(expr); struct rnd_state *state = this_cpu_ptr(&nft_numgen_prandom_state); + u32 val; - regs->data[priv->dreg] = reciprocal_scale(prandom_u32_state(state), - priv->modulus); + val = reciprocal_scale(prandom_u32_state(state), priv->modulus); + regs->data[priv->dreg] = val + priv->offset; } static int nft_ng_random_init(const struct nft_ctx *ctx, @@ -108,10 +121,16 @@ static int nft_ng_random_init(const struct nft_ctx *ctx, { struct nft_ng_random *priv = nft_expr_priv(expr); + if (tb[NFTA_NG_OFFSET]) + priv->offset = ntohl(nla_get_be32(tb[NFTA_NG_OFFSET])); + priv->modulus = ntohl(nla_get_be32(tb[NFTA_NG_MODULUS])); if (priv->modulus == 0) return -ERANGE; + if (priv->offset + priv->modulus - 1 < priv->offset) + return -EOVERFLOW; + prandom_init_once(&nft_numgen_prandom_state); priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]); @@ -124,7 +143,8 @@ static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_ng_random *priv = nft_expr_priv(expr); - return nft_ng_dump(skb, priv->dreg, priv->modulus, NFT_NG_RANDOM); + return nft_ng_dump(skb, priv->dreg, priv->modulus, NFT_NG_RANDOM, + priv->offset); } static struct nft_expr_type nft_ng_type; From dd7e39bbfce1fa6de8315d790d1fe01e92cba44d Mon Sep 17 00:00:00 2001 From: Arek Lichwa Date: Thu, 22 Sep 2016 14:08:05 +0200 Subject: [PATCH 0664/1050] Bluetooth: Fix NULL pointer dereference in mgmt context Adds missing callback assignment to cmd_complete in pending management command context. Dump path involves security procedure performed on legacy (pre-SSP) devices with service security requirements set to HIGH (16digits PIN). It fails when shorter PIN is delivered by user. [ 1.517950] Bluetooth: PIN code is not 16 bytes long [ 1.518491] BUG: unable to handle kernel NULL pointer dereference at (null) [ 1.518584] IP: [< (null)>] (null) [ 1.518584] PGD 9e08067 PUD 9fdf067 PMD 0 [ 1.518584] Oops: 0010 [#1] SMP [ 1.518584] Modules linked in: [ 1.518584] CPU: 0 PID: 1002 Comm: kworker/u3:2 Not tainted 4.8.0-rc6-354649-gaf4168c #16 [ 1.518584] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.9.3-20160701_074356-anatol 04/01/2014 [ 1.518584] Workqueue: hci0 hci_rx_work [ 1.518584] task: ffff880009ce14c0 task.stack: ffff880009e10000 [ 1.518584] RIP: 0010:[<0000000000000000>] [< (null)>] (null) [ 1.518584] RSP: 0018:ffff880009e13bc8 EFLAGS: 00010293 [ 1.518584] RAX: 0000000000000000 RBX: ffff880009eed100 RCX: 0000000000000006 [ 1.518584] RDX: ffff880009ddc000 RSI: 0000000000000000 RDI: ffff880009eed100 [ 1.518584] RBP: ffff880009e13be0 R08: 0000000000000000 R09: 0000000000000001 [ 1.518584] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 [ 1.518584] R13: ffff880009e13ccd R14: ffff880009ddc000 R15: ffff880009ddc010 [ 1.518584] FS: 0000000000000000(0000) GS:ffff88000bc00000(0000) knlGS:0000000000000000 [ 1.518584] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1.518584] CR2: 0000000000000000 CR3: 0000000009fdd000 CR4: 00000000000006f0 [ 1.518584] Stack: [ 1.518584] ffffffff81909808 ffff880009e13cce ffff880009e0d40b ffff880009e13c68 [ 1.518584] ffffffff818f428d 00000000024000c0 ffff880009e13c08 ffffffff810ca903 [ 1.518584] ffff880009e13c48 ffffffff811ade34 ffffffff8178c31f ffff880009ee6200 [ 1.518584] Call Trace: [ 1.518584] [] ? mgmt_pin_code_neg_reply_complete+0x38/0x60 [ 1.518584] [] hci_cmd_complete_evt+0x69d/0x3200 [ 1.518584] [] ? rcu_read_lock_sched_held+0x53/0x60 [ 1.518584] [] ? kmem_cache_alloc+0x1a4/0x200 [ 1.518584] [] ? skb_clone+0x4f/0xa0 [ 1.518584] [] hci_event_packet+0x8e1/0x28e0 [ 1.518584] [] ? _raw_spin_unlock_irqrestore+0x31/0x50 [ 1.518584] [] ? trace_hardirqs_on_caller+0xee/0x1b0 [ 1.518584] [] hci_rx_work+0x1e1/0x5b0 [ 1.518584] [] ? process_one_work+0x1ed/0x6b0 [ 1.518584] [] process_one_work+0x268/0x6b0 [ 1.518584] [] ? process_one_work+0x1ed/0x6b0 [ 1.518584] [] worker_thread+0x43/0x4e0 [ 1.518584] [] ? process_one_work+0x6b0/0x6b0 [ 1.518584] [] ? process_one_work+0x6b0/0x6b0 [ 1.518584] [] kthread+0xdf/0x100 [ 1.518584] [] ret_from_fork+0x1f/0x40 [ 1.518584] [] ? kthread_create_on_node+0x210/0x210 Signed-off-by: Arek Lichwa Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 7b2bac492fb1..63f42f45a96a 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2541,6 +2541,8 @@ static int send_pin_code_neg_reply(struct sock *sk, struct hci_dev *hdev, if (!cmd) return -ENOMEM; + cmd->cmd_complete = addr_cmd_complete; + err = hci_send_cmd(hdev, HCI_OP_PIN_CODE_NEG_REPLY, sizeof(cp->addr.bdaddr), &cp->addr.bdaddr); if (err < 0) From 1144a4eed04b2c3e7d20146d1b76f7669b55971d Mon Sep 17 00:00:00 2001 From: Dmitry Tunin Date: Wed, 21 Sep 2016 19:13:08 +0300 Subject: [PATCH 0665/1050] Bluetooth: Add a new 04ca:3011 QCA_ROME device BugLink: https://bugs.launchpad.net/bugs/1535802 T: Bus=01 Lev=02 Prnt=02 Port=04 Cnt=01 Dev#= 3 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=04ca ProdID=3011 Rev=00.01 C: #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I: If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb I: If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb Signed-off-by: Dmitry Tunin Signed-off-by: Marcel Holtmann Cc: stable@vger.kernel.org --- drivers/bluetooth/btusb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 9ebd73dd7915..6bd63b84abd0 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -253,6 +253,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0cf3, 0xe300), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0cf3, 0xe360), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0489, 0xe092), .driver_info = BTUSB_QCA_ROME }, + { USB_DEVICE(0x04ca, 0x3011), .driver_info = BTUSB_QCA_ROME }, /* Broadcom BCM2035 */ { USB_DEVICE(0x0a5c, 0x2009), .driver_info = BTUSB_BCM92035 }, From 7dc6f16c68757548a332a0c5fbe661987c2189a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Narajowski?= Date: Thu, 22 Sep 2016 16:01:39 +0200 Subject: [PATCH 0666/1050] Bluetooth: Fix not updating scan rsp when adv off MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Scan response data should not be updated unless there is an advertising instance. Signed-off-by: Michał Narajowski Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 63f42f45a96a..19b8a5e9420d 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3159,7 +3159,7 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, /* The name is stored in the scan response data and so * no need to udpate the advertising data here. */ - if (lmp_le_capable(hdev)) + if (lmp_le_capable(hdev) && hci_dev_test_flag(hdev, HCI_ADVERTISING)) __hci_req_update_scan_rsp_data(&req, hdev->cur_adv_instance); err = hci_req_run(&req, set_name_complete); From 331dcf421c34d227784d07943eb01e4023a42b0a Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 25 Aug 2016 12:23:39 +0100 Subject: [PATCH 0667/1050] i2c: qup: skip qup_i2c_suspend if the device is already runtime suspended If the i2c device is already runtime suspended, if qup_i2c_suspend is executed during suspend-to-idle or suspend-to-ram it will result in the following splat: WARNING: CPU: 3 PID: 1593 at drivers/clk/clk.c:476 clk_core_unprepare+0x80/0x90 Modules linked in: CPU: 3 PID: 1593 Comm: bash Tainted: G W 4.8.0-rc3 #14 Hardware name: Qualcomm Technologies, Inc. APQ 8016 SBC (DT) PC is at clk_core_unprepare+0x80/0x90 LR is at clk_unprepare+0x28/0x40 pc : [] lr : [] pstate: 60000145 Call trace: clk_core_unprepare+0x80/0x90 qup_i2c_disable_clocks+0x2c/0x68 qup_i2c_suspend+0x10/0x20 platform_pm_suspend+0x24/0x68 ... This patch fixes the issue by executing qup_i2c_pm_suspend_runtime conditionally in qup_i2c_suspend. Signed-off-by: Sudeep Holla Reviewed-by: Andy Gross Signed-off-by: Wolfram Sang Cc: stable@kernel.org --- drivers/i2c/busses/i2c-qup.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-qup.c b/drivers/i2c/busses/i2c-qup.c index 501bd15cb78e..a8497cfdae6f 100644 --- a/drivers/i2c/busses/i2c-qup.c +++ b/drivers/i2c/busses/i2c-qup.c @@ -1599,7 +1599,8 @@ static int qup_i2c_pm_resume_runtime(struct device *device) #ifdef CONFIG_PM_SLEEP static int qup_i2c_suspend(struct device *device) { - qup_i2c_pm_suspend_runtime(device); + if (!pm_runtime_suspended(device)) + return qup_i2c_pm_suspend_runtime(device); return 0; } From 56f2929b0f913163ba48c22ea4ae558ac61d3d1c Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Wed, 31 Aug 2016 12:37:55 +0300 Subject: [PATCH 0668/1050] iwlwifi: mvm: cleanup redundant no_power_up_nic_in_init config This is never really used anymore. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/iwl-config.h | 1 - drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 46 ++++++------------- 2 files changed, 15 insertions(+), 32 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index 6cecb1adf0c8..2660cc4b9f8c 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -359,7 +359,6 @@ struct iwl_cfg { high_temp:1, mac_addr_from_csr:1, lp_xtal_workaround:1, - no_power_up_nic_in_init:1, disable_dummy_notification:1, apmg_not_supported:1, mq_rx_supported:1, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 75f3ca0504fd..6c08aa3fd2df 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -712,37 +712,21 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, IWL_DEBUG_EEPROM(mvm->trans->dev, "working without external nvm file\n"); - if (WARN(cfg->no_power_up_nic_in_init && !mvm->nvm_file_name, - "not allowing power-up and not having nvm_file\n")) + err = iwl_trans_start_hw(mvm->trans); + if (err) goto out_free; - /* - * Even if nvm exists in the nvm_file driver should read again the nvm - * from the nic because there might be entries that exist in the OTP - * and not in the file. - * for nics with no_power_up_nic_in_init: rely completley on nvm_file - */ - if (cfg->no_power_up_nic_in_init && mvm->nvm_file_name) { - err = iwl_nvm_init(mvm, false); - if (err) - goto out_free; - } else { - err = iwl_trans_start_hw(mvm->trans); - if (err) - goto out_free; - - mutex_lock(&mvm->mutex); - iwl_mvm_ref(mvm, IWL_MVM_REF_INIT_UCODE); - err = iwl_run_init_mvm_ucode(mvm, true); - if (!err || !iwlmvm_mod_params.init_dbg) - iwl_mvm_stop_device(mvm); - iwl_mvm_unref(mvm, IWL_MVM_REF_INIT_UCODE); - mutex_unlock(&mvm->mutex); - /* returns 0 if successful, 1 if success but in rfkill */ - if (err < 0 && !iwlmvm_mod_params.init_dbg) { - IWL_ERR(mvm, "Failed to run INIT ucode: %d\n", err); - goto out_free; - } + mutex_lock(&mvm->mutex); + iwl_mvm_ref(mvm, IWL_MVM_REF_INIT_UCODE); + err = iwl_run_init_mvm_ucode(mvm, true); + if (!err || !iwlmvm_mod_params.init_dbg) + iwl_mvm_stop_device(mvm); + iwl_mvm_unref(mvm, IWL_MVM_REF_INIT_UCODE); + mutex_unlock(&mvm->mutex); + /* returns 0 if successful, 1 if success but in rfkill */ + if (err < 0 && !iwlmvm_mod_params.init_dbg) { + IWL_ERR(mvm, "Failed to run INIT ucode: %d\n", err); + goto out_free; } scan_size = iwl_mvm_scan_size(mvm); @@ -784,8 +768,8 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, flush_delayed_work(&mvm->fw_dump_wk); iwl_phy_db_free(mvm->phy_db); kfree(mvm->scan_cmd); - if (!cfg->no_power_up_nic_in_init || !mvm->nvm_file_name) - iwl_trans_op_mode_leave(trans); + iwl_trans_op_mode_leave(trans); + ieee80211_free_hw(mvm->hw); return NULL; } From acf91ec384dd4c7c2c88cbaa2e0374e537123c1f Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Thu, 22 Sep 2016 11:20:42 -0700 Subject: [PATCH 0669/1050] Bluetooth: btwilink: Save the packet type before sending Running with KASAN produces some messages: BUG: KASAN: use-after-free in ti_st_send_frame+0x9c/0x16c at addr ffffffc064868fe8 Read of size 1 by task kworker/u17:1/1266 Hardware name: HiKey Development Board (DT) Workqueue: hci0 hci_cmd_work Call trace: [] dump_backtrace+0x0/0x178 [] show_stack+0x20/0x28 [] dump_stack+0xa8/0xe0 [] print_trailer+0x110/0x174 [] object_err+0x4c/0x5c [] kasan_report_error+0x254/0x54c [] kasan_report+0x64/0x70 [] __asan_load1+0x4c/0x54 [] ti_st_send_frame+0x9c/0x16c [] hci_send_frame+0xb4/0x118 [] hci_cmd_work+0xcc/0x154 [] process_one_work+0x26c/0x7a4 [] worker_thread+0x9c/0x73c [] kthread+0x138/0x154 [] ret_from_fork+0x10/0x40 The packet is being accessed for statistics after it has been freed. Save the packet type before sending for statistics afterwards. Signed-off-by: Laura Abbott Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btwilink.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/btwilink.c b/drivers/bluetooth/btwilink.c index 485281b3f167..ef51c9c864c5 100644 --- a/drivers/bluetooth/btwilink.c +++ b/drivers/bluetooth/btwilink.c @@ -245,6 +245,7 @@ static int ti_st_send_frame(struct hci_dev *hdev, struct sk_buff *skb) { struct ti_st *hst; long len; + int pkt_type; hst = hci_get_drvdata(hdev); @@ -258,6 +259,7 @@ static int ti_st_send_frame(struct hci_dev *hdev, struct sk_buff *skb) * Freeing skb memory is taken care in shared transport layer, * so don't free skb memory here. */ + pkt_type = hci_skb_pkt_type(skb); len = hst->st_write(skb); if (len < 0) { kfree_skb(skb); @@ -268,7 +270,7 @@ static int ti_st_send_frame(struct hci_dev *hdev, struct sk_buff *skb) /* ST accepted our skb. So, Go ahead and do rest */ hdev->stat.byte_tx += len; - ti_st_tx_complete(hst, hci_skb_pkt_type(skb)); + ti_st_tx_complete(hst, pkt_type); return 0; } From f2082a53eea9839d9e3762405315c2a31d24d427 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Wed, 31 Aug 2016 14:13:53 +0300 Subject: [PATCH 0670/1050] iwlwifi: mvm: cleanup usage of init_dbg parameter Move the init_dbg check to earlier in the function to simplify the code. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 28 ++++++++++----------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 7322c4394a9a..8f10780808f6 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -1115,27 +1115,27 @@ int iwl_mvm_up(struct iwl_mvm *mvm) * (for example, if we were in RFKILL) */ ret = iwl_run_init_mvm_ucode(mvm, false); - if (ret && !iwlmvm_mod_params.init_dbg) { + + if (iwlmvm_mod_params.init_dbg) + return 0; + + if (ret) { IWL_ERR(mvm, "Failed to run INIT ucode: %d\n", ret); /* this can't happen */ if (WARN_ON(ret > 0)) ret = -ERFKILL; goto error; } - if (!iwlmvm_mod_params.init_dbg) { - /* - * Stop and start the transport without entering low power - * mode. This will save the state of other components on the - * device that are triggered by the INIT firwmare (MFUART). - */ - _iwl_trans_stop_device(mvm->trans, false); - ret = _iwl_trans_start_hw(mvm->trans, false); - if (ret) - goto error; - } - if (iwlmvm_mod_params.init_dbg) - return 0; + /* + * Stop and start the transport without entering low power + * mode. This will save the state of other components on the + * device that are triggered by the INIT firwmare (MFUART). + */ + _iwl_trans_stop_device(mvm->trans, false); + ret = _iwl_trans_start_hw(mvm->trans, false); + if (ret) + goto error; ret = iwl_mvm_load_ucode_wait_alive(mvm, IWL_UCODE_REGULAR); if (ret) { From 191167160c1380e59156dff0c2d7e74aa0ba5770 Mon Sep 17 00:00:00 2001 From: Oren Givon Date: Thu, 8 Sep 2016 13:45:54 +0300 Subject: [PATCH 0671/1050] iwlwifi: add two new 9560 series PCI IDs Add two new PCI IDs for the 9560 series. Signed-off-by: Oren Givon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 3d766f250d3f..001be406a3d3 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -529,6 +529,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x9DF0, 0x0030, iwl9560_2ac_cfg)}, {IWL_PCI_DEVICE(0xA370, 0x0030, iwl9560_2ac_cfg)}, {IWL_PCI_DEVICE(0x31DC, 0x0030, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x1030, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0xA370, 0x1030, iwl9560_2ac_cfg)}, /* a000 Series */ {IWL_PCI_DEVICE(0x2720, 0x0A10, iwla000_2ac_cfg)}, From cc2f41f84fdc68074e228397247acd32ad06440a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 9 Sep 2016 09:34:46 +0200 Subject: [PATCH 0672/1050] iwlwifi: pcie: avoid variable shadowing in TFD helpers The various TFD/TB helpers have two code paths depending on the type of TFD supported, with variable shadowing due to the new if branches. Move the fall-through code into else branches to avoid variable shadowing. While doing so, rename some of the variables and do some other cleanups (like removing void * casts of void * pointers.) Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- .../wireless/intel/iwlwifi/pcie/internal.h | 19 +++---- drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 51 ++++++++++--------- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index 2d81630fba0f..cac6d99012b3 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -497,23 +497,20 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, struct sk_buff_head *skbs); void iwl_trans_pcie_tx_reset(struct iwl_trans *trans); -static inline u16 iwl_pcie_tfd_tb_get_len(struct iwl_trans *trans, void *tfd, +static inline u16 iwl_pcie_tfd_tb_get_len(struct iwl_trans *trans, void *_tfd, u8 idx) { - struct iwl_tfd *tfd_fh; - struct iwl_tfd_tb *tb; - if (trans->cfg->use_tfh) { - struct iwl_tfh_tfd *tfd_fh = (void *)tfd; - struct iwl_tfh_tb *tb = &tfd_fh->tbs[idx]; + struct iwl_tfh_tfd *tfd = _tfd; + struct iwl_tfh_tb *tb = &tfd->tbs[idx]; return le16_to_cpu(tb->tb_len); + } else { + struct iwl_tfd *tfd = _tfd; + struct iwl_tfd_tb *tb = &tfd->tbs[idx]; + + return le16_to_cpu(tb->hi_n_len) >> 4; } - - tfd_fh = (void *)tfd; - tb = &tfd_fh->tbs[idx]; - - return le16_to_cpu(tb->hi_n_len) >> 4; } /***************************************************** diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index e00e7d8f197a..e9a278b60dfd 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -337,28 +337,32 @@ static inline void *iwl_pcie_get_tfd(struct iwl_trans_pcie *trans_pcie, } static inline dma_addr_t iwl_pcie_tfd_tb_get_addr(struct iwl_trans *trans, - void *tfd, u8 idx) + void *_tfd, u8 idx) { - struct iwl_tfd *tfd_fh; - struct iwl_tfd_tb *tb; - dma_addr_t addr; if (trans->cfg->use_tfh) { - struct iwl_tfh_tfd *tfd_fh = (void *)tfd; - struct iwl_tfh_tb *tb = &tfd_fh->tbs[idx]; + struct iwl_tfh_tfd *tfd = _tfd; + struct iwl_tfh_tb *tb = &tfd->tbs[idx]; return (dma_addr_t)(le64_to_cpu(tb->addr)); + } else { + struct iwl_tfd *tfd = _tfd; + struct iwl_tfd_tb *tb = &tfd->tbs[idx]; + dma_addr_t addr = get_unaligned_le32(&tb->lo); + dma_addr_t hi_len; + + if (sizeof(dma_addr_t) <= sizeof(u32)) + return addr; + + hi_len = le16_to_cpu(tb->hi_n_len) & 0xF; + + /* + * shift by 16 twice to avoid warnings on 32-bit + * (where this code never runs anyway due to the + * if statement above) + */ + return addr | ((hi_len << 16) << 16); } - - tfd_fh = (void *)tfd; - tb = &tfd_fh->tbs[idx]; - addr = get_unaligned_le32(&tb->lo); - - if (sizeof(dma_addr_t) > sizeof(u32)) - addr |= - ((dma_addr_t)(le16_to_cpu(tb->hi_n_len) & 0xF) << 16) << 16; - - return addr; } static inline void iwl_pcie_tfd_set_tb(struct iwl_trans *trans, void *tfd, @@ -388,18 +392,17 @@ static inline void iwl_pcie_tfd_set_tb(struct iwl_trans *trans, void *tfd, } } -static inline u8 iwl_pcie_tfd_get_num_tbs(struct iwl_trans *trans, void *tfd) +static inline u8 iwl_pcie_tfd_get_num_tbs(struct iwl_trans *trans, void *_tfd) { - struct iwl_tfd *tfd_fh; - if (trans->cfg->use_tfh) { - struct iwl_tfh_tfd *tfd_fh = (void *)tfd; + struct iwl_tfh_tfd *tfd = _tfd; - return le16_to_cpu(tfd_fh->num_tbs) & 0x1f; + return le16_to_cpu(tfd->num_tbs) & 0x1f; + } else { + struct iwl_tfd *tfd = _tfd; + + return tfd->num_tbs & 0x1f; } - - tfd_fh = (void *)tfd; - return tfd_fh->num_tbs & 0x1f; } static void iwl_pcie_tfd_unmap(struct iwl_trans *trans, From 75cfe338b8a6fadaa28879a969047554701a7589 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Wed, 14 Sep 2016 11:54:36 +0300 Subject: [PATCH 0673/1050] iwlwifi: mvm: bail out if CTDP start operation fails We were assigning the return value of iwl_mvm_ctdp_command() to a variable, but never checking it. If this command fails, we should not allow the interface up process to proceed, since it is potentially dangerous to ignore thermal management requirements. Fixes: commit 5c89e7bc557e ("iwlwifi: mvm: add registration to cooling device") Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 8f10780808f6..897412057d1f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -1233,9 +1233,12 @@ int iwl_mvm_up(struct iwl_mvm *mvm) } /* TODO: read the budget from BIOS / Platform NVM */ - if (iwl_mvm_is_ctdp_supported(mvm) && mvm->cooling_dev.cur_state > 0) + if (iwl_mvm_is_ctdp_supported(mvm) && mvm->cooling_dev.cur_state > 0) { ret = iwl_mvm_ctdp_command(mvm, CTDP_CMD_OPERATION_START, mvm->cooling_dev.cur_state); + if (ret) + goto error; + } #else /* Initialize tx backoffs to the minimal possible */ iwl_mvm_tt_tx_backoff(mvm, 0); From 7f66ea03644e197a29af5a00a6e26ff120d8edd0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Sep 2016 10:20:10 +0200 Subject: [PATCH 0674/1050] iwlwifi: mvm: correct rate_idx bounds-check The upper bound IWL_RATE_COUNT_LEGACY should be used with a >= check, rejecting the value itself; fix that. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index e0c9065a3ad4..ef4bdfc9e7e2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -346,7 +346,7 @@ void iwl_mvm_set_tx_cmd_rate(struct iwl_mvm *mvm, struct iwl_tx_cmd *tx_cmd, rate_idx = info->control.rates[0].idx; /* if the rate isn't a well known legacy rate, take the lowest one */ - if (rate_idx < 0 || rate_idx > IWL_RATE_COUNT_LEGACY) + if (rate_idx < 0 || rate_idx >= IWL_RATE_COUNT_LEGACY) rate_idx = rate_lowest_index( &mvm->nvm_data->bands[info->band], sta); From 3b4ac78610690bd83fb33762ef97e8b8a89285ae Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 22 Sep 2016 19:58:17 -0600 Subject: [PATCH 0675/1050] nvme-rdma: only clear queue flags after successful connect Otherwise, nvme_rdma_stop_and_clear_queue() will incorrectly try to stop/free rdma qps/cm_ids that are already freed. Fixes: e89ca58f9c90 ("nvme-rdma: add DELETING queue flag") Reported-by: Steve Wise Tested-by: Steve Wise Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index c2c2c28e6eb5..fbdb2267e460 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -561,7 +561,6 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl, queue = &ctrl->queues[idx]; queue->ctrl = ctrl; - queue->flags = 0; init_completion(&queue->cm_done); if (idx > 0) @@ -595,6 +594,7 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl, goto out_destroy_cm_id; } + clear_bit(NVME_RDMA_Q_DELETING, &queue->flags); set_bit(NVME_RDMA_Q_CONNECTED, &queue->flags); return 0; From 96b03ab86d843524ec4aed7fe0ceef412c684c68 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 22 Sep 2016 16:55:13 -0400 Subject: [PATCH 0676/1050] locking/hung_task: Fix typo in CONFIG_DETECT_HUNG_TASK help text Fix the indefinitiley -> indefinitely typo in Kconfig.debug. Signed-off-by: Vivien Didelot Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20160922205513.17821-1-vivien.didelot@savoirfairelinux.com Signed-off-by: Ingo Molnar --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 2e2cca509231..cab7405f48d2 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -821,7 +821,7 @@ config DETECT_HUNG_TASK help Say Y here to enable the kernel to detect "hung tasks", which are bugs that cause the task to be stuck in - uninterruptible "D" state indefinitiley. + uninterruptible "D" state indefinitely. When a hung task is detected, the kernel will print the current stack trace (which you should report), but the From f15823510246444052b35f148c7ae627842b0e05 Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Wed, 24 Aug 2016 11:33:46 -0700 Subject: [PATCH 0677/1050] i40e: fix setting user defined RSS hash key Previously, when using ethtool to change the RSS hash key, ethtool would report back saying the old key was still being used and no error was reported. It was unclear whether it was being reported incorrectly or being set incorrectly. Debugging revealed 'i40e_set_rxfh()' returned zero immediately instead of setting the key because a user defined indirection table is not supplied when changing the hash key. This fix instead changes it such that if an indirection table is not supplied, then a default one is created and the hash key is now correctly set. Change-ID: Iddb621897ecf208650272b7ee46702cad7b69a71 Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 2 ++ drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 12 +++++++----- drivers/net/ethernet/intel/i40e/i40e_main.c | 6 ++---- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 19103a6a7dcc..30aaee42f648 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -701,6 +701,8 @@ void i40e_do_reset_safe(struct i40e_pf *pf, u32 reset_flags); void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags); int i40e_config_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); +void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut, + u16 rss_table_size, u16 rss_size); struct i40e_vsi *i40e_find_vsi_from_id(struct i40e_pf *pf, u16 id); void i40e_update_stats(struct i40e_vsi *vsi); void i40e_update_eth_stats(struct i40e_vsi *vsi); diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 1835186b62c9..af28a8c2166b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -2922,15 +2922,13 @@ static int i40e_set_rxfh(struct net_device *netdev, const u32 *indir, { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; + struct i40e_pf *pf = vsi->back; u8 *seed = NULL; u16 i; if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) return -EOPNOTSUPP; - if (!indir) - return 0; - if (key) { if (!vsi->rss_hkey_user) { vsi->rss_hkey_user = kzalloc(I40E_HKEY_ARRAY_SIZE, @@ -2948,8 +2946,12 @@ static int i40e_set_rxfh(struct net_device *netdev, const u32 *indir, } /* Each 32 bits pointed by 'indir' is stored with a lut entry */ - for (i = 0; i < I40E_HLUT_ARRAY_SIZE; i++) - vsi->rss_lut_user[i] = (u8)(indir[i]); + if (indir) + for (i = 0; i < I40E_HLUT_ARRAY_SIZE; i++) + vsi->rss_lut_user[i] = (u8)(indir[i]); + else + i40e_fill_rss_lut(pf, vsi->rss_lut_user, I40E_HLUT_ARRAY_SIZE, + vsi->rss_size); return i40e_config_rss(vsi, seed, vsi->rss_lut_user, I40E_HLUT_ARRAY_SIZE); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 61b0fc433d37..69b9e30af3fa 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -57,8 +57,6 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit); static int i40e_setup_misc_vector(struct i40e_pf *pf); static void i40e_determine_queue_usage(struct i40e_pf *pf); static int i40e_setup_pf_filter_control(struct i40e_pf *pf); -static void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut, - u16 rss_table_size, u16 rss_size); static void i40e_fdir_sb_setup(struct i40e_pf *pf); static int i40e_veb_get_bw_info(struct i40e_veb *veb); @@ -8244,8 +8242,8 @@ int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) * @rss_table_size: Lookup table size * @rss_size: Range of queue number for hashing */ -static void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut, - u16 rss_table_size, u16 rss_size) +void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut, + u16 rss_table_size, u16 rss_size) { u16 i; From a01c7f6709925919e2e3c6c190a92692f63f74e4 Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Wed, 24 Aug 2016 11:33:47 -0700 Subject: [PATCH 0678/1050] i40e: fix "dump port" command when NPAR enabled When using the debugfs to issue the "dump port" command with NPAR enabled, the firmware reports back with invalid argument. The issue occurs because the pf->mac_seid was used to perform the query. This is fine when NPAR is disabled because the switch ID == pf->mac_seid, however this is not the case when NPAR is enabled. This fix instead goes through the VSI to determine the correct ID to use in either case. Change-ID: I0cd67913a7f2c4a2962e06d39e32e7447cc55b6a Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_debugfs.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 05cf9a719bab..8555f04002da 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -1054,6 +1054,7 @@ static ssize_t i40e_dbg_command_write(struct file *filp, struct i40e_dcbx_config *r_cfg = &pf->hw.remote_dcbx_config; int i, ret; + u32 switch_id; bw_data = kzalloc(sizeof( struct i40e_aqc_query_port_ets_config_resp), @@ -1063,8 +1064,12 @@ static ssize_t i40e_dbg_command_write(struct file *filp, goto command_write_done; } + vsi = pf->vsi[pf->lan_vsi]; + switch_id = + vsi->info.switch_id & I40E_AQ_VSI_SW_ID_MASK; + ret = i40e_aq_query_port_ets_config(&pf->hw, - pf->mac_seid, + switch_id, bw_data, NULL); if (ret) { dev_info(&pf->pdev->dev, From 8d9d927f4ab8d87fee91d9aa8bdcdf19a1787ce0 Mon Sep 17 00:00:00 2001 From: Mitch Williams Date: Wed, 24 Aug 2016 11:33:49 -0700 Subject: [PATCH 0679/1050] i40e: return correct opcode to VF This conditional is backward, so the driver responds back to the VF with the wrong opcode. Do the old switcheroo to fix this. Change-ID: I384035b0fef8a3881c176de4b4672009b3400b25 Signed-off-by: Mitch Williams Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index da3423561b3a..611fc87a5f0a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -2217,8 +2217,8 @@ static int i40e_vc_iwarp_qvmap_msg(struct i40e_vf *vf, u8 *msg, u16 msglen, error_param: /* send the response to the VF */ return i40e_vc_send_resp_to_vf(vf, - config ? I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP : - I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP, + config ? I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP : + I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP, aq_ret); } From d4a0658d813ec72965a52f04f07258a4018ccb17 Mon Sep 17 00:00:00 2001 From: Carolyn Wyborny Date: Wed, 24 Aug 2016 11:33:50 -0700 Subject: [PATCH 0680/1050] i40e: Fix to check for NULL This patch fixes an issue in the virt channel code, where a return from i40e_find_vsi_from_id was not checked for NULL when applicable. Without this patch, there is a risk for panic and static analysis tools complain. This patch fixes the problem by adding the check and adding an additional input check for similar reasons. Change-ID: I7e9be88eb7a3addb50eadc451c8336d9e06f5394 Signed-off-by: Carolyn Wyborny Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 611fc87a5f0a..2ab53555f463 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -502,8 +502,16 @@ static int i40e_config_vsi_tx_queue(struct i40e_vf *vf, u16 vsi_id, u32 qtx_ctl; int ret = 0; + if (!i40e_vc_isvalid_vsi_id(vf, info->vsi_id)) { + ret = -ENOENT; + goto error_context; + } pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id); vsi = i40e_find_vsi_from_id(pf, vsi_id); + if (!vsi) { + ret = -ENOENT; + goto error_context; + } /* clear the context structure first */ memset(&tx_ctx, 0, sizeof(struct i40e_hmc_obj_txq)); @@ -1476,7 +1484,8 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf, vsi = i40e_find_vsi_from_id(pf, info->vsi_id); if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) || - !i40e_vc_isvalid_vsi_id(vf, info->vsi_id)) { + !i40e_vc_isvalid_vsi_id(vf, info->vsi_id) || + !vsi) { aq_ret = I40E_ERR_PARAM; goto error_param; } From b3f5c7bc88bab134e9649e42d30be15e3775f00d Mon Sep 17 00:00:00 2001 From: Carolyn Wyborny Date: Wed, 24 Aug 2016 11:33:51 -0700 Subject: [PATCH 0681/1050] i40e: Fix for extra byte swap in tunnel setup This patch fixes an issue where we were byte swapping the port parameter, then byte swapping it again in function execution. Obviously, that's unnecessary, so take it out of the function calls. Without this patch, the udp based tunnel configuration would not be correct. Change-ID: I788d83c5bd5732170f1a81dbfa0b1ac3ca8ea5b7 Signed-off-by: Carolyn Wyborny Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 69b9e30af3fa..53cde5b44346 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -7154,9 +7154,9 @@ static void i40e_sync_udp_filters_subtask(struct i40e_pf *pf) pf->pending_udp_bitmap &= ~BIT_ULL(i); port = pf->udp_ports[i].index; if (port) - ret = i40e_aq_add_udp_tunnel(hw, ntohs(port), - pf->udp_ports[i].type, - NULL, NULL); + ret = i40e_aq_add_udp_tunnel(hw, port, + pf->udp_ports[i].type, + NULL, NULL); else ret = i40e_aq_del_udp_tunnel(hw, i, NULL); From ff918912e1b8ba4e743d1f0b06ced1d01969e17c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 28 Aug 2016 18:41:01 +0100 Subject: [PATCH 0682/1050] i40e: avoid potential null pointer dereference when assigning len There is a sanitcy check for desc being null in the first line of function i40evf_debug_aq. However, before that, aq_desc is cast from desc, and aq_desc is being dereferenced on the assignment of len, so this could be a potential null pointer deference. Fix this by moving the initialization of len to the code block where len is being used and hence at this point we know it is OK to dereference aq_desc. Signed-off-by: Colin Ian King Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40evf/i40e_common.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40evf/i40e_common.c b/drivers/net/ethernet/intel/i40evf/i40e_common.c index 4db0c0326185..7953c13451b9 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_common.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_common.c @@ -302,7 +302,6 @@ void i40evf_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask, void *desc, void *buffer, u16 buf_len) { struct i40e_aq_desc *aq_desc = (struct i40e_aq_desc *)desc; - u16 len = le16_to_cpu(aq_desc->datalen); u8 *buf = (u8 *)buffer; u16 i = 0; @@ -326,6 +325,8 @@ void i40evf_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask, void *desc, le32_to_cpu(aq_desc->params.external.addr_low)); if ((buffer != NULL) && (aq_desc->datalen != 0)) { + u16 len = le16_to_cpu(aq_desc->datalen); + i40e_debug(hw, mask, "AQ CMD Buffer:\n"); if (buf_len < len) len = buf_len; From 3f341acc1c65b800ced567174c683cda12dfb17d Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Thu, 1 Sep 2016 22:27:27 +0200 Subject: [PATCH 0683/1050] i40evf: Fix link state event handling Currently disabling the link state from PF via ip link set enp5s0f0 vf 0 state disable doesn't disable the CARRIER on the VF. This patch updates the carrier and starts/stops the tx queues based on the link state notification from PF. PF: enp5s0f0, VF: enp5s2 #modprobe i40e #echo 2 > /sys/class/net/enp5s0f0/device/sriov_numvfs #ip link set enp5s2 up #ip -d link show enp5s2 175: enp5s2: mtu 1500 qdisc mq state UP mode DEFAULT group default qlen 1000 link/ether ea:4d:60:bc:6f:85 brd ff:ff:ff:ff:ff:ff promiscuity 0 addrgenmode eui64 #ip link set enp5s0f0 vf 0 state disable #ip -d link show enp5s0f0 171: enp5s0f0: mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 link/ether 68:05:ca:2e:72:68 brd ff:ff:ff:ff:ff:ff promiscuity 0 addrgenmode eui64 numtxqueues 72 numrxqueues 72 portid 6805ca2e7268 vf 0 MAC 00:00:00:00:00:00, spoof checking on, link-state disable, trust off vf 1 MAC 00:00:00:00:00:00, spoof checking on, link-state auto, trust off #ip -d link show enp5s2 175: enp5s2: mtu 1500 qdisc mq state DOWN mode DEFAULT group default qlen 1000 link/ether ea:4d:60:bc:6f:85 brd ff:ff:ff:ff:ff:ff promiscuity 0 addrgenmode eui64 numtxqueues 16 numrxqueues 16 Signed-off-by: Sridhar Samudrala Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 4 ++++ drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c | 10 +++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index f751f7bc0d81..e0a8cd82255c 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -1037,6 +1037,7 @@ void i40evf_down(struct i40evf_adapter *adapter) netif_carrier_off(netdev); netif_tx_disable(netdev); + adapter->link_up = false; i40evf_napi_disable_all(adapter); i40evf_irq_disable(adapter); @@ -1731,6 +1732,7 @@ static void i40evf_reset_task(struct work_struct *work) set_bit(__I40E_DOWN, &adapter->vsi.state); netif_carrier_off(netdev); netif_tx_disable(netdev); + adapter->link_up = false; i40evf_napi_disable_all(adapter); i40evf_irq_disable(adapter); i40evf_free_traffic_irqs(adapter); @@ -1769,6 +1771,7 @@ continue_reset: if (netif_running(adapter->netdev)) { netif_carrier_off(netdev); netif_tx_stop_all_queues(netdev); + adapter->link_up = false; i40evf_napi_disable_all(adapter); } i40evf_irq_disable(adapter); @@ -2457,6 +2460,7 @@ static void i40evf_init_task(struct work_struct *work) goto err_sw_init; netif_carrier_off(netdev); + adapter->link_up = false; if (!adapter->netdev_registered) { err = register_netdev(netdev); diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c index cc6cb30c1667..ddf478d6322b 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c @@ -898,8 +898,14 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, vpe->event_data.link_event.link_status) { adapter->link_up = vpe->event_data.link_event.link_status; + if (adapter->link_up) { + netif_tx_start_all_queues(netdev); + netif_carrier_on(netdev); + } else { + netif_tx_stop_all_queues(netdev); + netif_carrier_off(netdev); + } i40evf_print_link_message(adapter); - netif_tx_stop_all_queues(netdev); } break; case I40E_VIRTCHNL_EVENT_RESET_IMPENDING: @@ -974,8 +980,6 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, case I40E_VIRTCHNL_OP_ENABLE_QUEUES: /* enable transmits */ i40evf_irq_enable(adapter, true); - netif_tx_start_all_queues(adapter->netdev); - netif_carrier_on(adapter->netdev); break; case I40E_VIRTCHNL_OP_DISABLE_QUEUES: i40evf_free_all_tx_resources(adapter); From cb130a0b41d2a825fa48d7dfc964f08da9ccbb96 Mon Sep 17 00:00:00 2001 From: Bimmy Pujari Date: Tue, 6 Sep 2016 18:05:03 -0700 Subject: [PATCH 0684/1050] i40evf: remove unnecessary error checking against i40evf_up_complete Function i40evf_up_complete() always returns success. Changed this to a void type and removed the code that checks the return status and prints an error message. Change-ID: I8c400f174786b9c855f679e470f35af292fb50ad Signed-off-by: Bimmy Pujari Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index e0a8cd82255c..99067757feb3 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -1007,7 +1007,7 @@ static void i40evf_configure(struct i40evf_adapter *adapter) * i40evf_up_complete - Finish the last steps of bringing up a connection * @adapter: board private structure **/ -static int i40evf_up_complete(struct i40evf_adapter *adapter) +static void i40evf_up_complete(struct i40evf_adapter *adapter) { adapter->state = __I40EVF_RUNNING; clear_bit(__I40E_DOWN, &adapter->vsi.state); @@ -1016,7 +1016,6 @@ static int i40evf_up_complete(struct i40evf_adapter *adapter) adapter->aq_required |= I40EVF_FLAG_AQ_ENABLE_QUEUES; mod_timer_pending(&adapter->watchdog_timer, jiffies + 1); - return 0; } /** @@ -1827,9 +1826,7 @@ continue_reset: i40evf_configure(adapter); - err = i40evf_up_complete(adapter); - if (err) - goto reset_err; + i40evf_up_complete(adapter); i40evf_irq_enable(adapter, true); } else { @@ -2059,9 +2056,7 @@ static int i40evf_open(struct net_device *netdev) i40evf_add_filter(adapter, adapter->hw.mac.addr); i40evf_configure(adapter); - err = i40evf_up_complete(adapter); - if (err) - goto err_req_irq; + i40evf_up_complete(adapter); i40evf_irq_enable(adapter, true); From 841493a3f64395b60554afbcaa17f4350f90e764 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 6 Sep 2016 18:05:04 -0700 Subject: [PATCH 0685/1050] i40e: Limit TX descriptor count in cases where frag size is greater than 16K The i40e driver was incorrectly assuming that we would always be pulling no more than 1 descriptor from each fragment. It is in fact possible for us to end up with the case where 2 descriptors worth of data may be pulled when a frame is larger than one of the pieces generated when aligning the payload to either 4K or pieces smaller than 16K. To adjust for this we just need to make certain to test all the way to the end of the fragments as it is possible for us to span 2 descriptors in the block before us so we need to guarantee that even the last 6 descriptors have enough data to fill a full frame. Change-ID: Ic2ecb4d6b745f447d334e66c14002152f50e2f99 Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 7 ++----- drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 7 ++----- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index f8d66236fcbf..bf7bb7c3f227 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2621,9 +2621,7 @@ bool __i40e_chk_linearize(struct sk_buff *skb) return false; /* We need to walk through the list and validate that each group - * of 6 fragments totals at least gso_size. However we don't need - * to perform such validation on the last 6 since the last 6 cannot - * inherit any data from a descriptor after them. + * of 6 fragments totals at least gso_size. */ nr_frags -= I40E_MAX_BUFFER_TXD - 2; frag = &skb_shinfo(skb)->frags[0]; @@ -2654,8 +2652,7 @@ bool __i40e_chk_linearize(struct sk_buff *skb) if (sum < 0) return true; - /* use pre-decrement to avoid processing last fragment */ - if (!--nr_frags) + if (!nr_frags--) break; sum -= skb_frag_size(stale++); diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 0130458264e5..e3427ebd5b84 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -1832,9 +1832,7 @@ bool __i40evf_chk_linearize(struct sk_buff *skb) return false; /* We need to walk through the list and validate that each group - * of 6 fragments totals at least gso_size. However we don't need - * to perform such validation on the last 6 since the last 6 cannot - * inherit any data from a descriptor after them. + * of 6 fragments totals at least gso_size. */ nr_frags -= I40E_MAX_BUFFER_TXD - 2; frag = &skb_shinfo(skb)->frags[0]; @@ -1865,8 +1863,7 @@ bool __i40evf_chk_linearize(struct sk_buff *skb) if (sum < 0) return true; - /* use pre-decrement to avoid processing last fragment */ - if (!--nr_frags) + if (!nr_frags--) break; sum -= skb_frag_size(stale++); From 903e68323bb62cc8ca30c5a7a41d962f92c27b97 Mon Sep 17 00:00:00 2001 From: Lihong Yang Date: Tue, 6 Sep 2016 18:05:05 -0700 Subject: [PATCH 0686/1050] i40evf: remove unnecessary error checking against i40e_shutdown_adminq The i40e_shutdown_adminq function never returns failure. There is no need to check the non-0 return value. Clean up the unnecessary error checking and warning against it. Change-ID: Ibb616f09cfb93bd1a872ebf3241a15fb8354b31b Signed-off-by: Lihong Yang Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 99067757feb3..99833f3ea34e 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -1785,8 +1785,7 @@ continue_reset: i40evf_free_all_tx_resources(adapter); /* kill and reinit the admin queue */ - if (i40evf_shutdown_adminq(hw)) - dev_warn(&adapter->pdev->dev, "Failed to shut down adminq\n"); + i40evf_shutdown_adminq(hw); adapter->current_op = I40E_VIRTCHNL_OP_UNKNOWN; err = i40evf_init_adminq(hw); if (err) From 691e412132f07bd566934b7cbc430e87c5656de1 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Mon, 22 Aug 2016 16:17:46 -0700 Subject: [PATCH 0687/1050] ixgbe: simplify the logic for setting VLAN filtering Simplify the logic for setting VLNCTRL.VFE by checking the VMDQ flag and 82598 MAC instead of having to maintain a list of MAC types. Signed-off-by: Emil Tantilov Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 39 ++++++------------- 1 file changed, 11 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index d76bc1a313ea..1c888588cecd 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -4105,23 +4105,20 @@ static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter) vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); - switch (hw->mac.type) { - case ixgbe_mac_82599EB: - case ixgbe_mac_X540: - case ixgbe_mac_X550: - case ixgbe_mac_X550EM_x: - case ixgbe_mac_x550em_a: - default: - if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) - break; - /* fall through */ - case ixgbe_mac_82598EB: - /* legacy case, we can just disable VLAN filtering */ + if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) { + /* For VMDq and SR-IOV we must leave VLAN filtering enabled */ + vlnctrl |= IXGBE_VLNCTRL_VFE; + IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); + } else { vlnctrl &= ~IXGBE_VLNCTRL_VFE; IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); return; } + /* Nothing to do for 82598 */ + if (hw->mac.type == ixgbe_mac_82598EB) + return; + /* We are already in VLAN promisc, nothing to do */ if (adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC) return; @@ -4129,10 +4126,6 @@ static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter) /* Set flag so we don't redo unnecessary work */ adapter->flags2 |= IXGBE_FLAG2_VLAN_PROMISC; - /* For VMDq and SR-IOV we must leave VLAN filtering enabled */ - vlnctrl |= IXGBE_VLNCTRL_VFE; - IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); - /* Add PF to all active pools */ for (i = IXGBE_VLVF_ENTRIES; --i;) { u32 reg_offset = IXGBE_VLVFB(i * 2 + VMDQ_P(0) / 32); @@ -4204,19 +4197,9 @@ static void ixgbe_vlan_promisc_disable(struct ixgbe_adapter *adapter) vlnctrl |= IXGBE_VLNCTRL_VFE; IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); - switch (hw->mac.type) { - case ixgbe_mac_82599EB: - case ixgbe_mac_X540: - case ixgbe_mac_X550: - case ixgbe_mac_X550EM_x: - case ixgbe_mac_x550em_a: - default: - if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) - break; - /* fall through */ - case ixgbe_mac_82598EB: + if (!(adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) || + hw->mac.type == ixgbe_mac_82598EB) return; - } /* We are not in VLAN promisc, nothing to do */ if (!(adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC)) From d2d43e5b9fce2c30182dd9b6c63f436ea923a4d9 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Mon, 22 Aug 2016 16:28:34 -0700 Subject: [PATCH 0688/1050] ixgbe: make ixgbe_led_on/off_t_x550em static These functions are only used in ixgbe_x550.c. Fixes a warning when compiling with -Wmissing-prototypes Reported-by: Krishneil Singh Signed-off-by: Emil Tantilov Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index e092a8929413..dec8b116e6a2 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -2125,7 +2125,7 @@ static s32 ixgbe_reset_phy_t_X550em(struct ixgbe_hw *hw) * @hw: pointer to hardware structure * @led_idx: led number to turn on **/ -s32 ixgbe_led_on_t_x550em(struct ixgbe_hw *hw, u32 led_idx) +static s32 ixgbe_led_on_t_x550em(struct ixgbe_hw *hw, u32 led_idx) { u16 phy_data; @@ -2147,7 +2147,7 @@ s32 ixgbe_led_on_t_x550em(struct ixgbe_hw *hw, u32 led_idx) * @hw: pointer to hardware structure * @led_idx: led number to turn off **/ -s32 ixgbe_led_off_t_x550em(struct ixgbe_hw *hw, u32 led_idx) +static s32 ixgbe_led_off_t_x550em(struct ixgbe_hw *hw, u32 led_idx) { u16 phy_data; From 8fe293aaaa7abd192633cf612065b355a66ed6ad Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Fri, 26 Aug 2016 14:48:28 -0700 Subject: [PATCH 0689/1050] ixgbe: Resolve NULL reference by setting {read, write}_reg_mdi Set the read_reg_mdi and write_reg_mdi method pointers for X550EM_A_10G_T devices to resolve jumping to NULL. Signed-off-by: Mark Rustad Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index dec8b116e6a2..cd22efb940ad 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -3049,6 +3049,8 @@ static const struct ixgbe_phy_operations phy_ops_x550em_a = { .identify = &ixgbe_identify_phy_x550em, .read_reg = &ixgbe_read_phy_reg_x550a, .write_reg = &ixgbe_write_phy_reg_x550a, + .read_reg_mdi = &ixgbe_read_phy_reg_mdi, + .write_reg_mdi = &ixgbe_write_phy_reg_mdi, }; static const u32 ixgbe_mvals_X550[IXGBE_MVALS_IDX_LIMIT] = { From ade3ccf9dc75c94c1557108572d445f0300adead Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Fri, 26 Aug 2016 14:48:33 -0700 Subject: [PATCH 0690/1050] ixgbe: Indicate support for pause frames in all cases All the MACs supported by ixgbe support pause frames, so indicate that support in ethtool. Also set advertising according to requested mode. Signed-off-by: Mark Rustad Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 9547191e26c9..730a99f0f002 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -313,6 +313,25 @@ static int ixgbe_get_settings(struct net_device *netdev, break; } + /* Indicate pause support */ + ecmd->supported |= SUPPORTED_Pause; + + switch (hw->fc.requested_mode) { + case ixgbe_fc_full: + ecmd->advertising |= ADVERTISED_Pause; + break; + case ixgbe_fc_rx_pause: + ecmd->advertising |= ADVERTISED_Pause | + ADVERTISED_Asym_Pause; + break; + case ixgbe_fc_tx_pause: + ecmd->advertising |= ADVERTISED_Asym_Pause; + break; + default: + ecmd->advertising &= ~(ADVERTISED_Pause | + ADVERTISED_Asym_Pause); + } + if (netif_carrier_ok(netdev)) { switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: From 14b22cd9827ad6765a00ca0b267c3cb0353d9c10 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Mon, 29 Aug 2016 16:39:28 -0700 Subject: [PATCH 0691/1050] ixgbevf: add spinlocks for MTU change calls Protect set_rlpml with mailbox lock to make sure the MTU configuration is handled properly. This change resolves an issue where set_rlpml can fail when the VF interface is brought up: ixgbevf 0000:03:1d.6: Failed to set MTU at 1500 Signed-off-by: Emil Tantilov Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 4044608083cd..7eaac3234049 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1810,8 +1810,10 @@ static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter) if (hw->mac.type >= ixgbe_mac_X550_vf) ixgbevf_setup_vfmrqc(adapter); + spin_lock_bh(&adapter->mbx_lock); /* notify the PF of our intent to use this size of frame */ ret = hw->mac.ops.set_rlpml(hw, netdev->mtu + ETH_HLEN + ETH_FCS_LEN); + spin_unlock_bh(&adapter->mbx_lock); if (ret) dev_err(&adapter->pdev->dev, "Failed to set MTU at %d\n", netdev->mtu); @@ -3758,8 +3760,10 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu) if ((new_mtu < 68) || (max_frame > max_possible_frame)) return -EINVAL; + spin_lock_bh(&adapter->mbx_lock); /* notify the PF of our intent to use this size of frame */ ret = hw->mac.ops.set_rlpml(hw, max_frame); + spin_unlock_bh(&adapter->mbx_lock); if (ret) return -EINVAL; From 7564a8880a3cf831078a67bffb05c51f34d133eb Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Thu, 1 Sep 2016 13:58:51 -0700 Subject: [PATCH 0692/1050] ixgbe: Use MDIO_PRTAD_NONE consistently The value MDIO_PRTAD_NONE should be used to indicate no PHY address. Not 0, not 0xFFFF. Use the MDIO_PRTAD_NONE value consistently. Signed-off-by: Mark Rustad Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c | 4 ++-- drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index db0731e05401..021ab9b89c71 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -346,8 +346,8 @@ s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw) return 0; } } - /* clear value if nothing found */ - hw->phy.mdio.prtad = 0; + /* indicate no PHY found */ + hw->phy.mdio.prtad = MDIO_PRTAD_NONE; return IXGBE_ERR_PHY_ADDR_INVALID; } return 0; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index cd22efb940ad..7e6b9267ca9d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -1459,7 +1459,7 @@ ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, ixgbe_link_speed speed, /* Configure internal PHY for KR/KX. */ ixgbe_setup_kr_speed_x550em(hw, speed); - if (!hw->phy.mdio.prtad || hw->phy.mdio.prtad == 0xFFFF) + if (hw->phy.mdio.prtad == MDIO_PRTAD_NONE) return IXGBE_ERR_PHY_ADDR_INVALID; /* Get external PHY device id */ From 3b00da03ae303a3bdfa3bdfbb078e0eadb749375 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 7 Sep 2016 20:28:11 -0700 Subject: [PATCH 0693/1050] ixgbe: Allow setting multiple queues when SR-IOV is enabled The maximum queue count reported was 1, however support for multiple queues with SR-IOV was added some time ago so we should report support for it to the user so that they can select multiple queues if they so desire. Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 730a99f0f002..2d872be336bb 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -3060,8 +3060,8 @@ static unsigned int ixgbe_max_channels(struct ixgbe_adapter *adapter) /* We only support one q_vector without MSI-X */ max_combined = 1; } else if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) { - /* SR-IOV currently only allows one queue on the PF */ - max_combined = 1; + /* Limit value based on the queue mask */ + max_combined = adapter->ring_feature[RING_F_RSS].mask + 1; } else if (tcs > 1) { /* For DCB report channels per traffic class */ if (adapter->hw.mac.type == ixgbe_mac_82598EB) { From fa81da7e5b261cf8010f65253661522d3ff71714 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 7 Sep 2016 20:28:17 -0700 Subject: [PATCH 0694/1050] ixgbe: Limit reporting of redirection table if SR-IOV is enabled The hardware redirection table can support more queues then the PF currently has when SR-IOV is enabled. In order to account for this use the RSS mask to trim of the bits that are not used. Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 2d872be336bb..f49f80380aa5 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -2947,9 +2947,13 @@ static u32 ixgbe_rss_indir_size(struct net_device *netdev) static void ixgbe_get_reta(struct ixgbe_adapter *adapter, u32 *indir) { int i, reta_size = ixgbe_rss_indir_tbl_entries(adapter); + u16 rss_m = adapter->ring_feature[RING_F_RSS].mask; + + if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) + rss_m = adapter->ring_feature[RING_F_RSS].indices - 1; for (i = 0; i < reta_size; i++) - indir[i] = adapter->rss_indir_tbl[i]; + indir[i] = adapter->rss_indir_tbl[i] & rss_m; } static int ixgbe_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, From e24fcf28959298e07cae9ee19eb9a4b2b399b4fb Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 7 Sep 2016 20:28:24 -0700 Subject: [PATCH 0695/1050] ixgbe: Support 4 queue RSS on VFs with 1 or 2 queue RSS on PF Instead of limiting the VFs if we don't use 4 queues for RSS in the PF we can instead just limit the RSS queues used to a power of 2. By doing this we can support use cases where VFs are using more queues than the PF is currently using and can support RSS if so desired. The only limitation on this is that we cannot support 3 queues of RSS in the PF or VF. In either of these cases we should fall back to 2 queues in order to be able to use the power of 2 masking provided by the psrtype register. Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c | 7 ++++--- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 12 +++++++----- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c index bcdc88444ceb..15ab337fd7ad 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c @@ -515,15 +515,16 @@ static bool ixgbe_set_sriov_queues(struct ixgbe_adapter *adapter) vmdq_i = min_t(u16, IXGBE_MAX_VMDQ_INDICES, vmdq_i); /* 64 pool mode with 2 queues per pool */ - if ((vmdq_i > 32) || (rss_i < 4) || (vmdq_i > 16 && pools)) { + if ((vmdq_i > 32) || (vmdq_i > 16 && pools)) { vmdq_m = IXGBE_82599_VMDQ_2Q_MASK; rss_m = IXGBE_RSS_2Q_MASK; rss_i = min_t(u16, rss_i, 2); - /* 32 pool mode with 4 queues per pool */ + /* 32 pool mode with up to 4 queues per pool */ } else { vmdq_m = IXGBE_82599_VMDQ_4Q_MASK; rss_m = IXGBE_RSS_4Q_MASK; - rss_i = 4; + /* We can support 4, 2, or 1 queues */ + rss_i = (rss_i > 3) ? 4 : (rss_i > 1) ? 2 : 1; } #ifdef IXGBE_FCOE diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 1c888588cecd..a244d9a67264 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -3248,7 +3248,8 @@ static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter) mtqc |= IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ; else if (tcs > 1) mtqc |= IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ; - else if (adapter->ring_feature[RING_F_RSS].indices == 4) + else if (adapter->ring_feature[RING_F_VMDQ].mask == + IXGBE_82599_VMDQ_4Q_MASK) mtqc |= IXGBE_MTQC_32VF; else mtqc |= IXGBE_MTQC_64VF; @@ -3475,12 +3476,12 @@ static void ixgbe_setup_reta(struct ixgbe_adapter *adapter) u32 reta_entries = ixgbe_rss_indir_tbl_entries(adapter); u16 rss_i = adapter->ring_feature[RING_F_RSS].indices; - /* Program table for at least 2 queues w/ SR-IOV so that VFs can + /* Program table for at least 4 queues w/ SR-IOV so that VFs can * make full use of any rings they may have. We will use the * PSRTYPE register to control how many rings we use within the PF. */ - if ((adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) && (rss_i < 2)) - rss_i = 2; + if ((adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) && (rss_i < 4)) + rss_i = 4; /* Fill out hash function seeds */ for (i = 0; i < 10; i++) @@ -3544,7 +3545,8 @@ static void ixgbe_setup_mrqc(struct ixgbe_adapter *adapter) mrqc = IXGBE_MRQC_VMDQRT8TCEN; /* 8 TCs */ else if (tcs > 1) mrqc = IXGBE_MRQC_VMDQRT4TCEN; /* 4 TCs */ - else if (adapter->ring_feature[RING_F_RSS].indices == 4) + else if (adapter->ring_feature[RING_F_VMDQ].mask == + IXGBE_82599_VMDQ_4Q_MASK) mrqc = IXGBE_MRQC_VMDQRSS32EN; else mrqc = IXGBE_MRQC_VMDQRSS64EN; From 0c339bf9ac2eed861d34a9dd40aee2a2d490ec36 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Fri, 9 Sep 2016 12:59:10 -0700 Subject: [PATCH 0696/1050] ixgbe: reset before SRIOV init to avoid mailbox issues Enabling SRIOV while the ixgbevf driver is loaded will result in all mailbox requests from ixgbevf_open() being rejected by ixgbe because adapter->clear_to_send is set to false on reset. Call ixgbe_sriov_reinit() before pci_enable_sriov() to make sure that mailbox requests are handled from the time ixgbevf is loaded. Reported-by: Andrew Bowers Signed-off-by: Emil Tantilov Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 8618599dfd6f..343a18241323 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -329,13 +329,15 @@ static int ixgbe_pci_sriov_enable(struct pci_dev *dev, int num_vfs) for (i = 0; i < adapter->num_vfs; i++) ixgbe_vf_configuration(dev, (i | 0x10000000)); + /* reset before enabling SRIOV to avoid mailbox issues */ + ixgbe_sriov_reinit(adapter); + err = pci_enable_sriov(dev, num_vfs); if (err) { e_dev_warn("Failed to enable PCI sriov: %d\n", err); return err; } ixgbe_get_vfs(adapter); - ixgbe_sriov_reinit(adapter); return num_vfs; #else From 36b701fae12ac763a568037e4e7c96b5727a8b3e Mon Sep 17 00:00:00 2001 From: Laura Garcia Liebana Date: Wed, 14 Sep 2016 15:00:02 +0200 Subject: [PATCH 0697/1050] netfilter: nf_tables: validate maximum value of u32 netlink attributes Fetch value and validate u32 netlink attribute. This validation is usually required when the u32 netlink attributes are being stored in a field whose size is smaller. This patch revisits 4da449ae1df9 ("netfilter: nft_exthdr: Add size check on u8 nft_exthdr attributes"). Fixes: 96518518cc41 ("netfilter: add nftables") Suggested-by: Pablo Neira Ayuso Signed-off-by: Laura Garcia Liebana Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 1 + net/netfilter/nf_tables_api.c | 25 +++++++++++++++++++++++++ net/netfilter/nft_bitwise.c | 8 +++++++- net/netfilter/nft_byteorder.c | 15 +++++++++++++-- net/netfilter/nft_cmp.c | 3 +++ net/netfilter/nft_exthdr.c | 12 +++++++----- net/netfilter/nft_immediate.c | 4 ++++ 7 files changed, 60 insertions(+), 8 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index a7a7cebc8d07..5031e072567b 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -145,6 +145,7 @@ static inline enum nft_registers nft_type_to_reg(enum nft_data_types type) return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE; } +unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest); unsigned int nft_parse_register(const struct nlattr *attr); int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index bd9715e5ff26..b70d3ea1430e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4409,6 +4409,31 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, return 0; } +/** + * nft_parse_u32_check - fetch u32 attribute and check for maximum value + * + * @attr: netlink attribute to fetch value from + * @max: maximum value to be stored in dest + * @dest: pointer to the variable + * + * Parse, check and store a given u32 netlink attribute into variable. + * This function returns -ERANGE if the value goes over maximum value. + * Otherwise a 0 is returned and the attribute value is stored in the + * destination variable. + */ +unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest) +{ + int val; + + val = ntohl(nla_get_be32(attr)); + if (val > max) + return -ERANGE; + + *dest = val; + return 0; +} +EXPORT_SYMBOL_GPL(nft_parse_u32_check); + /** * nft_parse_register - parse a register value from a netlink attribute * diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index d71cc18fa35d..31c15ed2e5fc 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -52,6 +52,7 @@ static int nft_bitwise_init(const struct nft_ctx *ctx, { struct nft_bitwise *priv = nft_expr_priv(expr); struct nft_data_desc d1, d2; + u32 len; int err; if (tb[NFTA_BITWISE_SREG] == NULL || @@ -61,7 +62,12 @@ static int nft_bitwise_init(const struct nft_ctx *ctx, tb[NFTA_BITWISE_XOR] == NULL) return -EINVAL; - priv->len = ntohl(nla_get_be32(tb[NFTA_BITWISE_LEN])); + err = nft_parse_u32_check(tb[NFTA_BITWISE_LEN], U8_MAX, &len); + if (err < 0) + return err; + + priv->len = len; + priv->sreg = nft_parse_register(tb[NFTA_BITWISE_SREG]); err = nft_validate_register_load(priv->sreg, priv->len); if (err < 0) diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index b78c28ba465f..ee63d981268d 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -99,6 +99,7 @@ static int nft_byteorder_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_byteorder *priv = nft_expr_priv(expr); + u32 size, len; int err; if (tb[NFTA_BYTEORDER_SREG] == NULL || @@ -117,7 +118,12 @@ static int nft_byteorder_init(const struct nft_ctx *ctx, return -EINVAL; } - priv->size = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SIZE])); + err = nft_parse_u32_check(tb[NFTA_BYTEORDER_SIZE], U8_MAX, &size); + if (err < 0) + return err; + + priv->size = size; + switch (priv->size) { case 2: case 4: @@ -128,7 +134,12 @@ static int nft_byteorder_init(const struct nft_ctx *ctx, } priv->sreg = nft_parse_register(tb[NFTA_BYTEORDER_SREG]); - priv->len = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_LEN])); + err = nft_parse_u32_check(tb[NFTA_BYTEORDER_LEN], U8_MAX, &len); + if (err < 0) + return err; + + priv->len = len; + err = nft_validate_register_load(priv->sreg, priv->len); if (err < 0) return err; diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index e25b35d70e4d..2e53739812b1 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -84,6 +84,9 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr, if (err < 0) return err; + if (desc.len > U8_MAX) + return -ERANGE; + priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP])); priv->len = desc.len; return 0; diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 82c264e40278..a84cf3d66056 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -59,7 +59,7 @@ static int nft_exthdr_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_exthdr *priv = nft_expr_priv(expr); - u32 offset, len; + u32 offset, len, err; if (tb[NFTA_EXTHDR_DREG] == NULL || tb[NFTA_EXTHDR_TYPE] == NULL || @@ -67,11 +67,13 @@ static int nft_exthdr_init(const struct nft_ctx *ctx, tb[NFTA_EXTHDR_LEN] == NULL) return -EINVAL; - offset = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OFFSET])); - len = ntohl(nla_get_be32(tb[NFTA_EXTHDR_LEN])); + err = nft_parse_u32_check(tb[NFTA_EXTHDR_OFFSET], U8_MAX, &offset); + if (err < 0) + return err; - if (offset > U8_MAX || len > U8_MAX) - return -ERANGE; + err = nft_parse_u32_check(tb[NFTA_EXTHDR_LEN], U8_MAX, &len); + if (err < 0) + return err; priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]); priv->offset = offset; diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index db3b746858e3..d17018ff54e6 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -53,6 +53,10 @@ static int nft_immediate_init(const struct nft_ctx *ctx, tb[NFTA_IMMEDIATE_DATA]); if (err < 0) return err; + + if (desc.len > U8_MAX) + return -ERANGE; + priv->dlen = desc.len; priv->dreg = nft_parse_register(tb[NFTA_IMMEDIATE_DREG]); From 8061bb54436c19fd16b7c734a69ff60bac26e3e9 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Wed, 14 Sep 2016 23:41:46 +0800 Subject: [PATCH 0698/1050] netfilter: nft_queue: add _SREG_QNUM attr to select the queue number Currently, the user can specify the queue numbers by _QUEUE_NUM and _QUEUE_TOTAL attributes, this is enough in most situations. But acctually, it is not very flexible, for example: tcp dport 80 mapped to queue0 tcp dport 81 mapped to queue1 tcp dport 82 mapped to queue2 In order to do this thing, we must add 3 nft rules, and more mapping meant more rules ... So take one register to select the queue number, then we can add one simple rule to mapping queues, maybe like this: queue num tcp dport map { 80:0, 81:1, 82:2 ... } Florian Westphal also proposed wider usage scenarios: queue num jhash ip saddr . ip daddr mod ... queue num meta cpu ... queue num meta mark ... The last point is how to load a queue number from sreg, although we can use *(u16*)®s->data[reg] to load the queue number, just like nat expr to load its l4port do. But we will cooperate with hash expr, meta cpu, meta mark expr and so on. They all store the result to u32 type, so cast it to u16 pointer and dereference it will generate wrong result in the big endian system. So just keep it simple, we treat queue number as u32 type, although u16 type is already enough. Suggested-by: Pablo Neira Ayuso Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 + net/netfilter/nft_queue.c | 102 ++++++++++++++++++++--- 2 files changed, 92 insertions(+), 12 deletions(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index bcfb892ff148..1cf41dd838b2 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -894,12 +894,14 @@ enum nft_log_attributes { * @NFTA_QUEUE_NUM: netlink queue to send messages to (NLA_U16) * @NFTA_QUEUE_TOTAL: number of queues to load balance packets on (NLA_U16) * @NFTA_QUEUE_FLAGS: various flags (NLA_U16) + * @NFTA_QUEUE_SREG_QNUM: source register of queue number (NLA_U32: nft_registers) */ enum nft_queue_attributes { NFTA_QUEUE_UNSPEC, NFTA_QUEUE_NUM, NFTA_QUEUE_TOTAL, NFTA_QUEUE_FLAGS, + NFTA_QUEUE_SREG_QNUM, __NFTA_QUEUE_MAX }; #define NFTA_QUEUE_MAX (__NFTA_QUEUE_MAX - 1) diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c index d16d59959ff6..393d359a1889 100644 --- a/net/netfilter/nft_queue.c +++ b/net/netfilter/nft_queue.c @@ -22,9 +22,10 @@ static u32 jhash_initval __read_mostly; struct nft_queue { - u16 queuenum; - u16 queues_total; - u16 flags; + enum nft_registers sreg_qnum:8; + u16 queuenum; + u16 queues_total; + u16 flags; }; static void nft_queue_eval(const struct nft_expr *expr, @@ -54,26 +55,39 @@ static void nft_queue_eval(const struct nft_expr *expr, regs->verdict.code = ret; } +static void nft_queue_sreg_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_queue *priv = nft_expr_priv(expr); + u32 queue, ret; + + queue = regs->data[priv->sreg_qnum]; + + ret = NF_QUEUE_NR(queue); + if (priv->flags & NFT_QUEUE_FLAG_BYPASS) + ret |= NF_VERDICT_FLAG_QUEUE_BYPASS; + + regs->verdict.code = ret; +} + static const struct nla_policy nft_queue_policy[NFTA_QUEUE_MAX + 1] = { [NFTA_QUEUE_NUM] = { .type = NLA_U16 }, [NFTA_QUEUE_TOTAL] = { .type = NLA_U16 }, [NFTA_QUEUE_FLAGS] = { .type = NLA_U16 }, + [NFTA_QUEUE_SREG_QNUM] = { .type = NLA_U32 }, }; static int nft_queue_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) + const struct nft_expr *expr, + const struct nlattr * const tb[]) { struct nft_queue *priv = nft_expr_priv(expr); u32 maxid; - if (tb[NFTA_QUEUE_NUM] == NULL) - return -EINVAL; - - init_hashrandom(&jhash_initval); priv->queuenum = ntohs(nla_get_be16(tb[NFTA_QUEUE_NUM])); - if (tb[NFTA_QUEUE_TOTAL] != NULL) + if (tb[NFTA_QUEUE_TOTAL]) priv->queues_total = ntohs(nla_get_be16(tb[NFTA_QUEUE_TOTAL])); else priv->queues_total = 1; @@ -85,7 +99,7 @@ static int nft_queue_init(const struct nft_ctx *ctx, if (maxid > U16_MAX) return -ERANGE; - if (tb[NFTA_QUEUE_FLAGS] != NULL) { + if (tb[NFTA_QUEUE_FLAGS]) { priv->flags = ntohs(nla_get_be16(tb[NFTA_QUEUE_FLAGS])); if (priv->flags & ~NFT_QUEUE_FLAG_MASK) return -EINVAL; @@ -93,6 +107,29 @@ static int nft_queue_init(const struct nft_ctx *ctx, return 0; } +static int nft_queue_sreg_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_queue *priv = nft_expr_priv(expr); + int err; + + priv->sreg_qnum = nft_parse_register(tb[NFTA_QUEUE_SREG_QNUM]); + err = nft_validate_register_load(priv->sreg_qnum, sizeof(u32)); + if (err < 0) + return err; + + if (tb[NFTA_QUEUE_FLAGS]) { + priv->flags = ntohs(nla_get_be16(tb[NFTA_QUEUE_FLAGS])); + if (priv->flags & ~NFT_QUEUE_FLAG_MASK) + return -EINVAL; + if (priv->flags & NFT_QUEUE_FLAG_CPU_FANOUT) + return -EOPNOTSUPP; + } + + return 0; +} + static int nft_queue_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_queue *priv = nft_expr_priv(expr); @@ -108,6 +145,21 @@ nla_put_failure: return -1; } +static int +nft_queue_sreg_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_queue *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_QUEUE_SREG_QNUM, priv->sreg_qnum) || + nla_put_be16(skb, NFTA_QUEUE_FLAGS, htons(priv->flags))) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + static struct nft_expr_type nft_queue_type; static const struct nft_expr_ops nft_queue_ops = { .type = &nft_queue_type, @@ -117,9 +169,35 @@ static const struct nft_expr_ops nft_queue_ops = { .dump = nft_queue_dump, }; +static const struct nft_expr_ops nft_queue_sreg_ops = { + .type = &nft_queue_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_queue)), + .eval = nft_queue_sreg_eval, + .init = nft_queue_sreg_init, + .dump = nft_queue_sreg_dump, +}; + +static const struct nft_expr_ops * +nft_queue_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + if (tb[NFTA_QUEUE_NUM] && tb[NFTA_QUEUE_SREG_QNUM]) + return ERR_PTR(-EINVAL); + + init_hashrandom(&jhash_initval); + + if (tb[NFTA_QUEUE_NUM]) + return &nft_queue_ops; + + if (tb[NFTA_QUEUE_SREG_QNUM]) + return &nft_queue_sreg_ops; + + return ERR_PTR(-EINVAL); +} + static struct nft_expr_type nft_queue_type __read_mostly = { .name = "queue", - .ops = &nft_queue_ops, + .select_ops = &nft_queue_select_ops, .policy = nft_queue_policy, .maxattr = NFTA_QUEUE_MAX, .owner = THIS_MODULE, From 2462f3f4a7e079192b78f36900c34f18dad824a7 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Thu, 15 Sep 2016 20:50:16 +0800 Subject: [PATCH 0699/1050] netfilter: nf_queue: improve queue range support for bridge family After commit ac2863445686 ("netfilter: bridge: add nf_afinfo to enable queuing to userspace"), we can queue packets to the user space in bridge family. But when the user specify the queue range, packets will be only delivered to the first queue num. Because in nfqueue_hash, we only support ipv4 and ipv6 family. Now add support for bridge family too. Suggested-by: Pablo Neira Ayuso Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_queue.h | 56 ++++++++++++++++++++++++-------- 1 file changed, 43 insertions(+), 13 deletions(-) diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index cc8a11f7e306..903dca050fb6 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -41,22 +41,19 @@ static inline void init_hashrandom(u32 *jhash_initval) *jhash_initval = prandom_u32(); } -static inline u32 hash_v4(const struct sk_buff *skb, u32 jhash_initval) +static inline u32 hash_v4(const struct iphdr *iph, u32 initval) { - const struct iphdr *iph = ip_hdr(skb); - /* packets in either direction go into same queue */ if ((__force u32)iph->saddr < (__force u32)iph->daddr) return jhash_3words((__force u32)iph->saddr, - (__force u32)iph->daddr, iph->protocol, jhash_initval); + (__force u32)iph->daddr, iph->protocol, initval); return jhash_3words((__force u32)iph->daddr, - (__force u32)iph->saddr, iph->protocol, jhash_initval); + (__force u32)iph->saddr, iph->protocol, initval); } -static inline u32 hash_v6(const struct sk_buff *skb, u32 jhash_initval) +static inline u32 hash_v6(const struct ipv6hdr *ip6h, u32 initval) { - const struct ipv6hdr *ip6h = ipv6_hdr(skb); u32 a, b, c; if ((__force u32)ip6h->saddr.s6_addr32[3] < @@ -74,17 +71,50 @@ static inline u32 hash_v6(const struct sk_buff *skb, u32 jhash_initval) else c = (__force u32) ip6h->daddr.s6_addr32[1]; - return jhash_3words(a, b, c, jhash_initval); + return jhash_3words(a, b, c, initval); +} + +static inline u32 hash_bridge(const struct sk_buff *skb, u32 initval) +{ + struct ipv6hdr *ip6h, _ip6h; + struct iphdr *iph, _iph; + + switch (eth_hdr(skb)->h_proto) { + case htons(ETH_P_IP): + iph = skb_header_pointer(skb, skb_network_offset(skb), + sizeof(*iph), &_iph); + if (iph) + return hash_v4(iph, initval); + break; + case htons(ETH_P_IPV6): + ip6h = skb_header_pointer(skb, skb_network_offset(skb), + sizeof(*ip6h), &_ip6h); + if (ip6h) + return hash_v6(ip6h, initval); + break; + } + + return 0; } static inline u32 nfqueue_hash(const struct sk_buff *skb, u16 queue, u16 queues_total, u8 family, - u32 jhash_initval) + u32 initval) { - if (family == NFPROTO_IPV4) - queue += ((u64) hash_v4(skb, jhash_initval) * queues_total) >> 32; - else if (family == NFPROTO_IPV6) - queue += ((u64) hash_v6(skb, jhash_initval) * queues_total) >> 32; + switch (family) { + case NFPROTO_IPV4: + queue += reciprocal_scale(hash_v4(ip_hdr(skb), initval), + queues_total); + break; + case NFPROTO_IPV6: + queue += reciprocal_scale(hash_v6(ipv6_hdr(skb), initval), + queues_total); + break; + case NFPROTO_BRIDGE: + queue += reciprocal_scale(hash_bridge(skb, initval), + queues_total); + break; + } return queue; } From 8dc3c2b86bb16e8f345b80a8af69696e9a7edb65 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Thu, 15 Sep 2016 21:29:08 +0800 Subject: [PATCH 0700/1050] netfilter: nf_tables: improve nft payload fast eval There's an off-by-one issue in nft_payload_fast_eval, skb_tail_pointer and ptr + priv->len all point to the last valid address plus 1. So if they are equal, we can still fetch the valid data. It's unnecessary to fall back to nft_payload_eval. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index fb8b5892b5ff..36ba4e55d84e 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -98,7 +98,7 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr, ptr += priv->offset; - if (unlikely(ptr + priv->len >= skb_tail_pointer(skb))) + if (unlikely(ptr + priv->len > skb_tail_pointer(skb))) return false; *dest = 0; From a20877b5edec4d2b62560b5245199af04846476c Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 17 Sep 2016 14:31:20 +0800 Subject: [PATCH 0701/1050] netfilter: nf_tables: check tprot_set first when we use xt.thoff pkt->xt.thoff is not always set properly, but we use it without any check. For payload expr, it will cause wrong results. For nftrace, we may notify the wrong network or transport header to the user space, furthermore, input the following nft rules, warning message will be printed out: # nft add rule arp filter output meta nftrace set 1 WARNING: CPU: 0 PID: 13428 at net/netfilter/nf_tables_trace.c:263 nft_trace_notify+0x4a3/0x5e0 [nf_tables] Call Trace: [] dump_stack+0x63/0x85 [] __warn+0xcb/0xf0 [] warn_slowpath_null+0x1d/0x20 [] nft_trace_notify+0x4a3/0x5e0 [nf_tables] [ ... ] [] nft_do_chain_arp+0x78/0x90 [nf_tables_arp] [] nf_iterate+0x62/0x80 [] nf_hook_slow+0x73/0xd0 [] arp_xmit+0x8f/0xb0 [ ... ] [] arp_solicit+0x106/0x2c0 So before we use pkt->xt.thoff, check the tprot_set first. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_core.c | 5 ++++- net/netfilter/nf_tables_trace.c | 20 +++++++++++--------- net/netfilter/nft_payload.c | 4 ++++ 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 36ba4e55d84e..67259cefef06 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -93,8 +93,11 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr, if (priv->base == NFT_PAYLOAD_NETWORK_HEADER) ptr = skb_network_header(skb); - else + else { + if (!pkt->tprot_set) + return false; ptr = skb_network_header(skb) + pkt->xt.thoff; + } ptr += priv->offset; diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index 39eb1cc62e91..696fe8f6f2f2 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -113,20 +113,22 @@ static int nf_trace_fill_pkt_info(struct sk_buff *nlskb, const struct nft_pktinfo *pkt) { const struct sk_buff *skb = pkt->skb; - unsigned int len = min_t(unsigned int, - pkt->xt.thoff - skb_network_offset(skb), - NFT_TRACETYPE_NETWORK_HSIZE); int off = skb_network_offset(skb); + unsigned int len, nh_end; + nh_end = pkt->tprot_set ? pkt->xt.thoff : skb->len; + len = min_t(unsigned int, nh_end - skb_network_offset(skb), + NFT_TRACETYPE_NETWORK_HSIZE); if (trace_fill_header(nlskb, NFTA_TRACE_NETWORK_HEADER, skb, off, len)) return -1; - len = min_t(unsigned int, skb->len - pkt->xt.thoff, - NFT_TRACETYPE_TRANSPORT_HSIZE); - - if (trace_fill_header(nlskb, NFTA_TRACE_TRANSPORT_HEADER, skb, - pkt->xt.thoff, len)) - return -1; + if (pkt->tprot_set) { + len = min_t(unsigned int, skb->len - pkt->xt.thoff, + NFT_TRACETYPE_TRANSPORT_HSIZE); + if (trace_fill_header(nlskb, NFTA_TRACE_TRANSPORT_HEADER, skb, + pkt->xt.thoff, len)) + return -1; + } if (!skb_mac_header_was_set(skb)) return 0; diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 12cd4bf16d17..b2f88617611a 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -92,6 +92,8 @@ static void nft_payload_eval(const struct nft_expr *expr, offset = skb_network_offset(skb); break; case NFT_PAYLOAD_TRANSPORT_HEADER: + if (!pkt->tprot_set) + goto err; offset = pkt->xt.thoff; break; default: @@ -184,6 +186,8 @@ static void nft_payload_set_eval(const struct nft_expr *expr, offset = skb_network_offset(skb); break; case NFT_PAYLOAD_TRANSPORT_HEADER: + if (!pkt->tprot_set) + goto err; offset = pkt->xt.thoff; break; default: From 7bdc66242de7f9cbe8dbb01757042dd18744d800 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Sun, 18 Sep 2016 10:52:25 +0800 Subject: [PATCH 0702/1050] netfilter: Enhance the codes used to get random once There are some codes which are used to get one random once in netfilter. We could use net_get_random_once to simplify these codes. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_RATEEST.c | 6 +----- net/netfilter/xt_connlimit.c | 8 +------- net/netfilter/xt_recent.c | 7 ++----- 3 files changed, 4 insertions(+), 17 deletions(-) diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index 515131f9e021..dbd6c4a12b97 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -24,7 +24,6 @@ static DEFINE_MUTEX(xt_rateest_mutex); #define RATEEST_HSIZE 16 static struct hlist_head rateest_hash[RATEEST_HSIZE] __read_mostly; static unsigned int jhash_rnd __read_mostly; -static bool rnd_inited __read_mostly; static unsigned int xt_rateest_hash(const char *name) { @@ -99,10 +98,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) } cfg; int ret; - if (unlikely(!rnd_inited)) { - get_random_bytes(&jhash_rnd, sizeof(jhash_rnd)); - rnd_inited = true; - } + net_get_random_once(&jhash_rnd, sizeof(jhash_rnd)); est = xt_rateest_lookup(info->name); if (est) { diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 99bbc829868d..b6dc322593a3 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -366,14 +366,8 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par) unsigned int i; int ret; - if (unlikely(!connlimit_rnd)) { - u_int32_t rand; + net_get_random_once(&connlimit_rnd, sizeof(connlimit_rnd)); - do { - get_random_bytes(&rand, sizeof(rand)); - } while (!rand); - cmpxchg(&connlimit_rnd, 0, rand); - } ret = nf_ct_l3proto_try_module_get(par->family); if (ret < 0) { pr_info("cannot load conntrack support for " diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index d725a27743a1..e3b7a09b103e 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -110,7 +110,6 @@ static const struct file_operations recent_old_fops, recent_mt_fops; #endif static u_int32_t hash_rnd __read_mostly; -static bool hash_rnd_inited __read_mostly; static inline unsigned int recent_entry_hash4(const union nf_inet_addr *addr) { @@ -340,10 +339,8 @@ static int recent_mt_check(const struct xt_mtchk_param *par, int ret = -EINVAL; size_t sz; - if (unlikely(!hash_rnd_inited)) { - get_random_bytes(&hash_rnd, sizeof(hash_rnd)); - hash_rnd_inited = true; - } + net_get_random_once(&hash_rnd, sizeof(hash_rnd)); + if (info->check_set & ~XT_RECENT_VALID_FLAGS) { pr_info("Unsupported user space flags (%08x)\n", info->check_set); From b9d80f83bf8c3485ae53a4f3a715363d764bb0e4 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Tue, 20 Sep 2016 10:31:04 +0800 Subject: [PATCH 0703/1050] netfilter: xt_helper: Use sizeof(variable) instead of literal number It's better to use sizeof(info->name)-1 as index to force set the string tail instead of literal number '29'. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c index 9f4ab00c8050..805c9f64a04c 100644 --- a/net/netfilter/xt_helper.c +++ b/net/netfilter/xt_helper.c @@ -65,7 +65,7 @@ static int helper_mt_check(const struct xt_mtchk_param *par) par->family); return ret; } - info->name[29] = '\0'; + info->name[sizeof(info->name) - 1] = '\0'; return 0; } From 4004d5c374dabcbce201e16442e4596b764cc60b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 20 Sep 2016 18:22:46 +0200 Subject: [PATCH 0704/1050] netfilter: nft_lookup: remove superfluous element found check We already checked for !found just a bit before: if (!found) { regs->verdict.code = NFT_BREAK; return; } if (found && set->flags & NFT_SET_MAP) ^^^^^ So this redundant check can just go away. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_lookup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index e164325d1bc0..8166b6994cc7 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -43,7 +43,7 @@ static void nft_lookup_eval(const struct nft_expr *expr, return; } - if (found && set->flags & NFT_SET_MAP) + if (set->flags & NFT_SET_MAP) nft_data_copy(®s->data[priv->dreg], nft_set_ext_data(ext), set->dlen); From c18df0adabf8400c1825b90382d06df5edc303fa Mon Sep 17 00:00:00 2001 From: David Daney Date: Tue, 20 Sep 2016 11:46:35 -0700 Subject: [PATCH 0705/1050] arm64: Call numa_store_cpu_info() earlier. The wq_numa_init() function makes a private CPU to node map by calling cpu_to_node() early in the boot process, before the non-boot CPUs are brought online. Since the default implementation of cpu_to_node() returns zero for CPUs that have never been brought online, the workqueue system's view is that *all* CPUs are on node zero. When the unbound workqueue for a non-zero node is created, the tsk_cpus_allowed() for the worker threads is the empty set because there are, in the view of the workqueue system, no CPUs on non-zero nodes. The code in try_to_wake_up() using this empty cpumask ends up using the cpumask empty set value of NR_CPUS as an index into the per-CPU area pointer array, and gets garbage as it is one past the end of the array. This results in: [ 0.881970] Unable to handle kernel paging request at virtual address fffffb1008b926a4 [ 1.970095] pgd = fffffc00094b0000 [ 1.973530] [fffffb1008b926a4] *pgd=0000000000000000, *pud=0000000000000000, *pmd=0000000000000000 [ 1.982610] Internal error: Oops: 96000004 [#1] SMP [ 1.987541] Modules linked in: [ 1.990631] CPU: 48 PID: 295 Comm: cpuhp/48 Tainted: G W 4.8.0-rc6-preempt-vol+ #9 [ 1.999435] Hardware name: Cavium ThunderX CN88XX board (DT) [ 2.005159] task: fffffe0fe89cc300 task.stack: fffffe0fe8b8c000 [ 2.011158] PC is at try_to_wake_up+0x194/0x34c [ 2.015737] LR is at try_to_wake_up+0x150/0x34c [ 2.020318] pc : [] lr : [] pstate: 600000c5 [ 2.027803] sp : fffffe0fe8b8fb10 [ 2.031149] x29: fffffe0fe8b8fb10 x28: 0000000000000000 [ 2.036522] x27: fffffc0008c63bc8 x26: 0000000000001000 [ 2.041896] x25: fffffc0008c63c80 x24: fffffc0008bfb200 [ 2.047270] x23: 00000000000000c0 x22: 0000000000000004 [ 2.052642] x21: fffffe0fe89d25bc x20: 0000000000001000 [ 2.058014] x19: fffffe0fe89d1d00 x18: 0000000000000000 [ 2.063386] x17: 0000000000000000 x16: 0000000000000000 [ 2.068760] x15: 0000000000000018 x14: 0000000000000000 [ 2.074133] x13: 0000000000000000 x12: 0000000000000000 [ 2.079505] x11: 0000000000000000 x10: 0000000000000000 [ 2.084879] x9 : 0000000000000000 x8 : 0000000000000000 [ 2.090251] x7 : 0000000000000040 x6 : 0000000000000000 [ 2.095621] x5 : ffffffffffffffff x4 : 0000000000000000 [ 2.100991] x3 : 0000000000000000 x2 : 0000000000000000 [ 2.106364] x1 : fffffc0008be4c24 x0 : ffffff0ffffada80 [ 2.111737] [ 2.113236] Process cpuhp/48 (pid: 295, stack limit = 0xfffffe0fe8b8c020) [ 2.120102] Stack: (0xfffffe0fe8b8fb10 to 0xfffffe0fe8b90000) [ 2.125914] fb00: fffffe0fe8b8fb80 fffffc00080e7648 . . . [ 2.442859] Call trace: [ 2.445327] Exception stack(0xfffffe0fe8b8f940 to 0xfffffe0fe8b8fa70) [ 2.451843] f940: fffffe0fe89d1d00 0000040000000000 fffffe0fe8b8fb10 fffffc00080e7468 [ 2.459767] f960: fffffe0fe8b8f980 fffffc00080e4958 ffffff0ff91ab200 fffffc00080e4b64 [ 2.467690] f980: fffffe0fe8b8f9d0 fffffc00080e515c fffffe0fe8b8fa80 0000000000000000 [ 2.475614] f9a0: fffffe0fe8b8f9d0 fffffc00080e58e4 fffffe0fe8b8fa80 0000000000000000 [ 2.483540] f9c0: fffffe0fe8d10000 0000000000000040 fffffe0fe8b8fa50 fffffc00080e5ac4 [ 2.491465] f9e0: ffffff0ffffada80 fffffc0008be4c24 0000000000000000 0000000000000000 [ 2.499387] fa00: 0000000000000000 ffffffffffffffff 0000000000000000 0000000000000040 [ 2.507309] fa20: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 2.515233] fa40: 0000000000000000 0000000000000000 0000000000000000 0000000000000018 [ 2.523156] fa60: 0000000000000000 0000000000000000 [ 2.528089] [] try_to_wake_up+0x194/0x34c [ 2.533723] [] wake_up_process+0x28/0x34 [ 2.539275] [] create_worker+0x110/0x19c [ 2.544824] [] alloc_unbound_pwq+0x3cc/0x4b0 [ 2.550724] [] wq_update_unbound_numa+0x10c/0x1e4 [ 2.557066] [] workqueue_online_cpu+0x220/0x28c [ 2.563234] [] cpuhp_invoke_callback+0x6c/0x168 [ 2.569398] [] cpuhp_up_callbacks+0x44/0xe4 [ 2.575210] [] cpuhp_thread_fun+0x13c/0x148 [ 2.581027] [] smpboot_thread_fn+0x19c/0x1a8 [ 2.586929] [] kthread+0xdc/0xf0 [ 2.591776] [] ret_from_fork+0x10/0x50 [ 2.597147] Code: b00057e1 91304021 91005021 b8626822 (b8606821) [ 2.603464] ---[ end trace 58c0cd36b88802bc ]--- [ 2.608138] Kernel panic - not syncing: Fatal exception Fix by moving call to numa_store_cpu_info() for all CPUs into smp_prepare_cpus(), which happens before wq_numa_init(). Since smp_store_cpu_info() now contains only a single function call, simplify by removing the function and out-lining its contents. Suggested-by: Robert Richter Fixes: 1a2db300348b ("arm64, numa: Add NUMA support for arm64 platforms.") Cc: # 4.7.x- Signed-off-by: David Daney Reviewed-by: Robert Richter Tested-by: Yisheng Xie Signed-off-by: Catalin Marinas --- arch/arm64/kernel/smp.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index d93d43352504..3ff173e92582 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -201,12 +201,6 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) return ret; } -static void smp_store_cpu_info(unsigned int cpuid) -{ - store_cpu_topology(cpuid); - numa_store_cpu_info(cpuid); -} - /* * This is the secondary CPU boot entry. We're using this CPUs * idle thread stack, but a set of temporary page tables. @@ -254,7 +248,7 @@ asmlinkage void secondary_start_kernel(void) */ notify_cpu_starting(cpu); - smp_store_cpu_info(cpu); + store_cpu_topology(cpu); /* * OK, now it's safe to let the boot CPU continue. Wait for @@ -689,10 +683,13 @@ void __init smp_prepare_cpus(unsigned int max_cpus) { int err; unsigned int cpu; + unsigned int this_cpu; init_cpu_topology(); - smp_store_cpu_info(smp_processor_id()); + this_cpu = smp_processor_id(); + store_cpu_topology(this_cpu); + numa_store_cpu_info(this_cpu); /* * If UP is mandated by "nosmp" (which implies "maxcpus=0"), don't set @@ -719,6 +716,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) continue; set_cpu_present(cpu, true); + numa_store_cpu_info(cpu); } } From 67787b68ec48c239d5ec12f9bf5adaf5c459517a Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Fri, 23 Sep 2016 16:42:08 +0900 Subject: [PATCH 0706/1050] arm64: kgdb: handle read-only text / modules Handle read-only cases when CONFIG_DEBUG_RODATA (4.0) or CONFIG_DEBUG_SET_MODULE_RONX (3.18) are enabled by using aarch64_insn_write() instead of probe_kernel_write() as introduced by commit 2f896d586610 ("arm64: use fixmap for text patching") in 4.0. Fixes: 11d91a770f1f ("arm64: Add CONFIG_DEBUG_SET_MODULE_RONX support") Signed-off-by: AKASHI Takahiro Reviewed-by: Mark Rutland Cc: Will Deacon Cc: Jason Wessel Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/debug-monitors.h | 2 -- arch/arm64/kernel/kgdb.c | 36 ++++++++++++++++--------- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 4b6b3f72a215..b71420a12f26 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -61,8 +61,6 @@ #define AARCH64_BREAK_KGDB_DYN_DBG \ (AARCH64_BREAK_MON | (KGDB_DYN_DBG_BRK_IMM << 5)) -#define KGDB_DYN_BRK_INS_BYTE(x) \ - ((AARCH64_BREAK_KGDB_DYN_DBG >> (8 * (x))) & 0xff) #define CACHE_FLUSH_IS_SAFE 1 diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index 8c57f6496e56..e017a9493b92 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -19,10 +19,13 @@ * along with this program. If not, see . */ +#include #include #include #include #include +#include +#include #include struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { @@ -338,15 +341,24 @@ void kgdb_arch_exit(void) unregister_die_notifier(&kgdb_notifier); } -/* - * ARM instructions are always in LE. - * Break instruction is encoded in LE format - */ -struct kgdb_arch arch_kgdb_ops = { - .gdb_bpt_instr = { - KGDB_DYN_BRK_INS_BYTE(0), - KGDB_DYN_BRK_INS_BYTE(1), - KGDB_DYN_BRK_INS_BYTE(2), - KGDB_DYN_BRK_INS_BYTE(3), - } -}; +struct kgdb_arch arch_kgdb_ops; + +int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) +{ + int err; + + BUILD_BUG_ON(AARCH64_INSN_SIZE != BREAK_INSTR_SIZE); + + err = aarch64_insn_read((void *)bpt->bpt_addr, (u32 *)bpt->saved_instr); + if (err) + return err; + + return aarch64_insn_write((void *)bpt->bpt_addr, + (u32)AARCH64_BREAK_KGDB_DYN_DBG); +} + +int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) +{ + return aarch64_insn_write((void *)bpt->bpt_addr, + *(u32 *)bpt->saved_instr); +} From 21641c2e1ffd0b504610a33beaeab8fcc5140677 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Sun, 18 Sep 2016 15:52:20 -0700 Subject: [PATCH 0707/1050] net_sched: check NULL on error path in route4_change() On error path in route4_change(), 'f' could be NULL, so we should check NULL before calling tcf_exts_destroy(). Fixes: b9a24bb76bf6 ("net_sched: properly handle failure case of tcf_exts_init()") Reported-by: kbuild test robot Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/cls_route.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index a4ce39b19be0..455fc8f83d0a 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -559,7 +559,8 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, return 0; errout: - tcf_exts_destroy(&f->exts); + if (f) + tcf_exts_destroy(&f->exts); kfree(f); return err; } From 2ed5c3f09627f72a2e0e407a86b2ac05494190f9 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Sun, 18 Sep 2016 16:22:47 -0700 Subject: [PATCH 0708/1050] sch_qfq: keep backlog updated with qlen Reported-by: Stas Nichiporovich Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/sch_qfq.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index f27ffee106f6..ca0516e6f743 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -1153,6 +1153,7 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch) if (!skb) return NULL; + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; qdisc_bstats_update(sch, skb); @@ -1256,6 +1257,7 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, } bstats_update(&cl->bstats, skb); + qdisc_qstats_backlog_inc(sch, skb); ++sch->q.qlen; agg = cl->agg; @@ -1476,6 +1478,7 @@ static void qfq_reset_qdisc(struct Qdisc *sch) qdisc_reset(cl->qdisc); } } + sch->qstats.backlog = 0; sch->q.qlen = 0; } From 3d4357fba82b3cf19ebf0a04d1c9cb086af15d02 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Sun, 18 Sep 2016 16:22:48 -0700 Subject: [PATCH 0709/1050] sch_sfb: keep backlog updated with qlen Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/sch_sfb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index add3cc7d37ec..20a350bd1b1d 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -400,6 +400,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, enqueue: ret = qdisc_enqueue(skb, child, to_free); if (likely(ret == NET_XMIT_SUCCESS)) { + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; increment_qlen(skb, q); } else if (net_xmit_drop_count(ret)) { @@ -428,6 +429,7 @@ static struct sk_buff *sfb_dequeue(struct Qdisc *sch) if (skb) { qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; decrement_qlen(skb, q); } @@ -450,6 +452,7 @@ static void sfb_reset(struct Qdisc *sch) struct sfb_sched_data *q = qdisc_priv(sch); qdisc_reset(q->qdisc); + sch->qstats.backlog = 0; sch->q.qlen = 0; q->slot = 0; q->double_buffering = false; From a3007446e53af07c53bdb4cabad7b3ea60859da4 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 20 Sep 2016 18:19:13 -0300 Subject: [PATCH 0710/1050] sctp: fix the handling of SACK Gap Ack blocks sctp_acked() is using 32bit arithmetics on 16bits vars, via TSN_lte() macros, which is weird and confusing. Once the offset to ctsn is calculated, all wrapping is already handled and thus to verify the Gap Ack blocks we can just use pure less/big-or-equal than checks. Also, rename gap variable to tsn_offset, so it's more meaningful, as it doesn't point to any gap at all. Even so, I don't think this discrepancy resulted in any practical bug. This patch is a preparation for the next one, which will introduce typecheck() for TSN_lte() macros and would cause a compile error here. Suggested-by: David Laight Reported-by: David Laight Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/outqueue.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 8c3f446d965c..3ec6da8bbb53 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1719,7 +1719,7 @@ static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn) { int i; sctp_sack_variable_t *frags; - __u16 gap; + __u16 tsn_offset, blocks; __u32 ctsn = ntohl(sack->cum_tsn_ack); if (TSN_lte(tsn, ctsn)) @@ -1738,10 +1738,11 @@ static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn) */ frags = sack->variable; - gap = tsn - ctsn; - for (i = 0; i < ntohs(sack->num_gap_ack_blocks); ++i) { - if (TSN_lte(ntohs(frags[i].gab.start), gap) && - TSN_lte(gap, ntohs(frags[i].gab.end))) + blocks = ntohs(sack->num_gap_ack_blocks); + tsn_offset = tsn - ctsn; + for (i = 0; i < blocks; ++i) { + if (tsn_offset >= ntohs(frags[i].gab.start) && + tsn_offset <= ntohs(frags[i].gab.end)) goto pass; } From 182691d0998400f35ad304718024e60feaa864aa Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 20 Sep 2016 18:19:14 -0300 Subject: [PATCH 0711/1050] sctp: improve how SSN, TSN and ASCONF serial are compared Make it similar to time_before() macros: - easier to understand - make use of typecheck() to avoid working on unexpected variable types (made the issue on previous patch visible) - for _[lg]te versions, slighly faster, as the compiler used to generate a sequence of cmp/je/cmp/js instructions and now it's sub/test/jle (for _lte): Before, for sctp_outq_sack: if (primary->cacc.changeover_active) { 1f01: 80 b9 84 02 00 00 00 cmpb $0x0,0x284(%rcx) 1f08: 74 6e je 1f78 u8 clear_cycling = 0; if (TSN_lte(primary->cacc.next_tsn_at_change, sack_ctsn)) { 1f0a: 8b 81 80 02 00 00 mov 0x280(%rcx),%eax return ((s) - (t)) & TSN_SIGN_BIT; } static inline int TSN_lte(__u32 s, __u32 t) { return ((s) == (t)) || (((s) - (t)) & TSN_SIGN_BIT); 1f10: 8b 7d bc mov -0x44(%rbp),%edi 1f13: 39 c7 cmp %eax,%edi 1f15: 74 25 je 1f3c 1f17: 39 f8 cmp %edi,%eax 1f19: 78 21 js 1f3c primary->cacc.changeover_active = 0; After: if (primary->cacc.changeover_active) { 1ee7: 80 b9 84 02 00 00 00 cmpb $0x0,0x284(%rcx) 1eee: 74 73 je 1f63 u8 clear_cycling = 0; if (TSN_lte(primary->cacc.next_tsn_at_change, sack_ctsn)) { 1ef0: 8b 81 80 02 00 00 mov 0x280(%rcx),%eax 1ef6: 2b 45 b4 sub -0x4c(%rbp),%eax 1ef9: 85 c0 test %eax,%eax 1efb: 7e 26 jle 1f23 primary->cacc.changeover_active = 0; *_lt() generated pretty much the same code. Tested with gcc (GCC) 6.1.1 20160621. This patch also removes SSN_lte as it is not used and cleanups some comments. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/sm.h | 92 ++++++++----------------------------------- 1 file changed, 17 insertions(+), 75 deletions(-) diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index bafe2a0ab908..ca6c971dd74a 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -307,85 +307,27 @@ static inline __u16 sctp_data_size(struct sctp_chunk *chunk) } /* Compare two TSNs */ +#define TSN_lt(a,b) \ + (typecheck(__u32, a) && \ + typecheck(__u32, b) && \ + ((__s32)((a) - (b)) < 0)) -/* RFC 1982 - Serial Number Arithmetic - * - * 2. Comparison - * Then, s1 is said to be equal to s2 if and only if i1 is equal to i2, - * in all other cases, s1 is not equal to s2. - * - * s1 is said to be less than s2 if, and only if, s1 is not equal to s2, - * and - * - * (i1 < i2 and i2 - i1 < 2^(SERIAL_BITS - 1)) or - * (i1 > i2 and i1 - i2 > 2^(SERIAL_BITS - 1)) - * - * s1 is said to be greater than s2 if, and only if, s1 is not equal to - * s2, and - * - * (i1 < i2 and i2 - i1 > 2^(SERIAL_BITS - 1)) or - * (i1 > i2 and i1 - i2 < 2^(SERIAL_BITS - 1)) - */ - -/* - * RFC 2960 - * 1.6 Serial Number Arithmetic - * - * Comparisons and arithmetic on TSNs in this document SHOULD use Serial - * Number Arithmetic as defined in [RFC1982] where SERIAL_BITS = 32. - */ - -enum { - TSN_SIGN_BIT = (1<<31) -}; - -static inline int TSN_lt(__u32 s, __u32 t) -{ - return ((s) - (t)) & TSN_SIGN_BIT; -} - -static inline int TSN_lte(__u32 s, __u32 t) -{ - return ((s) == (t)) || (((s) - (t)) & TSN_SIGN_BIT); -} +#define TSN_lte(a,b) \ + (typecheck(__u32, a) && \ + typecheck(__u32, b) && \ + ((__s32)((a) - (b)) <= 0)) /* Compare two SSNs */ +#define SSN_lt(a,b) \ + (typecheck(__u16, a) && \ + typecheck(__u16, b) && \ + ((__s16)((a) - (b)) < 0)) -/* - * RFC 2960 - * 1.6 Serial Number Arithmetic - * - * Comparisons and arithmetic on Stream Sequence Numbers in this document - * SHOULD use Serial Number Arithmetic as defined in [RFC1982] where - * SERIAL_BITS = 16. - */ -enum { - SSN_SIGN_BIT = (1<<15) -}; - -static inline int SSN_lt(__u16 s, __u16 t) -{ - return ((s) - (t)) & SSN_SIGN_BIT; -} - -static inline int SSN_lte(__u16 s, __u16 t) -{ - return ((s) == (t)) || (((s) - (t)) & SSN_SIGN_BIT); -} - -/* - * ADDIP 3.1.1 - * The valid range of Serial Number is from 0 to 4294967295 (2**32 - 1). Serial - * Numbers wrap back to 0 after reaching 4294967295. - */ -enum { - ADDIP_SERIAL_SIGN_BIT = (1<<31) -}; - -static inline int ADDIP_SERIAL_gte(__u32 s, __u32 t) -{ - return ((s) == (t)) || (((t) - (s)) & ADDIP_SERIAL_SIGN_BIT); -} +/* ADDIP 3.1.1 */ +#define ADDIP_SERIAL_gte(a,b) \ + (typecheck(__u32, a) && \ + typecheck(__u32, b) && \ + ((__s32)((b) - (a)) <= 0)) /* Check VTAG of the packet matches the sender's own tag. */ static inline int From 429baa6f0e1b9237a3667c3a5e8ca76051e6d0b7 Mon Sep 17 00:00:00 2001 From: Bert Kenward Date: Thu, 22 Sep 2016 15:47:45 +0100 Subject: [PATCH 0712/1050] sfc: check async completer is !NULL before calling Add a NULL check before calling asynchronous MCDI completion functions during device removal. Fixes: 7014d7f6 ("sfc: allow asynchronous MCDI without completion function") Signed-off-by: Bert Kenward Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/mcdi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c index 9fbc12a8f80c..241520943ada 100644 --- a/drivers/net/ethernet/sfc/mcdi.c +++ b/drivers/net/ethernet/sfc/mcdi.c @@ -1156,7 +1156,8 @@ void efx_mcdi_flush_async(struct efx_nic *efx) * acquired locks in the wrong order. */ list_for_each_entry_safe(async, next, &mcdi->async_list, list) { - async->complete(efx, async->cookie, -ENETDOWN, NULL, 0); + if (async->complete) + async->complete(efx, async->cookie, -ENETDOWN, NULL, 0); list_del(&async->list); kfree(async); } From fefa569a9d4bc4b7758c0fddd75bb0382c95da77 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Sep 2016 08:58:55 -0700 Subject: [PATCH 0713/1050] net_sched: sch_fq: account for schedule/timers drifts It looks like the following patch can make FQ very precise, even in VM or stressed hosts. It matters at high pacing rates. We take into account the difference between the time that was programmed when last packet was sent, and current time (a drift of tens of usecs is often observed) Add an EWMA of the unthrottle latency to help diagnostics. This latency is the difference between current time and oldest packet in delayed RB-tree. This accounts for the high resolution timer latency, but can be different under stress, as fq_check_throttled() can be opportunistically be called from a dequeue() called after an enqueue() for a different flow. Tested: // Start a 10Gbit flow $ netperf --google-pacing-rate 1250000000 -H lpaa24 -l 10000 -- -K bbr & Before patch : $ sar -n DEV 10 5 | grep eth0 | grep Average Average: eth0 17106.04 756876.84 1102.75 1119049.02 0.00 0.00 0.52 After patch : $ sar -n DEV 10 5 | grep eth0 | grep Average Average: eth0 17867.00 800245.90 1151.77 1183172.12 0.00 0.00 0.52 A new iproute2 tc can output the 'unthrottle latency' : $ tc -s qd sh dev eth0 | grep latency 0 gc, 0 highprio, 32490767 throttled, 2382 ns latency Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 2 +- net/sched/sch_fq.c | 21 ++++++++++++++++++--- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index f8e39dbaa781..df7451d35131 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -811,7 +811,7 @@ struct tc_fq_qd_stats { __u32 flows; __u32 inactive_flows; __u32 throttled_flows; - __u32 pad; + __u32 unthrottle_latency_ns; }; /* Heavy-Hitter Filter */ diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 5dd929cc1423..18e752439f6f 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -86,6 +86,7 @@ struct fq_sched_data { struct rb_root delayed; /* for rate limited flows */ u64 time_next_delayed_flow; + unsigned long unthrottle_latency_ns; struct fq_flow internal; /* for non classified or high prio packets */ u32 quantum; @@ -408,11 +409,19 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, static void fq_check_throttled(struct fq_sched_data *q, u64 now) { + unsigned long sample; struct rb_node *p; if (q->time_next_delayed_flow > now) return; + /* Update unthrottle latency EWMA. + * This is cheap and can help diagnosing timer/latency problems. + */ + sample = (unsigned long)(now - q->time_next_delayed_flow); + q->unthrottle_latency_ns -= q->unthrottle_latency_ns >> 3; + q->unthrottle_latency_ns += sample >> 3; + q->time_next_delayed_flow = ~0ULL; while ((p = rb_first(&q->delayed)) != NULL) { struct fq_flow *f = container_of(p, struct fq_flow, rate_node); @@ -515,7 +524,12 @@ begin: len = NSEC_PER_SEC; q->stat_pkts_too_long++; } - + /* Account for schedule/timers drifts. + * f->time_next_packet was set when prior packet was sent, + * and current time (@now) can be too late by tens of us. + */ + if (f->time_next_packet) + len -= min(len/2, now - f->time_next_packet); f->time_next_packet = now + len; } out: @@ -787,6 +801,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt) q->initial_quantum = 10 * psched_mtu(qdisc_dev(sch)); q->flow_refill_delay = msecs_to_jiffies(40); q->flow_max_rate = ~0U; + q->time_next_delayed_flow = ~0ULL; q->rate_enable = 1; q->new_flows.first = NULL; q->old_flows.first = NULL; @@ -854,8 +869,8 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d) st.flows = q->flows; st.inactive_flows = q->inactive_flows; st.throttled_flows = q->throttled_flows; - st.pad = 0; - + st.unthrottle_latency_ns = min_t(unsigned long, + q->unthrottle_latency_ns, ~0U); sch_tree_unlock(sch); return gnet_stats_copy_app(d, &st, sizeof(st)); From 53e89941ba2a969c483aa29b907de9a823179297 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 22 Sep 2016 20:01:41 +0300 Subject: [PATCH 0714/1050] net_sched: act_vlan: add helper inlines to access tcf_vlan info Needed e.g for offloading drivers to pick the relevant attributes. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/net/tc_act/tc_vlan.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h index 6b835889ea30..48cca321ee6c 100644 --- a/include/net/tc_act/tc_vlan.h +++ b/include/net/tc_act/tc_vlan.h @@ -11,6 +11,7 @@ #define __NET_TC_VLAN_H #include +#include #define VLAN_F_POP 0x1 #define VLAN_F_PUSH 0x2 @@ -24,4 +25,28 @@ struct tcf_vlan { }; #define to_vlan(a) ((struct tcf_vlan *)a) +static inline bool is_tcf_vlan(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + if (a->ops && a->ops->type == TCA_ACT_VLAN) + return true; +#endif + return false; +} + +static inline u32 tcf_vlan_action(const struct tc_action *a) +{ + return to_vlan(a)->tcfv_action; +} + +static inline u16 tcf_vlan_push_vid(const struct tc_action *a) +{ + return to_vlan(a)->tcfv_push_vid; +} + +static inline __be16 tcf_vlan_push_proto(const struct tc_action *a) +{ + return to_vlan(a)->tcfv_push_proto; +} + #endif /* __NET_TC_VLAN_H */ From 9deb2241f19f26800e3b4c6bf49c4db992192bf0 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 22 Sep 2016 20:01:42 +0300 Subject: [PATCH 0715/1050] net/mlx5: E-Switch, Set the vport when registering the uplink rep Set the vport value in the PF entry to be that of the uplink so we can use it blindly over the tc / eswitch offload code without translating it each time we deal with the uplink representor. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 6 +-- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 10 +---- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 3 +- .../mellanox/mlx5/core/eswitch_offloads.c | 41 +++++++++---------- 4 files changed, 27 insertions(+), 33 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a9fc9d4c6af4..b309e7cbe1bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3726,9 +3726,9 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) mlx5_query_nic_vport_mac_address(mdev, 0, rep.hw_id); rep.load = mlx5e_nic_rep_load; rep.unload = mlx5e_nic_rep_unload; - rep.vport = 0; + rep.vport = FDB_UPLINK_VPORT; rep.priv_data = priv; - mlx5_eswitch_register_vport_rep(esw, &rep); + mlx5_eswitch_register_vport_rep(esw, 0, &rep); } } @@ -3867,7 +3867,7 @@ static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev) rep.unload = mlx5e_vport_rep_unload; rep.vport = vport; ether_addr_copy(rep.hw_id, mac); - mlx5_eswitch_register_vport_rep(esw, &rep); + mlx5_eswitch_register_vport_rep(esw, vport, &rep); } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 22cfc4ac1837..783e1222e71e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -120,10 +120,7 @@ static struct mlx5_flow_rule *mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5_eswitch_rep *rep = priv->ppriv; u32 src_vport; - if (rep->vport) /* set source vport for the flow */ - src_vport = rep->vport; - else - src_vport = FDB_UPLINK_VPORT; + src_vport = rep->vport; return mlx5_eswitch_add_offloaded_rule(esw, spec, action, src_vport, dst_vport); } @@ -399,10 +396,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, out_priv = netdev_priv(out_dev); out_rep = out_priv->ppriv; - if (out_rep->vport == 0) - *dest_vport = FDB_UPLINK_VPORT; - else - *dest_vport = out_rep->vport; + *dest_vport = out_rep->vport; *action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; continue; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index b96e8c93bf9d..6d8c5a2fbb63 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -254,9 +254,10 @@ void mlx5_eswitch_sqs2vport_stop(struct mlx5_eswitch *esw, int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode); int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode); void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, + int vport_index, struct mlx5_eswitch_rep *rep); void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, - int vport); + int vport_index); #define MLX5_DEBUG_ESWITCH_MASK BIT(3) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 7de40e6b0c25..516ac9920b75 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -144,16 +144,12 @@ int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, { struct mlx5_flow_rule *flow_rule; struct mlx5_esw_sq *esw_sq; - int vport; int err; int i; if (esw->mode != SRIOV_OFFLOADS) return 0; - vport = rep->vport == 0 ? - FDB_UPLINK_VPORT : rep->vport; - for (i = 0; i < sqns_num; i++) { esw_sq = kzalloc(sizeof(*esw_sq), GFP_KERNEL); if (!esw_sq) { @@ -163,7 +159,7 @@ int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, /* Add re-inject rule to the PF/representor sqs */ flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, - vport, + rep->vport, sqns_array[i]); if (IS_ERR(flow_rule)) { err = PTR_ERR(flow_rule); @@ -620,27 +616,30 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) } void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep *rep) -{ - struct mlx5_esw_offload *offloads = &esw->offloads; - - memcpy(&offloads->vport_reps[rep->vport], rep, - sizeof(struct mlx5_eswitch_rep)); - - INIT_LIST_HEAD(&offloads->vport_reps[rep->vport].vport_sqs_list); - offloads->vport_reps[rep->vport].valid = true; -} - -void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, - int vport) + int vport_index, + struct mlx5_eswitch_rep *__rep) { struct mlx5_esw_offload *offloads = &esw->offloads; struct mlx5_eswitch_rep *rep; - rep = &offloads->vport_reps[vport]; + rep = &offloads->vport_reps[vport_index]; - if (esw->mode == SRIOV_OFFLOADS && esw->vports[vport].enabled) + memcpy(rep, __rep, sizeof(struct mlx5_eswitch_rep)); + + INIT_LIST_HEAD(&rep->vport_sqs_list); + rep->valid = true; +} + +void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, + int vport_index) +{ + struct mlx5_esw_offload *offloads = &esw->offloads; + struct mlx5_eswitch_rep *rep; + + rep = &offloads->vport_reps[vport_index]; + + if (esw->mode == SRIOV_OFFLOADS && esw->vports[vport_index].enabled) rep->unload(esw, rep); - offloads->vport_reps[vport].valid = false; + rep->valid = false; } From bac9b6aa1df7d584d72558cdd12df186e91245b3 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 22 Sep 2016 20:01:43 +0300 Subject: [PATCH 0716/1050] net/mlx5: E-Switch, Set vport representor fields explicitly on registration The structure we use for the eswitch vport representor (mlx5_eswitch_rep) has some fields which are set from upper layers in the driver when they register the rep. Use explicit setting on registration time for them and avoid global memcpy. This patch doesn't add new functionality. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 5 +++-- .../net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 8 +++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 6d8c5a2fbb63..ebfcde02a5db 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -178,11 +178,12 @@ struct mlx5_eswitch_rep { void (*unload)(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep); u16 vport; - struct mlx5_flow_rule *vport_rx_rule; + u8 hw_id[ETH_ALEN]; void *priv_data; + + struct mlx5_flow_rule *vport_rx_rule; struct list_head vport_sqs_list; bool valid; - u8 hw_id[ETH_ALEN]; }; struct mlx5_esw_offload { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 516ac9920b75..80c6f4f7487b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -624,7 +624,13 @@ void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, rep = &offloads->vport_reps[vport_index]; - memcpy(rep, __rep, sizeof(struct mlx5_eswitch_rep)); + memset(rep, 0, sizeof(*rep)); + + rep->load = __rep->load; + rep->unload = __rep->unload; + rep->vport = __rep->vport; + rep->priv_data = __rep->priv_data; + ether_addr_copy(rep->hw_id, __rep->hw_id); INIT_LIST_HEAD(&rep->vport_sqs_list); rep->valid = true; From e33dfe316cf3b408e63bf0c21be0842412eb7981 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 22 Sep 2016 20:01:44 +0300 Subject: [PATCH 0717/1050] net/mlx5: E-Switch, Allow fine tuning of eswitch vport push/pop vlan The HW can be programmed to push vlan, pop vlan or both. A factorization step towards using the push/pop capabilties in the eswitch offloads mode. This patch doesn't add new functionality. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 33 ++++++++++++------- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 5 +++ 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index f75f864fa490..abbf2c369923 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -127,7 +127,7 @@ static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, } static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, - u16 vlan, u8 qos, bool set) + u16 vlan, u8 qos, u8 set_flags) { u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {0}; @@ -135,14 +135,18 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist)) return -ENOTSUPP; - esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%d\n", - vport, vlan, qos, set); - if (set) { + esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%x\n", + vport, vlan, qos, set_flags); + + if (set_flags & SET_VLAN_STRIP) MLX5_SET(modify_esw_vport_context_in, in, esw_vport_context.vport_cvlan_strip, 1); + + if (set_flags & SET_VLAN_INSERT) { /* insert only if no vlan in packet */ MLX5_SET(modify_esw_vport_context_in, in, esw_vport_context.vport_cvlan_insert, 1); + MLX5_SET(modify_esw_vport_context_in, in, esw_vport_context.cvlan_pcp, qos); MLX5_SET(modify_esw_vport_context_in, in, @@ -1778,25 +1782,21 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, return 0; } -int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, - int vport, u16 vlan, u8 qos) +int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + int vport, u16 vlan, u8 qos, u8 set_flags) { struct mlx5_vport *evport; int err = 0; - int set = 0; if (!ESW_ALLOWED(esw)) return -EPERM; if (!LEGAL_VPORT(esw, vport) || (vlan > 4095) || (qos > 7)) return -EINVAL; - if (vlan || qos) - set = 1; - mutex_lock(&esw->state_lock); evport = &esw->vports[vport]; - err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set); + err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags); if (err) goto unlock; @@ -1814,6 +1814,17 @@ unlock: return err; } +int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + int vport, u16 vlan, u8 qos) +{ + u8 set_flags = 0; + + if (vlan || qos) + set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT; + + return __mlx5_eswitch_set_vport_vlan(esw, vport, vlan, qos, set_flags); +} + int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, int vport, bool spoofchk) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index ebfcde02a5db..4f5391a7965a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -246,6 +246,11 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_rule * mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn); +enum { + SET_VLAN_STRIP = BIT(0), + SET_VLAN_INSERT = BIT(1) +}; + int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep, u16 *sqns_array, int sqns_num); From 776b12b674db53012a7ce8c379a0bbdec0a5ffa5 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 22 Sep 2016 20:01:45 +0300 Subject: [PATCH 0718/1050] net/mlx5: Put elements related to offloaded TC rule in one struct Put the representors related to the source and dest vports and the action in struct mlx5_esw_flow_attr which is used while setting the FDB rule. This patch doesn't change any functionality. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 51 +++++++++++-------- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 10 +++- .../mellanox/mlx5/core/eswitch_offloads.c | 9 ++-- 3 files changed, 44 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 783e1222e71e..3eb319b12663 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "en.h" #include "en_tc.h" #include "eswitch.h" @@ -47,6 +48,7 @@ struct mlx5e_tc_flow { struct rhash_head node; u64 cookie; struct mlx5_flow_rule *rule; + struct mlx5_esw_flow_attr *attr; }; #define MLX5E_TC_TABLE_NUM_ENTRIES 1024 @@ -114,15 +116,11 @@ err_create_ft: static struct mlx5_flow_rule *mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, - u32 action, u32 dst_vport) + struct mlx5_esw_flow_attr *attr) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_eswitch_rep *rep = priv->ppriv; - u32 src_vport; - src_vport = rep->vport; - - return mlx5_eswitch_add_offloaded_rule(esw, spec, action, src_vport, dst_vport); + return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); } static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, @@ -358,7 +356,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, } static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, - u32 *action, u32 *dest_vport) + struct mlx5_esw_flow_attr *attr) { const struct tc_action *a; LIST_HEAD(actions); @@ -366,17 +364,18 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, if (tc_no_actions(exts)) return -EINVAL; - *action = 0; + memset(attr, 0, sizeof(*attr)); + attr->in_rep = priv->ppriv; tcf_exts_to_list(exts, &actions); list_for_each_entry(a, &actions, list) { /* Only support a single action per rule */ - if (*action) + if (attr->action) return -EINVAL; if (is_tcf_gact_shot(a)) { - *action = MLX5_FLOW_CONTEXT_ACTION_DROP | - MLX5_FLOW_CONTEXT_ACTION_COUNT; + attr->action = MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_COUNT; continue; } @@ -384,7 +383,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, int ifindex = tcf_mirred_ifindex(a); struct net_device *out_dev; struct mlx5e_priv *out_priv; - struct mlx5_eswitch_rep *out_rep; out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex); @@ -394,10 +392,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return -EINVAL; } + attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; out_priv = netdev_priv(out_dev); - out_rep = out_priv->ppriv; - *dest_vport = out_rep->vport; - *action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + attr->out_rep = out_priv->ppriv; continue; } @@ -411,18 +408,27 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, { struct mlx5e_tc_table *tc = &priv->fs.tc; int err = 0; - u32 flow_tag, action, dest_vport = 0; + bool fdb_flow = false; + u32 flow_tag, action; struct mlx5e_tc_flow *flow; struct mlx5_flow_spec *spec; struct mlx5_flow_rule *old = NULL; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + if (esw && esw->mode == SRIOV_OFFLOADS) + fdb_flow = true; + flow = rhashtable_lookup_fast(&tc->ht, &f->cookie, tc->ht_params); - if (flow) + if (flow) { old = flow->rule; - else - flow = kzalloc(sizeof(*flow), GFP_KERNEL); + } else { + if (fdb_flow) + flow = kzalloc(sizeof(*flow) + sizeof(struct mlx5_esw_flow_attr), + GFP_KERNEL); + else + flow = kzalloc(sizeof(*flow), GFP_KERNEL); + } spec = mlx5_vzalloc(sizeof(*spec)); if (!spec || !flow) { @@ -436,11 +442,12 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, if (err < 0) goto err_free; - if (esw && esw->mode == SRIOV_OFFLOADS) { - err = parse_tc_fdb_actions(priv, f->exts, &action, &dest_vport); + if (fdb_flow) { + flow->attr = (struct mlx5_esw_flow_attr *)(flow + 1); + err = parse_tc_fdb_actions(priv, f->exts, flow->attr); if (err < 0) goto err_free; - flow->rule = mlx5e_tc_add_fdb_flow(priv, spec, action, dest_vport); + flow->rule = mlx5e_tc_add_fdb_flow(priv, spec, flow->attr); } else { err = parse_tc_nic_actions(priv, f->exts, &action, &flow_tag); if (err < 0) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 4f5391a7965a..eeeeadc51558 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -238,11 +238,12 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, struct ifla_vf_stats *vf_stats); struct mlx5_flow_spec; +struct mlx5_esw_flow_attr; struct mlx5_flow_rule * mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, - u32 action, u32 src_vport, u32 dst_vport); + struct mlx5_esw_flow_attr *attr); struct mlx5_flow_rule * mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn); @@ -251,6 +252,13 @@ enum { SET_VLAN_INSERT = BIT(1) }; +struct mlx5_esw_flow_attr { + struct mlx5_eswitch_rep *in_rep; + struct mlx5_eswitch_rep *out_rep; + + int action; +}; + int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep, u16 *sqns_array, int sqns_num); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 80c6f4f7487b..781debb1acf8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -46,19 +46,22 @@ enum { struct mlx5_flow_rule * mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, - u32 action, u32 src_vport, u32 dst_vport) + struct mlx5_esw_flow_attr *attr) { struct mlx5_flow_destination dest = { 0 }; struct mlx5_fc *counter = NULL; struct mlx5_flow_rule *rule; void *misc; + int action; if (esw->mode != SRIOV_OFFLOADS) return ERR_PTR(-EOPNOTSUPP); + action = attr->action; + if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest.vport_num = dst_vport; + dest.vport_num = attr->out_rep->vport; action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } else if (action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { counter = mlx5_fc_create(esw->dev, true); @@ -69,7 +72,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, } misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - MLX5_SET(fte_match_set_misc, misc, source_port, src_vport); + MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport); misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); From 8515c581dfa574420559d8cef24c2ba24e8eb8dd Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 22 Sep 2016 20:01:46 +0300 Subject: [PATCH 0719/1050] net/mlx5e: Refactor retrival of skb from rx completion element (cqe) Factor the relevant code into a static inline helper (skb_from_cqe) doing that. Move the call to napi_gro_receive to be carried out just after mlx5e_complete_rx_cqe returns. Both changes are to be used for the VF representor as well in the next commit. This patch doesn't change any functionality. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 41 +++++++++++++------ 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 0a81bd34abbe..e836e477f8b7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -629,7 +629,6 @@ static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, rq->stats.packets++; rq->stats.bytes += cqe_bcnt; mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb); - napi_gro_receive(rq->cq.napi, skb); } static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_sq *sq) @@ -733,20 +732,15 @@ static inline bool mlx5e_xdp_handle(struct mlx5e_rq *rq, } } -void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +static inline +struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, + u16 wqe_counter, u32 cqe_bcnt) { struct bpf_prog *xdp_prog = READ_ONCE(rq->xdp_prog); struct mlx5e_dma_info *di; - struct mlx5e_rx_wqe *wqe; - __be16 wqe_counter_be; struct sk_buff *skb; - u16 wqe_counter; void *va, *data; - u32 cqe_bcnt; - wqe_counter_be = cqe->wqe_counter; - wqe_counter = be16_to_cpu(wqe_counter_be); - wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); di = &rq->dma_info[wqe_counter]; va = page_address(di->page); data = va + MLX5_RX_HEADROOM; @@ -757,22 +751,21 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) rq->buff.wqe_sz, DMA_FROM_DEVICE); prefetch(data); - cqe_bcnt = be32_to_cpu(cqe->byte_cnt); if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { rq->stats.wqe_err++; mlx5e_page_release(rq, di, true); - goto wq_ll_pop; + return NULL; } if (mlx5e_xdp_handle(rq, xdp_prog, di, data, cqe_bcnt)) - goto wq_ll_pop; /* page/packet was consumed by XDP */ + return NULL; /* page/packet was consumed by XDP */ skb = build_skb(va, RQ_PAGE_SIZE(rq)); if (unlikely(!skb)) { rq->stats.buff_alloc_err++; mlx5e_page_release(rq, di, true); - goto wq_ll_pop; + return NULL; } /* queue up for recycling ..*/ @@ -782,7 +775,28 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) skb_reserve(skb, MLX5_RX_HEADROOM); skb_put(skb, cqe_bcnt); + return skb; +} + +void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + struct mlx5e_rx_wqe *wqe; + __be16 wqe_counter_be; + struct sk_buff *skb; + u16 wqe_counter; + u32 cqe_bcnt; + + wqe_counter_be = cqe->wqe_counter; + wqe_counter = be16_to_cpu(wqe_counter_be); + wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + + skb = skb_from_cqe(rq, cqe, wqe_counter, cqe_bcnt); + if (!skb) + goto wq_ll_pop; + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + napi_gro_receive(rq->cq.napi, skb); wq_ll_pop: mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, @@ -861,6 +875,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) mlx5e_mpwqe_fill_rx_skb(rq, cqe, wi, cqe_bcnt, skb); mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + napi_gro_receive(rq->cq.napi, skb); mpwrq_cqe_out: if (likely(wi->consumed_strides < rq->mpwqe_num_strides)) From f5f82476090fd2c6fc4fde03ba61aef984900009 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 22 Sep 2016 20:01:47 +0300 Subject: [PATCH 0720/1050] net/mlx5: E-Switch, Support VLAN actions in the offloads mode Many virtualization systems use a policy under which a vlan tag is pushed to packets sent by guests, and popped before the packet is forwarded to the VM. The current generation of the mlx5 HW doesn't fully support that on a per flow level. As such, we are addressing the above common use case with the SRIOV e-Switch abilities to push vlan into packets sent by VFs and pop vlan from packets forwarded to VFs. The HW can match on the correct vlan being present in packets forwarded to VFs (eSwitch steering is done before stripping the tag), so this part is offloaded as is. A common practice for vlans is to avoid both push vlan and pop vlan for inter-host VM/VM (east-west) communication because in this case, push on egress cancels out with pop on ingress. For supporting that, we use a global eswitch vlan pop policy, hence allowing guest A to communicate with both remote VM B and local VM C. This works since the HW pops the vlan only if it exists (e.g for C --> A packets but not for B --> A packets). On the slow path, when a VF vport has an offloaded flow which involves pushing vlans, wheres another flow is not currently offloaded, the packets from the 2nd flow seen by the VF representor on the host have vlan. The VF rep driver removes such vlan before calling into the host networking stack. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + .../net/ethernet/mellanox/mlx5/core/en_main.c | 21 +- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 33 ++++ .../net/ethernet/mellanox/mlx5/core/eswitch.h | 15 ++ .../mellanox/mlx5/core/eswitch_offloads.c | 180 ++++++++++++++++++ 5 files changed, 249 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 346015407b70..460363b66cb1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -869,6 +869,7 @@ void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw, int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv); void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv); int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr); +void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); int mlx5e_create_direct_rqts(struct mlx5e_priv *priv); void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index b309e7cbe1bf..c12792314be7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -446,6 +446,16 @@ static void mlx5e_rq_free_mpwqe_info(struct mlx5e_rq *rq) kfree(rq->mpwqe.info); } +static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch_rep *rep = (struct mlx5_eswitch_rep *)priv->ppriv; + + if (rep && rep->vport != FDB_UPLINK_VPORT) + return true; + + return false; +} + static int mlx5e_create_rq(struct mlx5e_channel *c, struct mlx5e_rq_param *param, struct mlx5e_rq *rq) @@ -487,6 +497,11 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, switch (priv->params.rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + if (mlx5e_is_vf_vport_rep(priv)) { + err = -EINVAL; + goto err_rq_wq_destroy; + } + rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq; rq->alloc_wqe = mlx5e_alloc_rx_mpwqe; rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; @@ -512,7 +527,11 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, goto err_rq_wq_destroy; } - rq->handle_rx_cqe = mlx5e_handle_rx_cqe; + if (mlx5e_is_vf_vport_rep(priv)) + rq->handle_rx_cqe = mlx5e_handle_rx_cqe_rep; + else + rq->handle_rx_cqe = mlx5e_handle_rx_cqe; + rq->alloc_wqe = mlx5e_alloc_rx_wqe; rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index e836e477f8b7..c6de6fba5843 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -36,6 +36,7 @@ #include #include "en.h" #include "en_tc.h" +#include "eswitch.h" static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp) { @@ -803,6 +804,38 @@ wq_ll_pop: &wqe->next.next_wqe_index); } +void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + struct net_device *netdev = rq->netdev; + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rx_wqe *wqe; + struct sk_buff *skb; + __be16 wqe_counter_be; + u16 wqe_counter; + u32 cqe_bcnt; + + wqe_counter_be = cqe->wqe_counter; + wqe_counter = be16_to_cpu(wqe_counter_be); + wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + + skb = skb_from_cqe(rq, cqe, wqe_counter, cqe_bcnt); + if (!skb) + goto wq_ll_pop; + + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + + if (rep->vlan && skb_vlan_tag_present(skb)) + skb_vlan_pop(skb); + + napi_gro_receive(rq->cq.napi, skb); + +wq_ll_pop: + mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, + &wqe->next.next_wqe_index); +} + static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, struct mlx5e_mpw_info *wi, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index eeeeadc51558..2e2938e08cda 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -157,6 +157,7 @@ struct mlx5_eswitch_fdb { struct mlx5_flow_group *send_to_vport_grp; struct mlx5_flow_group *miss_grp; struct mlx5_flow_rule *miss_rule; + int vlan_push_pop_refcount; } offloads; }; }; @@ -183,6 +184,8 @@ struct mlx5_eswitch_rep { struct mlx5_flow_rule *vport_rx_rule; struct list_head vport_sqs_list; + u16 vlan; + u32 vlan_refcount; bool valid; }; @@ -252,11 +255,16 @@ enum { SET_VLAN_INSERT = BIT(1) }; +#define MLX5_FLOW_CONTEXT_ACTION_VLAN_POP 0x40 +#define MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH 0x80 + struct mlx5_esw_flow_attr { struct mlx5_eswitch_rep *in_rep; struct mlx5_eswitch_rep *out_rep; int action; + u16 vlan; + bool vlan_handled; }; int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, @@ -273,6 +281,13 @@ void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, int vport_index); +int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr); +int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr); +int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + int vport, u16 vlan, u8 qos, u8 set_flags); + #define MLX5_DEBUG_ESWITCH_MASK BIT(3) #define esw_info(dev, format, ...) \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 781debb1acf8..c55ad8d00c05 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -89,6 +89,186 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, return rule; } +static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val) +{ + struct mlx5_eswitch_rep *rep; + int vf_vport, err = 0; + + esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none"); + for (vf_vport = 1; vf_vport < esw->enabled_vports; vf_vport++) { + rep = &esw->offloads.vport_reps[vf_vport]; + if (!rep->valid) + continue; + + err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val); + if (err) + goto out; + } + +out: + return err; +} + +static struct mlx5_eswitch_rep * +esw_vlan_action_get_vport(struct mlx5_esw_flow_attr *attr, bool push, bool pop) +{ + struct mlx5_eswitch_rep *in_rep, *out_rep, *vport = NULL; + + in_rep = attr->in_rep; + out_rep = attr->out_rep; + + if (push) + vport = in_rep; + else if (pop) + vport = out_rep; + else + vport = in_rep; + + return vport; +} + +static int esw_add_vlan_action_check(struct mlx5_esw_flow_attr *attr, + bool push, bool pop, bool fwd) +{ + struct mlx5_eswitch_rep *in_rep, *out_rep; + + if ((push || pop) && !fwd) + goto out_notsupp; + + in_rep = attr->in_rep; + out_rep = attr->out_rep; + + if (push && in_rep->vport == FDB_UPLINK_VPORT) + goto out_notsupp; + + if (pop && out_rep->vport == FDB_UPLINK_VPORT) + goto out_notsupp; + + /* vport has vlan push configured, can't offload VF --> wire rules w.o it */ + if (!push && !pop && fwd) + if (in_rep->vlan && out_rep->vport == FDB_UPLINK_VPORT) + goto out_notsupp; + + /* protects against (1) setting rules with different vlans to push and + * (2) setting rules w.o vlans (attr->vlan = 0) && w. vlans to push (!= 0) + */ + if (push && in_rep->vlan_refcount && (in_rep->vlan != attr->vlan)) + goto out_notsupp; + + return 0; + +out_notsupp: + return -ENOTSUPP; +} + +int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr) +{ + struct offloads_fdb *offloads = &esw->fdb_table.offloads; + struct mlx5_eswitch_rep *vport = NULL; + bool push, pop, fwd; + int err = 0; + + push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH); + pop = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP); + fwd = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); + + err = esw_add_vlan_action_check(attr, push, pop, fwd); + if (err) + return err; + + attr->vlan_handled = false; + + vport = esw_vlan_action_get_vport(attr, push, pop); + + if (!push && !pop && fwd) { + /* tracks VF --> wire rules without vlan push action */ + if (attr->out_rep->vport == FDB_UPLINK_VPORT) { + vport->vlan_refcount++; + attr->vlan_handled = true; + } + + return 0; + } + + if (!push && !pop) + return 0; + + if (!(offloads->vlan_push_pop_refcount)) { + /* it's the 1st vlan rule, apply global vlan pop policy */ + err = esw_set_global_vlan_pop(esw, SET_VLAN_STRIP); + if (err) + goto out; + } + offloads->vlan_push_pop_refcount++; + + if (push) { + if (vport->vlan_refcount) + goto skip_set_push; + + err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, attr->vlan, 0, + SET_VLAN_INSERT | SET_VLAN_STRIP); + if (err) + goto out; + vport->vlan = attr->vlan; +skip_set_push: + vport->vlan_refcount++; + } +out: + if (!err) + attr->vlan_handled = true; + return err; +} + +int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr) +{ + struct offloads_fdb *offloads = &esw->fdb_table.offloads; + struct mlx5_eswitch_rep *vport = NULL; + bool push, pop, fwd; + int err = 0; + + if (!attr->vlan_handled) + return 0; + + push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH); + pop = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP); + fwd = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); + + vport = esw_vlan_action_get_vport(attr, push, pop); + + if (!push && !pop && fwd) { + /* tracks VF --> wire rules without vlan push action */ + if (attr->out_rep->vport == FDB_UPLINK_VPORT) + vport->vlan_refcount--; + + return 0; + } + + if (push) { + vport->vlan_refcount--; + if (vport->vlan_refcount) + goto skip_unset_push; + + vport->vlan = 0; + err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, + 0, 0, SET_VLAN_STRIP); + if (err) + goto out; + } + +skip_unset_push: + offloads->vlan_push_pop_refcount--; + if (offloads->vlan_push_pop_refcount) + return 0; + + /* no more vlan rules, stop global vlan pop policy */ + err = esw_set_global_vlan_pop(esw, 0); + +out: + return err; +} + static struct mlx5_flow_rule * mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn) { From 8b32580df1cb4dc9cccb2d369d20317f7f74d9ce Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 22 Sep 2016 20:01:48 +0300 Subject: [PATCH 0721/1050] net/mlx5e: Add TC vlan action for SRIOV offloads Parse TC vlan actions and set the required elements to allow offloading. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 43 ++++++++++++++----- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 3eb319b12663..e61bd525f60e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -119,17 +119,27 @@ static struct mlx5_flow_rule *mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5_esw_flow_attr *attr) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + int err; + + err = mlx5_eswitch_add_vlan_action(esw, attr); + if (err) + return ERR_PTR(err); return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); } static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, - struct mlx5_flow_rule *rule) + struct mlx5_flow_rule *rule, + struct mlx5_esw_flow_attr *attr) { + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_fc *counter = NULL; counter = mlx5_flow_rule_counter(rule); + if (esw && esw->mode == SRIOV_OFFLOADS) + mlx5_eswitch_del_vlan_action(esw, attr); + mlx5_del_flow_rule(rule); mlx5_fc_destroy(priv->mdev, counter); @@ -369,13 +379,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, tcf_exts_to_list(exts, &actions); list_for_each_entry(a, &actions, list) { - /* Only support a single action per rule */ - if (attr->action) - return -EINVAL; - if (is_tcf_gact_shot(a)) { - attr->action = MLX5_FLOW_CONTEXT_ACTION_DROP | - MLX5_FLOW_CONTEXT_ACTION_COUNT; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_COUNT; continue; } @@ -392,12 +398,25 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return -EINVAL; } - attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; out_priv = netdev_priv(out_dev); attr->out_rep = out_priv->ppriv; continue; } + if (is_tcf_vlan(a)) { + if (tcf_vlan_action(a) == VLAN_F_POP) { + attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; + } else if (tcf_vlan_action(a) == VLAN_F_PUSH) { + if (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q)) + return -EOPNOTSUPP; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; + attr->vlan = tcf_vlan_push_vid(a); + } + continue; + } + return -EINVAL; } return 0; @@ -413,6 +432,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, struct mlx5e_tc_flow *flow; struct mlx5_flow_spec *spec; struct mlx5_flow_rule *old = NULL; + struct mlx5_esw_flow_attr *old_attr; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; if (esw && esw->mode == SRIOV_OFFLOADS) @@ -422,6 +442,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, tc->ht_params); if (flow) { old = flow->rule; + old_attr = flow->attr; } else { if (fdb_flow) flow = kzalloc(sizeof(*flow) + sizeof(struct mlx5_esw_flow_attr), @@ -466,7 +487,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, goto err_del_rule; if (old) - mlx5e_tc_del_flow(priv, old); + mlx5e_tc_del_flow(priv, old, old_attr); goto out; @@ -494,7 +515,7 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv, rhashtable_remove_fast(&tc->ht, &flow->node, tc->ht_params); - mlx5e_tc_del_flow(priv, flow->rule); + mlx5e_tc_del_flow(priv, flow->rule, flow->attr); kfree(flow); @@ -551,7 +572,7 @@ static void _mlx5e_tc_del_flow(void *ptr, void *arg) struct mlx5e_tc_flow *flow = ptr; struct mlx5e_priv *priv = arg; - mlx5e_tc_del_flow(priv, flow->rule); + mlx5e_tc_del_flow(priv, flow->rule, flow->attr); kfree(flow); } From 095b6cfd69cedc8050b69535af8bf718ce0e9aad Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 22 Sep 2016 20:01:49 +0300 Subject: [PATCH 0722/1050] net/mlx5e: Add TC vlan match parsing Enhance the parsing of offloaded TC rules matches to handle vlans. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index e61bd525f60e..a350b7171e3d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -164,6 +164,7 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | BIT(FLOW_DISSECTOR_KEY_BASIC) | BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_VLAN) | BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | BIT(FLOW_DISSECTOR_KEY_PORTS))) { @@ -227,6 +228,24 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec key->src); } + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_dissector_key_vlan *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_VLAN, + f->key); + struct flow_dissector_key_vlan *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_VLAN, + f->mask); + if (mask->vlan_id) { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, vlan_tag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, vlan_tag, 1); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, mask->vlan_id); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, key->vlan_id); + } + } + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { struct flow_dissector_key_ipv4_addrs *key = skb_flow_dissector_target(f->dissector, From e12934d9806e61d2727069cd56757987f3da76aa Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 22 Sep 2016 18:48:58 +0100 Subject: [PATCH 0723/1050] cxgb4: fix signed wrap around when decrementing index idx Change predecrement compare to post decrement compare to avoid an unsigned integer wrap-around comparison when decrementing idx in the while loop. For example, when idx is zero, the current situation will predecrement idx in the while loop, wrapping idx to the maximum signed integer and cause out of bounds reads on rxq_info->msix_tbl[idx]. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c index d12a73e03565..f13b593d7c8b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c @@ -367,7 +367,7 @@ int request_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type) } return 0; unwind: - while (--idx >= 0) { + while (idx-- > 0) { bmap_idx = rxq_info->msix_tbl[idx]; free_msix_idx_in_bmap(adap, bmap_idx); free_irq(adap->msix_info_ulds[bmap_idx].vec, From 2fe664f1fcf7c4da6891f95708a7a56d3c024354 Mon Sep 17 00:00:00 2001 From: Douglas Caetano dos Santos Date: Thu, 22 Sep 2016 15:52:04 -0300 Subject: [PATCH 0724/1050] tcp: fix wrong checksum calculation on MTU probing With TCP MTU probing enabled and offload TX checksumming disabled, tcp_mtu_probe() calculated the wrong checksum when a fragment being copied into the probe's SKB had an odd length. This was caused by the direct use of skb_copy_and_csum_bits() to calculate the checksum, as it pads the fragment being copied, if needed. When this fragment was not the last, a subsequent call used the previous checksum without considering this padding. The effect was a stale connection in one way, as even retransmissions wouldn't solve the problem, because the checksum was never recalculated for the full SKB length. Signed-off-by: Douglas Caetano dos Santos Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5288cec4a2b2..d48d5571e62a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1966,12 +1966,14 @@ static int tcp_mtu_probe(struct sock *sk) len = 0; tcp_for_write_queue_from_safe(skb, next, sk) { copy = min_t(int, skb->len, probe_size - len); - if (nskb->ip_summed) + if (nskb->ip_summed) { skb_copy_bits(skb, 0, skb_put(nskb, copy), copy); - else - nskb->csum = skb_copy_and_csum_bits(skb, 0, - skb_put(nskb, copy), - copy, nskb->csum); + } else { + __wsum csum = skb_copy_and_csum_bits(skb, 0, + skb_put(nskb, copy), + copy, 0); + nskb->csum = csum_block_add(nskb->csum, csum, len); + } if (skb->len <= copy) { /* We've eaten all the data from this skb. From b24d2891cfb0a7975b0039743439c98fe7b7dea7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 13:17:33 +0100 Subject: [PATCH 0725/1050] rxrpc: Preset timestamp on Tx sk_buffs Set the timestamp on sk_buffs holding packets to be transmitted before queueing them because the moment the packet is on the queue it can be seen by the retransmission algorithm - which may see a completely random timestamp. If the retransmission algorithm sees such a timestamp, it may retransmit the packet and, in future, tell the congestion management algorithm that the retransmit timer expired. Signed-off-by: David Howells --- net/rxrpc/sendmsg.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index ca7c3be60ad2..ca3811bfbd17 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -99,6 +99,11 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, ASSERTCMP(seq, ==, call->tx_top + 1); + /* We have to set the timestamp before queueing as the retransmit + * algorithm can see the packet as soon as we queue it. + */ + skb->tstamp = ktime_get_real(); + ix = seq & RXRPC_RXTX_BUFF_MASK; rxrpc_get_skb(skb, rxrpc_skb_tx_got); call->rxtx_annotations[ix] = RXRPC_TX_ANNO_UNACK; From 9aff212bd677829189fae2e2e408cefc196ae5ae Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 12:39:23 +0100 Subject: [PATCH 0726/1050] rxrpc: Don't send an ACK at the end of service call response transmission Don't send an IDLE ACK at the end of the transmission of the response to a service call. The service end resends DATA packets until the client sends an ACK that hard-acks all the send data. At that point, the call is complete. Signed-off-by: David Howells --- net/rxrpc/recvmsg.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 6ba4af5a8d95..99e4c0ae30f1 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -143,8 +143,6 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call) if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, true, false); rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); - } else { - rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, false, false); } write_lock_bh(&call->state_lock); From c0d058c21c69b3685c3f1bb008aa11f1a5eaee7e Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 12:39:23 +0100 Subject: [PATCH 0727/1050] rxrpc: Make sure sendmsg() is woken on call completion Make sure that sendmsg() gets woken up if the call it is waiting for completes abnormally. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 1 + 1 file changed, 1 insertion(+) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index b13754a6dd7a..808ab750dc6b 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -758,6 +758,7 @@ static inline bool __rxrpc_set_call_completion(struct rxrpc_call *call, call->error = error; call->completion = compl, call->state = RXRPC_CALL_COMPLETE; + wake_up(&call->waitq); return true; } return false; From 90bd684ded900673d86f64f4b4197704a38f04bc Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 12:39:23 +0100 Subject: [PATCH 0728/1050] rxrpc: Should be using ktime_add_ms() not ktime_add_ns() ktime_add_ms() should be used to add the resend time (in ms) rather than ktime_add_ns(). Signed-off-by: David Howells --- net/rxrpc/call_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 6e2ea8f4ae75..a2909da5d581 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -187,7 +187,7 @@ static void rxrpc_resend(struct rxrpc_call *call) call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS | annotation; } - resend_at = ktime_sub(ktime_add_ns(oldest, rxrpc_resend_timeout), now); + resend_at = ktime_sub(ktime_add_ms(oldest, rxrpc_resend_timeout), now); call->resend_at = jiffies + nsecs_to_jiffies(ktime_to_ns(resend_at)); /* Now go through the Tx window and perform the retransmissions. We From 019b1c9fe32a2a32c1153e31375f87ec3e591273 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Sep 2016 17:54:00 -0700 Subject: [PATCH 0729/1050] tcp: fix a compile error in DBGUNDO() If DBGUNDO() is enabled (FASTRETRANS_DEBUG > 1), a compile error will happen, since inet6_sk(sk)->daddr became sk->sk_v6_daddr Fixes: efe4208f47f9 ("ipv6: make lookups simpler and faster") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 08323bd95f2a..a756b8749a26 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2329,10 +2329,9 @@ static void DBGUNDO(struct sock *sk, const char *msg) } #if IS_ENABLED(CONFIG_IPV6) else if (sk->sk_family == AF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", msg, - &np->daddr, ntohs(inet->inet_dport), + &sk->sk_v6_daddr, ntohs(inet->inet_dport), tp->snd_cwnd, tcp_left_out(tp), tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); From e3978673f514fa4999f04dfad9bbd5bb70d0edc6 Mon Sep 17 00:00:00 2001 From: Iyappan Subramanian Date: Thu, 22 Sep 2016 15:47:33 -0700 Subject: [PATCH 0730/1050] drivers: net: xgene: Fix MSS programming Current driver programs static value of MSS in hardware register for TSO offload engine to segment the TCP payload regardless the MSS value provided by network stack. This patch fixes this by programming hardware registers with the stack provided MSS value. Since the hardware has the limitation of having only 4 MSS registers, this patch uses reference count of mss values being used. Signed-off-by: Iyappan Subramanian Signed-off-by: Toan Le Signed-off-by: David S. Miller --- .../net/ethernet/apm/xgene/xgene_enet_hw.h | 7 ++ .../net/ethernet/apm/xgene/xgene_enet_main.c | 88 +++++++++++++++---- .../net/ethernet/apm/xgene/xgene_enet_main.h | 8 +- .../net/ethernet/apm/xgene/xgene_enet_xgmac.c | 18 +++- 4 files changed, 99 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h index 8a8d05500894..8456337a237d 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h @@ -237,6 +237,8 @@ enum xgene_enet_rm { #define TCPHDR_LEN 6 #define IPHDR_POS 6 #define IPHDR_LEN 6 +#define MSS_POS 20 +#define MSS_LEN 2 #define EC_POS 22 /* Enable checksum */ #define EC_LEN 1 #define ET_POS 23 /* Enable TSO */ @@ -253,6 +255,11 @@ enum xgene_enet_rm { #define LAST_BUFFER (0x7800ULL << BUFDATALEN_POS) +#define TSO_MSS0_POS 0 +#define TSO_MSS0_LEN 14 +#define TSO_MSS1_POS 16 +#define TSO_MSS1_LEN 14 + struct xgene_enet_raw_desc { __le64 m0; __le64 m1; diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 522ba920bfc1..429f18fc5503 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -137,6 +137,7 @@ static irqreturn_t xgene_enet_rx_irq(const int irq, void *data) static int xgene_enet_tx_completion(struct xgene_enet_desc_ring *cp_ring, struct xgene_enet_raw_desc *raw_desc) { + struct xgene_enet_pdata *pdata = netdev_priv(cp_ring->ndev); struct sk_buff *skb; struct device *dev; skb_frag_t *frag; @@ -144,6 +145,7 @@ static int xgene_enet_tx_completion(struct xgene_enet_desc_ring *cp_ring, u16 skb_index; u8 status; int i, ret = 0; + u8 mss_index; skb_index = GET_VAL(USERINFO, le64_to_cpu(raw_desc->m0)); skb = cp_ring->cp_skb[skb_index]; @@ -160,6 +162,13 @@ static int xgene_enet_tx_completion(struct xgene_enet_desc_ring *cp_ring, DMA_TO_DEVICE); } + if (GET_BIT(ET, le64_to_cpu(raw_desc->m3))) { + mss_index = GET_VAL(MSS, le64_to_cpu(raw_desc->m3)); + spin_lock(&pdata->mss_lock); + pdata->mss_refcnt[mss_index]--; + spin_unlock(&pdata->mss_lock); + } + /* Checking for error */ status = GET_VAL(LERR, le64_to_cpu(raw_desc->m0)); if (unlikely(status > 2)) { @@ -178,15 +187,53 @@ static int xgene_enet_tx_completion(struct xgene_enet_desc_ring *cp_ring, return ret; } -static u64 xgene_enet_work_msg(struct sk_buff *skb) +static int xgene_enet_setup_mss(struct net_device *ndev, u32 mss) +{ + struct xgene_enet_pdata *pdata = netdev_priv(ndev); + bool mss_index_found = false; + int mss_index; + int i; + + spin_lock(&pdata->mss_lock); + + /* Reuse the slot if MSS matches */ + for (i = 0; !mss_index_found && i < NUM_MSS_REG; i++) { + if (pdata->mss[i] == mss) { + pdata->mss_refcnt[i]++; + mss_index = i; + mss_index_found = true; + } + } + + /* Overwrite the slot with ref_count = 0 */ + for (i = 0; !mss_index_found && i < NUM_MSS_REG; i++) { + if (!pdata->mss_refcnt[i]) { + pdata->mss_refcnt[i]++; + pdata->mac_ops->set_mss(pdata, mss, i); + pdata->mss[i] = mss; + mss_index = i; + mss_index_found = true; + } + } + + spin_unlock(&pdata->mss_lock); + + /* No slots with ref_count = 0 available, return busy */ + if (!mss_index_found) + return -EBUSY; + + return mss_index; +} + +static int xgene_enet_work_msg(struct sk_buff *skb, u64 *hopinfo) { struct net_device *ndev = skb->dev; struct iphdr *iph; u8 l3hlen = 0, l4hlen = 0; u8 ethhdr, proto = 0, csum_enable = 0; - u64 hopinfo = 0; u32 hdr_len, mss = 0; u32 i, len, nr_frags; + int mss_index; ethhdr = xgene_enet_hdr_len(skb->data); @@ -226,7 +273,11 @@ static u64 xgene_enet_work_msg(struct sk_buff *skb) if (!mss || ((skb->len - hdr_len) <= mss)) goto out; - hopinfo |= SET_BIT(ET); + mss_index = xgene_enet_setup_mss(ndev, mss); + if (unlikely(mss_index < 0)) + return -EBUSY; + + *hopinfo |= SET_BIT(ET) | SET_VAL(MSS, mss_index); } } else if (iph->protocol == IPPROTO_UDP) { l4hlen = UDP_HDR_SIZE; @@ -234,15 +285,15 @@ static u64 xgene_enet_work_msg(struct sk_buff *skb) } out: l3hlen = ip_hdrlen(skb) >> 2; - hopinfo |= SET_VAL(TCPHDR, l4hlen) | - SET_VAL(IPHDR, l3hlen) | - SET_VAL(ETHHDR, ethhdr) | - SET_VAL(EC, csum_enable) | - SET_VAL(IS, proto) | - SET_BIT(IC) | - SET_BIT(TYPE_ETH_WORK_MESSAGE); + *hopinfo |= SET_VAL(TCPHDR, l4hlen) | + SET_VAL(IPHDR, l3hlen) | + SET_VAL(ETHHDR, ethhdr) | + SET_VAL(EC, csum_enable) | + SET_VAL(IS, proto) | + SET_BIT(IC) | + SET_BIT(TYPE_ETH_WORK_MESSAGE); - return hopinfo; + return 0; } static u16 xgene_enet_encode_len(u16 len) @@ -282,20 +333,22 @@ static int xgene_enet_setup_tx_desc(struct xgene_enet_desc_ring *tx_ring, dma_addr_t dma_addr, pbuf_addr, *frag_dma_addr; skb_frag_t *frag; u16 tail = tx_ring->tail; - u64 hopinfo; + u64 hopinfo = 0; u32 len, hw_len; u8 ll = 0, nv = 0, idx = 0; bool split = false; u32 size, offset, ell_bytes = 0; u32 i, fidx, nr_frags, count = 1; + int ret; raw_desc = &tx_ring->raw_desc[tail]; tail = (tail + 1) & (tx_ring->slots - 1); memset(raw_desc, 0, sizeof(struct xgene_enet_raw_desc)); - hopinfo = xgene_enet_work_msg(skb); - if (!hopinfo) - return -EINVAL; + ret = xgene_enet_work_msg(skb, &hopinfo); + if (ret) + return ret; + raw_desc->m3 = cpu_to_le64(SET_VAL(HENQNUM, tx_ring->dst_ring_num) | hopinfo); @@ -435,6 +488,9 @@ static netdev_tx_t xgene_enet_start_xmit(struct sk_buff *skb, return NETDEV_TX_OK; count = xgene_enet_setup_tx_desc(tx_ring, skb); + if (count == -EBUSY) + return NETDEV_TX_BUSY; + if (count <= 0) { dev_kfree_skb_any(skb); return NETDEV_TX_OK; @@ -1669,7 +1725,7 @@ static int xgene_enet_probe(struct platform_device *pdev) if (pdata->phy_mode == PHY_INTERFACE_MODE_XGMII) { ndev->features |= NETIF_F_TSO; - pdata->mss = XGENE_ENET_MSS; + spin_lock_init(&pdata->mss_lock); } ndev->hw_features = ndev->features; diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h index 773537161171..0cda58f5a840 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h @@ -47,7 +47,7 @@ #define NUM_PKT_BUF 64 #define NUM_BUFPOOL 32 #define MAX_EXP_BUFFS 256 -#define XGENE_ENET_MSS 1448 +#define NUM_MSS_REG 4 #define XGENE_MIN_ENET_FRAME_SIZE 60 #define XGENE_MAX_ENET_IRQ 16 @@ -143,7 +143,7 @@ struct xgene_mac_ops { void (*rx_disable)(struct xgene_enet_pdata *pdata); void (*set_speed)(struct xgene_enet_pdata *pdata); void (*set_mac_addr)(struct xgene_enet_pdata *pdata); - void (*set_mss)(struct xgene_enet_pdata *pdata); + void (*set_mss)(struct xgene_enet_pdata *pdata, u16 mss, u8 index); void (*link_state)(struct work_struct *work); }; @@ -212,7 +212,9 @@ struct xgene_enet_pdata { u8 eth_bufnum; u8 bp_bufnum; u16 ring_num; - u32 mss; + u32 mss[NUM_MSS_REG]; + u32 mss_refcnt[NUM_MSS_REG]; + spinlock_t mss_lock; /* mss lock */ u8 tx_delay; u8 rx_delay; bool mdio_driver; diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c index 279ee27004f7..6475f383ba83 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c @@ -232,9 +232,22 @@ static void xgene_xgmac_set_mac_addr(struct xgene_enet_pdata *pdata) xgene_enet_wr_mac(pdata, HSTMACADR_MSW_ADDR, addr1); } -static void xgene_xgmac_set_mss(struct xgene_enet_pdata *pdata) +static void xgene_xgmac_set_mss(struct xgene_enet_pdata *pdata, + u16 mss, u8 index) { - xgene_enet_wr_csr(pdata, XG_TSIF_MSS_REG0_ADDR, pdata->mss); + u8 offset; + u32 data; + + offset = (index < 2) ? 0 : 4; + xgene_enet_rd_csr(pdata, XG_TSIF_MSS_REG0_ADDR + offset, &data); + + if (!(index & 0x1)) + data = SET_VAL(TSO_MSS1, data >> TSO_MSS1_POS) | + SET_VAL(TSO_MSS0, mss); + else + data = SET_VAL(TSO_MSS1, mss) | SET_VAL(TSO_MSS0, data); + + xgene_enet_wr_csr(pdata, XG_TSIF_MSS_REG0_ADDR + offset, data); } static u32 xgene_enet_link_status(struct xgene_enet_pdata *pdata) @@ -258,7 +271,6 @@ static void xgene_xgmac_init(struct xgene_enet_pdata *pdata) xgene_enet_wr_mac(pdata, AXGMAC_CONFIG_1, data); xgene_xgmac_set_mac_addr(pdata); - xgene_xgmac_set_mss(pdata); xgene_enet_rd_csr(pdata, XG_RSIF_CONFIG_REG_ADDR, &data); data |= CFG_RSIF_FPBUFF_TIMEOUT_EN; From 4acfee8143b33efa8bda6f03fe1462d545ff8170 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 22 Sep 2016 16:49:21 -0400 Subject: [PATCH 0731/1050] net: dsa: add port STP state helper Add a void helper to set the STP state of a port, checking first if the required routine is provided by the driver. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- net/dsa/slave.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 9ecbe787f102..fd78d4c9b197 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -69,6 +69,12 @@ static inline bool dsa_port_is_bridged(struct dsa_slave_priv *p) return !!p->bridge_dev; } +static void dsa_port_set_stp_state(struct dsa_switch *ds, int port, u8 state) +{ + if (ds->ops->port_stp_state_set) + ds->ops->port_stp_state_set(ds, port, state); +} + static int dsa_slave_open(struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); @@ -104,8 +110,7 @@ static int dsa_slave_open(struct net_device *dev) goto clear_promisc; } - if (ds->ops->port_stp_state_set) - ds->ops->port_stp_state_set(ds, p->port, stp_state); + dsa_port_set_stp_state(ds, p->port, stp_state); if (p->phy) phy_start(p->phy); @@ -147,8 +152,7 @@ static int dsa_slave_close(struct net_device *dev) if (ds->ops->port_disable) ds->ops->port_disable(ds, p->port, p->phy); - if (ds->ops->port_stp_state_set) - ds->ops->port_stp_state_set(ds, p->port, BR_STATE_DISABLED); + dsa_port_set_stp_state(ds, p->port, BR_STATE_DISABLED); return 0; } @@ -354,7 +358,7 @@ static int dsa_slave_stp_state_set(struct net_device *dev, if (switchdev_trans_ph_prepare(trans)) return ds->ops->port_stp_state_set ? 0 : -EOPNOTSUPP; - ds->ops->port_stp_state_set(ds, p->port, attr->u.stp_state); + dsa_port_set_stp_state(ds, p->port, attr->u.stp_state); return 0; } @@ -556,8 +560,7 @@ static void dsa_slave_bridge_port_leave(struct net_device *dev) /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer, * so allow it to be in BR_STATE_FORWARDING to be kept functional */ - if (ds->ops->port_stp_state_set) - ds->ops->port_stp_state_set(ds, p->port, BR_STATE_FORWARDING); + dsa_port_set_stp_state(ds, p->port, BR_STATE_FORWARDING); } static int dsa_slave_port_attr_get(struct net_device *dev, From 732f794c1baf58e1eb2be4431635829c3da655bd Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 22 Sep 2016 16:49:22 -0400 Subject: [PATCH 0732/1050] net: dsa: add port fast ageing Today the DSA drivers are in charge of flushing the MAC addresses associated to a port when its STP state changes from Learning or Forwarding, to Disabled or Blocking or Listening. This makes the drivers more complex and hides the generic switch logic. Introduce a new optional port_fast_age operation to dsa_switch_ops, to move this logic to the DSA layer and keep drivers simple. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 2 ++ net/dsa/slave.c | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/include/net/dsa.h b/include/net/dsa.h index 7556646db2d3..b122196d5a1f 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -143,6 +143,7 @@ struct dsa_port { struct net_device *netdev; struct device_node *dn; unsigned int ageing_time; + u8 stp_state; }; struct dsa_switch { @@ -339,6 +340,7 @@ struct dsa_switch_ops { void (*port_bridge_leave)(struct dsa_switch *ds, int port); void (*port_stp_state_set)(struct dsa_switch *ds, int port, u8 state); + void (*port_fast_age)(struct dsa_switch *ds, int port); /* * VLAN support diff --git a/net/dsa/slave.c b/net/dsa/slave.c index fd78d4c9b197..6b1282c006b1 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -71,8 +71,26 @@ static inline bool dsa_port_is_bridged(struct dsa_slave_priv *p) static void dsa_port_set_stp_state(struct dsa_switch *ds, int port, u8 state) { + struct dsa_port *dp = &ds->ports[port]; + if (ds->ops->port_stp_state_set) ds->ops->port_stp_state_set(ds, port, state); + + if (ds->ops->port_fast_age) { + /* Fast age FDB entries or flush appropriate forwarding database + * for the given port, if we are moving it from Learning or + * Forwarding state, to Disabled or Blocking or Listening state. + */ + + if ((dp->stp_state == BR_STATE_LEARNING || + dp->stp_state == BR_STATE_FORWARDING) && + (state == BR_STATE_DISABLED || + state == BR_STATE_BLOCKING || + state == BR_STATE_LISTENING)) + ds->ops->port_fast_age(ds, port); + } + + dp->stp_state = state; } static int dsa_slave_open(struct net_device *dev) From 597698f1e00d37d40f83770ea166f3ca0dc1d68c Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 22 Sep 2016 16:49:23 -0400 Subject: [PATCH 0733/1050] net: dsa: b53: implement DSA port fast ageing Remove the fast ageing logic from b53_br_set_stp_state and implement the new DSA switch port_fast_age operation instead. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 1a492c053e27..7717b19dc806 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1402,16 +1402,12 @@ static void b53_br_leave(struct dsa_switch *ds, int port) } } -static void b53_br_set_stp_state(struct dsa_switch *ds, int port, - u8 state) +static void b53_br_set_stp_state(struct dsa_switch *ds, int port, u8 state) { struct b53_device *dev = ds->priv; - u8 hw_state, cur_hw_state; + u8 hw_state; u8 reg; - b53_read8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), ®); - cur_hw_state = reg & PORT_CTRL_STP_STATE_MASK; - switch (state) { case BR_STATE_DISABLED: hw_state = PORT_CTRL_DIS_STATE; @@ -1433,26 +1429,20 @@ static void b53_br_set_stp_state(struct dsa_switch *ds, int port, return; } - /* Fast-age ARL entries if we are moving a port from Learning or - * Forwarding (cur_hw_state) state to Disabled, Blocking or Listening - * state (hw_state) - */ - if (cur_hw_state != hw_state) { - if (cur_hw_state >= PORT_CTRL_LEARN_STATE && - hw_state <= PORT_CTRL_LISTEN_STATE) { - if (b53_fast_age_port(dev, port)) { - dev_err(ds->dev, "fast ageing failed\n"); - return; - } - } - } - b53_read8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), ®); reg &= ~PORT_CTRL_STP_STATE_MASK; reg |= hw_state; b53_write8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), reg); } +static void b53_br_fast_age(struct dsa_switch *ds, int port) +{ + struct b53_device *dev = ds->priv; + + if (b53_fast_age_port(dev, port)) + dev_err(ds->dev, "fast ageing failed\n"); +} + static enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds) { return DSA_TAG_PROTO_NONE; @@ -1472,6 +1462,7 @@ static struct dsa_switch_ops b53_switch_ops = { .port_bridge_join = b53_br_join, .port_bridge_leave = b53_br_leave, .port_stp_state_set = b53_br_set_stp_state, + .port_fast_age = b53_br_fast_age, .port_vlan_filtering = b53_vlan_filtering, .port_vlan_prepare = b53_vlan_prepare, .port_vlan_add = b53_vlan_add, From 749efcb8140e608dc2a63b6d61063b7cd3e556a5 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 22 Sep 2016 16:49:24 -0400 Subject: [PATCH 0734/1050] net: dsa: mv88e6xxx: implement DSA port fast ageing Now that the DSA layer handles port fast ageing on correct STP change, simplify _mv88e6xxx_port_state and implement mv88e6xxx_port_fast_age. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 45 ++++++++++++++++---------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 25bd3fa744d9..122876cb926c 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1133,31 +1133,18 @@ static int _mv88e6xxx_port_state(struct mv88e6xxx_chip *chip, int port, oldstate = reg & PORT_CONTROL_STATE_MASK; - if (oldstate != state) { - /* Flush forwarding database if we're moving a port - * from Learning or Forwarding state to Disabled or - * Blocking or Listening state. - */ - if ((oldstate == PORT_CONTROL_STATE_LEARNING || - oldstate == PORT_CONTROL_STATE_FORWARDING) && - (state == PORT_CONTROL_STATE_DISABLED || - state == PORT_CONTROL_STATE_BLOCKING)) { - err = _mv88e6xxx_atu_remove(chip, 0, port, false); - if (err) - return err; - } + reg &= ~PORT_CONTROL_STATE_MASK; + reg |= state; - reg = (reg & ~PORT_CONTROL_STATE_MASK) | state; - err = mv88e6xxx_port_write(chip, port, PORT_CONTROL, reg); - if (err) - return err; + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL, reg); + if (err) + return err; - netdev_dbg(ds->ports[port].netdev, "PortState %s (was %s)\n", - mv88e6xxx_port_state_names[state], - mv88e6xxx_port_state_names[oldstate]); - } + netdev_dbg(ds->ports[port].netdev, "PortState %s (was %s)\n", + mv88e6xxx_port_state_names[state], + mv88e6xxx_port_state_names[oldstate]); - return err; + return 0; } static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port) @@ -1232,6 +1219,19 @@ static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port, mv88e6xxx_port_state_names[stp_state]); } +static void mv88e6xxx_port_fast_age(struct dsa_switch *ds, int port) +{ + struct mv88e6xxx_chip *chip = ds->priv; + int err; + + mutex_lock(&chip->reg_lock); + err = _mv88e6xxx_atu_remove(chip, 0, port, false); + mutex_unlock(&chip->reg_lock); + + if (err) + netdev_err(ds->ports[port].netdev, "failed to flush ATU\n"); +} + static int _mv88e6xxx_port_pvid(struct mv88e6xxx_chip *chip, int port, u16 *new, u16 *old) { @@ -3684,6 +3684,7 @@ static struct dsa_switch_ops mv88e6xxx_switch_ops = { .port_bridge_join = mv88e6xxx_port_bridge_join, .port_bridge_leave = mv88e6xxx_port_bridge_leave, .port_stp_state_set = mv88e6xxx_port_stp_state_set, + .port_fast_age = mv88e6xxx_port_fast_age, .port_vlan_filtering = mv88e6xxx_port_vlan_filtering, .port_vlan_prepare = mv88e6xxx_port_vlan_prepare, .port_vlan_add = mv88e6xxx_port_vlan_add, From 17db4bcef3c3c45b95b3b3d8577f725df1b2c0a0 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 22 Sep 2016 16:56:29 -0700 Subject: [PATCH 0735/1050] hv_netvsc: use consume_skb Packets that are transmitted in normal path should use consume_skb instead of kfree_skb. This allows for better tracing of packet drops. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index ff05b9b0837f..720b5fa9e625 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -635,7 +635,7 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device, q_idx = nvsc_packet->q_idx; channel = incoming_channel; - dev_kfree_skb_any(skb); + dev_consume_skb_any(skb); } num_outstanding_sends = @@ -944,7 +944,7 @@ int netvsc_send(struct hv_device *device, } if (msdp->skb) - dev_kfree_skb_any(msdp->skb); + dev_consume_skb_any(msdp->skb); if (xmit_more && !packet->cp_partial) { msdp->skb = skb; From 07d0f0008c783d2a2fce8497000938db15fd7aa1 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 22 Sep 2016 16:56:30 -0700 Subject: [PATCH 0736/1050] hv_netvsc: dev hold/put reference to VF The netvsc driver holds a pointer to the virtual function network device if managing SR-IOV association. In order to ensure that the VF network device does not disappear, it should be using dev_hold/dev_put to get a reference count. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 2360e704e271..e74dbcc2916d 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1262,6 +1262,8 @@ static int netvsc_register_vf(struct net_device *vf_netdev) * Take a reference on the module. */ try_module_get(THIS_MODULE); + + dev_hold(vf_netdev); net_device_ctx->vf_netdev = vf_netdev; return NOTIFY_OK; } @@ -1376,6 +1378,7 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name); netvsc_inject_disable(net_device_ctx); net_device_ctx->vf_netdev = NULL; + dev_put(vf_netdev); module_put(THIS_MODULE); return NOTIFY_OK; } From ee837a137304290a1ae26980c73a367f7afef54f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 22 Sep 2016 16:56:31 -0700 Subject: [PATCH 0737/1050] hv_netvsc: simplify callback event code The callback handler for netlink events can be simplified: * Consolidate check for netlink callback events about this driver itself. * Ignore non-Ethernet devices. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index e74dbcc2916d..849b566ad7bf 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1238,10 +1238,6 @@ static int netvsc_register_vf(struct net_device *vf_netdev) struct net_device *ndev; struct net_device_context *net_device_ctx; struct netvsc_device *netvsc_dev; - const struct ethtool_ops *eth_ops = vf_netdev->ethtool_ops; - - if (eth_ops == NULL || eth_ops == ðtool_ops) - return NOTIFY_DONE; /* * We will use the MAC address to locate the synthetic interface to @@ -1286,12 +1282,8 @@ static int netvsc_vf_up(struct net_device *vf_netdev) { struct net_device *ndev; struct netvsc_device *netvsc_dev; - const struct ethtool_ops *eth_ops = vf_netdev->ethtool_ops; struct net_device_context *net_device_ctx; - if (eth_ops == ðtool_ops) - return NOTIFY_DONE; - ndev = get_netvsc_net_device(vf_netdev->dev_addr); if (!ndev) return NOTIFY_DONE; @@ -1329,10 +1321,6 @@ static int netvsc_vf_down(struct net_device *vf_netdev) struct net_device *ndev; struct netvsc_device *netvsc_dev; struct net_device_context *net_device_ctx; - const struct ethtool_ops *eth_ops = vf_netdev->ethtool_ops; - - if (eth_ops == ðtool_ops) - return NOTIFY_DONE; ndev = get_netvsc_net_device(vf_netdev->dev_addr); if (!ndev) @@ -1361,12 +1349,8 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) { struct net_device *ndev; struct netvsc_device *netvsc_dev; - const struct ethtool_ops *eth_ops = vf_netdev->ethtool_ops; struct net_device_context *net_device_ctx; - if (eth_ops == ðtool_ops) - return NOTIFY_DONE; - ndev = get_netvsc_net_device(vf_netdev->dev_addr); if (!ndev) return NOTIFY_DONE; @@ -1542,13 +1526,21 @@ static int netvsc_netdev_event(struct notifier_block *this, { struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); + /* Skip our own events */ + if (event_dev->netdev_ops == &device_ops) + return NOTIFY_DONE; + + /* Avoid non-Ethernet type devices */ + if (event_dev->type != ARPHRD_ETHER) + return NOTIFY_DONE; + /* Avoid Vlan dev with same MAC registering as VF */ if (event_dev->priv_flags & IFF_802_1Q_VLAN) return NOTIFY_DONE; /* Avoid Bonding master dev with same MAC registering as VF */ - if (event_dev->priv_flags & IFF_BONDING && - event_dev->flags & IFF_MASTER) + if ((event_dev->priv_flags & IFF_BONDING) && + (event_dev->flags & IFF_MASTER)) return NOTIFY_DONE; switch (event) { From e8ff40d4bff1f3b6a588e29ed1fbdfd943642856 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 22 Sep 2016 16:56:32 -0700 Subject: [PATCH 0738/1050] hv_netvsc: improve VF device matching The code to associate netvsc and VF devices can be made less error prone by using a better matching algorithms. On registration, use the permanent address which avoids any possible issues caused by device MAC address being changed. For all other callbacks, search by the netdevice pointer value to ensure getting the correct network device. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 60 +++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 849b566ad7bf..87682198ff73 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1215,22 +1215,44 @@ static void netvsc_free_netdev(struct net_device *netdev) free_netdev(netdev); } -static struct net_device *get_netvsc_net_device(char *mac) +static struct net_device *get_netvsc_bymac(const u8 *mac) { - struct net_device *dev, *found = NULL; + struct net_device *dev; ASSERT_RTNL(); for_each_netdev(&init_net, dev) { - if (memcmp(dev->dev_addr, mac, ETH_ALEN) == 0) { - if (dev->netdev_ops != &device_ops) - continue; - found = dev; - break; - } + if (dev->netdev_ops != &device_ops) + continue; /* not a netvsc device */ + + if (ether_addr_equal(mac, dev->perm_addr)) + return dev; } - return found; + return NULL; +} + +static struct net_device *get_netvsc_byref(const struct net_device *vf_netdev) +{ + struct net_device *dev; + + ASSERT_RTNL(); + + for_each_netdev(&init_net, dev) { + struct net_device_context *net_device_ctx; + + if (dev->netdev_ops != &device_ops) + continue; /* not a netvsc device */ + + net_device_ctx = netdev_priv(dev); + if (net_device_ctx->nvdev == NULL) + continue; /* device is removed */ + + if (net_device_ctx->vf_netdev == vf_netdev) + return dev; /* a match */ + } + + return NULL; } static int netvsc_register_vf(struct net_device *vf_netdev) @@ -1239,12 +1261,15 @@ static int netvsc_register_vf(struct net_device *vf_netdev) struct net_device_context *net_device_ctx; struct netvsc_device *netvsc_dev; + if (vf_netdev->addr_len != ETH_ALEN) + return NOTIFY_DONE; + /* * We will use the MAC address to locate the synthetic interface to * associate with the VF interface. If we don't find a matching * synthetic interface, move on. */ - ndev = get_netvsc_net_device(vf_netdev->dev_addr); + ndev = get_netvsc_bymac(vf_netdev->perm_addr); if (!ndev) return NOTIFY_DONE; @@ -1284,16 +1309,13 @@ static int netvsc_vf_up(struct net_device *vf_netdev) struct netvsc_device *netvsc_dev; struct net_device_context *net_device_ctx; - ndev = get_netvsc_net_device(vf_netdev->dev_addr); + ndev = get_netvsc_byref(vf_netdev); if (!ndev) return NOTIFY_DONE; net_device_ctx = netdev_priv(ndev); netvsc_dev = net_device_ctx->nvdev; - if (!netvsc_dev || !net_device_ctx->vf_netdev) - return NOTIFY_DONE; - netdev_info(ndev, "VF up: %s\n", vf_netdev->name); netvsc_inject_enable(net_device_ctx); @@ -1322,16 +1344,13 @@ static int netvsc_vf_down(struct net_device *vf_netdev) struct netvsc_device *netvsc_dev; struct net_device_context *net_device_ctx; - ndev = get_netvsc_net_device(vf_netdev->dev_addr); + ndev = get_netvsc_byref(vf_netdev); if (!ndev) return NOTIFY_DONE; net_device_ctx = netdev_priv(ndev); netvsc_dev = net_device_ctx->nvdev; - if (!netvsc_dev || !net_device_ctx->vf_netdev) - return NOTIFY_DONE; - netdev_info(ndev, "VF down: %s\n", vf_netdev->name); netvsc_inject_disable(net_device_ctx); netvsc_switch_datapath(ndev, false); @@ -1351,14 +1370,13 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) struct netvsc_device *netvsc_dev; struct net_device_context *net_device_ctx; - ndev = get_netvsc_net_device(vf_netdev->dev_addr); + ndev = get_netvsc_byref(vf_netdev); if (!ndev) return NOTIFY_DONE; net_device_ctx = netdev_priv(ndev); netvsc_dev = net_device_ctx->nvdev; - if (!netvsc_dev || !net_device_ctx->vf_netdev) - return NOTIFY_DONE; + netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name); netvsc_inject_disable(net_device_ctx); net_device_ctx->vf_netdev = NULL; From f207c10d982388fa42710922ad1c0c9d3ba9a87b Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 22 Sep 2016 16:56:33 -0700 Subject: [PATCH 0739/1050] hv_netvsc: use RCU to protect vf_netdev The vf_netdev pointer in the netvsc device context can simply be protected by RCU because network device destruction is already RCU synchronized. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 2 +- drivers/net/hyperv/netvsc_drv.c | 29 +++++++++++++++-------------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 284b97b6b258..6b7948764443 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -695,7 +695,7 @@ struct net_device_context { bool start_remove; /* State to manage the associated VF interface. */ - struct net_device *vf_netdev; + struct net_device __rcu *vf_netdev; bool vf_inject; atomic_t vf_use_cnt; /* 1: allocated, serial number is valid. 0: not allocated */ diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 87682198ff73..dde17c0faf9f 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -667,8 +667,8 @@ int netvsc_recv_callback(struct hv_device *device_obj, { struct net_device *net = hv_get_drvdata(device_obj); struct net_device_context *net_device_ctx = netdev_priv(net); + struct net_device *vf_netdev; struct sk_buff *skb; - struct sk_buff *vf_skb; struct netvsc_stats *rx_stats; u32 bytes_recvd = packet->total_data_buflen; int ret = 0; @@ -676,9 +676,12 @@ int netvsc_recv_callback(struct hv_device *device_obj, if (!net || net->reg_state != NETREG_REGISTERED) return NVSP_STAT_FAIL; - if (READ_ONCE(net_device_ctx->vf_inject)) { + vf_netdev = rcu_dereference(net_device_ctx->vf_netdev); + if (vf_netdev) { + struct sk_buff *vf_skb; + atomic_inc(&net_device_ctx->vf_use_cnt); - if (!READ_ONCE(net_device_ctx->vf_inject)) { + if (!net_device_ctx->vf_inject) { /* * We raced; just move on. */ @@ -694,13 +697,12 @@ int netvsc_recv_callback(struct hv_device *device_obj, * the host). Deliver these via the VF interface * in the guest. */ - vf_skb = netvsc_alloc_recv_skb(net_device_ctx->vf_netdev, + vf_skb = netvsc_alloc_recv_skb(vf_netdev, packet, csum_info, *data, vlan_tci); if (vf_skb != NULL) { - ++net_device_ctx->vf_netdev->stats.rx_packets; - net_device_ctx->vf_netdev->stats.rx_bytes += - bytes_recvd; + ++vf_netdev->stats.rx_packets; + vf_netdev->stats.rx_bytes += bytes_recvd; netif_receive_skb(vf_skb); } else { ++net->stats.rx_dropped; @@ -1232,7 +1234,7 @@ static struct net_device *get_netvsc_bymac(const u8 *mac) return NULL; } -static struct net_device *get_netvsc_byref(const struct net_device *vf_netdev) +static struct net_device *get_netvsc_byref(struct net_device *vf_netdev) { struct net_device *dev; @@ -1248,7 +1250,7 @@ static struct net_device *get_netvsc_byref(const struct net_device *vf_netdev) if (net_device_ctx->nvdev == NULL) continue; /* device is removed */ - if (net_device_ctx->vf_netdev == vf_netdev) + if (rtnl_dereference(net_device_ctx->vf_netdev) == vf_netdev) return dev; /* a match */ } @@ -1275,7 +1277,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev) net_device_ctx = netdev_priv(ndev); netvsc_dev = net_device_ctx->nvdev; - if (!netvsc_dev || net_device_ctx->vf_netdev) + if (!netvsc_dev || rtnl_dereference(net_device_ctx->vf_netdev)) return NOTIFY_DONE; netdev_info(ndev, "VF registering: %s\n", vf_netdev->name); @@ -1285,7 +1287,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev) try_module_get(THIS_MODULE); dev_hold(vf_netdev); - net_device_ctx->vf_netdev = vf_netdev; + rcu_assign_pointer(net_device_ctx->vf_netdev, vf_netdev); return NOTIFY_OK; } @@ -1379,7 +1381,8 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name); netvsc_inject_disable(net_device_ctx); - net_device_ctx->vf_netdev = NULL; + + RCU_INIT_POINTER(net_device_ctx->vf_netdev, NULL); dev_put(vf_netdev); module_put(THIS_MODULE); return NOTIFY_OK; @@ -1433,8 +1436,6 @@ static int netvsc_probe(struct hv_device *dev, INIT_LIST_HEAD(&net_device_ctx->reconfig_events); atomic_set(&net_device_ctx->vf_use_cnt, 0); - net_device_ctx->vf_netdev = NULL; - net_device_ctx->vf_inject = false; net->netdev_ops = &device_ops; From 9cbcc4280645f0e7e19e6a0da443ec7e69cecf40 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 22 Sep 2016 16:56:34 -0700 Subject: [PATCH 0740/1050] hv_netvsc: remove VF in flight counters Since VF reference is now protected by RCU, no longer need the VF usage counter and can use device flags to see whether to inject or not. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 3 +- drivers/net/hyperv/netvsc_drv.c | 81 ++++++++------------------------- 2 files changed, 21 insertions(+), 63 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 6b7948764443..1d4974026eff 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -696,8 +696,7 @@ struct net_device_context { /* State to manage the associated VF interface. */ struct net_device __rcu *vf_netdev; - bool vf_inject; - atomic_t vf_use_cnt; + /* 1: allocated, serial number is valid. 0: not allocated */ u32 vf_alloc; /* Serial number of the VF to team with */ diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index dde17c0faf9f..9375d82702ce 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -670,50 +670,20 @@ int netvsc_recv_callback(struct hv_device *device_obj, struct net_device *vf_netdev; struct sk_buff *skb; struct netvsc_stats *rx_stats; - u32 bytes_recvd = packet->total_data_buflen; - int ret = 0; - if (!net || net->reg_state != NETREG_REGISTERED) + if (net->reg_state != NETREG_REGISTERED) return NVSP_STAT_FAIL; + /* + * If necessary, inject this packet into the VF interface. + * On Hyper-V, multicast and brodcast packets are only delivered + * to the synthetic interface (after subjecting these to + * policy filters on the host). Deliver these via the VF + * interface in the guest. + */ vf_netdev = rcu_dereference(net_device_ctx->vf_netdev); - if (vf_netdev) { - struct sk_buff *vf_skb; - - atomic_inc(&net_device_ctx->vf_use_cnt); - if (!net_device_ctx->vf_inject) { - /* - * We raced; just move on. - */ - atomic_dec(&net_device_ctx->vf_use_cnt); - goto vf_injection_done; - } - - /* - * Inject this packet into the VF inerface. - * On Hyper-V, multicast and brodcast packets - * are only delivered on the synthetic interface - * (after subjecting these to policy filters on - * the host). Deliver these via the VF interface - * in the guest. - */ - vf_skb = netvsc_alloc_recv_skb(vf_netdev, - packet, csum_info, *data, - vlan_tci); - if (vf_skb != NULL) { - ++vf_netdev->stats.rx_packets; - vf_netdev->stats.rx_bytes += bytes_recvd; - netif_receive_skb(vf_skb); - } else { - ++net->stats.rx_dropped; - ret = NVSP_STAT_FAIL; - } - atomic_dec(&net_device_ctx->vf_use_cnt); - return ret; - } - -vf_injection_done: - rx_stats = this_cpu_ptr(net_device_ctx->rx_stats); + if (vf_netdev && (vf_netdev->flags & IFF_UP)) + net = vf_netdev; /* Allocate a skb - TODO direct I/O to pages? */ skb = netvsc_alloc_recv_skb(net, packet, csum_info, *data, vlan_tci); @@ -721,9 +691,17 @@ vf_injection_done: ++net->stats.rx_dropped; return NVSP_STAT_FAIL; } - skb_record_rx_queue(skb, channel-> - offermsg.offer.sub_channel_index); + if (net != vf_netdev) + skb_record_rx_queue(skb, + channel->offermsg.offer.sub_channel_index); + + /* + * Even if injecting the packet, record the statistics + * on the synthetic device because modifying the VF device + * statistics will not work correctly. + */ + rx_stats = this_cpu_ptr(net_device_ctx->rx_stats); u64_stats_update_begin(&rx_stats->syncp); rx_stats->packets++; rx_stats->bytes += packet->total_data_buflen; @@ -1291,20 +1269,6 @@ static int netvsc_register_vf(struct net_device *vf_netdev) return NOTIFY_OK; } -static void netvsc_inject_enable(struct net_device_context *net_device_ctx) -{ - net_device_ctx->vf_inject = true; -} - -static void netvsc_inject_disable(struct net_device_context *net_device_ctx) -{ - net_device_ctx->vf_inject = false; - - /* Wait for currently active users to drain out. */ - while (atomic_read(&net_device_ctx->vf_use_cnt) != 0) - udelay(50); -} - static int netvsc_vf_up(struct net_device *vf_netdev) { struct net_device *ndev; @@ -1319,7 +1283,6 @@ static int netvsc_vf_up(struct net_device *vf_netdev) netvsc_dev = net_device_ctx->nvdev; netdev_info(ndev, "VF up: %s\n", vf_netdev->name); - netvsc_inject_enable(net_device_ctx); /* * Open the device before switching data path. @@ -1354,7 +1317,6 @@ static int netvsc_vf_down(struct net_device *vf_netdev) netvsc_dev = net_device_ctx->nvdev; netdev_info(ndev, "VF down: %s\n", vf_netdev->name); - netvsc_inject_disable(net_device_ctx); netvsc_switch_datapath(ndev, false); netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name); rndis_filter_close(netvsc_dev); @@ -1380,7 +1342,6 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) netvsc_dev = net_device_ctx->nvdev; netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name); - netvsc_inject_disable(net_device_ctx); RCU_INIT_POINTER(net_device_ctx->vf_netdev, NULL); dev_put(vf_netdev); @@ -1435,8 +1396,6 @@ static int netvsc_probe(struct hv_device *dev, spin_lock_init(&net_device_ctx->lock); INIT_LIST_HEAD(&net_device_ctx->reconfig_events); - atomic_set(&net_device_ctx->vf_use_cnt, 0); - net->netdev_ops = &device_ops; net->hw_features = NETVSC_HW_FEATURES; From f7ad75b753f386454f50044fd69edad767b69ce8 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 22 Sep 2016 16:56:35 -0700 Subject: [PATCH 0741/1050] hv_netvsc: count multicast packets received Useful for debugging issues with multicast and SR-IOV to keep track of number of received multicast packets. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 2 ++ drivers/net/hyperv/netvsc_drv.c | 9 ++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 1d4974026eff..7130bf910f52 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -649,6 +649,8 @@ struct multi_recv_comp { struct netvsc_stats { u64 packets; u64 bytes; + u64 broadcast; + u64 multicast; struct u64_stats_sync syncp; }; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 9375d82702ce..52eeb2f67276 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -705,6 +705,11 @@ int netvsc_recv_callback(struct hv_device *device_obj, u64_stats_update_begin(&rx_stats->syncp); rx_stats->packets++; rx_stats->bytes += packet->total_data_buflen; + + if (skb->pkt_type == PACKET_BROADCAST) + ++rx_stats->broadcast; + else if (skb->pkt_type == PACKET_MULTICAST) + ++rx_stats->multicast; u64_stats_update_end(&rx_stats->syncp); /* @@ -947,7 +952,7 @@ static struct rtnl_link_stats64 *netvsc_get_stats64(struct net_device *net, cpu); struct netvsc_stats *rx_stats = per_cpu_ptr(ndev_ctx->rx_stats, cpu); - u64 tx_packets, tx_bytes, rx_packets, rx_bytes; + u64 tx_packets, tx_bytes, rx_packets, rx_bytes, rx_multicast; unsigned int start; do { @@ -960,12 +965,14 @@ static struct rtnl_link_stats64 *netvsc_get_stats64(struct net_device *net, start = u64_stats_fetch_begin_irq(&rx_stats->syncp); rx_packets = rx_stats->packets; rx_bytes = rx_stats->bytes; + rx_multicast = rx_stats->multicast + rx_stats->broadcast; } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start)); t->tx_bytes += tx_bytes; t->tx_packets += tx_packets; t->rx_bytes += rx_bytes; t->rx_packets += rx_packets; + t->multicast += rx_multicast; } t->tx_dropped = net->stats.tx_dropped; From 2d48c5f9335e48ddac7a52db10bf3bfd01986b9c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 23 Sep 2016 01:28:35 +0200 Subject: [PATCH 0742/1050] bpf: use skb_to_full_sk helper in bpf_skb_under_cgroup We need to use skb_to_full_sk() helper introduced in commit bd5eb35f16a9 ("xfrm: take care of request sockets") as otherwise we miss tcp synack messages, since ownership is on request socket and therefore it would miss the sk_fullsock() check. Use skb_to_full_sk() as also done similarly in the bpf_get_cgroup_classid() helper via 2309236c13fe ("cls_cgroup: get sk_classid only from full sockets") fix to not let this fall through. Fixes: 4a482f34afcc ("cgroup: bpf: Add bpf_skb_in_cgroup_proto") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/filter.c b/net/core/filter.c index 0920c2ac1d00..e5d997759d5e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2408,7 +2408,7 @@ BPF_CALL_3(bpf_skb_under_cgroup, struct sk_buff *, skb, struct bpf_map *, map, struct cgroup *cgrp; struct sock *sk; - sk = skb->sk; + sk = skb_to_full_sk(skb); if (!sk || !sk_fullsock(sk)) return -ENOENT; if (unlikely(idx >= array->map.max_entries)) From 669dc4d76d0ecc2d795df735839f43cfddf9f617 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 23 Sep 2016 01:28:36 +0200 Subject: [PATCH 0743/1050] bpf: use bpf_get_smp_processor_id_proto instead of raw one Same motivation as in commit 80b48c445797 ("bpf: don't use raw processor id in generic helper"), but this time for XDP typed programs. Thus, allow for preemption checks when we have DEBUG_PREEMPT enabled, and otherwise use the raw variant. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/filter.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/filter.c b/net/core/filter.c index e5d997759d5e..acf84fbfb043 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2551,6 +2551,8 @@ xdp_func_proto(enum bpf_func_id func_id) switch (func_id) { case BPF_FUNC_perf_event_output: return &bpf_xdp_event_output_proto; + case BPF_FUNC_get_smp_processor_id: + return &bpf_get_smp_processor_id_proto; default: return sk_filter_func_proto(func_id); } From 7a4b28c6cc9ffac50f791b99cc7e46106436e5d8 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 23 Sep 2016 01:28:37 +0200 Subject: [PATCH 0744/1050] bpf: add helper to invalidate hash Add a small helper that complements 36bbef52c7eb ("bpf: direct packet write and access for helpers for clsact progs") for invalidating the current skb->hash after mangling on headers via direct packet write. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 7 +++++++ net/core/filter.c | 18 ++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e07432b9f8b8..f09c70b97eca 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -419,6 +419,13 @@ enum bpf_func_id { */ BPF_FUNC_csum_update, + /** + * bpf_set_hash_invalid(skb) + * Invalidate current skb>hash. + * @skb: pointer to skb + */ + BPF_FUNC_set_hash_invalid, + __BPF_FUNC_MAX_ID, }; diff --git a/net/core/filter.c b/net/core/filter.c index acf84fbfb043..00351cdf7d0c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1777,6 +1777,22 @@ static const struct bpf_func_proto bpf_get_hash_recalc_proto = { .arg1_type = ARG_PTR_TO_CTX, }; +BPF_CALL_1(bpf_set_hash_invalid, struct sk_buff *, skb) +{ + /* After all direct packet write, this can be used once for + * triggering a lazy recalc on next skb_get_hash() invocation. + */ + skb_clear_hash(skb); + return 0; +} + +static const struct bpf_func_proto bpf_set_hash_invalid_proto = { + .func = bpf_set_hash_invalid, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto, u16, vlan_tci) { @@ -2534,6 +2550,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_get_route_realm_proto; case BPF_FUNC_get_hash_recalc: return &bpf_get_hash_recalc_proto; + case BPF_FUNC_set_hash_invalid: + return &bpf_set_hash_invalid_proto; case BPF_FUNC_perf_event_output: return &bpf_skb_event_output_proto; case BPF_FUNC_get_smp_processor_id: From 98dafac5697fbe1fb4bef9e3204baf9051641b00 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 14:04:38 +0100 Subject: [PATCH 0745/1050] rxrpc: Use before_eq() and friends to compare serial numbers before_eq() and friends should be used to compare serial numbers (when not checking for (non)equality) rather than casting to int, subtracting and checking the result. Signed-off-by: David Howells --- net/rxrpc/input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index cbb5d53f09d7..06027b6d9c19 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -578,7 +578,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, } /* Discard any out-of-order or duplicate ACKs. */ - if ((int)sp->hdr.serial - (int)call->acks_latest <= 0) { + if (before_eq(sp->hdr.serial, call->acks_latest)) { _debug("discard ACK %d <= %d", sp->hdr.serial, call->acks_latest); return; From dfc3da4404ad1ec42a0a649a4ffa2b0f37e80352 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 12:39:23 +0100 Subject: [PATCH 0746/1050] rxrpc: Need to start the resend timer on initial transmission When a DATA packet has its initial transmission, we may need to start or adjust the resend timer. Without this we end up relying on being sent a NACK to initiate the resend. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 1 + net/rxrpc/call_event.c | 2 +- net/rxrpc/sendmsg.c | 9 +++++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 808ab750dc6b..9e3ba4dc9578 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -704,6 +704,7 @@ int rxrpc_reject_call(struct rxrpc_sock *); /* * call_event.c */ +void rxrpc_set_timer(struct rxrpc_call *); void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool); void rxrpc_process_call(struct work_struct *); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index a2909da5d581..3a7f90a2659c 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -24,7 +24,7 @@ /* * Set the timer */ -static void rxrpc_set_timer(struct rxrpc_call *call) +void rxrpc_set_timer(struct rxrpc_call *call) { unsigned long t, now = jiffies; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index ca3811bfbd17..7cb34b2dfba9 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -146,6 +146,15 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, if (ret < 0) { _debug("need instant resend %d", ret); rxrpc_instant_resend(call, ix); + } else { + unsigned long resend_at; + + resend_at = jiffies + msecs_to_jiffies(rxrpc_resend_timeout); + + if (time_before(resend_at, call->resend_at)) { + call->resend_at = resend_at; + rxrpc_set_timer(call); + } } rxrpc_free_skb(skb, rxrpc_skb_tx_freed); From fb2a3d5c7c85cb6e8bc88192be919b4ef8d6e630 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 23 Sep 2016 09:09:31 -0400 Subject: [PATCH 0747/1050] Revert "xen-netback: create a debugfs node for hash information" This reverts commit c0c64c152389ad73306b9b0796357210ec6d32ee. There is no vif->ctrl_task member, so this change broke the build. Signed-off-by: David S. Miller --- drivers/net/xen-netback/common.h | 4 -- drivers/net/xen-netback/hash.c | 68 -------------------------------- drivers/net/xen-netback/xenbus.c | 37 +---------------- 3 files changed, 2 insertions(+), 107 deletions(-) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index ff94c513fe20..b38fb2cf3364 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -410,8 +410,4 @@ u32 xenvif_set_hash_mapping(struct xenvif *vif, u32 gref, u32 len, void xenvif_set_skb_hash(struct xenvif *vif, struct sk_buff *skb); -#ifdef CONFIG_DEBUG_FS -void xenvif_dump_hash_info(struct xenvif *vif, struct seq_file *m); -#endif - #endif /* __XEN_NETBACK__COMMON_H__ */ diff --git a/drivers/net/xen-netback/hash.c b/drivers/net/xen-netback/hash.c index e8c5dddc54ba..613bac057650 100644 --- a/drivers/net/xen-netback/hash.c +++ b/drivers/net/xen-netback/hash.c @@ -360,74 +360,6 @@ u32 xenvif_set_hash_mapping(struct xenvif *vif, u32 gref, u32 len, return XEN_NETIF_CTRL_STATUS_SUCCESS; } -#ifdef CONFIG_DEBUG_FS -void xenvif_dump_hash_info(struct xenvif *vif, struct seq_file *m) -{ - unsigned int i; - - switch (vif->hash.alg) { - case XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ: - seq_puts(m, "Hash Algorithm: TOEPLITZ\n"); - break; - - case XEN_NETIF_CTRL_HASH_ALGORITHM_NONE: - seq_puts(m, "Hash Algorithm: NONE\n"); - /* FALLTHRU */ - default: - return; - } - - if (vif->hash.flags) { - seq_puts(m, "\nHash Flags:\n"); - - if (vif->hash.flags & XEN_NETIF_CTRL_HASH_TYPE_IPV4) - seq_puts(m, "- IPv4\n"); - if (vif->hash.flags & XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP) - seq_puts(m, "- IPv4 + TCP\n"); - if (vif->hash.flags & XEN_NETIF_CTRL_HASH_TYPE_IPV6) - seq_puts(m, "- IPv6\n"); - if (vif->hash.flags & XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP) - seq_puts(m, "- IPv6 + TCP\n"); - } - - seq_puts(m, "\nHash Key:\n"); - - for (i = 0; i < XEN_NETBK_MAX_HASH_KEY_SIZE; ) { - unsigned int j, n; - - n = 8; - if (i + n >= XEN_NETBK_MAX_HASH_KEY_SIZE) - n = XEN_NETBK_MAX_HASH_KEY_SIZE - i; - - seq_printf(m, "[%2u - %2u]: ", i, i + n - 1); - - for (j = 0; j < n; j++, i++) - seq_printf(m, "%02x ", vif->hash.key[i]); - - seq_puts(m, "\n"); - } - - if (vif->hash.size != 0) { - seq_puts(m, "\nHash Mapping:\n"); - - for (i = 0; i < vif->hash.size; ) { - unsigned int j, n; - - n = 8; - if (i + n >= vif->hash.size) - n = vif->hash.size - i; - - seq_printf(m, "[%4u - %4u]: ", i, i + n - 1); - - for (j = 0; j < n; j++, i++) - seq_printf(m, "%4u ", vif->hash.mapping[i]); - - seq_puts(m, "\n"); - } - } -} -#endif /* CONFIG_DEBUG_FS */ - void xenvif_init_hash(struct xenvif *vif) { if (xenvif_hash_cache_size == 0) diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 9911b4e61c0f..daf4c7867102 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -165,7 +165,7 @@ xenvif_write_io_ring(struct file *filp, const char __user *buf, size_t count, return count; } -static int xenvif_io_ring_open(struct inode *inode, struct file *filp) +static int xenvif_dump_open(struct inode *inode, struct file *filp) { int ret; void *queue = NULL; @@ -179,35 +179,13 @@ static int xenvif_io_ring_open(struct inode *inode, struct file *filp) static const struct file_operations xenvif_dbg_io_ring_ops_fops = { .owner = THIS_MODULE, - .open = xenvif_io_ring_open, + .open = xenvif_dump_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, .write = xenvif_write_io_ring, }; -static int xenvif_read_ctrl(struct seq_file *m, void *v) -{ - struct xenvif *vif = m->private; - - xenvif_dump_hash_info(vif, m); - - return 0; -} - -static int xenvif_ctrl_open(struct inode *inode, struct file *filp) -{ - return single_open(filp, xenvif_read_ctrl, inode->i_private); -} - -static const struct file_operations xenvif_dbg_ctrl_ops_fops = { - .owner = THIS_MODULE, - .open = xenvif_ctrl_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static void xenvif_debugfs_addif(struct xenvif *vif) { struct dentry *pfile; @@ -232,17 +210,6 @@ static void xenvif_debugfs_addif(struct xenvif *vif) pr_warn("Creation of io_ring file returned %ld!\n", PTR_ERR(pfile)); } - - if (vif->ctrl_task) { - pfile = debugfs_create_file("ctrl", - S_IRUSR, - vif->xenvif_dbg_root, - vif, - &xenvif_dbg_ctrl_ops_fops); - if (IS_ERR_OR_NULL(pfile)) - pr_warn("Creation of ctrl file returned %ld!\n", - PTR_ERR(pfile)); - } } else netdev_warn(vif->dev, "Creation of vif debugfs dir returned %ld!\n", From be8aa3380678183821bd7d7b5dec845f10d776ce Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 12:39:23 +0100 Subject: [PATCH 0748/1050] rxrpc: Fix accidental cancellation of scheduled resend by ACK parser When rxrpc_input_soft_acks() is parsing the soft-ACKs from an ACK packet, it updates the Tx packet annotations in the annotation buffer. If a soft-ACK is an ACK, then we overwrite unack'd, nak'd or to-be-retransmitted states and that is fine; but if the soft-ACK is an NACK, we overwrite the to-be-retransmitted with a nak - which isn't. Instead, we need to let any scheduled retransmission stand if the packet was NAK'd. Note that we don't reissue a resend if the annotation is in the to-be-retransmitted state because someone else must've scheduled the resend already. Signed-off-by: David Howells --- net/rxrpc/input.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 06027b6d9c19..d3d69ab1f0a1 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -479,6 +479,8 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, case RXRPC_ACK_TYPE_NACK: if (anno_type == RXRPC_TX_ANNO_NAK) continue; + if (anno_type == RXRPC_TX_ANNO_RETRANS) + continue; call->rxtx_annotations[ix] = RXRPC_TX_ANNO_NAK | annotation; resend = true; From 01a88f7f6bd4514de9551c3fc9a6fd9e65cbf79d Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 12:39:22 +0100 Subject: [PATCH 0749/1050] rxrpc: Fix call timer Fix the call timer in the following ways: (1) If call->resend_at or call->ack_at are before or equal to the current time, then ignore that timeout. (2) If call->expire_at is before or equal to the current time, then don't set the timer at all (possibly we should queue the call). (3) Don't skip modifying the timer if timer_pending() is true. This indicates that the timer is working, not that it has expired and is running/waiting to run its expiry handler. Also call rxrpc_set_timer() to start the call timer going rather than calling add_timer(). Signed-off-by: David Howells --- net/rxrpc/call_event.c | 25 ++++++++++++++----------- net/rxrpc/call_object.c | 4 ++-- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 3a7f90a2659c..8bc5c8e37ab4 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -28,24 +28,27 @@ void rxrpc_set_timer(struct rxrpc_call *call) { unsigned long t, now = jiffies; - _enter("{%ld,%ld,%ld:%ld}", - call->ack_at - now, call->resend_at - now, call->expire_at - now, - call->timer.expires - now); - read_lock_bh(&call->state_lock); if (call->state < RXRPC_CALL_COMPLETE) { - t = call->ack_at; - if (time_before(call->resend_at, t)) + t = call->expire_at; + if (time_before_eq(t, now)) + goto out; + + if (time_after(call->resend_at, now) && + time_before(call->resend_at, t)) t = call->resend_at; - if (time_before(call->expire_at, t)) - t = call->expire_at; - if (!timer_pending(&call->timer) || - time_before(t, call->timer.expires)) { - _debug("set timer %ld", t - now); + + if (time_after(call->ack_at, now) && + time_before(call->ack_at, t)) + t = call->ack_at; + + if (call->timer.expires != t || !timer_pending(&call->timer)) { mod_timer(&call->timer, t); } } + +out: read_unlock_bh(&call->state_lock); } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index f50a6094e198..f2fadf667e19 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -199,8 +199,8 @@ static void rxrpc_start_call_timer(struct rxrpc_call *call) call->expire_at = expire_at; call->ack_at = expire_at; call->resend_at = expire_at; - call->timer.expires = expire_at; - add_timer(&call->timer); + call->timer.expires = expire_at + 1; + rxrpc_set_timer(call); } /* From 70790dbe3f6651fb66ad38da0a1e24368778bc16 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 12:39:22 +0100 Subject: [PATCH 0750/1050] rxrpc: Pass the last Tx packet marker in the annotation buffer When the last packet of data to be transmitted on a call is queued, tx_top is set and then the RXRPC_CALL_TX_LAST flag is set. Unfortunately, this leaves a race in the ACK processing side of things because the flag affects the interpretation of tx_top and also allows us to start receiving reply data before we've finished transmitting. To fix this, make the following changes: (1) rxrpc_queue_packet() now sets a marker in the annotation buffer instead of setting the RXRPC_CALL_TX_LAST flag. (2) rxrpc_rotate_tx_window() detects the marker and sets the flag in the same context as the routines that use it. (3) rxrpc_end_tx_phase() is simplified to just shift the call state. The Tx window must have been rotated before calling to discard the last packet. (4) rxrpc_receiving_reply() is added to handle the arrival of the first DATA packet of a reply to a client call (which is an implicit ACK of the Tx phase). (5) The last part of rxrpc_input_ack() is reordered to perform Tx rotation, then soft-ACK application and then to end the phase if we've rotated the last packet. In the event of a terminal ACK, the soft-ACK application will be skipped as nAcks should be 0. (6) rxrpc_input_ackall() now has to rotate as well as ending the phase. In addition: (7) Alter the transmit tracepoint to log the rotation of the last packet. (8) Remove the no-longer relevant queue_reqack tracepoint note. The ACK-REQUESTED packet header flag is now set as needed when we actually transmit the packet and may vary by retransmission. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 7 ++- net/rxrpc/input.c | 104 ++++++++++++++++++++++++++-------------- net/rxrpc/misc.c | 3 +- net/rxrpc/sendmsg.c | 14 +++--- 4 files changed, 82 insertions(+), 46 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 9e3ba4dc9578..a494d56eb236 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -508,7 +508,9 @@ struct rxrpc_call { #define RXRPC_TX_ANNO_NAK 2 #define RXRPC_TX_ANNO_RETRANS 3 #define RXRPC_TX_ANNO_MASK 0x03 -#define RXRPC_TX_ANNO_RESENT 0x04 +#define RXRPC_TX_ANNO_LAST 0x04 +#define RXRPC_TX_ANNO_RESENT 0x08 + #define RXRPC_RX_ANNO_JUMBO 0x3f /* Jumbo subpacket number + 1 if not zero */ #define RXRPC_RX_ANNO_JLAST 0x40 /* Set if last element of a jumbo packet */ #define RXRPC_RX_ANNO_VERIFIED 0x80 /* Set if verified and decrypted */ @@ -621,9 +623,10 @@ extern const char rxrpc_call_traces[rxrpc_call__nr_trace][4]; enum rxrpc_transmit_trace { rxrpc_transmit_wait, rxrpc_transmit_queue, - rxrpc_transmit_queue_reqack, rxrpc_transmit_queue_last, rxrpc_transmit_rotate, + rxrpc_transmit_rotate_last, + rxrpc_transmit_await_reply, rxrpc_transmit_end, rxrpc_transmit__nr_trace }; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index d3d69ab1f0a1..fb3e2f6afa3b 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -59,6 +59,7 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to) { struct sk_buff *skb, *list = NULL; int ix; + u8 annotation; spin_lock(&call->lock); @@ -66,16 +67,22 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to) call->tx_hard_ack++; ix = call->tx_hard_ack & RXRPC_RXTX_BUFF_MASK; skb = call->rxtx_buffer[ix]; + annotation = call->rxtx_annotations[ix]; rxrpc_see_skb(skb, rxrpc_skb_tx_rotated); call->rxtx_buffer[ix] = NULL; call->rxtx_annotations[ix] = 0; skb->next = list; list = skb; + + if (annotation & RXRPC_TX_ANNO_LAST) + set_bit(RXRPC_CALL_TX_LAST, &call->flags); } spin_unlock(&call->lock); - trace_rxrpc_transmit(call, rxrpc_transmit_rotate); + trace_rxrpc_transmit(call, (test_bit(RXRPC_CALL_TX_LAST, &call->flags) ? + rxrpc_transmit_rotate_last : + rxrpc_transmit_rotate)); wake_up(&call->waitq); while (list) { @@ -92,42 +99,65 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to) * This occurs when we get an ACKALL packet, the first DATA packet of a reply, * or a final ACK packet. */ -static bool rxrpc_end_tx_phase(struct rxrpc_call *call, const char *abort_why) +static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, + const char *abort_why) { - _enter(""); - switch (call->state) { - case RXRPC_CALL_CLIENT_RECV_REPLY: - return true; - case RXRPC_CALL_CLIENT_AWAIT_REPLY: - case RXRPC_CALL_SERVER_AWAIT_ACK: - break; - default: - rxrpc_proto_abort(abort_why, call, call->tx_top); - return false; - } - - rxrpc_rotate_tx_window(call, call->tx_top); + ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags)); write_lock(&call->state_lock); switch (call->state) { - default: - break; + case RXRPC_CALL_CLIENT_SEND_REQUEST: case RXRPC_CALL_CLIENT_AWAIT_REPLY: - call->tx_phase = false; - call->state = RXRPC_CALL_CLIENT_RECV_REPLY; + if (reply_begun) + call->state = RXRPC_CALL_CLIENT_RECV_REPLY; + else + call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY; break; + case RXRPC_CALL_SERVER_AWAIT_ACK: __rxrpc_call_completed(call); rxrpc_notify_socket(call); break; + + default: + goto bad_state; } write_unlock(&call->state_lock); - trace_rxrpc_transmit(call, rxrpc_transmit_end); + if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) { + trace_rxrpc_transmit(call, rxrpc_transmit_await_reply); + } else { + trace_rxrpc_transmit(call, rxrpc_transmit_end); + } _leave(" = ok"); return true; + +bad_state: + write_unlock(&call->state_lock); + kdebug("end_tx %s", rxrpc_call_states[call->state]); + rxrpc_proto_abort(abort_why, call, call->tx_top); + return false; +} + +/* + * Begin the reply reception phase of a call. + */ +static bool rxrpc_receiving_reply(struct rxrpc_call *call) +{ + rxrpc_seq_t top = READ_ONCE(call->tx_top); + + if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) + rxrpc_rotate_tx_window(call, top); + if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { + rxrpc_proto_abort("TXL", call, top); + return false; + } + if (!rxrpc_end_tx_phase(call, true, "ETD")) + return false; + call->tx_phase = false; + return true; } /* @@ -226,8 +256,9 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb, /* Received data implicitly ACKs all of the request packets we sent * when we're acting as a client. */ - if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY && - !rxrpc_end_tx_phase(call, "ETD")) + if ((call->state == RXRPC_CALL_CLIENT_SEND_REQUEST || + call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) && + !rxrpc_receiving_reply(call)) return; call->ackr_prev_seq = seq; @@ -587,27 +618,26 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, } call->acks_latest = sp->hdr.serial; - if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) && - hard_ack == call->tx_top) { - rxrpc_end_tx_phase(call, "ETA"); - return; - } - if (before(hard_ack, call->tx_hard_ack) || after(hard_ack, call->tx_top)) return rxrpc_proto_abort("AKW", call, 0); + if (nr_acks > call->tx_top - hard_ack) + return rxrpc_proto_abort("AKN", call, 0); if (after(hard_ack, call->tx_hard_ack)) rxrpc_rotate_tx_window(call, hard_ack); - if (after(first_soft_ack, call->tx_top)) - return; + if (nr_acks > 0) { + if (skb_copy_bits(skb, sp->offset, buf.acks, nr_acks) < 0) + return rxrpc_proto_abort("XSA", call, 0); + rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks); + } + + if (test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { + rxrpc_end_tx_phase(call, false, "ETA"); + return; + } - if (nr_acks > call->tx_top - first_soft_ack + 1) - nr_acks = first_soft_ack - call->tx_top + 1; - if (skb_copy_bits(skb, sp->offset, buf.acks, nr_acks) < 0) - return rxrpc_proto_abort("XSA", call, 0); - rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks); } /* @@ -619,7 +649,9 @@ static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb) _proto("Rx ACKALL %%%u", sp->hdr.serial); - rxrpc_end_tx_phase(call, "ETL"); + rxrpc_rotate_tx_window(call, call->tx_top); + if (test_bit(RXRPC_CALL_TX_LAST, &call->flags)) + rxrpc_end_tx_phase(call, false, "ETL"); } /* diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 0d425e707f22..fe648711c2f7 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -155,9 +155,10 @@ const char rxrpc_client_traces[rxrpc_client__nr_trace][7] = { const char rxrpc_transmit_traces[rxrpc_transmit__nr_trace][4] = { [rxrpc_transmit_wait] = "WAI", [rxrpc_transmit_queue] = "QUE", - [rxrpc_transmit_queue_reqack] = "QRA", [rxrpc_transmit_queue_last] = "QLS", [rxrpc_transmit_rotate] = "ROT", + [rxrpc_transmit_rotate_last] = "RLS", + [rxrpc_transmit_await_reply] = "AWR", [rxrpc_transmit_end] = "END", }; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 7cb34b2dfba9..93e6584cd751 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -94,11 +94,15 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_skb_priv *sp = rxrpc_skb(skb); rxrpc_seq_t seq = sp->hdr.seq; int ret, ix; + u8 annotation = RXRPC_TX_ANNO_UNACK; _net("queue skb %p [%d]", skb, seq); ASSERTCMP(seq, ==, call->tx_top + 1); + if (last) + annotation |= RXRPC_TX_ANNO_LAST; + /* We have to set the timestamp before queueing as the retransmit * algorithm can see the packet as soon as we queue it. */ @@ -106,18 +110,14 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, ix = seq & RXRPC_RXTX_BUFF_MASK; rxrpc_get_skb(skb, rxrpc_skb_tx_got); - call->rxtx_annotations[ix] = RXRPC_TX_ANNO_UNACK; + call->rxtx_annotations[ix] = annotation; smp_wmb(); call->rxtx_buffer[ix] = skb; call->tx_top = seq; - if (last) { - set_bit(RXRPC_CALL_TX_LAST, &call->flags); + if (last) trace_rxrpc_transmit(call, rxrpc_transmit_queue_last); - } else if (sp->hdr.flags & RXRPC_REQUEST_ACK) { - trace_rxrpc_transmit(call, rxrpc_transmit_queue_reqack); - } else { + else trace_rxrpc_transmit(call, rxrpc_transmit_queue); - } if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) { _debug("________awaiting reply/ACK__________"); From b86e218e0d422488e0febb07620fa97ae9713779 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 15:08:48 +0100 Subject: [PATCH 0751/1050] rxrpc: Don't call the tx_ack tracepoint if don't generate an ACK rxrpc_send_call_packet() is invoking the tx_ack tracepoint before it checks whether there's an ACK to transmit (another thread may jump in and transmit it). Fix this by only invoking the tracepoint if we get a valid ACK to transmit. Further, only allocate a serial number if we're going to actually transmit something. Signed-off-by: David Howells --- net/rxrpc/output.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 282cb1e36d06..5c1e008a5323 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -80,9 +80,6 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, pkt->ackinfo.rwind = htonl(call->rx_winsize); pkt->ackinfo.jumbo_max = htonl(jmax); - trace_rxrpc_tx_ack(call, hard_ack + 1, serial, call->ackr_reason, - top - hard_ack); - *ackp++ = 0; *ackp++ = 0; *ackp++ = 0; @@ -119,8 +116,6 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) return -ENOMEM; } - serial = atomic_inc_return(&conn->serial); - msg.msg_name = &call->peer->srx.transport; msg.msg_namelen = call->peer->srx.transport_len; msg.msg_control = NULL; @@ -131,7 +126,6 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) pkt->whdr.cid = htonl(call->cid); pkt->whdr.callNumber = htonl(call->call_id); pkt->whdr.seq = 0; - pkt->whdr.serial = htonl(serial); pkt->whdr.type = type; pkt->whdr.flags = conn->out_clientflag; pkt->whdr.userStatus = 0; @@ -157,14 +151,6 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) spin_unlock_bh(&call->lock); - _proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", - serial, - ntohs(pkt->ack.maxSkew), - ntohl(pkt->ack.firstPacket), - ntohl(pkt->ack.previousPacket), - ntohl(pkt->ack.serial), - rxrpc_acks(pkt->ack.reason), - pkt->ack.nAcks); iov[0].iov_len += sizeof(pkt->ack) + n; iov[1].iov_base = &pkt->ackinfo; @@ -176,7 +162,6 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) case RXRPC_PACKET_TYPE_ABORT: abort_code = call->abort_code; pkt->abort_code = htonl(abort_code); - _proto("Tx ABORT %%%u { %d }", serial, abort_code); iov[0].iov_len += sizeof(pkt->abort_code); len += sizeof(pkt->abort_code); ioc = 1; @@ -188,6 +173,17 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) goto out; } + serial = atomic_inc_return(&conn->serial); + pkt->whdr.serial = htonl(serial); + switch (type) { + case RXRPC_PACKET_TYPE_ACK: + trace_rxrpc_tx_ack(call, + ntohl(pkt->ack.firstPacket), + ntohl(pkt->ack.serial), + pkt->ack.reason, pkt->ack.nAcks); + break; + } + if (ping) { call->ackr_ping = serial; smp_wmb(); From fc7ab6d29a3af0b7f6df7c095509378c8caf85b5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 15:22:36 +0100 Subject: [PATCH 0752/1050] rxrpc: Add a tracepoint for the call timer Add a tracepoint to log call timer initiation, setting and expiry. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 36 ++++++++++++++++++++++++++++++++++++ net/rxrpc/ar-internal.h | 13 ++++++++++++- net/rxrpc/call_event.c | 7 ++++--- net/rxrpc/call_object.c | 6 ++++-- net/rxrpc/misc.c | 8 ++++++++ net/rxrpc/sendmsg.c | 2 +- 6 files changed, 65 insertions(+), 7 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index e8f2afbbe0bf..57322897d745 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -414,6 +414,42 @@ TRACE_EVENT(rxrpc_rtt_rx, __entry->avg) ); +TRACE_EVENT(rxrpc_timer, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_timer_trace why, + unsigned long now), + + TP_ARGS(call, why, now), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(enum rxrpc_timer_trace, why ) + __field(unsigned long, now ) + __field(unsigned long, expire_at ) + __field(unsigned long, ack_at ) + __field(unsigned long, resend_at ) + __field(unsigned long, timer ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->why = why; + __entry->now = now; + __entry->expire_at = call->expire_at; + __entry->ack_at = call->ack_at; + __entry->resend_at = call->resend_at; + __entry->timer = call->timer.expires; + ), + + TP_printk("c=%p %s now=%lx x=%ld a=%ld r=%ld t=%ld", + __entry->call, + rxrpc_timer_traces[__entry->why], + __entry->now, + __entry->expire_at - __entry->now, + __entry->ack_at - __entry->now, + __entry->resend_at - __entry->now, + __entry->timer - __entry->now) + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index a494d56eb236..e564eca75985 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -678,6 +678,17 @@ enum rxrpc_rtt_rx_trace { extern const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5]; +enum rxrpc_timer_trace { + rxrpc_timer_begin, + rxrpc_timer_expired, + rxrpc_timer_set_for_ack, + rxrpc_timer_set_for_resend, + rxrpc_timer_set_for_send, + rxrpc_timer__nr_trace +}; + +extern const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8]; + extern const char *const rxrpc_pkts[]; extern const char *rxrpc_acks(u8 reason); @@ -707,7 +718,7 @@ int rxrpc_reject_call(struct rxrpc_sock *); /* * call_event.c */ -void rxrpc_set_timer(struct rxrpc_call *); +void rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace); void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool); void rxrpc_process_call(struct work_struct *); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 8bc5c8e37ab4..90e970ba048a 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -24,7 +24,7 @@ /* * Set the timer */ -void rxrpc_set_timer(struct rxrpc_call *call) +void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why) { unsigned long t, now = jiffies; @@ -45,6 +45,7 @@ void rxrpc_set_timer(struct rxrpc_call *call) if (call->timer.expires != t || !timer_pending(&call->timer)) { mod_timer(&call->timer, t); + trace_rxrpc_timer(call, why, now); } } @@ -120,7 +121,7 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, _debug("deferred ACK %ld < %ld", expiry, call->ack_at - now); if (time_before(ack_at, call->ack_at)) { call->ack_at = ack_at; - rxrpc_set_timer(call); + rxrpc_set_timer(call, rxrpc_timer_set_for_ack); } } } @@ -293,7 +294,7 @@ recheck_state: goto recheck_state; } - rxrpc_set_timer(call); + rxrpc_set_timer(call, rxrpc_timer_set_for_resend); /* other events may have been raised since we started checking */ if (call->events && call->state < RXRPC_CALL_COMPLETE) { diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index f2fadf667e19..a53f4c2c0025 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -76,8 +76,10 @@ static void rxrpc_call_timer_expired(unsigned long _call) _enter("%d", call->debug_id); - if (call->state < RXRPC_CALL_COMPLETE) + if (call->state < RXRPC_CALL_COMPLETE) { + trace_rxrpc_timer(call, rxrpc_timer_expired, jiffies); rxrpc_queue_call(call); + } } /* @@ -200,7 +202,7 @@ static void rxrpc_start_call_timer(struct rxrpc_call *call) call->ack_at = expire_at; call->resend_at = expire_at; call->timer.expires = expire_at + 1; - rxrpc_set_timer(call); + rxrpc_set_timer(call, rxrpc_timer_begin); } /* diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index fe648711c2f7..fa9942fabdf2 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -194,3 +194,11 @@ const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5] = { [rxrpc_rtt_rx_ping_response] = "PONG", [rxrpc_rtt_rx_requested_ack] = "RACK", }; + +const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8] = { + [rxrpc_timer_begin] = "Begin ", + [rxrpc_timer_expired] = "*EXPR*", + [rxrpc_timer_set_for_ack] = "SetAck", + [rxrpc_timer_set_for_send] = "SetTx ", + [rxrpc_timer_set_for_resend] = "SetRTx", +}; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 93e6584cd751..99939372b5a4 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -153,7 +153,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, if (time_before(resend_at, call->resend_at)) { call->resend_at = resend_at; - rxrpc_set_timer(call); + rxrpc_set_timer(call, rxrpc_timer_set_for_send); } } From be832aecc5ba811728e15a10f675f4a2187f25dd Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 12:39:22 +0100 Subject: [PATCH 0753/1050] rxrpc: Add data Tx tracepoint and adjust Tx ACK tracepoint Add a tracepoint to log transmission of DATA packets (including loss injection). Adjust the ACK transmission tracepoint to include the packet serial number and to line this up with the DATA transmission display. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 52 +++++++++++++++++++++++++++++------- net/rxrpc/conn_event.c | 5 ++-- net/rxrpc/output.c | 5 +++- 3 files changed, 49 insertions(+), 13 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 57322897d745..6001bf93dc79 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -256,33 +256,67 @@ TRACE_EVENT(rxrpc_rx_ack, __entry->n_acks) ); -TRACE_EVENT(rxrpc_tx_ack, - TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t first, - rxrpc_serial_t serial, u8 reason, u8 n_acks), +TRACE_EVENT(rxrpc_tx_data, + TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq, + rxrpc_serial_t serial, u8 flags, bool lose), - TP_ARGS(call, first, serial, reason, n_acks), + TP_ARGS(call, seq, serial, flags, lose), TP_STRUCT__entry( __field(struct rxrpc_call *, call ) - __field(rxrpc_seq_t, first ) + __field(rxrpc_seq_t, seq ) __field(rxrpc_serial_t, serial ) + __field(u8, flags ) + __field(bool, lose ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->seq = seq; + __entry->serial = serial; + __entry->flags = flags; + __entry->lose = lose; + ), + + TP_printk("c=%p DATA %08x q=%08x fl=%02x%s", + __entry->call, + __entry->serial, + __entry->seq, + __entry->flags, + __entry->lose ? " *LOSE*" : "") + ); + +TRACE_EVENT(rxrpc_tx_ack, + TP_PROTO(struct rxrpc_call *call, rxrpc_serial_t serial, + rxrpc_seq_t ack_first, rxrpc_serial_t ack_serial, + u8 reason, u8 n_acks), + + TP_ARGS(call, serial, ack_first, ack_serial, reason, n_acks), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(rxrpc_serial_t, serial ) + __field(rxrpc_seq_t, ack_first ) + __field(rxrpc_serial_t, ack_serial ) __field(u8, reason ) __field(u8, n_acks ) ), TP_fast_assign( __entry->call = call; - __entry->first = first; __entry->serial = serial; + __entry->ack_first = ack_first; + __entry->ack_serial = ack_serial; __entry->reason = reason; __entry->n_acks = n_acks; ), - TP_printk("c=%p %s f=%08x r=%08x n=%u", + TP_printk(" c=%p ACK %08x %s f=%08x r=%08x n=%u", __entry->call, - rxrpc_acks(__entry->reason), - __entry->first, __entry->serial, + rxrpc_acks(__entry->reason), + __entry->ack_first, + __entry->ack_serial, __entry->n_acks) ); diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 75a15a4c74c3..a1cf1ec5f29e 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -98,9 +98,6 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, pkt.info.rwind = htonl(rxrpc_rx_window_size); pkt.info.jumbo_max = htonl(rxrpc_rx_jumbo_max); len += sizeof(pkt.ack) + sizeof(pkt.info); - - trace_rxrpc_tx_ack(NULL, chan->last_seq, 0, - RXRPC_ACK_DUPLICATE, 0); break; } @@ -122,6 +119,8 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, _proto("Tx ABORT %%%u { %d } [re]", serial, conn->local_abort); break; case RXRPC_PACKET_TYPE_ACK: + trace_rxrpc_tx_ack(NULL, serial, chan->last_seq, 0, + RXRPC_ACK_DUPLICATE, 0); _proto("Tx ACK %%%u [re]", serial); break; } diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 5c1e008a5323..e47fbd1c836d 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -177,7 +177,7 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) pkt->whdr.serial = htonl(serial); switch (type) { case RXRPC_PACKET_TYPE_ACK: - trace_rxrpc_tx_ack(call, + trace_rxrpc_tx_ack(call, serial, ntohl(pkt->ack.firstPacket), ntohl(pkt->ack.serial), pkt->ack.reason, pkt->ack.nAcks); @@ -275,6 +275,8 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb) if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { static int lose; if ((lose++ & 7) == 7) { + trace_rxrpc_tx_data(call, sp->hdr.seq, serial, + whdr.flags, true); rxrpc_lose_skb(skb, rxrpc_skb_tx_lost); _leave(" = 0 [lose]"); return 0; @@ -302,6 +304,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb) goto send_fragmentable; done: + trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, false); if (ret >= 0) { ktime_t now = ktime_get_real(); skb->tstamp = now; From 89b475abdb107a74f57497b65becaf837a0e5b6b Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 12:39:22 +0100 Subject: [PATCH 0754/1050] rxrpc: Add a tracepoint to log injected Rx packet loss Add a tracepoint to log received packets that get discarded due to Rx packet loss. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 21 +++++++++++++++++++++ net/rxrpc/input.c | 11 +++++------ 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 6001bf93dc79..9413b17ba04b 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -484,6 +484,27 @@ TRACE_EVENT(rxrpc_timer, __entry->timer - __entry->now) ); +TRACE_EVENT(rxrpc_rx_lose, + TP_PROTO(struct rxrpc_skb_priv *sp), + + TP_ARGS(sp), + + TP_STRUCT__entry( + __field_struct(struct rxrpc_host_header, hdr ) + ), + + TP_fast_assign( + memcpy(&__entry->hdr, &sp->hdr, sizeof(__entry->hdr)); + ), + + TP_printk("%08x:%08x:%08x:%04x %08x %08x %02x %02x %s *LOSE*", + __entry->hdr.epoch, __entry->hdr.cid, + __entry->hdr.callNumber, __entry->hdr.serviceId, + __entry->hdr.serial, __entry->hdr.seq, + __entry->hdr.type, __entry->hdr.flags, + __entry->hdr.type <= 15 ? rxrpc_pkts[__entry->hdr.type] : "?UNK") + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index fb3e2f6afa3b..19b1e189f5dc 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -837,20 +837,19 @@ void rxrpc_data_ready(struct sock *udp_sk) skb_orphan(skb); sp = rxrpc_skb(skb); + /* dig out the RxRPC connection details */ + if (rxrpc_extract_header(sp, skb) < 0) + goto bad_message; + if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { static int lose; if ((lose++ & 7) == 7) { + trace_rxrpc_rx_lose(sp); rxrpc_lose_skb(skb, rxrpc_skb_rx_lost); return; } } - _net("Rx UDP packet from %08x:%04hu", - ntohl(ip_hdr(skb)->saddr), ntohs(udp_hdr(skb)->source)); - - /* dig out the RxRPC connection details */ - if (rxrpc_extract_header(sp, skb) < 0) - goto bad_message; trace_rxrpc_rx_packet(sp); _net("Rx RxRPC %s ep=%x call=%x:%x", From 9c7ad434441da6b5d4ac878cac368fbdaec99b56 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 13:50:40 +0100 Subject: [PATCH 0755/1050] rxrpc: Add tracepoint for ACK proposal Add a tracepoint to log proposed ACKs, including whether the proposal is used to update a pending ACK or is discarded in favour of an easlier, higher priority ACK. Whilst we're at it, get rid of the rxrpc_acks() function and access the name array directly. We do, however, need to validate the ACK reason number given to trace_rxrpc_rx_ack() to make sure we don't overrun the array. Signed-off-by: David Howells --- include/rxrpc/packet.h | 1 + include/trace/events/rxrpc.h | 42 ++++++++++++++++++++++++++++++++++-- net/rxrpc/ar-internal.h | 25 +++++++++++++++++++-- net/rxrpc/call_event.c | 21 ++++++++++++------ net/rxrpc/input.c | 19 ++++++++++------ net/rxrpc/misc.c | 30 ++++++++++++++++---------- net/rxrpc/output.c | 3 ++- net/rxrpc/recvmsg.c | 3 ++- 8 files changed, 114 insertions(+), 30 deletions(-) diff --git a/include/rxrpc/packet.h b/include/rxrpc/packet.h index fd6eb3a60a8c..703a64b4681a 100644 --- a/include/rxrpc/packet.h +++ b/include/rxrpc/packet.h @@ -123,6 +123,7 @@ struct rxrpc_ackpacket { #define RXRPC_ACK_PING_RESPONSE 7 /* response to RXRPC_ACK_PING */ #define RXRPC_ACK_DELAY 8 /* nothing happened since received packet */ #define RXRPC_ACK_IDLE 9 /* ACK due to fully received ACK window */ +#define RXRPC_ACK__INVALID 10 /* Representation of invalid ACK reason */ uint8_t nAcks; /* number of ACKs */ #define RXRPC_MAXACKS 255 diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 9413b17ba04b..d67a8c6b085a 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -251,7 +251,7 @@ TRACE_EVENT(rxrpc_rx_ack, TP_printk("c=%p %s f=%08x n=%u", __entry->call, - rxrpc_acks(__entry->reason), + rxrpc_ack_names[__entry->reason], __entry->first, __entry->n_acks) ); @@ -314,7 +314,7 @@ TRACE_EVENT(rxrpc_tx_ack, TP_printk(" c=%p ACK %08x %s f=%08x r=%08x n=%u", __entry->call, __entry->serial, - rxrpc_acks(__entry->reason), + rxrpc_ack_names[__entry->reason], __entry->ack_first, __entry->ack_serial, __entry->n_acks) @@ -505,6 +505,44 @@ TRACE_EVENT(rxrpc_rx_lose, __entry->hdr.type <= 15 ? rxrpc_pkts[__entry->hdr.type] : "?UNK") ); +TRACE_EVENT(rxrpc_propose_ack, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_propose_ack_trace why, + u8 ack_reason, rxrpc_serial_t serial, bool immediate, + bool background, enum rxrpc_propose_ack_outcome outcome), + + TP_ARGS(call, why, ack_reason, serial, immediate, background, + outcome), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(enum rxrpc_propose_ack_trace, why ) + __field(rxrpc_serial_t, serial ) + __field(u8, ack_reason ) + __field(bool, immediate ) + __field(bool, background ) + __field(enum rxrpc_propose_ack_outcome, outcome ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->why = why; + __entry->serial = serial; + __entry->ack_reason = ack_reason; + __entry->immediate = immediate; + __entry->background = background; + __entry->outcome = outcome; + ), + + TP_printk("c=%p %s %s r=%08x i=%u b=%u%s", + __entry->call, + rxrpc_propose_ack_traces[__entry->why], + rxrpc_ack_names[__entry->ack_reason], + __entry->serial, + __entry->immediate, + __entry->background, + rxrpc_propose_ack_outcomes[__entry->outcome]) + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index e564eca75985..042dbcc52654 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -689,8 +689,28 @@ enum rxrpc_timer_trace { extern const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8]; +enum rxrpc_propose_ack_trace { + rxrpc_propose_ack_input_data, + rxrpc_propose_ack_ping_for_params, + rxrpc_propose_ack_respond_to_ack, + rxrpc_propose_ack_respond_to_ping, + rxrpc_propose_ack_retry_tx, + rxrpc_propose_ack_terminal_ack, + rxrpc_propose_ack__nr_trace +}; + +enum rxrpc_propose_ack_outcome { + rxrpc_propose_ack_use, + rxrpc_propose_ack_update, + rxrpc_propose_ack_subsume, + rxrpc_propose_ack__nr_outcomes +}; + +extern const char rxrpc_propose_ack_traces[rxrpc_propose_ack__nr_trace][8]; +extern const char *const rxrpc_propose_ack_outcomes[rxrpc_propose_ack__nr_outcomes]; + extern const char *const rxrpc_pkts[]; -extern const char *rxrpc_acks(u8 reason); +extern const char const rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4]; #include @@ -719,7 +739,8 @@ int rxrpc_reject_call(struct rxrpc_sock *); * call_event.c */ void rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace); -void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool); +void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool, + enum rxrpc_propose_ack_trace); void rxrpc_process_call(struct work_struct *); /* diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 90e970ba048a..fd5b11339ffb 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -58,14 +58,13 @@ out: */ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, u16 skew, u32 serial, bool immediate, - bool background) + bool background, + enum rxrpc_propose_ack_trace why) { + enum rxrpc_propose_ack_outcome outcome = rxrpc_propose_ack_use; unsigned long now, ack_at, expiry = rxrpc_soft_ack_delay; s8 prior = rxrpc_ack_priority[ack_reason]; - _enter("{%d},%s,%%%x,%u", - call->debug_id, rxrpc_acks(ack_reason), serial, immediate); - /* Update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial * numbers, but we don't alter the timeout. */ @@ -74,15 +73,18 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, call->ackr_reason, rxrpc_ack_priority[call->ackr_reason]); if (ack_reason == call->ackr_reason) { if (RXRPC_ACK_UPDATEABLE & (1 << ack_reason)) { + outcome = rxrpc_propose_ack_update; call->ackr_serial = serial; call->ackr_skew = skew; } if (!immediate) - return; + goto trace; } else if (prior > rxrpc_ack_priority[call->ackr_reason]) { call->ackr_reason = ack_reason; call->ackr_serial = serial; call->ackr_skew = skew; + } else { + outcome = rxrpc_propose_ack_subsume; } switch (ack_reason) { @@ -124,17 +126,22 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, rxrpc_set_timer(call, rxrpc_timer_set_for_ack); } } + +trace: + trace_rxrpc_propose_ack(call, why, ack_reason, serial, immediate, + background, outcome); } /* * propose an ACK be sent, locking the call structure */ void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, - u16 skew, u32 serial, bool immediate, bool background) + u16 skew, u32 serial, bool immediate, bool background, + enum rxrpc_propose_ack_trace why) { spin_lock_bh(&call->lock); __rxrpc_propose_ACK(call, ack_reason, skew, serial, - immediate, background); + immediate, background, why); spin_unlock_bh(&call->lock); } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 19b1e189f5dc..349698d87ad1 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -49,7 +49,8 @@ static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb, if (call->peer->rtt_usage < 3 || ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now)) rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial, - true, true); + true, true, + rxrpc_propose_ack_ping_for_params); } /* @@ -382,7 +383,8 @@ skip: ack: if (ack) rxrpc_propose_ACK(call, ack, skew, ack_serial, - immediate_ack, true); + immediate_ack, true, + rxrpc_propose_ack_input_data); if (sp->hdr.seq == READ_ONCE(call->rx_hard_ack) + 1) rxrpc_notify_socket(call); @@ -539,6 +541,7 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, u16 skew) { + u8 ack_reason; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); union { struct rxrpc_ackpacket ack; @@ -561,8 +564,10 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, first_soft_ack = ntohl(buf.ack.firstPacket); hard_ack = first_soft_ack - 1; nr_acks = buf.ack.nAcks; + ack_reason = (buf.ack.reason < RXRPC_ACK__INVALID ? + buf.ack.reason : RXRPC_ACK__INVALID); - trace_rxrpc_rx_ack(call, first_soft_ack, buf.ack.reason, nr_acks); + trace_rxrpc_rx_ack(call, first_soft_ack, ack_reason, nr_acks); _proto("Rx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", sp->hdr.serial, @@ -570,7 +575,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, first_soft_ack, ntohl(buf.ack.previousPacket), acked_serial, - rxrpc_acks(buf.ack.reason), + rxrpc_ack_names[ack_reason], buf.ack.nAcks); if (buf.ack.reason == RXRPC_ACK_PING_RESPONSE) @@ -583,10 +588,12 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, if (buf.ack.reason == RXRPC_ACK_PING) { _proto("Rx ACK %%%u PING Request", sp->hdr.serial); rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE, - skew, sp->hdr.serial, true, true); + skew, sp->hdr.serial, true, true, + rxrpc_propose_ack_respond_to_ping); } else if (sp->hdr.flags & RXRPC_REQUEST_ACK) { rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, - skew, sp->hdr.serial, true, true); + skew, sp->hdr.serial, true, true, + rxrpc_propose_ack_respond_to_ack); } offset = sp->offset + nr_acks + 3; diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index fa9942fabdf2..1ca14835d87f 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -91,17 +91,10 @@ const s8 rxrpc_ack_priority[] = { [RXRPC_ACK_PING] = 9, }; -const char *rxrpc_acks(u8 reason) -{ - static const char *const str[] = { - "---", "REQ", "DUP", "OOS", "WIN", "MEM", "PNG", "PNR", "DLY", - "IDL", "-?-" - }; - - if (reason >= ARRAY_SIZE(str)) - reason = ARRAY_SIZE(str) - 1; - return str[reason]; -} +const char const rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4] = { + "---", "REQ", "DUP", "OOS", "WIN", "MEM", "PNG", "PNR", "DLY", + "IDL", "-?-" +}; const char rxrpc_skb_traces[rxrpc_skb__nr_trace][7] = { [rxrpc_skb_rx_cleaned] = "Rx CLN", @@ -202,3 +195,18 @@ const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8] = { [rxrpc_timer_set_for_send] = "SetTx ", [rxrpc_timer_set_for_resend] = "SetRTx", }; + +const char rxrpc_propose_ack_traces[rxrpc_propose_ack__nr_trace][8] = { + [rxrpc_propose_ack_input_data] = "DataIn ", + [rxrpc_propose_ack_ping_for_params] = "Params ", + [rxrpc_propose_ack_respond_to_ack] = "Rsp2Ack", + [rxrpc_propose_ack_respond_to_ping] = "Rsp2Png", + [rxrpc_propose_ack_retry_tx] = "RetryTx", + [rxrpc_propose_ack_terminal_ack] = "ClTerm ", +}; + +const char *const rxrpc_propose_ack_outcomes[rxrpc_propose_ack__nr_outcomes] = { + [rxrpc_propose_ack_use] = "", + [rxrpc_propose_ack_update] = " Update", + [rxrpc_propose_ack_subsume] = " Subsume", +}; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index e47fbd1c836d..0c563e325c9d 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -210,7 +210,8 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) rxrpc_propose_ACK(call, pkt->ack.reason, ntohs(pkt->ack.maxSkew), ntohl(pkt->ack.serial), - true, true); + true, true, + rxrpc_propose_ack_retry_tx); break; case RXRPC_PACKET_TYPE_ABORT: break; diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 99e4c0ae30f1..8c7f3de45bac 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -141,7 +141,8 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call) ASSERTCMP(call->rx_hard_ack, ==, call->rx_top); if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { - rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, true, false); + rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, true, false, + rxrpc_propose_ack_terminal_ack); rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); } From c6672e3fe4a641bf302d6309ab4d5ee55648e758 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 13:58:55 +0100 Subject: [PATCH 0756/1050] rxrpc: Add a tracepoint to log which packets will be retransmitted Add a tracepoint to log in rxrpc_resend() which packets will be retransmitted. Note that if a positive ACK comes in whilst we have dropped the lock to retransmit another packet, the actual retransmission may not happen, though some of the effects will (such as altering the congestion management). Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 27 +++++++++++++++++++++++++++ net/rxrpc/call_event.c | 2 ++ 2 files changed, 29 insertions(+) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index d67a8c6b085a..56475497043d 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -543,6 +543,33 @@ TRACE_EVENT(rxrpc_propose_ack, rxrpc_propose_ack_outcomes[__entry->outcome]) ); +TRACE_EVENT(rxrpc_retransmit, + TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq, u8 annotation, + s64 expiry), + + TP_ARGS(call, seq, annotation, expiry), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(rxrpc_seq_t, seq ) + __field(u8, annotation ) + __field(s64, expiry ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->seq = seq; + __entry->annotation = annotation; + __entry->expiry = expiry; + ), + + TP_printk("c=%p q=%x a=%02x xp=%lld", + __entry->call, + __entry->seq, + __entry->annotation, + __entry->expiry) + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index fd5b11339ffb..a78a92fe5d77 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -196,6 +196,8 @@ static void rxrpc_resend(struct rxrpc_call *call) /* Okay, we need to retransmit a packet. */ call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS | annotation; + trace_rxrpc_retransmit(call, seq, annotation | anno_type, + ktime_to_ns(ktime_sub(skb->tstamp, max_age))); } resend_at = ktime_sub(ktime_add_ms(oldest, rxrpc_resend_timeout), now); From 7e956304eb8a285304a78582e4537e72c6365f20 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 23 Sep 2016 15:13:53 +0100 Subject: [PATCH 0757/1050] MIPS: Fix pre-r6 emulation FPU initialisation In the mipsr2_decoder() function, used to emulate pre-MIPSr6 instructions that were removed in MIPSr6, the init_fpu() function is called if a removed pre-MIPSr6 floating point instruction is the first floating point instruction used by the task. However, init_fpu() performs varous actions that rely upon not being migrated. For example in the most basic case it sets the coprocessor 0 Status.CU1 bit to enable the FPU & then loads FP register context into the FPU registers. If the task were to migrate during this time, it may end up attempting to load FP register context on a different CPU where it hasn't set the CU1 bit, leading to errors such as: do_cpu invoked from kernel context![#2]: CPU: 2 PID: 7338 Comm: fp-prctl Tainted: G D 4.7.0-00424-g49b0c82 #2 task: 838e4000 ti: 88d38000 task.ti: 88d38000 $ 0 : 00000000 00000001 ffffffff 88d3fef8 $ 4 : 838e4000 88d38004 00000000 00000001 $ 8 : 3400fc01 801f8020 808e9100 24000000 $12 : dbffffff 807b69d8 807b0000 00000000 $16 : 00000000 80786150 00400fc4 809c0398 $20 : 809c0338 0040273c 88d3ff28 808e9d30 $24 : 808e9d30 00400fb4 $28 : 88d38000 88d3fe88 00000000 8011a2ac Hi : 0040273c Lo : 88d3ff28 epc : 80114178 _restore_fp+0x10/0xa0 ra : 8011a2ac mipsr2_decoder+0xd5c/0x1660 Status: 1400fc03 KERNEL EXL IE Cause : 1080002c (ExcCode 0b) PrId : 0001a920 (MIPS I6400) Modules linked in: Process fp-prctl (pid: 7338, threadinfo=88d38000, task=838e4000, tls=766527d0) Stack : 00000000 00000000 00000000 88d3fe98 00000000 00000000 809c0398 809c0338 808e9100 00000000 88d3ff28 00400fc4 00400fc4 0040273c 7fb69e18 004a0000 004a0000 004a0000 7664add0 8010de18 00000000 00000000 88d3fef8 88d3ff28 808e9100 00000000 766527d0 8010e534 000c0000 85755000 8181d580 00000000 00000000 00000000 004a0000 00000000 766527d0 7fb69e18 004a0000 80105c20 ... Call Trace: [<80114178>] _restore_fp+0x10/0xa0 [<8011a2ac>] mipsr2_decoder+0xd5c/0x1660 [<8010de18>] do_ri+0x90/0x6b8 [<80105c20>] ret_from_exception+0x0/0x10 Fix this by disabling preemption around the call to init_fpu(), ensuring that it starts & completes on one CPU. Signed-off-by: Paul Burton Fixes: b0a668fb2038 ("MIPS: kernel: mips-r2-to-r6-emul: Add R2 emulator for MIPS R6") Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # v4.0+ Patchwork: https://patchwork.linux-mips.org/patch/14305/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/mips-r2-to-r6-emul.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/mips/kernel/mips-r2-to-r6-emul.c b/arch/mips/kernel/mips-r2-to-r6-emul.c index c3372cac6db2..0a7e10b5f9e3 100644 --- a/arch/mips/kernel/mips-r2-to-r6-emul.c +++ b/arch/mips/kernel/mips-r2-to-r6-emul.c @@ -1164,7 +1164,9 @@ fpu_emul: regs->regs[31] = r31; regs->cp0_epc = epc; if (!used_math()) { /* First time FPU user. */ + preempt_disable(); err = init_fpu(); + preempt_enable(); set_used_math(); } lose_fpu(1); /* Save FPU state for the emulator. */ From c8712c6a674e3382fe4d26d108251ccfa55d08e0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 23 Sep 2016 10:25:48 -0600 Subject: [PATCH 0758/1050] blk-mq: skip unmapped queues in blk_mq_alloc_request_hctx This provides the caller a feedback that a given hctx is not mapped and thus no command can be sent on it. Signed-off-by: Christoph Hellwig Tested-by: Steve Wise Signed-off-by: Jens Axboe --- block/blk-mq.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 13f5a6c1de76..c207fa9870eb 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -296,17 +296,29 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw, if (ret) return ERR_PTR(ret); + /* + * Check if the hardware context is actually mapped to anything. + * If not tell the caller that it should skip this queue. + */ hctx = q->queue_hw_ctx[hctx_idx]; + if (!blk_mq_hw_queue_mapped(hctx)) { + ret = -EXDEV; + goto out_queue_exit; + } ctx = __blk_mq_get_ctx(q, cpumask_first(hctx->cpumask)); blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx); rq = __blk_mq_alloc_request(&alloc_data, rw, 0); if (!rq) { - blk_queue_exit(q); - return ERR_PTR(-EWOULDBLOCK); + ret = -EWOULDBLOCK; + goto out_queue_exit; } return rq; + +out_queue_exit: + blk_queue_exit(q); + return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx); From 9157056da8f8c4a6305f15619e269f164b63a6de Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 23 Sep 2016 16:55:49 -0400 Subject: [PATCH 0759/1050] cgroup: fix invalid controller enable rejections with cgroup namespace On the v2 hierarchy, "cgroup.subtree_control" rejects controller enables if the cgroup has processes in it. The enforcement of this logic assumes that the cgroup wouldn't have any css_sets associated with it if there are no tasks in the cgroup, which is no longer true since a79a908fd2b0 ("cgroup: introduce cgroup namespaces"). When a cgroup namespace is created, it pins the css_set of the creating task to use it as the root css_set of the namespace. This extra reference stays as long as the namespace is around and makes "cgroup.subtree_control" think that the namespace root cgroup is not empty even when it is and thus reject controller enables. Fix it by making cgroup_subtree_control() walk and test emptiness of each css_set instead of testing whether the list_head is empty. While at it, update the comment of cgroup_task_count() to indicate that the returned value may be higher than the number of tasks, which has always been true due to temporary references and doesn't break anything. Signed-off-by: Tejun Heo Reported-by: Evgeny Vereshchagin Cc: Serge E. Hallyn Cc: Aditya Kali Cc: Eric W. Biederman Cc: stable@vger.kernel.org # v4.6+ Fixes: a79a908fd2b0 ("cgroup: introduce cgroup namespaces") Link: https://github.com/systemd/systemd/pull/3589#issuecomment-249089541 --- kernel/cgroup.c | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d1c51b7f5221..0d4ee1ea5c31 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3446,9 +3446,28 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, * Except for the root, subtree_control must be zero for a cgroup * with tasks so that child cgroups don't compete against tasks. */ - if (enable && cgroup_parent(cgrp) && !list_empty(&cgrp->cset_links)) { - ret = -EBUSY; - goto out_unlock; + if (enable && cgroup_parent(cgrp)) { + struct cgrp_cset_link *link; + + /* + * Because namespaces pin csets too, @cgrp->cset_links + * might not be empty even when @cgrp is empty. Walk and + * verify each cset. + */ + spin_lock_irq(&css_set_lock); + + ret = 0; + list_for_each_entry(link, &cgrp->cset_links, cset_link) { + if (css_set_populated(link->cset)) { + ret = -EBUSY; + break; + } + } + + spin_unlock_irq(&css_set_lock); + + if (ret) + goto out_unlock; } /* save and update control masks and prepare csses */ @@ -3899,7 +3918,9 @@ void cgroup_file_notify(struct cgroup_file *cfile) * cgroup_task_count - count the number of tasks in a cgroup. * @cgrp: the cgroup in question * - * Return the number of tasks in the cgroup. + * Return the number of tasks in the cgroup. The returned number can be + * higher than the actual number of tasks due to css_set references from + * namespace roots and temporary usages. */ static int cgroup_task_count(const struct cgroup *cgrp) { From 8083ad1cf910dc22d4300213583d9d540853898a Mon Sep 17 00:00:00 2001 From: Dean Jenkins Date: Fri, 23 Sep 2016 18:56:26 +0100 Subject: [PATCH 0760/1050] Bluetooth: Tidy-up coding style in hci_bcsp.c drivers/bluetooth/hci_bcsp.c contains some style issues as highlighted by ./scripts/checkpatch.pl --strict -f drivers/bluetooth/hci_bcsp.c a) comments - maintainer prefers network style comments b) positioning of lines in multi-line statements c) spaces after casts d) missing blank lines after declarations Therefore, tidy-up the above to make it easier to apply future code changes that have conforming style. Signed-off-by: Dean Jenkins Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_bcsp.c | 55 +++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/drivers/bluetooth/hci_bcsp.c b/drivers/bluetooth/hci_bcsp.c index d7d23ceba4d1..2628b3683b81 100644 --- a/drivers/bluetooth/hci_bcsp.c +++ b/drivers/bluetooth/hci_bcsp.c @@ -90,7 +90,8 @@ struct bcsp_struct { /* ---- BCSP CRC calculation ---- */ /* Table for calculating CRC for polynomial 0x1021, LSB processed first, -initial value 0xffff, bits shifted in reverse order. */ + * initial value 0xffff, bits shifted in reverse order. + */ static const u16 crc_table[] = { 0x0000, 0x1081, 0x2102, 0x3183, @@ -174,7 +175,7 @@ static int bcsp_enqueue(struct hci_uart *hu, struct sk_buff *skb) } static struct sk_buff *bcsp_prepare_pkt(struct bcsp_struct *bcsp, u8 *data, - int len, int pkt_type) + int len, int pkt_type) { struct sk_buff *nskb; u8 hdr[4], chan; @@ -213,6 +214,7 @@ static struct sk_buff *bcsp_prepare_pkt(struct bcsp_struct *bcsp, u8 *data, /* Vendor specific commands */ if (hci_opcode_ogf(__le16_to_cpu(opcode)) == 0x3f) { u8 desc = *(data + HCI_COMMAND_HDR_SIZE); + if ((desc & 0xf0) == 0xc0) { data += HCI_COMMAND_HDR_SIZE + 1; len -= HCI_COMMAND_HDR_SIZE + 1; @@ -271,8 +273,8 @@ static struct sk_buff *bcsp_prepare_pkt(struct bcsp_struct *bcsp, u8 *data, /* Put CRC */ if (bcsp->use_crc) { bcsp_txmsg_crc = bitrev16(bcsp_txmsg_crc); - bcsp_slip_one_byte(nskb, (u8) ((bcsp_txmsg_crc >> 8) & 0x00ff)); - bcsp_slip_one_byte(nskb, (u8) (bcsp_txmsg_crc & 0x00ff)); + bcsp_slip_one_byte(nskb, (u8)((bcsp_txmsg_crc >> 8) & 0x00ff)); + bcsp_slip_one_byte(nskb, (u8)(bcsp_txmsg_crc & 0x00ff)); } bcsp_slip_msgdelim(nskb); @@ -287,7 +289,8 @@ static struct sk_buff *bcsp_dequeue(struct hci_uart *hu) struct sk_buff *skb; /* First of all, check for unreliable messages in the queue, - since they have priority */ + * since they have priority + */ skb = skb_dequeue(&bcsp->unrel); if (skb != NULL) { @@ -414,7 +417,7 @@ static void bcsp_handle_le_pkt(struct hci_uart *hu) /* spot "conf" pkts and reply with a "conf rsp" pkt */ if (bcsp->rx_skb->data[1] >> 4 == 4 && bcsp->rx_skb->data[2] == 0 && - !memcmp(&bcsp->rx_skb->data[4], conf_pkt, 4)) { + !memcmp(&bcsp->rx_skb->data[4], conf_pkt, 4)) { struct sk_buff *nskb = alloc_skb(4, GFP_ATOMIC); BT_DBG("Found a LE conf pkt"); @@ -428,7 +431,7 @@ static void bcsp_handle_le_pkt(struct hci_uart *hu) } /* Spot "sync" pkts. If we find one...disaster! */ else if (bcsp->rx_skb->data[1] >> 4 == 4 && bcsp->rx_skb->data[2] == 0 && - !memcmp(&bcsp->rx_skb->data[4], sync_pkt, 4)) { + !memcmp(&bcsp->rx_skb->data[4], sync_pkt, 4)) { BT_ERR("Found a LE sync pkt, card has reset"); } } @@ -446,7 +449,7 @@ static inline void bcsp_unslip_one_byte(struct bcsp_struct *bcsp, unsigned char default: memcpy(skb_put(bcsp->rx_skb, 1), &byte, 1); if ((bcsp->rx_skb->data[0] & 0x40) != 0 && - bcsp->rx_state != BCSP_W4_CRC) + bcsp->rx_state != BCSP_W4_CRC) bcsp_crc_update(&bcsp->message_crc, byte); bcsp->rx_count--; } @@ -457,7 +460,7 @@ static inline void bcsp_unslip_one_byte(struct bcsp_struct *bcsp, unsigned char case 0xdc: memcpy(skb_put(bcsp->rx_skb, 1), &c0, 1); if ((bcsp->rx_skb->data[0] & 0x40) != 0 && - bcsp->rx_state != BCSP_W4_CRC) + bcsp->rx_state != BCSP_W4_CRC) bcsp_crc_update(&bcsp->message_crc, 0xc0); bcsp->rx_esc_state = BCSP_ESCSTATE_NOESC; bcsp->rx_count--; @@ -466,7 +469,7 @@ static inline void bcsp_unslip_one_byte(struct bcsp_struct *bcsp, unsigned char case 0xdd: memcpy(skb_put(bcsp->rx_skb, 1), &db, 1); if ((bcsp->rx_skb->data[0] & 0x40) != 0 && - bcsp->rx_state != BCSP_W4_CRC) + bcsp->rx_state != BCSP_W4_CRC) bcsp_crc_update(&bcsp->message_crc, 0xdb); bcsp->rx_esc_state = BCSP_ESCSTATE_NOESC; bcsp->rx_count--; @@ -502,18 +505,18 @@ static void bcsp_complete_rx_pkt(struct hci_uart *hu) bcsp_pkt_cull(bcsp); if ((bcsp->rx_skb->data[1] & 0x0f) == 6 && - bcsp->rx_skb->data[0] & 0x80) { + bcsp->rx_skb->data[0] & 0x80) { hci_skb_pkt_type(bcsp->rx_skb) = HCI_ACLDATA_PKT; pass_up = 1; } else if ((bcsp->rx_skb->data[1] & 0x0f) == 5 && - bcsp->rx_skb->data[0] & 0x80) { + bcsp->rx_skb->data[0] & 0x80) { hci_skb_pkt_type(bcsp->rx_skb) = HCI_EVENT_PKT; pass_up = 1; } else if ((bcsp->rx_skb->data[1] & 0x0f) == 7) { hci_skb_pkt_type(bcsp->rx_skb) = HCI_SCODATA_PKT; pass_up = 1; } else if ((bcsp->rx_skb->data[1] & 0x0f) == 1 && - !(bcsp->rx_skb->data[0] & 0x80)) { + !(bcsp->rx_skb->data[0] & 0x80)) { bcsp_handle_le_pkt(hu); pass_up = 0; } else @@ -537,9 +540,9 @@ static void bcsp_complete_rx_pkt(struct hci_uart *hu) hci_recv_frame(hu->hdev, bcsp->rx_skb); } else { BT_ERR("Packet for unknown channel (%u %s)", - bcsp->rx_skb->data[1] & 0x0f, - bcsp->rx_skb->data[0] & 0x80 ? - "reliable" : "unreliable"); + bcsp->rx_skb->data[1] & 0x0f, + bcsp->rx_skb->data[0] & 0x80 ? + "reliable" : "unreliable"); kfree_skb(bcsp->rx_skb); } } else @@ -567,7 +570,7 @@ static int bcsp_recv(struct hci_uart *hu, const void *data, int count) const unsigned char *ptr; BT_DBG("hu %p count %d rx_state %d rx_count %ld", - hu, count, bcsp->rx_state, bcsp->rx_count); + hu, count, bcsp->rx_state, bcsp->rx_count); ptr = data; while (count) { @@ -586,18 +589,18 @@ static int bcsp_recv(struct hci_uart *hu, const void *data, int count) switch (bcsp->rx_state) { case BCSP_W4_BCSP_HDR: - if ((0xff & (u8) ~ (bcsp->rx_skb->data[0] + bcsp->rx_skb->data[1] + - bcsp->rx_skb->data[2])) != bcsp->rx_skb->data[3]) { + if ((0xff & (u8)~(bcsp->rx_skb->data[0] + bcsp->rx_skb->data[1] + + bcsp->rx_skb->data[2])) != bcsp->rx_skb->data[3]) { BT_ERR("Error in BCSP hdr checksum"); kfree_skb(bcsp->rx_skb); bcsp->rx_state = BCSP_W4_PKT_DELIMITER; bcsp->rx_count = 0; continue; } - if (bcsp->rx_skb->data[0] & 0x80 /* reliable pkt */ - && (bcsp->rx_skb->data[0] & 0x07) != bcsp->rxseq_txack) { + if (bcsp->rx_skb->data[0] & 0x80 && /* reliable pkt */ + (bcsp->rx_skb->data[0] & 0x07) != bcsp->rxseq_txack) { BT_ERR("Out-of-order packet arrived, got %u expected %u", - bcsp->rx_skb->data[0] & 0x07, bcsp->rxseq_txack); + bcsp->rx_skb->data[0] & 0x07, bcsp->rxseq_txack); kfree_skb(bcsp->rx_skb); bcsp->rx_state = BCSP_W4_PKT_DELIMITER; @@ -620,8 +623,8 @@ static int bcsp_recv(struct hci_uart *hu, const void *data, int count) case BCSP_W4_CRC: if (bitrev16(bcsp->message_crc) != bscp_get_crc(bcsp)) { BT_ERR("Checksum failed: computed %04x received %04x", - bitrev16(bcsp->message_crc), - bscp_get_crc(bcsp)); + bitrev16(bcsp->message_crc), + bscp_get_crc(bcsp)); kfree_skb(bcsp->rx_skb); bcsp->rx_state = BCSP_W4_PKT_DELIMITER; @@ -679,7 +682,7 @@ static int bcsp_recv(struct hci_uart *hu, const void *data, int count) /* Arrange to retransmit all messages in the relq. */ static void bcsp_timed_event(unsigned long arg) { - struct hci_uart *hu = (struct hci_uart *) arg; + struct hci_uart *hu = (struct hci_uart *)arg; struct bcsp_struct *bcsp = hu->priv; struct sk_buff *skb; unsigned long flags; @@ -715,7 +718,7 @@ static int bcsp_open(struct hci_uart *hu) init_timer(&bcsp->tbcsp); bcsp->tbcsp.function = bcsp_timed_event; - bcsp->tbcsp.data = (u_long) hu; + bcsp->tbcsp.data = (u_long)hu; bcsp->rx_state = BCSP_W4_PKT_DELIMITER; From 37332ddc1470b820bd9d1eb36a2ca6ce1efb209b Mon Sep 17 00:00:00 2001 From: Dean Jenkins Date: Fri, 23 Sep 2016 18:56:27 +0100 Subject: [PATCH 0761/1050] Bluetooth: BCSP fails to ACK re-transmitted frames from the peer Send an ACK frame with the current txack value in response to every received reliable frame unless a TX reliable frame is being sent. This modification allows re-transmitted frames from the remote peer to be acknowledged rather than ignored. It means that the remote peer knows which frame number to start re-transmitting from. Without this modification, the recovery time to a missing frame from the remote peer was unnecessarily being extended because the headers of the out of order reliable frames were being discarded rather than being processed. The frame headers of received frames will indicate whether the local peer's transmissions have been acknowledged by the remote peer. Therefore, the local peer may unnecessarily re-transmit despite the remote peer already indicating that the frame had been acknowledged in out of order reliable frame. Signed-off-by: Dean Jenkins Signed-off-by: Jiada Wang Signed-off-by: Rajeev Kumar Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_bcsp.c | 83 ++++++++++++++++++++++-------------- 1 file changed, 50 insertions(+), 33 deletions(-) diff --git a/drivers/bluetooth/hci_bcsp.c b/drivers/bluetooth/hci_bcsp.c index 2628b3683b81..a2c921faaa12 100644 --- a/drivers/bluetooth/hci_bcsp.c +++ b/drivers/bluetooth/hci_bcsp.c @@ -488,13 +488,28 @@ static inline void bcsp_unslip_one_byte(struct bcsp_struct *bcsp, unsigned char static void bcsp_complete_rx_pkt(struct hci_uart *hu) { struct bcsp_struct *bcsp = hu->priv; - int pass_up; + int pass_up = 0; if (bcsp->rx_skb->data[0] & 0x80) { /* reliable pkt */ BT_DBG("Received seqno %u from card", bcsp->rxseq_txack); - bcsp->rxseq_txack++; - bcsp->rxseq_txack %= 0x8; - bcsp->txack_req = 1; + + /* check the rx sequence number is as expected */ + if ((bcsp->rx_skb->data[0] & 0x07) == bcsp->rxseq_txack) { + bcsp->rxseq_txack++; + bcsp->rxseq_txack %= 0x8; + } else { + /* handle re-transmitted packet or + * when packet was missed + */ + BT_ERR("Out-of-order packet arrived, got %u expected %u", + bcsp->rx_skb->data[0] & 0x07, bcsp->rxseq_txack); + + /* do not process out-of-order packet payload */ + pass_up = 2; + } + + /* send current txack value to all received reliable packets */ + bcsp->txack_req = 1; /* If needed, transmit an ack pkt */ hci_uart_tx_wakeup(hu); @@ -503,26 +518,33 @@ static void bcsp_complete_rx_pkt(struct hci_uart *hu) bcsp->rxack = (bcsp->rx_skb->data[0] >> 3) & 0x07; BT_DBG("Request for pkt %u from card", bcsp->rxack); + /* handle received ACK indications, + * including those from out-of-order packets + */ bcsp_pkt_cull(bcsp); - if ((bcsp->rx_skb->data[1] & 0x0f) == 6 && - bcsp->rx_skb->data[0] & 0x80) { - hci_skb_pkt_type(bcsp->rx_skb) = HCI_ACLDATA_PKT; - pass_up = 1; - } else if ((bcsp->rx_skb->data[1] & 0x0f) == 5 && - bcsp->rx_skb->data[0] & 0x80) { - hci_skb_pkt_type(bcsp->rx_skb) = HCI_EVENT_PKT; - pass_up = 1; - } else if ((bcsp->rx_skb->data[1] & 0x0f) == 7) { - hci_skb_pkt_type(bcsp->rx_skb) = HCI_SCODATA_PKT; - pass_up = 1; - } else if ((bcsp->rx_skb->data[1] & 0x0f) == 1 && - !(bcsp->rx_skb->data[0] & 0x80)) { - bcsp_handle_le_pkt(hu); - pass_up = 0; - } else - pass_up = 0; - if (!pass_up) { + if (pass_up != 2) { + if ((bcsp->rx_skb->data[1] & 0x0f) == 6 && + (bcsp->rx_skb->data[0] & 0x80)) { + hci_skb_pkt_type(bcsp->rx_skb) = HCI_ACLDATA_PKT; + pass_up = 1; + } else if ((bcsp->rx_skb->data[1] & 0x0f) == 5 && + (bcsp->rx_skb->data[0] & 0x80)) { + hci_skb_pkt_type(bcsp->rx_skb) = HCI_EVENT_PKT; + pass_up = 1; + } else if ((bcsp->rx_skb->data[1] & 0x0f) == 7) { + hci_skb_pkt_type(bcsp->rx_skb) = HCI_SCODATA_PKT; + pass_up = 1; + } else if ((bcsp->rx_skb->data[1] & 0x0f) == 1 && + !(bcsp->rx_skb->data[0] & 0x80)) { + bcsp_handle_le_pkt(hu); + pass_up = 0; + } else { + pass_up = 0; + } + } + + if (pass_up == 0) { struct hci_event_hdr hdr; u8 desc = (bcsp->rx_skb->data[1] & 0x0f); @@ -547,11 +569,16 @@ static void bcsp_complete_rx_pkt(struct hci_uart *hu) } } else kfree_skb(bcsp->rx_skb); - } else { + } else if (pass_up == 1) { /* Pull out BCSP hdr */ skb_pull(bcsp->rx_skb, 4); hci_recv_frame(hu->hdev, bcsp->rx_skb); + } else { + /* ignore packet payload of already ACKed re-transmitted + * packets or when a packet was missed in the BCSP window + */ + kfree_skb(bcsp->rx_skb); } bcsp->rx_state = BCSP_W4_PKT_DELIMITER; @@ -597,16 +624,6 @@ static int bcsp_recv(struct hci_uart *hu, const void *data, int count) bcsp->rx_count = 0; continue; } - if (bcsp->rx_skb->data[0] & 0x80 && /* reliable pkt */ - (bcsp->rx_skb->data[0] & 0x07) != bcsp->rxseq_txack) { - BT_ERR("Out-of-order packet arrived, got %u expected %u", - bcsp->rx_skb->data[0] & 0x07, bcsp->rxseq_txack); - - kfree_skb(bcsp->rx_skb); - bcsp->rx_state = BCSP_W4_PKT_DELIMITER; - bcsp->rx_count = 0; - continue; - } bcsp->rx_state = BCSP_W4_DATA; bcsp->rx_count = (bcsp->rx_skb->data[1] >> 4) + (bcsp->rx_skb->data[2] << 4); /* May be 0 */ From 056506944a58814e5767d55ef6389aa2ab06908c Mon Sep 17 00:00:00 2001 From: Vignesh Raman Date: Fri, 23 Sep 2016 18:56:28 +0100 Subject: [PATCH 0762/1050] Bluetooth: Use single return in hci_uart_tty_ioctl() call Remove multiple return statements in hci_uart_tty_ioctl() call and added a single return statement. This code re-organisation allows subsequent locking to be easily added. Signed-off-by: Vignesh Raman Signed-off-by: Dean Jenkins Signed-off-by: Rajeev Kumar Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_ldisc.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index 9a3aab67b6bb..9497c469efd2 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -697,34 +697,36 @@ static int hci_uart_tty_ioctl(struct tty_struct *tty, struct file *file, case HCIUARTSETPROTO: if (!test_and_set_bit(HCI_UART_PROTO_SET, &hu->flags)) { err = hci_uart_set_proto(hu, arg); - if (err) { + if (err) clear_bit(HCI_UART_PROTO_SET, &hu->flags); - return err; - } } else - return -EBUSY; + err = -EBUSY; break; case HCIUARTGETPROTO: if (test_bit(HCI_UART_PROTO_SET, &hu->flags)) - return hu->proto->id; - return -EUNATCH; + err = hu->proto->id; + else + err = -EUNATCH; + break; case HCIUARTGETDEVICE: if (test_bit(HCI_UART_REGISTERED, &hu->flags)) - return hu->hdev->id; - return -EUNATCH; + err = hu->hdev->id; + else + err = -EUNATCH; + break; case HCIUARTSETFLAGS: if (test_bit(HCI_UART_PROTO_SET, &hu->flags)) - return -EBUSY; - err = hci_uart_set_flags(hu, arg); - if (err) - return err; + err = -EBUSY; + else + err = hci_uart_set_flags(hu, arg); break; case HCIUARTGETFLAGS: - return hu->hdev_flags; + err = hu->hdev_flags; + break; default: err = n_tty_ioctl_helper(tty, file, cmd, arg); From c9cc599a96a6822c52cd72ed31dd7f813d792b4f Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 22 Sep 2016 12:11:12 +0300 Subject: [PATCH 0763/1050] net/mlx4_core: Fix QUERY FUNC CAP flags Separate QUERY_FUNC_CAP flags0 from QUERY_FUNC_CAP flags, as 'flags' is already used for another set of flags in FUNC CAP, while phv bit should be part of a different set of flags. Remove QUERY_FUNC_CAP port_flags field, as it is not in use. Fixes: 77fc29c4bbbb ('net/mlx4_core: Preparations for 802.1ad VLAN support') Fixes: 5cc914f10851 ('mlx4_core: Added FW commands and their wrappers for supporting SRIOV') Signed-off-by: Moshe Shemesh Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/fw.c | 5 ++--- drivers/net/ethernet/mellanox/mlx4/fw.h | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index d728704d0c7b..c7523307b41a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -612,8 +612,7 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port, MLX4_GET(func_cap->phys_port_id, outbox, QUERY_FUNC_CAP_PHYS_PORT_ID); - MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS0_OFFSET); - func_cap->flags |= (field & QUERY_FUNC_CAP_PHV_BIT); + MLX4_GET(func_cap->flags0, outbox, QUERY_FUNC_CAP_FLAGS0_OFFSET); /* All other resources are allocated by the master, but we still report * 'num' and 'reserved' capabilities as follows: @@ -2914,7 +2913,7 @@ int get_phv_bit(struct mlx4_dev *dev, u8 port, int *phv) memset(&func_cap, 0, sizeof(func_cap)); err = mlx4_QUERY_FUNC_CAP(dev, port, &func_cap); if (!err) - *phv = func_cap.flags & QUERY_FUNC_CAP_PHV_BIT; + *phv = func_cap.flags0 & QUERY_FUNC_CAP_PHV_BIT; return err; } EXPORT_SYMBOL(get_phv_bit); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index cdbd76f10ced..f11614f12517 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -152,7 +152,7 @@ struct mlx4_func_cap { u32 qp1_proxy_qpn; u32 reserved_lkey; u8 physical_port; - u8 port_flags; + u8 flags0; u8 flags1; u64 phys_port_id; u32 extra_flags; From 7c3d21c8153c6bfb5690e35e086b0522c42442d9 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 22 Sep 2016 12:11:13 +0300 Subject: [PATCH 0764/1050] net/mlx4_core: Preparation for VF vlan protocol 802.1ad Check device capability to support VF vlan protocol 802.1ad mode. Add vport attribute vlan protocol. Init vport vlan protocol by default to 802.1Q. Add update QP support for VF vlan protocol 802.1ad. Add func capability vlan_offload_disable to disable all vlan HW acceleration on VF while the VF is set to VF vlan protocol 802.1ad mode. No change in VF vlan protocol 802.1Q (VST) mode. Signed-off-by: Moshe Shemesh Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 7 ++++ drivers/net/ethernet/mellanox/mlx4/fw.c | 37 ++++++++++++++--- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 2 + .../ethernet/mellanox/mlx4/resource_tracker.c | 40 +++++++++++++++---- include/linux/mlx4/device.h | 3 ++ include/linux/mlx4/qp.h | 2 + 6 files changed, 78 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index a58d96cf1ed1..09c969420eac 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1851,6 +1851,7 @@ static int mlx4_master_immediate_activate_vlan_qos(struct mlx4_priv *priv, if (vp_oper->state.default_vlan == vp_admin->default_vlan && vp_oper->state.default_qos == vp_admin->default_qos && + vp_oper->state.vlan_proto == vp_admin->vlan_proto && vp_oper->state.link_state == vp_admin->link_state && vp_oper->state.qos_vport == vp_admin->qos_vport) return 0; @@ -1909,6 +1910,7 @@ static int mlx4_master_immediate_activate_vlan_qos(struct mlx4_priv *priv, vp_oper->state.default_vlan = vp_admin->default_vlan; vp_oper->state.default_qos = vp_admin->default_qos; + vp_oper->state.vlan_proto = vp_admin->vlan_proto; vp_oper->state.link_state = vp_admin->link_state; vp_oper->state.qos_vport = vp_admin->qos_vport; @@ -1922,6 +1924,7 @@ static int mlx4_master_immediate_activate_vlan_qos(struct mlx4_priv *priv, work->qos_vport = vp_oper->state.qos_vport; work->vlan_id = vp_oper->state.default_vlan; work->vlan_ix = vp_oper->vlan_idx; + work->vlan_proto = vp_oper->state.vlan_proto; work->priv = priv; INIT_WORK(&work->work, mlx4_vf_immed_vlan_work_handler); queue_work(priv->mfunc.master.comm_wq, &work->work); @@ -2012,6 +2015,8 @@ static int mlx4_master_activate_admin_state(struct mlx4_priv *priv, int slave) vp_admin->default_vlan, &(vp_oper->vlan_idx)); if (err) { vp_oper->vlan_idx = NO_INDX; + vp_oper->state.default_vlan = MLX4_VGT; + vp_oper->state.vlan_proto = htons(ETH_P_8021Q); mlx4_warn(&priv->dev, "No vlan resources slave %d, port %d\n", slave, port); @@ -2388,6 +2393,8 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) admin_vport->qos_vport = MLX4_VPP_DEFAULT_VPORT; oper_vport->qos_vport = MLX4_VPP_DEFAULT_VPORT; + admin_vport->vlan_proto = htons(ETH_P_8021Q); + oper_vport->vlan_proto = htons(ETH_P_8021Q); vf_oper->vport[port].vlan_idx = NO_INDX; vf_oper->vport[port].mac_idx = NO_INDX; mlx4_set_random_admin_guid(dev, i, port); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index c7523307b41a..7dc9d38a51f8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -158,7 +158,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [31] = "Modifying loopback source checks using UPDATE_QP support", [32] = "Loopback source checks support", [33] = "RoCEv2 support", - [34] = "DMFS Sniffer support (UC & MC)" + [34] = "DMFS Sniffer support (UC & MC)", + [35] = "QinQ VST mode support", }; int i; @@ -313,12 +314,15 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, #define QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID 0x80 #define QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS (1 << 31) #define QUERY_FUNC_CAP_PHV_BIT 0x40 +#define QUERY_FUNC_CAP_VLAN_OFFLOAD_DISABLE 0x20 if (vhcr->op_modifier == 1) { struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave); int converted_port = mlx4_slave_convert_port( dev, slave, vhcr->in_modifier); + struct mlx4_vport_oper_state *vp_oper = + &priv->mfunc.master.vf_oper[slave].vport[vhcr->in_modifier]; if (converted_port < 0) return -EINVAL; @@ -357,11 +361,12 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, MLX4_PUT(outbox->buf, dev->caps.phys_port_id[vhcr->in_modifier], QUERY_FUNC_CAP_PHYS_PORT_ID); - if (dev->caps.phv_bit[port]) { - field = QUERY_FUNC_CAP_PHV_BIT; - MLX4_PUT(outbox->buf, field, - QUERY_FUNC_CAP_FLAGS0_OFFSET); - } + field = 0; + if (dev->caps.phv_bit[port]) + field |= QUERY_FUNC_CAP_PHV_BIT; + if (vp_oper->state.vlan_proto == htons(ETH_P_8021AD)) + field |= QUERY_FUNC_CAP_VLAN_OFFLOAD_DISABLE; + MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS0_OFFSET); } else if (vhcr->op_modifier == 0) { struct mlx4_active_ports actv_ports = @@ -689,6 +694,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET 0x52 #define QUERY_DEV_CAP_MAX_SG_RQ_OFFSET 0x55 #define QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET 0x56 +#define QUERY_DEV_CAP_SVLAN_BY_QP_OFFSET 0x5D #define QUERY_DEV_CAP_MAX_QP_MCG_OFFSET 0x61 #define QUERY_DEV_CAP_RSVD_MCG_OFFSET 0x62 #define QUERY_DEV_CAP_MAX_MCG_OFFSET 0x63 @@ -856,6 +862,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET); dev_cap->max_sq_desc_sz = size; + MLX4_GET(field, outbox, QUERY_DEV_CAP_SVLAN_BY_QP_OFFSET); + if (field & 0x1) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_MCG_OFFSET); dev_cap->max_qp_per_mcg = 1 << field; MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MCG_OFFSET); @@ -2937,6 +2946,22 @@ int set_phv_bit(struct mlx4_dev *dev, u8 port, int new_val) } EXPORT_SYMBOL(set_phv_bit); +int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port, + bool *vlan_offload_disabled) +{ + struct mlx4_func_cap func_cap; + int err; + + memset(&func_cap, 0, sizeof(func_cap)); + err = mlx4_QUERY_FUNC_CAP(dev, port, &func_cap); + if (!err) + *vlan_offload_disabled = + !!(func_cap.flags0 & + QUERY_FUNC_CAP_VLAN_OFFLOAD_DISABLE); + return err; +} +EXPORT_SYMBOL(mlx4_get_is_vlan_offload_disabled); + void mlx4_replace_zero_macs(struct mlx4_dev *dev) { int i; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index c128ba3ef014..fdfe1ace07d7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -508,6 +508,7 @@ struct mlx4_vport_state { u64 mac; u16 default_vlan; u8 default_qos; + __be16 vlan_proto; u32 tx_rate; bool spoofchk; u32 link_state; @@ -657,6 +658,7 @@ struct mlx4_vf_immed_vlan_work { u8 qos_vport; u16 vlan_id; u16 orig_vlan_id; + __be16 vlan_proto; }; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 8b81114bdc72..84d7857ccc27 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -790,10 +790,22 @@ static int update_vport_qp_param(struct mlx4_dev *dev, MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED | MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED; } else if (0 != vp_oper->state.default_vlan) { - qpc->pri_path.vlan_control |= - MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | - MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED | - MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED; + if (vp_oper->state.vlan_proto == htons(ETH_P_8021AD)) { + /* vst QinQ should block untagged on TX, + * but cvlan is in payload and phv is set so + * hw see it as untagged. Block tagged instead. + */ + qpc->pri_path.vlan_control |= + MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED | + MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED; + } else { /* vst 802.1Q */ + qpc->pri_path.vlan_control |= + MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED; + } } else { /* priority tagged */ qpc->pri_path.vlan_control |= MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | @@ -802,7 +814,11 @@ static int update_vport_qp_param(struct mlx4_dev *dev, qpc->pri_path.fvl_rx |= MLX4_FVL_RX_FORCE_ETH_VLAN; qpc->pri_path.vlan_index = vp_oper->vlan_idx; - qpc->pri_path.fl |= MLX4_FL_CV | MLX4_FL_ETH_HIDE_CQE_VLAN; + qpc->pri_path.fl |= MLX4_FL_ETH_HIDE_CQE_VLAN; + if (vp_oper->state.vlan_proto == htons(ETH_P_8021AD)) + qpc->pri_path.fl |= MLX4_FL_SV; + else + qpc->pri_path.fl |= MLX4_FL_CV; qpc->pri_path.feup |= MLX4_FEUP_FORCE_ETH_UP | MLX4_FVL_FORCE_ETH_VLAN; qpc->pri_path.sched_queue &= 0xC7; qpc->pri_path.sched_queue |= (vp_oper->state.default_qos) << 3; @@ -5238,6 +5254,7 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work) u64 qp_path_mask = ((1ULL << MLX4_UPD_QP_PATH_MASK_VLAN_INDEX) | (1ULL << MLX4_UPD_QP_PATH_MASK_FVL) | (1ULL << MLX4_UPD_QP_PATH_MASK_CV) | + (1ULL << MLX4_UPD_QP_PATH_MASK_SV) | (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_HIDE_CQE_VLAN) | (1ULL << MLX4_UPD_QP_PATH_MASK_FEUP) | (1ULL << MLX4_UPD_QP_PATH_MASK_FVL_RX) | @@ -5266,7 +5283,12 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work) else if (!work->vlan_id) vlan_control = MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED; - else + else if (work->vlan_proto == htons(ETH_P_8021AD)) + vlan_control = MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED | + MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED; + else /* vst 802.1Q */ vlan_control = MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED | MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED; @@ -5311,7 +5333,11 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work) upd_context->qp_context.pri_path.fvl_rx = qp->fvl_rx | MLX4_FVL_RX_FORCE_ETH_VLAN; upd_context->qp_context.pri_path.fl = - qp->pri_path_fl | MLX4_FL_CV | MLX4_FL_ETH_HIDE_CQE_VLAN; + qp->pri_path_fl | MLX4_FL_ETH_HIDE_CQE_VLAN; + if (work->vlan_proto == htons(ETH_P_8021AD)) + upd_context->qp_context.pri_path.fl |= MLX4_FL_SV; + else + upd_context->qp_context.pri_path.fl |= MLX4_FL_CV; upd_context->qp_context.pri_path.feup = qp->feup | MLX4_FEUP_FORCE_ETH_UP | MLX4_FVL_FORCE_ETH_VLAN; upd_context->qp_context.pri_path.sched_queue = diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 42da3552f7cb..59b50d3eedb4 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -221,6 +221,7 @@ enum { MLX4_DEV_CAP_FLAG2_ROCE_V1_V2 = 1ULL << 33, MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER = 1ULL << 34, MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT = 1ULL << 35, + MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP = 1ULL << 36, }; enum { @@ -1371,6 +1372,8 @@ int mlx4_SET_PORT_fcs_check(struct mlx4_dev *dev, u8 port, int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering, int enable); int set_phv_bit(struct mlx4_dev *dev, u8 port, int new_val); int get_phv_bit(struct mlx4_dev *dev, u8 port, int *phv); +int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port, + bool *vlan_offload_disabled); int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx); int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index deaa2217214d..b4ee8f62ce8d 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -160,6 +160,7 @@ struct mlx4_qp_path { enum { /* fl */ MLX4_FL_CV = 1 << 6, + MLX4_FL_SV = 1 << 5, MLX4_FL_ETH_HIDE_CQE_VLAN = 1 << 2, MLX4_FL_ETH_SRC_CHECK_MC_LB = 1 << 1, MLX4_FL_ETH_SRC_CHECK_UC_LB = 1 << 0, @@ -267,6 +268,7 @@ enum { MLX4_UPD_QP_PATH_MASK_FVL_RX = 16 + 32, MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_UC_LB = 18 + 32, MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB = 19 + 32, + MLX4_UPD_QP_PATH_MASK_SV = 22 + 32, }; enum { /* param3 */ From 0815fe3a86a01cdf81361459c465761be7138665 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 22 Sep 2016 12:11:14 +0300 Subject: [PATCH 0765/1050] net/mlx4_en: Disable vlan HW acceleration when in VF vlan protocol 802.1ad mode In Ethernet VF, disable vlan HW acceleration on VF while it is set to VF vlan protocol 802.1ad mode. Signed-off-by: Moshe Shemesh Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 62516f8369ba..a94f8a3f026c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -3224,6 +3224,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, } if (mlx4_is_slave(mdev->dev)) { + bool vlan_offload_disabled; int phv; err = get_phv_bit(mdev->dev, port, &phv); @@ -3231,6 +3232,18 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, dev->hw_features |= NETIF_F_HW_VLAN_STAG_TX; priv->pflags |= MLX4_EN_PRIV_FLAGS_PHV; } + err = mlx4_get_is_vlan_offload_disabled(mdev->dev, port, + &vlan_offload_disabled); + if (!err && vlan_offload_disabled) { + dev->hw_features &= ~(NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_STAG_TX | + NETIF_F_HW_VLAN_STAG_RX); + dev->features &= ~(NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_STAG_TX | + NETIF_F_HW_VLAN_STAG_RX); + } } else { if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PHV_EN && !(mdev->dev->caps.flags2 & From 79aab093a0b5370d7fc4e99df75996f4744dc03f Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 22 Sep 2016 12:11:15 +0300 Subject: [PATCH 0766/1050] net: Update API for VF vlan protocol 802.1ad support Introduce new rtnl UAPI that exposes a list of vlans per VF, giving the ability for user-space application to specify it for the VF, as an option to support 802.1ad. We adjusted IP Link tool to support this option. For future use cases, the new UAPI supports multiple vlans. For now we limit the list size to a single vlan in kernel. Add IFLA_VF_VLAN_LIST in addition to IFLA_VF_VLAN to keep backward compatibility with older versions of IP Link tool. Add a vlan protocol parameter to the ndo_set_vf_vlan callback. We kept 802.1Q as the drivers' default vlan protocol. Suitable ip link tool command examples: Set vf vlan protocol 802.1ad: ip link set eth0 vf 1 vlan 100 proto 802.1ad Set vf to VST (802.1Q) mode: ip link set eth0 vf 1 vlan 100 proto 802.1Q Or by omitting the new parameter ip link set eth0 vf 1 vlan 100 Signed-off-by: Moshe Shemesh Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/bnx2x/bnx2x_cmn.h | 3 +- .../net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 9 ++- .../net/ethernet/broadcom/bnxt/bnxt_sriov.c | 6 +- .../net/ethernet/broadcom/bnxt/bnxt_sriov.h | 2 +- drivers/net/ethernet/emulex/benet/be_main.c | 6 +- drivers/net/ethernet/intel/fm10k/fm10k.h | 2 +- drivers/net/ethernet/intel/fm10k/fm10k_iov.c | 6 +- .../ethernet/intel/i40e/i40e_virtchnl_pf.c | 11 ++- .../ethernet/intel/i40e/i40e_virtchnl_pf.h | 4 +- drivers/net/ethernet/intel/igb/igb_main.c | 9 ++- .../net/ethernet/intel/ixgbe/ixgbe_sriov.c | 5 +- .../net/ethernet/intel/ixgbe/ixgbe_sriov.h | 2 +- .../net/ethernet/mellanox/mlx4/en_netdev.c | 6 +- .../net/ethernet/mellanox/mlx5/core/en_main.c | 6 +- drivers/net/ethernet/qlogic/qede/qede_main.c | 6 +- .../net/ethernet/qlogic/qlcnic/qlcnic_sriov.h | 2 +- .../ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c | 5 +- drivers/net/ethernet/sfc/sriov.c | 5 +- drivers/net/ethernet/sfc/sriov.h | 2 +- include/linux/if_link.h | 1 + include/linux/netdevice.h | 6 +- include/uapi/linux/if_link.h | 19 ++++- net/core/rtnetlink.c | 80 +++++++++++++++---- 23 files changed, 161 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h index 0e68fadecfdb..243cb9748d35 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h @@ -492,7 +492,8 @@ int __bnx2x_setup_tc(struct net_device *dev, u32 handle, __be16 proto, int bnx2x_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivi); int bnx2x_set_vf_mac(struct net_device *dev, int queue, u8 *mac); -int bnx2x_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos); +int bnx2x_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos, + __be16 vlan_proto); /* select_queue callback */ u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb, diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index 6c586b045d1d..3f77d0863543 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -2521,7 +2521,8 @@ void bnx2x_pf_set_vfs_vlan(struct bnx2x *bp) for_each_vf(bp, vfidx) { bulletin = BP_VF_BULLETIN(bp, vfidx); if (bulletin->valid_bitmap & (1 << VLAN_VALID)) - bnx2x_set_vf_vlan(bp->dev, vfidx, bulletin->vlan, 0); + bnx2x_set_vf_vlan(bp->dev, vfidx, bulletin->vlan, 0, + htons(ETH_P_8021Q)); } } @@ -2781,7 +2782,8 @@ static int bnx2x_set_vf_vlan_filter(struct bnx2x *bp, struct bnx2x_virtf *vf, return 0; } -int bnx2x_set_vf_vlan(struct net_device *dev, int vfidx, u16 vlan, u8 qos) +int bnx2x_set_vf_vlan(struct net_device *dev, int vfidx, u16 vlan, u8 qos, + __be16 vlan_proto) { struct pf_vf_bulletin_content *bulletin = NULL; struct bnx2x *bp = netdev_priv(dev); @@ -2796,6 +2798,9 @@ int bnx2x_set_vf_vlan(struct net_device *dev, int vfidx, u16 vlan, u8 qos) return -EINVAL; } + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + DP(BNX2X_MSG_IOV, "configuring VF %d with VLAN %d qos %d\n", vfidx, vlan, 0); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index 8be718508600..ec6cd18842c3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -174,7 +174,8 @@ int bnxt_set_vf_mac(struct net_device *dev, int vf_id, u8 *mac) return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); } -int bnxt_set_vf_vlan(struct net_device *dev, int vf_id, u16 vlan_id, u8 qos) +int bnxt_set_vf_vlan(struct net_device *dev, int vf_id, u16 vlan_id, u8 qos, + __be16 vlan_proto) { struct hwrm_func_cfg_input req = {0}; struct bnxt *bp = netdev_priv(dev); @@ -185,6 +186,9 @@ int bnxt_set_vf_vlan(struct net_device *dev, int vf_id, u16 vlan_id, u8 qos) if (bp->hwrm_spec_code < 0x10201) return -ENOTSUPP; + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + rc = bnxt_vf_ndo_prep(bp, vf_id); if (rc) return rc; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h index 0392670ab49c..1ab72e4820af 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h @@ -12,7 +12,7 @@ int bnxt_get_vf_config(struct net_device *, int, struct ifla_vf_info *); int bnxt_set_vf_mac(struct net_device *, int, u8 *); -int bnxt_set_vf_vlan(struct net_device *, int, u16, u8); +int bnxt_set_vf_vlan(struct net_device *, int, u16, u8, __be16); int bnxt_set_vf_bw(struct net_device *, int, int, int); int bnxt_set_vf_link_state(struct net_device *, int, int); int bnxt_set_vf_spoofchk(struct net_device *, int, bool); diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 9a94840c5757..ac513e6627d1 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1895,7 +1895,8 @@ static int be_clear_vf_tvt(struct be_adapter *adapter, int vf) return 0; } -static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos) +static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos, + __be16 vlan_proto) { struct be_adapter *adapter = netdev_priv(netdev); struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf]; @@ -1907,6 +1908,9 @@ static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos) if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7) return -EINVAL; + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + if (vlan || qos) { vlan |= qos << VLAN_PRIO_SHIFT; status = be_set_vf_tvt(adapter, vf, vlan); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h index 67ff01aeb11a..4d19e46f7c55 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k.h @@ -507,7 +507,7 @@ int fm10k_iov_configure(struct pci_dev *pdev, int num_vfs); s32 fm10k_iov_update_pvid(struct fm10k_intfc *interface, u16 glort, u16 pvid); int fm10k_ndo_set_vf_mac(struct net_device *netdev, int vf_idx, u8 *mac); int fm10k_ndo_set_vf_vlan(struct net_device *netdev, - int vf_idx, u16 vid, u8 qos); + int vf_idx, u16 vid, u8 qos, __be16 vlan_proto); int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, int rate, int unused); int fm10k_ndo_get_vf_config(struct net_device *netdev, diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c index d9dec81f6b6d..5f4dac0d36ef 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c @@ -445,7 +445,7 @@ int fm10k_ndo_set_vf_mac(struct net_device *netdev, int vf_idx, u8 *mac) } int fm10k_ndo_set_vf_vlan(struct net_device *netdev, int vf_idx, u16 vid, - u8 qos) + u8 qos, __be16 vlan_proto) { struct fm10k_intfc *interface = netdev_priv(netdev); struct fm10k_iov_data *iov_data = interface->iov_data; @@ -460,6 +460,10 @@ int fm10k_ndo_set_vf_vlan(struct net_device *netdev, int vf_idx, u16 vid, if (qos || (vid > (VLAN_VID_MASK - 1))) return -EINVAL; + /* VF VLAN Protocol part to default is unsupported */ + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + vf_info = &iov_data->vf_info[vf_idx]; /* exit if there is nothing to do */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index da3423561b3a..724d8740d4cc 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -2747,11 +2747,12 @@ error_param: * @vf_id: VF identifier * @vlan_id: mac address * @qos: priority setting + * @vlan_proto: vlan protocol * * program VF vlan id and/or qos **/ -int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, - int vf_id, u16 vlan_id, u8 qos) +int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, + u16 vlan_id, u8 qos, __be16 vlan_proto) { u16 vlanprio = vlan_id | (qos << I40E_VLAN_PRIORITY_SHIFT); struct i40e_netdev_priv *np = netdev_priv(netdev); @@ -2774,6 +2775,12 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, goto error_pvid; } + if (vlan_proto != htons(ETH_P_8021Q)) { + dev_err(&pf->pdev->dev, "VF VLAN protocol is not supported\n"); + ret = -EPROTONOSUPPORT; + goto error_pvid; + } + vf = &(pf->vf[vf_id]); vsi = pf->vsi[vf->lan_vsi_idx]; if (!test_bit(I40E_VF_STAT_INIT, &vf->vf_states)) { diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index 875174141451..4012d069939a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -129,8 +129,8 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf); /* VF configuration related iplink handlers */ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac); -int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, - int vf_id, u16 vlan_id, u8 qos); +int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, + u16 vlan_id, u8 qos, __be16 vlan_proto); int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, int max_tx_rate); int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting); diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index af75eac5fa16..a83aa13a5bf4 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -169,7 +169,7 @@ static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *); static void igb_restore_vf_multicasts(struct igb_adapter *adapter); static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac); static int igb_ndo_set_vf_vlan(struct net_device *netdev, - int vf, u16 vlan, u8 qos); + int vf, u16 vlan, u8 qos, __be16 vlan_proto); static int igb_ndo_set_vf_bw(struct net_device *, int, int, int); static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting); @@ -6222,14 +6222,17 @@ static int igb_disable_port_vlan(struct igb_adapter *adapter, int vf) return 0; } -static int igb_ndo_set_vf_vlan(struct net_device *netdev, - int vf, u16 vlan, u8 qos) +static int igb_ndo_set_vf_vlan(struct net_device *netdev, int vf, + u16 vlan, u8 qos, __be16 vlan_proto) { struct igb_adapter *adapter = netdev_priv(netdev); if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7)) return -EINVAL; + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + return (vlan || qos) ? igb_enable_port_vlan(adapter, vf, vlan, qos) : igb_disable_port_vlan(adapter, vf); } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 8618599dfd6f..b18590a995db 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -1354,13 +1354,16 @@ static int ixgbe_disable_port_vlan(struct ixgbe_adapter *adapter, int vf) return err; } -int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos) +int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, + u8 qos, __be16 vlan_proto) { int err = 0; struct ixgbe_adapter *adapter = netdev_priv(netdev); if ((vf >= adapter->num_vfs) || (vlan > 4095) || (qos > 7)) return -EINVAL; + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; if (vlan || qos) { /* Check if there is already a port VLAN set, if so * we have to delete the old one first before we diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h index 47e65e2f886a..0c7977d27b71 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h @@ -43,7 +43,7 @@ void ixgbe_disable_tx_rx(struct ixgbe_adapter *adapter); void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter); int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int queue, u8 *mac); int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int queue, u16 vlan, - u8 qos); + u8 qos, __be16 vlan_proto); int ixgbe_link_mbps(struct ixgbe_adapter *adapter); int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int min_tx_rate, int max_tx_rate); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index a94f8a3f026c..132eeeafcdc4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2400,11 +2400,15 @@ static int mlx4_en_set_vf_mac(struct net_device *dev, int queue, u8 *mac) return mlx4_set_vf_mac(mdev->dev, en_priv->port, queue, mac_u64); } -static int mlx4_en_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos) +static int mlx4_en_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos, + __be16 vlan_proto) { struct mlx4_en_priv *en_priv = netdev_priv(dev); struct mlx4_en_dev *mdev = en_priv->mdev; + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + return mlx4_set_vf_vlan(mdev->dev, en_priv->port, vf, vlan, qos); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index c12792314be7..b58cfe37dead 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2917,11 +2917,15 @@ static int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac) return mlx5_eswitch_set_vport_mac(mdev->priv.eswitch, vf + 1, mac); } -static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos) +static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos, + __be16 vlan_proto) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + return mlx5_eswitch_set_vport_vlan(mdev->priv.eswitch, vf + 1, vlan, qos); } diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index cd23a2946db7..0e198fe89d1a 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -100,7 +100,8 @@ static int qede_alloc_rx_buffer(struct qede_dev *edev, static void qede_link_update(void *dev, struct qed_link_output *link); #ifdef CONFIG_QED_SRIOV -static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos) +static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos, + __be16 vlan_proto) { struct qede_dev *edev = netdev_priv(ndev); @@ -109,6 +110,9 @@ static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos) return -EINVAL; } + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + DP_VERBOSE(edev, QED_MSG_IOV, "Setting Vlan 0x%04x to VF [%d]\n", vlan, vf); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h index 24061b9b92e8..5f327659efa7 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h @@ -238,7 +238,7 @@ int qlcnic_sriov_set_vf_mac(struct net_device *, int, u8 *); int qlcnic_sriov_set_vf_tx_rate(struct net_device *, int, int, int); int qlcnic_sriov_get_vf_config(struct net_device *, int , struct ifla_vf_info *); -int qlcnic_sriov_set_vf_vlan(struct net_device *, int, u16, u8); +int qlcnic_sriov_set_vf_vlan(struct net_device *, int, u16, u8, __be16); int qlcnic_sriov_set_vf_spoofchk(struct net_device *, int, bool); #else static inline void qlcnic_sriov_pf_disable(struct qlcnic_adapter *adapter) {} diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c index afd687e5e779..50eaafa3eaba 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c @@ -1915,7 +1915,7 @@ int qlcnic_sriov_set_vf_tx_rate(struct net_device *netdev, int vf, } int qlcnic_sriov_set_vf_vlan(struct net_device *netdev, int vf, - u16 vlan, u8 qos) + u16 vlan, u8 qos, __be16 vlan_proto) { struct qlcnic_adapter *adapter = netdev_priv(netdev); struct qlcnic_sriov *sriov = adapter->ahw->sriov; @@ -1928,6 +1928,9 @@ int qlcnic_sriov_set_vf_vlan(struct net_device *netdev, int vf, if (vf >= sriov->num_vfs || qos > 7) return -EINVAL; + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + if (vlan > MAX_VLAN_ID) { netdev_err(netdev, "Invalid VLAN ID, allowed range is [0 - %d]\n", diff --git a/drivers/net/ethernet/sfc/sriov.c b/drivers/net/ethernet/sfc/sriov.c index 816c44689e67..9abcf4aded30 100644 --- a/drivers/net/ethernet/sfc/sriov.c +++ b/drivers/net/ethernet/sfc/sriov.c @@ -22,7 +22,7 @@ int efx_sriov_set_vf_mac(struct net_device *net_dev, int vf_i, u8 *mac) } int efx_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i, u16 vlan, - u8 qos) + u8 qos, __be16 vlan_proto) { struct efx_nic *efx = netdev_priv(net_dev); @@ -31,6 +31,9 @@ int efx_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i, u16 vlan, (qos & ~(VLAN_PRIO_MASK >> VLAN_PRIO_SHIFT))) return -EINVAL; + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + return efx->type->sriov_set_vf_vlan(efx, vf_i, vlan, qos); } else { return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/sfc/sriov.h b/drivers/net/ethernet/sfc/sriov.h index 400df526586d..ba1762e7f216 100644 --- a/drivers/net/ethernet/sfc/sriov.h +++ b/drivers/net/ethernet/sfc/sriov.h @@ -16,7 +16,7 @@ int efx_sriov_set_vf_mac(struct net_device *net_dev, int vf_i, u8 *mac); int efx_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i, u16 vlan, - u8 qos); + u8 qos, __be16 vlan_proto); int efx_sriov_set_vf_spoofchk(struct net_device *net_dev, int vf_i, bool spoofchk); int efx_sriov_get_vf_config(struct net_device *net_dev, int vf_i, diff --git a/include/linux/if_link.h b/include/linux/if_link.h index f923d15b432c..0b17c585b5cd 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -25,5 +25,6 @@ struct ifla_vf_info { __u32 max_tx_rate; __u32 rss_query_en; __u32 trusted; + __be16 vlan_proto; }; #endif /* _LINUX_IF_LINK_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 69f242c71865..1e8a5c734d72 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -946,7 +946,8 @@ struct netdev_xdp { * * SR-IOV management functions. * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac); - * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, u8 qos); + * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, + * u8 qos, __be16 proto); * int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate, * int max_tx_rate); * int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting); @@ -1187,7 +1188,8 @@ struct net_device_ops { int (*ndo_set_vf_mac)(struct net_device *dev, int queue, u8 *mac); int (*ndo_set_vf_vlan)(struct net_device *dev, - int queue, u16 vlan, u8 qos); + int queue, u16 vlan, + u8 qos, __be16 proto); int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate, int max_tx_rate); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 7ec9e99d5491..b4fba662cd32 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -619,7 +619,7 @@ enum { enum { IFLA_VF_UNSPEC, IFLA_VF_MAC, /* Hardware queue specific attributes */ - IFLA_VF_VLAN, + IFLA_VF_VLAN, /* VLAN ID and QoS */ IFLA_VF_TX_RATE, /* Max TX Bandwidth Allocation */ IFLA_VF_SPOOFCHK, /* Spoof Checking on/off switch */ IFLA_VF_LINK_STATE, /* link state enable/disable/auto switch */ @@ -631,6 +631,7 @@ enum { IFLA_VF_TRUST, /* Trust VF */ IFLA_VF_IB_NODE_GUID, /* VF Infiniband node GUID */ IFLA_VF_IB_PORT_GUID, /* VF Infiniband port GUID */ + IFLA_VF_VLAN_LIST, /* nested list of vlans, option for QinQ */ __IFLA_VF_MAX, }; @@ -647,6 +648,22 @@ struct ifla_vf_vlan { __u32 qos; }; +enum { + IFLA_VF_VLAN_INFO_UNSPEC, + IFLA_VF_VLAN_INFO, /* VLAN ID, QoS and VLAN protocol */ + __IFLA_VF_VLAN_INFO_MAX, +}; + +#define IFLA_VF_VLAN_INFO_MAX (__IFLA_VF_VLAN_INFO_MAX - 1) +#define MAX_VLAN_LIST_LEN 1 + +struct ifla_vf_vlan_info { + __u32 vf; + __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */ + __u32 qos; + __be16 vlan_proto; /* VLAN protocol either 802.1Q or 802.1ad */ +}; + struct ifla_vf_tx_rate { __u32 vf; __u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */ diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 0dbae4244a89..3ac8946bf244 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -843,7 +843,10 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev, size += nla_total_size(num_vfs * sizeof(struct nlattr)); size += num_vfs * (nla_total_size(sizeof(struct ifla_vf_mac)) + - nla_total_size(sizeof(struct ifla_vf_vlan)) + + nla_total_size(MAX_VLAN_LIST_LEN * + sizeof(struct nlattr)) + + nla_total_size(MAX_VLAN_LIST_LEN * + sizeof(struct ifla_vf_vlan_info)) + nla_total_size(sizeof(struct ifla_vf_spoofchk)) + nla_total_size(sizeof(struct ifla_vf_rate)) + nla_total_size(sizeof(struct ifla_vf_link_state)) + @@ -1111,14 +1114,15 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, struct nlattr *vfinfo) { struct ifla_vf_rss_query_en vf_rss_query_en; + struct nlattr *vf, *vfstats, *vfvlanlist; struct ifla_vf_link_state vf_linkstate; + struct ifla_vf_vlan_info vf_vlan_info; struct ifla_vf_spoofchk vf_spoofchk; struct ifla_vf_tx_rate vf_tx_rate; struct ifla_vf_stats vf_stats; struct ifla_vf_trust vf_trust; struct ifla_vf_vlan vf_vlan; struct ifla_vf_rate vf_rate; - struct nlattr *vf, *vfstats; struct ifla_vf_mac vf_mac; struct ifla_vf_info ivi; @@ -1135,11 +1139,14 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, * IFLA_VF_LINK_STATE_AUTO which equals zero */ ivi.linkstate = 0; + /* VLAN Protocol by default is 802.1Q */ + ivi.vlan_proto = htons(ETH_P_8021Q); if (dev->netdev_ops->ndo_get_vf_config(dev, vfs_num, &ivi)) return 0; vf_mac.vf = vf_vlan.vf = + vf_vlan_info.vf = vf_rate.vf = vf_tx_rate.vf = vf_spoofchk.vf = @@ -1150,6 +1157,9 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); vf_vlan.vlan = ivi.vlan; vf_vlan.qos = ivi.qos; + vf_vlan_info.vlan = ivi.vlan; + vf_vlan_info.qos = ivi.qos; + vf_vlan_info.vlan_proto = ivi.vlan_proto; vf_tx_rate.rate = ivi.max_tx_rate; vf_rate.min_tx_rate = ivi.min_tx_rate; vf_rate.max_tx_rate = ivi.max_tx_rate; @@ -1158,10 +1168,8 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, vf_rss_query_en.setting = ivi.rss_query_en; vf_trust.setting = ivi.trusted; vf = nla_nest_start(skb, IFLA_VF_INFO); - if (!vf) { - nla_nest_cancel(skb, vfinfo); - return -EMSGSIZE; - } + if (!vf) + goto nla_put_vfinfo_failure; if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) || nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) || nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate), @@ -1177,17 +1185,23 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, &vf_rss_query_en) || nla_put(skb, IFLA_VF_TRUST, sizeof(vf_trust), &vf_trust)) - return -EMSGSIZE; + goto nla_put_vf_failure; + vfvlanlist = nla_nest_start(skb, IFLA_VF_VLAN_LIST); + if (!vfvlanlist) + goto nla_put_vf_failure; + if (nla_put(skb, IFLA_VF_VLAN_INFO, sizeof(vf_vlan_info), + &vf_vlan_info)) { + nla_nest_cancel(skb, vfvlanlist); + goto nla_put_vf_failure; + } + nla_nest_end(skb, vfvlanlist); memset(&vf_stats, 0, sizeof(vf_stats)); if (dev->netdev_ops->ndo_get_vf_stats) dev->netdev_ops->ndo_get_vf_stats(dev, vfs_num, &vf_stats); vfstats = nla_nest_start(skb, IFLA_VF_STATS); - if (!vfstats) { - nla_nest_cancel(skb, vf); - nla_nest_cancel(skb, vfinfo); - return -EMSGSIZE; - } + if (!vfstats) + goto nla_put_vf_failure; if (nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_PACKETS, vf_stats.rx_packets, IFLA_VF_STATS_PAD) || nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_PACKETS, @@ -1199,11 +1213,19 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, nla_put_u64_64bit(skb, IFLA_VF_STATS_BROADCAST, vf_stats.broadcast, IFLA_VF_STATS_PAD) || nla_put_u64_64bit(skb, IFLA_VF_STATS_MULTICAST, - vf_stats.multicast, IFLA_VF_STATS_PAD)) - return -EMSGSIZE; + vf_stats.multicast, IFLA_VF_STATS_PAD)) { + nla_nest_cancel(skb, vfstats); + goto nla_put_vf_failure; + } nla_nest_end(skb, vfstats); nla_nest_end(skb, vf); return 0; + +nla_put_vf_failure: + nla_nest_cancel(skb, vf); +nla_put_vfinfo_failure: + nla_nest_cancel(skb, vfinfo); + return -EMSGSIZE; } static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) @@ -1448,6 +1470,7 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { [IFLA_VF_MAC] = { .len = sizeof(struct ifla_vf_mac) }, [IFLA_VF_VLAN] = { .len = sizeof(struct ifla_vf_vlan) }, + [IFLA_VF_VLAN_LIST] = { .type = NLA_NESTED }, [IFLA_VF_TX_RATE] = { .len = sizeof(struct ifla_vf_tx_rate) }, [IFLA_VF_SPOOFCHK] = { .len = sizeof(struct ifla_vf_spoofchk) }, [IFLA_VF_RATE] = { .len = sizeof(struct ifla_vf_rate) }, @@ -1704,7 +1727,34 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) err = -EOPNOTSUPP; if (ops->ndo_set_vf_vlan) err = ops->ndo_set_vf_vlan(dev, ivv->vf, ivv->vlan, - ivv->qos); + ivv->qos, + htons(ETH_P_8021Q)); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_VLAN_LIST]) { + struct ifla_vf_vlan_info *ivvl[MAX_VLAN_LIST_LEN]; + struct nlattr *attr; + int rem, len = 0; + + err = -EOPNOTSUPP; + if (!ops->ndo_set_vf_vlan) + return err; + + nla_for_each_nested(attr, tb[IFLA_VF_VLAN_LIST], rem) { + if (nla_type(attr) != IFLA_VF_VLAN_INFO || + nla_len(attr) < NLA_HDRLEN) { + return -EINVAL; + } + if (len >= MAX_VLAN_LIST_LEN) + return -EOPNOTSUPP; + ivvl[len] = nla_data(attr); + + len++; + } + err = ops->ndo_set_vf_vlan(dev, ivvl[0]->vf, ivvl[0]->vlan, + ivvl[0]->qos, ivvl[0]->vlan_proto); if (err < 0) return err; } From b42959dc35a533a531dd698b581193a65a5da831 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 22 Sep 2016 12:11:16 +0300 Subject: [PATCH 0767/1050] net/mlx4: Add VF vlan protocol 802.1ad support Move the vf to VST 802.1ad mode (mlx4 VST QinQ mode) by setting vf vlan protocol to 802.1ad. VST 802.1ad mode in mlx4, is used for STAG strip/insertion by PF, while the CTAG is set by the VF. Read current vlan protocol as part of the vf configuration state. Upon setting vf vlan protocol to 802.1ad, we use a mechanism of handshake to verify that both the vf and the pf driver version support it. The handshake uses the command QUERY_FUNC_CAP: - The vf sets a pre-defined support bit in input modifier. - A pf that supports the feature sends the request to the vf through a pre-defined field in the output mailbox. - In case vf does not support the feature, the pf will fail the control command (in this case, IP link tool command to set the vf vlan protocol to 802.1ad). No change in VST 802.1Q mode. Signed-off-by: Moshe Shemesh Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 51 ++++++++++- .../net/ethernet/mellanox/mlx4/en_netdev.c | 6 +- drivers/net/ethernet/mellanox/mlx4/fw.c | 89 ++++++++++++++++++- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 1 + include/linux/mlx4/cmd.h | 3 +- 5 files changed, 138 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 09c969420eac..b1cef7a0f7ca 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1995,6 +1995,8 @@ static int mlx4_master_activate_admin_state(struct mlx4_priv *priv, int slave) int port, err; struct mlx4_vport_state *vp_admin; struct mlx4_vport_oper_state *vp_oper; + struct mlx4_slave_state *slave_state = + &priv->mfunc.master.slave_state[slave]; struct mlx4_active_ports actv_ports = mlx4_get_active_ports( &priv->dev, slave); int min_port = find_first_bit(actv_ports.ports, @@ -2009,7 +2011,19 @@ static int mlx4_master_activate_admin_state(struct mlx4_priv *priv, int slave) priv->mfunc.master.vf_admin[slave].enable_smi[port]; vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; - vp_oper->state = *vp_admin; + if (vp_admin->vlan_proto != htons(ETH_P_8021AD) || + slave_state->vst_qinq_supported) { + vp_oper->state.vlan_proto = vp_admin->vlan_proto; + vp_oper->state.default_vlan = vp_admin->default_vlan; + vp_oper->state.default_qos = vp_admin->default_qos; + } + vp_oper->state.link_state = vp_admin->link_state; + vp_oper->state.mac = vp_admin->mac; + vp_oper->state.spoofchk = vp_admin->spoofchk; + vp_oper->state.tx_rate = vp_admin->tx_rate; + vp_oper->state.qos_vport = vp_admin->qos_vport; + vp_oper->state.guid = vp_admin->guid; + if (MLX4_VGT != vp_admin->default_vlan) { err = __mlx4_register_vlan(&priv->dev, port, vp_admin->default_vlan, &(vp_oper->vlan_idx)); @@ -2097,6 +2111,7 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, mlx4_warn(dev, "Received reset from slave:%d\n", slave); slave_state[slave].active = false; slave_state[slave].old_vlan_api = false; + slave_state[slave].vst_qinq_supported = false; mlx4_master_deactivate_admin_state(priv, slave); for (i = 0; i < MLX4_EVENT_TYPES_NUM; ++i) { slave_state[slave].event_eq[i].eqn = -1; @@ -2364,6 +2379,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) vf_oper = &priv->mfunc.master.vf_oper[i]; s_state = &priv->mfunc.master.slave_state[i]; s_state->last_cmd = MLX4_COMM_CMD_RESET; + s_state->vst_qinq_supported = false; mutex_init(&priv->mfunc.master.gen_eqe_mutex[i]); for (j = 0; j < MLX4_EVENT_TYPES_NUM; ++j) s_state->event_eq[j].eqn = -1; @@ -2955,10 +2971,13 @@ int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac) EXPORT_SYMBOL_GPL(mlx4_set_vf_mac); -int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos) +int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos, + __be16 proto) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_vport_state *vf_admin; + struct mlx4_slave_state *slave_state; + struct mlx4_vport_oper_state *vf_oper; int slave; if ((!mlx4_is_master(dev)) || @@ -2968,12 +2987,31 @@ int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos) if ((vlan > 4095) || (qos > 7)) return -EINVAL; + if (proto == htons(ETH_P_8021AD) && + !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP)) + return -EPROTONOSUPPORT; + + if (proto != htons(ETH_P_8021Q) && + proto != htons(ETH_P_8021AD)) + return -EINVAL; + + if ((proto == htons(ETH_P_8021AD)) && + ((vlan == 0) || (vlan == MLX4_VGT))) + return -EINVAL; + slave = mlx4_get_slave_indx(dev, vf); if (slave < 0) return -EINVAL; + slave_state = &priv->mfunc.master.slave_state[slave]; + if ((proto == htons(ETH_P_8021AD)) && (slave_state->active) && + (!slave_state->vst_qinq_supported)) { + mlx4_err(dev, "vf %d does not support VST QinQ mode\n", vf); + return -EPROTONOSUPPORT; + } port = mlx4_slaves_closest_port(dev, slave, port); vf_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; + vf_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; if (!mlx4_valid_vf_state_change(dev, port, vf_admin, vlan, qos)) return -EPERM; @@ -2983,6 +3021,7 @@ int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos) else vf_admin->default_vlan = vlan; vf_admin->default_qos = qos; + vf_admin->vlan_proto = proto; /* If rate was configured prior to VST, we saved the configured rate * in vf_admin->rate and now, if priority supported we enforce the QoS @@ -2991,7 +3030,12 @@ int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos) vf_admin->tx_rate) vf_admin->qos_vport = slave; - if (mlx4_master_immediate_activate_vlan_qos(priv, slave, port)) + /* Try to activate new vf state without restart, + * this option is not supported while moving to VST QinQ mode. + */ + if ((proto == htons(ETH_P_8021AD) && + vf_oper->state.vlan_proto != proto) || + mlx4_master_immediate_activate_vlan_qos(priv, slave, port)) mlx4_info(dev, "updating vf %d port %d config will take effect on next VF restart\n", vf, port); @@ -3135,6 +3179,7 @@ int mlx4_get_vf_config(struct mlx4_dev *dev, int port, int vf, struct ifla_vf_in ivf->vlan = s_info->default_vlan; ivf->qos = s_info->default_qos; + ivf->vlan_proto = s_info->vlan_proto; if (mlx4_is_vf_vst_and_prio_qos(dev, port, s_info)) ivf->max_tx_rate = s_info->tx_rate; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 132eeeafcdc4..7e703bed7b82 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2406,10 +2406,8 @@ static int mlx4_en_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos, struct mlx4_en_priv *en_priv = netdev_priv(dev); struct mlx4_en_dev *mdev = en_priv->mdev; - if (vlan_proto != htons(ETH_P_8021Q)) - return -EPROTONOSUPPORT; - - return mlx4_set_vf_vlan(mdev->dev, en_priv->port, vf, vlan, qos); + return mlx4_set_vf_vlan(mdev->dev, en_priv->port, vf, vlan, qos, + vlan_proto); } static int mlx4_en_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 7dc9d38a51f8..090bf81076e8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -249,6 +249,72 @@ out: return err; } +static int mlx4_activate_vst_qinq(struct mlx4_priv *priv, int slave, int port) +{ + struct mlx4_vport_oper_state *vp_oper; + struct mlx4_vport_state *vp_admin; + int err; + + vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; + + if (vp_admin->default_vlan != vp_oper->state.default_vlan) { + err = __mlx4_register_vlan(&priv->dev, port, + vp_admin->default_vlan, + &vp_oper->vlan_idx); + if (err) { + vp_oper->vlan_idx = NO_INDX; + mlx4_warn(&priv->dev, + "No vlan resources slave %d, port %d\n", + slave, port); + return err; + } + mlx4_dbg(&priv->dev, "alloc vlan %d idx %d slave %d port %d\n", + (int)(vp_oper->state.default_vlan), + vp_oper->vlan_idx, slave, port); + } + vp_oper->state.vlan_proto = vp_admin->vlan_proto; + vp_oper->state.default_vlan = vp_admin->default_vlan; + vp_oper->state.default_qos = vp_admin->default_qos; + + return 0; +} + +static int mlx4_handle_vst_qinq(struct mlx4_priv *priv, int slave, int port) +{ + struct mlx4_vport_oper_state *vp_oper; + struct mlx4_slave_state *slave_state; + struct mlx4_vport_state *vp_admin; + int err; + + vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; + slave_state = &priv->mfunc.master.slave_state[slave]; + + if ((vp_admin->vlan_proto != htons(ETH_P_8021AD)) || + (!slave_state->active)) + return 0; + + if (vp_oper->state.vlan_proto == vp_admin->vlan_proto && + vp_oper->state.default_vlan == vp_admin->default_vlan && + vp_oper->state.default_qos == vp_admin->default_qos) + return 0; + + if (!slave_state->vst_qinq_supported) { + /* Warn and revert the request to set vst QinQ mode */ + vp_admin->vlan_proto = vp_oper->state.vlan_proto; + vp_admin->default_vlan = vp_oper->state.default_vlan; + vp_admin->default_qos = vp_oper->state.default_qos; + + mlx4_warn(&priv->dev, + "Slave %d does not support VST QinQ mode\n", slave); + return 0; + } + + err = mlx4_activate_vst_qinq(priv, slave, port); + return err; +} + int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -312,17 +378,18 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, #define QUERY_FUNC_CAP_VF_ENABLE_QP0 0x08 #define QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID 0x80 -#define QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS (1 << 31) #define QUERY_FUNC_CAP_PHV_BIT 0x40 #define QUERY_FUNC_CAP_VLAN_OFFLOAD_DISABLE 0x20 +#define QUERY_FUNC_CAP_SUPPORTS_VST_QINQ BIT(30) +#define QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS BIT(31) + if (vhcr->op_modifier == 1) { struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave); int converted_port = mlx4_slave_convert_port( dev, slave, vhcr->in_modifier); - struct mlx4_vport_oper_state *vp_oper = - &priv->mfunc.master.vf_oper[slave].vport[vhcr->in_modifier]; + struct mlx4_vport_oper_state *vp_oper; if (converted_port < 0) return -EINVAL; @@ -361,6 +428,11 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, MLX4_PUT(outbox->buf, dev->caps.phys_port_id[vhcr->in_modifier], QUERY_FUNC_CAP_PHYS_PORT_ID); + vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + err = mlx4_handle_vst_qinq(priv, slave, port); + if (err) + return err; + field = 0; if (dev->caps.phv_bit[port]) field |= QUERY_FUNC_CAP_PHV_BIT; @@ -371,6 +443,9 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, } else if (vhcr->op_modifier == 0) { struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave); + struct mlx4_slave_state *slave_state = + &priv->mfunc.master.slave_state[slave]; + /* enable rdma and ethernet interfaces, new quota locations, * and reserved lkey */ @@ -444,6 +519,10 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, size = dev->caps.reserved_lkey + ((slave << 8) & 0xFF00); MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP_RESD_LKEY_OFFSET); + + if (vhcr->in_modifier & QUERY_FUNC_CAP_SUPPORTS_VST_QINQ) + slave_state->vst_qinq_supported = true; + } else err = -EINVAL; @@ -459,10 +538,12 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port, u32 size, qkey; int err = 0, quotas = 0; u32 in_modifier; + u32 slave_caps; op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */ - in_modifier = op_modifier ? gen_or_port : + slave_caps = QUERY_FUNC_CAP_SUPPORTS_VST_QINQ | QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS; + in_modifier = op_modifier ? gen_or_port : slave_caps; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index fdfe1ace07d7..e4878f31e45d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -483,6 +483,7 @@ struct mlx4_slave_state { u8 init_port_mask; bool active; bool old_vlan_api; + bool vst_qinq_supported; u8 function; dma_addr_t vhcr_dma; u16 mtu[MLX4_MAX_PORTS + 1]; diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 116b284bc4ce..1f3568694a57 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -309,7 +309,8 @@ int mlx4_get_vf_stats(struct mlx4_dev *dev, int port, int vf_idx, struct ifla_vf_stats *vf_stats); u32 mlx4_comm_get_version(void); int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac); -int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos); +int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, + u8 qos, __be16 proto); int mlx4_set_vf_rate(struct mlx4_dev *dev, int port, int vf, int min_tx_rate, int max_tx_rate); int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting); From 4ce4862a815e3cee8040c9d91e2148aecbbf056e Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Fri, 23 Sep 2016 14:04:09 +0800 Subject: [PATCH 0768/1050] Documentation: devicetree: revise ethernet device-tree binding about TRGMII add phy-mode "trgmii" to Documentation/devicetree/bindings/net/ethernet.txt Cc: devicetree@vger.kernel.org Reported-by: Sergei Shtylyov Signed-off-by: Sean Wang Acked-by: Rob Herring Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/ethernet.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/ethernet.txt b/Documentation/devicetree/bindings/net/ethernet.txt index 5d88f37480b6..e1d76812419c 100644 --- a/Documentation/devicetree/bindings/net/ethernet.txt +++ b/Documentation/devicetree/bindings/net/ethernet.txt @@ -11,8 +11,8 @@ The following properties are common to the Ethernet controllers: the maximum frame size (there's contradiction in ePAPR). - phy-mode: string, operation mode of the PHY interface; supported values are "mii", "gmii", "sgmii", "qsgmii", "tbi", "rev-mii", "rmii", "rgmii", "rgmii-id", - "rgmii-rxid", "rgmii-txid", "rtbi", "smii", "xgmii"; this is now a de-facto - standard property; + "rgmii-rxid", "rgmii-txid", "rtbi", "smii", "xgmii", "trgmii"; this is now a + de-facto standard property; - phy-connection-type: the same as "phy-mode" property but described in ePAPR; - phy-handle: phandle, specifies a reference to a node representing a PHY device; this property is described in ePAPR and so preferred; From 7f8c2865a94a73308386627cd7556c17f03efb63 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Fri, 23 Sep 2016 14:09:32 +0800 Subject: [PATCH 0769/1050] Documentation: devicetree: fix typo in MediaTek ethernet device-tree binding fix typo in Documentation/devicetree/bindings/net/mediatek-net.txt Cc: devicetree@vger.kernel.org Reported-by: Sergei Shtylyov Signed-off-by: Sean Wang Acked-by: Rob Herring Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/mediatek-net.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/mediatek-net.txt b/Documentation/devicetree/bindings/net/mediatek-net.txt index 7111278adc7e..f09525772369 100644 --- a/Documentation/devicetree/bindings/net/mediatek-net.txt +++ b/Documentation/devicetree/bindings/net/mediatek-net.txt @@ -34,7 +34,7 @@ Required properties: - phy-handle: see ethernet.txt file in the same directory and the phy-mode "trgmii" required being provided when reg is equal to 0 and the MAC uses fixed-link to connect - with inernal switch such as MT7530. + with internal switch such as MT7530. Example: From faac0ff0a544eed6b8c9375c1104d692e4979540 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 23 Sep 2016 12:02:45 +0100 Subject: [PATCH 0770/1050] mlxsw: spectrum: remove redundant check if err is zero There is an earlier check and return if err is non-zero, so the check to see if it is zero is redundant in every iteration of the loop and hence the check can be removed. Signed-off-by: Colin Ian King Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 80f27b504444..fd74d1064ff3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1664,7 +1664,7 @@ static void __mlxsw_sp_port_get_stats(struct net_device *dev, return; mlxsw_sp_port_get_stats_raw(dev, grp, prio, ppcnt_pl); for (i = 0; i < len; i++) - data[data_index + i] = !err ? hw_stats[i].getter(ppcnt_pl) : 0; + data[data_index + i] = hw_stats[i].getter(ppcnt_pl); } static void mlxsw_sp_port_get_stats(struct net_device *dev, From b4e28c1fc9c7f3b7508b9a27d7c59a0da7b1f824 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Fri, 23 Sep 2016 14:42:27 +0530 Subject: [PATCH 0771/1050] net: thunderx: Fix issue with IRQ namimg This patch fixes a regression caused by previous commit when irq name exceeds 20 byte array if interface's name size is large. Fixes: e412621394fa ("net: thunderx: Use netdev's name for naming VF's interrupts") Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h index 18d12d35039a..30426109711c 100644 --- a/drivers/net/ethernet/cavium/thunder/nic.h +++ b/drivers/net/ethernet/cavium/thunder/nic.h @@ -305,7 +305,7 @@ struct nicvf { bool msix_enabled; u8 num_vec; struct msix_entry msix_entries[NIC_VF_MSIX_VECTORS]; - char irq_name[NIC_VF_MSIX_VECTORS][20]; + char irq_name[NIC_VF_MSIX_VECTORS][IFNAMSIZ + 15]; bool irq_allocated[NIC_VF_MSIX_VECTORS]; cpumask_var_t affinity_mask[NIC_VF_MSIX_VECTORS]; From 2c204c2b9fca36aa24f7abe2e8bfd83fe3a8db8d Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Fri, 23 Sep 2016 14:42:28 +0530 Subject: [PATCH 0772/1050] net: thunderx: Support for byte queue limits This patch adds support for byte queue limits Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- .../net/ethernet/cavium/thunder/nicvf_main.c | 14 +++++++-- .../ethernet/cavium/thunder/nicvf_queues.c | 30 ++++++++++++------- 2 files changed, 32 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 7d00162a2f89..45a13f718863 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -516,7 +516,8 @@ static int nicvf_init_resources(struct nicvf *nic) static void nicvf_snd_pkt_handler(struct net_device *netdev, struct cmp_queue *cq, struct cqe_send_t *cqe_tx, - int cqe_type, int budget) + int cqe_type, int budget, + unsigned int *tx_pkts, unsigned int *tx_bytes) { struct sk_buff *skb = NULL; struct nicvf *nic = netdev_priv(netdev); @@ -547,6 +548,8 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev, } nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1); prefetch(skb); + (*tx_pkts)++; + *tx_bytes += skb->len; napi_consume_skb(skb, budget); sq->skbuff[cqe_tx->sqe_ptr] = (u64)NULL; } else { @@ -662,6 +665,7 @@ static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx, struct cmp_queue *cq = &qs->cq[cq_idx]; struct cqe_rx_t *cq_desc; struct netdev_queue *txq; + unsigned int tx_pkts = 0, tx_bytes = 0; spin_lock_bh(&cq->lock); loop: @@ -701,7 +705,7 @@ loop: case CQE_TYPE_SEND: nicvf_snd_pkt_handler(netdev, cq, (void *)cq_desc, CQE_TYPE_SEND, - budget); + budget, &tx_pkts, &tx_bytes); tx_done++; break; case CQE_TYPE_INVALID: @@ -730,6 +734,9 @@ done: netdev = nic->pnicvf->netdev; txq = netdev_get_tx_queue(netdev, nicvf_netdev_qidx(nic, cq_idx)); + if (tx_pkts) + netdev_tx_completed_queue(txq, tx_pkts, tx_bytes); + nic = nic->pnicvf; if (netif_tx_queue_stopped(txq) && netif_carrier_ok(netdev)) { netif_tx_start_queue(txq); @@ -1160,6 +1167,9 @@ int nicvf_stop(struct net_device *netdev) netif_tx_disable(netdev); + for (qidx = 0; qidx < netdev->num_tx_queues; qidx++) + netdev_tx_reset_queue(netdev_get_tx_queue(netdev, qidx)); + /* Free resources */ nicvf_config_data_transfer(nic, false); diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index 178c5c7b0994..a4fc50155881 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -1082,6 +1082,24 @@ static inline void nicvf_sq_add_cqe_subdesc(struct snd_queue *sq, int qentry, imm->len = 1; } +static inline void nicvf_sq_doorbell(struct nicvf *nic, struct sk_buff *skb, + int sq_num, int desc_cnt) +{ + struct netdev_queue *txq; + + txq = netdev_get_tx_queue(nic->pnicvf->netdev, + skb_get_queue_mapping(skb)); + + netdev_tx_sent_queue(txq, skb->len); + + /* make sure all memory stores are done before ringing doorbell */ + smp_wmb(); + + /* Inform HW to xmit all TSO segments */ + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, + sq_num, desc_cnt); +} + /* Segment a TSO packet into 'gso_size' segments and append * them to SQ for transfer */ @@ -1141,12 +1159,8 @@ static int nicvf_sq_append_tso(struct nicvf *nic, struct snd_queue *sq, /* Save SKB in the last segment for freeing */ sq->skbuff[hdr_qentry] = (u64)skb; - /* make sure all memory stores are done before ringing doorbell */ - smp_wmb(); + nicvf_sq_doorbell(nic, skb, sq_num, desc_cnt); - /* Inform HW to xmit all TSO segments */ - nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, - sq_num, desc_cnt); nic->drv_stats.tx_tso++; return 1; } @@ -1219,12 +1233,8 @@ doorbell: nicvf_sq_add_cqe_subdesc(sq, qentry, tso_sqe, skb); } - /* make sure all memory stores are done before ringing doorbell */ - smp_wmb(); + nicvf_sq_doorbell(nic, skb, sq_num, subdesc_cnt); - /* Inform HW to xmit new packet */ - nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, - sq_num, subdesc_cnt); return 1; append_fail: From c6a77ff82fb849534748719f37f3f9086d78ed39 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 23 Sep 2016 17:08:17 -0700 Subject: [PATCH 0773/1050] hv_netvsc: fix comments Typo's and spelling errors. Also remove old comment from staging era. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 7130bf910f52..f4fbcb5aa24a 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -433,7 +433,7 @@ struct nvsp_1_message_revoke_send_buffer { */ struct nvsp_1_message_send_rndis_packet { /* - * This field is specified by RNIDS. They assume there's two different + * This field is specified by RNDIS. They assume there's two different * channels of communication. However, the Network VSP only has one. * Therefore, the channel travels with the RNDIS packet. */ @@ -578,7 +578,7 @@ struct nvsp_5_send_indirect_table { /* The number of entries in the send indirection table */ u32 count; - /* The offset of the send indireciton table from top of this struct. + /* The offset of the send indirection table from top of this struct. * The send indirection table tells which channel to put the send * traffic on. Each entry is a channel number. */ @@ -734,7 +734,6 @@ struct netvsc_device { struct nvsp_message channel_init_pkt; struct nvsp_message revoke_packet; - /* unsigned char HwMacAddr[HW_MACADDR_LEN]; */ struct vmbus_channel *chn_table[VRSS_CHANNEL_MAX]; u32 send_table[VRSS_SEND_TAB_SIZE]; @@ -1239,7 +1238,7 @@ struct rndis_message { u32 ndis_msg_type; /* Total length of this message, from the beginning */ - /* of the sruct rndis_message, in bytes. */ + /* of the struct rndis_message, in bytes. */ u32 msg_len; /* Actual message */ From db32e4e49ce2b0e5fcc17803d011a401c0a637f6 Mon Sep 17 00:00:00 2001 From: Lance Richardson Date: Fri, 23 Sep 2016 15:50:29 -0400 Subject: [PATCH 0774/1050] ip6_gre: fix flowi6_proto value in ip6gre_xmit_other() Similar to commit 3be07244b733 ("ip6_gre: fix flowi6_proto value in xmit path"), set flowi6_proto to IPPROTO_GRE for output route lookup. Up until now, ip6gre_xmit_other() has set flowi6_proto to a bogus value. This affected output route lookup for packets sent on an ip6gretap device in cases where routing was dependent on the value of flowi6_proto. Since the correct proto is already set in the tunnel flowi6 template via commit 252f3f5a1189 ("ip6_gre: Set flowi6_proto as IPPROTO_GRE in xmit path."), simply delete the line setting the incorrect flowi6_proto value. Suggested-by: Jiri Benc Fixes: c12b395a4664 ("gre: Support GRE over IPv6") Reviewed-by: Shmulik Ladkani Signed-off-by: Lance Richardson Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 704274cbd495..edc3daab354e 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -648,7 +648,6 @@ static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev) encap_limit = t->parms.encap_limit; memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - fl6.flowi6_proto = skb->protocol; err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)); if (err) From 805b21b929e29192fb5de16154f616bfc1116e3e Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 24 Sep 2016 18:05:26 +0100 Subject: [PATCH 0775/1050] rxrpc: Send an ACK after every few DATA packets we receive Send an ACK if we haven't sent one for the last two packets we've received. This keeps the other end apprised of where we've got to - which is important if they're doing slow-start. We do this in recvmsg so that we can dispatch a packet directly without the need to wake up the background thread. This should possibly be made configurable in future. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 3 +++ net/rxrpc/misc.c | 1 + net/rxrpc/output.c | 25 +++++++++++++++++-------- net/rxrpc/recvmsg.c | 13 ++++++++++++- 4 files changed, 33 insertions(+), 9 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 042dbcc52654..e3bf9c0e3ad1 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -533,6 +533,8 @@ struct rxrpc_call { u16 ackr_skew; /* skew on packet being ACK'd */ rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */ rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */ + rxrpc_seq_t ackr_consumed; /* Highest packet shown consumed */ + rxrpc_seq_t ackr_seen; /* Highest packet shown seen */ rxrpc_serial_t ackr_ping; /* Last ping sent */ ktime_t ackr_ping_time; /* Time last ping sent */ @@ -695,6 +697,7 @@ enum rxrpc_propose_ack_trace { rxrpc_propose_ack_respond_to_ack, rxrpc_propose_ack_respond_to_ping, rxrpc_propose_ack_retry_tx, + rxrpc_propose_ack_rotate_rx, rxrpc_propose_ack_terminal_ack, rxrpc_propose_ack__nr_trace }; diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 1ca14835d87f..a473fd7dabaa 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -202,6 +202,7 @@ const char rxrpc_propose_ack_traces[rxrpc_propose_ack__nr_trace][8] = { [rxrpc_propose_ack_respond_to_ack] = "Rsp2Ack", [rxrpc_propose_ack_respond_to_ping] = "Rsp2Png", [rxrpc_propose_ack_retry_tx] = "RetryTx", + [rxrpc_propose_ack_rotate_rx] = "RxAck ", [rxrpc_propose_ack_terminal_ack] = "ClTerm ", }; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 0c563e325c9d..3eb01445e814 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -36,7 +36,9 @@ struct rxrpc_pkt_buffer { * Fill out an ACK packet. */ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, - struct rxrpc_pkt_buffer *pkt) + struct rxrpc_pkt_buffer *pkt, + rxrpc_seq_t *_hard_ack, + rxrpc_seq_t *_top) { rxrpc_serial_t serial; rxrpc_seq_t hard_ack, top, seq; @@ -48,6 +50,8 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, serial = call->ackr_serial; hard_ack = READ_ONCE(call->rx_hard_ack); top = smp_load_acquire(&call->rx_top); + *_hard_ack = hard_ack; + *_top = top; pkt->ack.bufferSpace = htons(8); pkt->ack.maxSkew = htons(call->ackr_skew); @@ -96,6 +100,7 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) struct msghdr msg; struct kvec iov[2]; rxrpc_serial_t serial; + rxrpc_seq_t hard_ack, top; size_t len, n; bool ping = false; int ioc, ret; @@ -146,7 +151,7 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) goto out; } ping = (call->ackr_reason == RXRPC_ACK_PING); - n = rxrpc_fill_out_ack(call, pkt); + n = rxrpc_fill_out_ack(call, pkt, &hard_ack, &top); call->ackr_reason = 0; spin_unlock_bh(&call->lock); @@ -203,18 +208,22 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) if (ping) call->ackr_ping_time = ktime_get_real(); - if (ret < 0 && call->state < RXRPC_CALL_COMPLETE) { - switch (type) { - case RXRPC_PACKET_TYPE_ACK: + if (type == RXRPC_PACKET_TYPE_ACK && + call->state < RXRPC_CALL_COMPLETE) { + if (ret < 0) { clear_bit(RXRPC_CALL_PINGING, &call->flags); rxrpc_propose_ACK(call, pkt->ack.reason, ntohs(pkt->ack.maxSkew), ntohl(pkt->ack.serial), true, true, rxrpc_propose_ack_retry_tx); - break; - case RXRPC_PACKET_TYPE_ABORT: - break; + } else { + spin_lock_bh(&call->lock); + if (after(hard_ack, call->ackr_consumed)) + call->ackr_consumed = hard_ack; + if (after(top, call->ackr_seen)) + call->ackr_seen = top; + spin_unlock_bh(&call->lock); } } diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 8c7f3de45bac..a7458c398b9e 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -201,8 +201,19 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) _debug("%u,%u,%02x", hard_ack, top, flags); trace_rxrpc_receive(call, rxrpc_receive_rotate, serial, hard_ack); - if (flags & RXRPC_LAST_PACKET) + if (flags & RXRPC_LAST_PACKET) { rxrpc_end_rx_phase(call); + } else { + /* Check to see if there's an ACK that needs sending. */ + if (after_eq(hard_ack, call->ackr_consumed + 2) || + after_eq(top, call->ackr_seen + 2) || + (hard_ack == top && after(hard_ack, call->ackr_consumed))) + rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial, + true, false, + rxrpc_propose_ack_rotate_rx); + if (call->ackr_reason) + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + } } /* From 3089bf614c7e2fd441ee001e3ff3d18326f6f091 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Fri, 23 Sep 2016 20:21:56 -0700 Subject: [PATCH 0776/1050] shmem: fix tmpfs to handle the huge= option properly shmem_get_unmapped_area() checks SHMEM_SB(sb)->huge incorrectly, which leads to a reversed effect of "huge=" mount option. Fix the check in shmem_get_unmapped_area(). Note, the default value of SHMEM_SB(sb)->huge remains as SHMEM_HUGE_NEVER. User will need to specify "huge=" option to enable huge page mappings. Reported-by: Hillf Danton Signed-off-by: Toshi Kani Acked-by: Kirill A. Shutemov Reviewed-by: Aneesh Kumar K.V Signed-off-by: Hugh Dickins Signed-off-by: Linus Torvalds --- mm/shmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/shmem.c b/mm/shmem.c index fd8b2b5741b1..aec5b492d947 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1980,7 +1980,7 @@ unsigned long shmem_get_unmapped_area(struct file *file, return addr; sb = shm_mnt->mnt_sb; } - if (SHMEM_SB(sb)->huge != SHMEM_HUGE_NEVER) + if (SHMEM_SB(sb)->huge == SHMEM_HUGE_NEVER) return addr; } From 71664665c3e3ca5ff61ef5fc65480f82cd575eb2 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 23 Sep 2016 20:24:23 -0700 Subject: [PATCH 0777/1050] huge tmpfs: fix Committed_AS leak Under swapping load on huge tmpfs, /proc/meminfo's Committed_AS grows bigger and bigger: just a cosmetic issue for most users, but disabling for those who run without overcommit (/proc/sys/vm/overcommit_memory 2). shmem_uncharge() was forgetting to unaccount __vm_enough_memory's charge, and shmem_charge() was forgetting it on the filesystem-full error path. Fixes: 800d8c63b2e9 ("shmem: add huge pages support") Signed-off-by: Hugh Dickins Acked-by: Kirill A. Shutemov Signed-off-by: Linus Torvalds --- mm/shmem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/shmem.c b/mm/shmem.c index aec5b492d947..971fc83e6402 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -270,7 +270,7 @@ bool shmem_charge(struct inode *inode, long pages) info->alloced -= pages; shmem_recalc_inode(inode); spin_unlock_irqrestore(&info->lock, flags); - + shmem_unacct_blocks(info->flags, pages); return false; } percpu_counter_add(&sbinfo->used_blocks, pages); @@ -291,6 +291,7 @@ void shmem_uncharge(struct inode *inode, long pages) if (sbinfo->max_blocks) percpu_counter_sub(&sbinfo->used_blocks, pages); + shmem_unacct_blocks(info->flags, pages); } /* From b385d21f27d86426472f6ae92a231095f7de2a8d Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 23 Sep 2016 20:27:04 -0700 Subject: [PATCH 0778/1050] mm: delete unnecessary and unsafe init_tlb_ubc() init_tlb_ubc() looked unnecessary to me: tlb_ubc is statically initialized with zeroes in the init_task, and copied from parent to child while it is quiescent in arch_dup_task_struct(); so I went to delete it. But inserted temporary debug WARN_ONs in place of init_tlb_ubc() to check that it was always empty at that point, and found them firing: because memcg reclaim can recurse into global reclaim (when allocating biosets for swapout in my case), and arrive back at the init_tlb_ubc() in shrink_node_memcg(). Resetting tlb_ubc.flush_required at that point is wrong: if the upper level needs a deferred TLB flush, but the lower level turns out not to, we miss a TLB flush. But fortunately, that's the only part of the protocol that does not nest: with the initialization removed, cpumask collects bits from upper and lower levels, and flushes TLB when needed. Fixes: 72b252aed506 ("mm: send one IPI per CPU to TLB flush all entries after unmapping pages") Signed-off-by: Hugh Dickins Acked-by: Mel Gorman Cc: stable@vger.kernel.org # 4.3+ Signed-off-by: Linus Torvalds --- mm/vmscan.c | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index b1e12a1ea9cf..0fe8b7113868 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2303,23 +2303,6 @@ out: } } -#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH -static void init_tlb_ubc(void) -{ - /* - * This deliberately does not clear the cpumask as it's expensive - * and unnecessary. If there happens to be data in there then the - * first SWAP_CLUSTER_MAX pages will send an unnecessary IPI and - * then will be cleared. - */ - current->tlb_ubc.flush_required = false; -} -#else -static inline void init_tlb_ubc(void) -{ -} -#endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */ - /* * This is a basic per-node page freer. Used by both kswapd and direct reclaim. */ @@ -2355,8 +2338,6 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc scan_adjusted = (global_reclaim(sc) && !current_is_kswapd() && sc->priority == DEF_PRIORITY); - init_tlb_ubc(); - blk_start_plug(&plug); while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || nr[LRU_INACTIVE_FILE]) { From 595c73071e6641e59b83911fbb4026e767471000 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 23 Sep 2016 17:53:52 -0700 Subject: [PATCH 0779/1050] libnvdimm, region: fix flush hint table thinko The definition of the flush hint table as: void __iomem *flush_wpq[0][0]; ...passed the unit test, but is broken as flush_wpq[0][1] and flush_wpq[1][0] refer to the same entry. Fix this to use a helper that calculates a slot in the table based on the geometry of flush hints in the region. This is important to get right since virtualization solutions use this mechanism to trigger hypervisor flushes to platform persistence. Reported-by: Dave Jiang Tested-by: Dave Jiang Signed-off-by: Dan Williams --- drivers/nvdimm/nd.h | 22 ++++++++++++++++++++-- drivers/nvdimm/region_devs.c | 20 ++++++++++++-------- 2 files changed, 32 insertions(+), 10 deletions(-) diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 8024a0ef86d3..0b78a8211f4a 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -52,10 +52,28 @@ struct nvdimm_drvdata { struct nd_region_data { int ns_count; int ns_active; - unsigned int flush_mask; - void __iomem *flush_wpq[0][0]; + unsigned int hints_shift; + void __iomem *flush_wpq[0]; }; +static inline void __iomem *ndrd_get_flush_wpq(struct nd_region_data *ndrd, + int dimm, int hint) +{ + unsigned int num = 1 << ndrd->hints_shift; + unsigned int mask = num - 1; + + return ndrd->flush_wpq[dimm * num + (hint & mask)]; +} + +static inline void ndrd_set_flush_wpq(struct nd_region_data *ndrd, int dimm, + int hint, void __iomem *flush) +{ + unsigned int num = 1 << ndrd->hints_shift; + unsigned int mask = num - 1; + + ndrd->flush_wpq[dimm * num + (hint & mask)] = flush; +} + static inline struct nd_namespace_index *to_namespace_index( struct nvdimm_drvdata *ndd, int i) { diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 4eef88eb5144..4c0ac4abb629 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -38,7 +38,7 @@ static int nvdimm_map_flush(struct device *dev, struct nvdimm *nvdimm, int dimm, dev_dbg(dev, "%s: map %d flush address%s\n", nvdimm_name(nvdimm), nvdimm->num_flush, nvdimm->num_flush == 1 ? "" : "es"); - for (i = 0; i < nvdimm->num_flush; i++) { + for (i = 0; i < (1 << ndrd->hints_shift); i++) { struct resource *res = &nvdimm->flush_wpq[i]; unsigned long pfn = PHYS_PFN(res->start); void __iomem *flush_page; @@ -54,14 +54,15 @@ static int nvdimm_map_flush(struct device *dev, struct nvdimm *nvdimm, int dimm, if (j < i) flush_page = (void __iomem *) ((unsigned long) - ndrd->flush_wpq[dimm][j] & PAGE_MASK); + ndrd_get_flush_wpq(ndrd, dimm, j) + & PAGE_MASK); else flush_page = devm_nvdimm_ioremap(dev, PFN_PHYS(pfn), PAGE_SIZE); if (!flush_page) return -ENXIO; - ndrd->flush_wpq[dimm][i] = flush_page - + (res->start & ~PAGE_MASK); + ndrd_set_flush_wpq(ndrd, dimm, i, flush_page + + (res->start & ~PAGE_MASK)); } return 0; @@ -93,7 +94,10 @@ int nd_region_activate(struct nd_region *nd_region) return -ENOMEM; dev_set_drvdata(dev, ndrd); - ndrd->flush_mask = (1 << ilog2(num_flush)) - 1; + if (!num_flush) + return 0; + + ndrd->hints_shift = ilog2(num_flush); for (i = 0; i < nd_region->ndr_mappings; i++) { struct nd_mapping *nd_mapping = &nd_region->mapping[i]; struct nvdimm *nvdimm = nd_mapping->nvdimm; @@ -900,8 +904,8 @@ void nvdimm_flush(struct nd_region *nd_region) */ wmb(); for (i = 0; i < nd_region->ndr_mappings; i++) - if (ndrd->flush_wpq[i][0]) - writeq(1, ndrd->flush_wpq[i][idx & ndrd->flush_mask]); + if (ndrd_get_flush_wpq(ndrd, i, 0)) + writeq(1, ndrd_get_flush_wpq(ndrd, i, idx)); wmb(); } EXPORT_SYMBOL_GPL(nvdimm_flush); @@ -925,7 +929,7 @@ int nvdimm_has_flush(struct nd_region *nd_region) for (i = 0; i < nd_region->ndr_mappings; i++) /* flush hints present, flushing required */ - if (ndrd->flush_wpq[i][0]) + if (ndrd_get_flush_wpq(ndrd, i, 0)) return 1; /* From 50f4c7b73f831a53fa9ddeb9bdf4cfb5b23d3aa7 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Wed, 7 Sep 2016 10:40:24 +0800 Subject: [PATCH 0780/1050] netfilter: xt_TCPMSS: Refactor the codes to decrease one condition check and more readable The origin codes perform two condition checks with dst_mtu(skb_dst(skb)) and in_mtu. And the last statement is "min(dst_mtu(skb_dst(skb)), in_mtu) - minlen". It may let reader think about how about the result. Would it be negative. Now assign the result of min(dst_mtu(skb_dst(skb)), in_mtu) to a new variable, then only perform one condition check, and it is more readable. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_TCPMSS.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index e118397254af..872db2d0e2a9 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -110,18 +110,14 @@ tcpmss_mangle_packet(struct sk_buff *skb, if (info->mss == XT_TCPMSS_CLAMP_PMTU) { struct net *net = par->net; unsigned int in_mtu = tcpmss_reverse_mtu(net, skb, family); + unsigned int min_mtu = min(dst_mtu(skb_dst(skb)), in_mtu); - if (dst_mtu(skb_dst(skb)) <= minlen) { + if (min_mtu <= minlen) { net_err_ratelimited("unknown or invalid path-MTU (%u)\n", - dst_mtu(skb_dst(skb))); + min_mtu); return -1; } - if (in_mtu <= minlen) { - net_err_ratelimited("unknown or invalid path-MTU (%u)\n", - in_mtu); - return -1; - } - newmss = min(dst_mtu(skb_dst(skb)), in_mtu) - minlen; + newmss = min_mtu - minlen; } else newmss = info->mss; From c5136b15ea364124299c8a9ba96b300e96061e3a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 21 Sep 2016 11:35:01 -0400 Subject: [PATCH 0781/1050] netfilter: bridge: add and use br_nf_hook_thresh This replaces the last uses of NF_HOOK_THRESH(). Followup patch will remove it and rename nf_hook_thresh. The reason is that inet (non-bridge) netfilter no longer invokes the hooks from hooks, so we do no longer need the thresh value to skip hooks with a lower priority. The bridge netfilter however may need to do this. br_nf_hook_thresh is a wrapper that is supposed to do this, i.e. only call hooks with a priority that exceeds NF_BR_PRI_BRNF. It's used only in the recursion cases of br_netfilter. It invokes nf_hook_slow while holding an rcu read-side critical section to make a future cleanup simpler. Signed-off-by: Florian Westphal Signed-off-by: Aaron Conole Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/br_netfilter.h | 6 +++ net/bridge/br_netfilter_hooks.c | 60 +++++++++++++++++++++++----- net/bridge/br_netfilter_ipv6.c | 12 +++--- 3 files changed, 62 insertions(+), 16 deletions(-) diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h index e8d1448425a7..0b0c35c37125 100644 --- a/include/net/netfilter/br_netfilter.h +++ b/include/net/netfilter/br_netfilter.h @@ -15,6 +15,12 @@ static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb) void nf_bridge_update_protocol(struct sk_buff *skb); +int br_nf_hook_thresh(unsigned int hook, struct net *net, struct sock *sk, + struct sk_buff *skb, struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct net *, struct sock *, + struct sk_buff *)); + static inline struct nf_bridge_info * nf_bridge_info_get(const struct sk_buff *skb) { diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 77e7f69bf80d..6029af47377d 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -395,11 +396,10 @@ bridged_dnat: skb->dev = nf_bridge->physindev; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, - NF_BR_PRE_ROUTING, - net, sk, skb, skb->dev, NULL, - br_nf_pre_routing_finish_bridge, - 1); + br_nf_hook_thresh(NF_BR_PRE_ROUTING, + net, sk, skb, skb->dev, + NULL, + br_nf_pre_routing_finish); return 0; } ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); @@ -417,10 +417,8 @@ bridged_dnat: skb->dev = nf_bridge->physindev; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, net, sk, skb, - skb->dev, NULL, - br_handle_frame_finish, 1); - + br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, skb->dev, NULL, + br_handle_frame_finish); return 0; } @@ -992,6 +990,50 @@ static struct notifier_block brnf_notifier __read_mostly = { .notifier_call = brnf_device_event, }; +/* recursively invokes nf_hook_slow (again), skipping already-called + * hooks (< NF_BR_PRI_BRNF). + * + * Called with rcu read lock held. + */ +int br_nf_hook_thresh(unsigned int hook, struct net *net, + struct sock *sk, struct sk_buff *skb, + struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct net *, struct sock *, + struct sk_buff *)) +{ + struct nf_hook_ops *elem; + struct nf_hook_state state; + struct list_head *head; + int ret; + + head = &net->nf.hooks[NFPROTO_BRIDGE][hook]; + + list_for_each_entry_rcu(elem, head, list) { + struct nf_hook_ops *next; + + next = list_entry_rcu(list_next_rcu(&elem->list), + struct nf_hook_ops, list); + if (next->priority <= NF_BR_PRI_BRNF) + continue; + } + + if (&elem->list == head) + return okfn(net, sk, skb); + + /* We may already have this, but read-locks nest anyway */ + rcu_read_lock(); + nf_hook_state_init(&state, head, hook, NF_BR_PRI_BRNF + 1, + NFPROTO_BRIDGE, indev, outdev, sk, net, okfn); + + ret = nf_hook_slow(skb, &state); + rcu_read_unlock(); + if (ret == 1) + ret = okfn(net, sk, skb); + + return ret; +} + #ifdef CONFIG_SYSCTL static int brnf_sysctl_call_tables(struct ctl_table *ctl, int write, diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index 5e59a8457e7b..5989661c659f 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -187,10 +187,9 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc skb->dev = nf_bridge->physindev; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, - net, sk, skb, skb->dev, NULL, - br_nf_pre_routing_finish_bridge, - 1); + br_nf_hook_thresh(NF_BR_PRE_ROUTING, + net, sk, skb, skb->dev, NULL, + br_nf_pre_routing_finish_bridge); return 0; } ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); @@ -207,9 +206,8 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc skb->dev = nf_bridge->physindev; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, net, sk, skb, - skb->dev, NULL, - br_handle_frame_finish, 1); + br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, + skb->dev, NULL, br_handle_frame_finish); return 0; } From fe72926b792e52ab00abfa81a201805bfb2247d6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 21 Sep 2016 11:35:02 -0400 Subject: [PATCH 0782/1050] netfilter: call nf_hook_state_init with rcu_read_lock held This makes things simpler because we can store the head of the list in the nf_state structure without worrying about concurrent add/delete of hook elements from the list. A future commit will make use of this to implement a simpler linked-list. Signed-off-by: Florian Westphal Signed-off-by: Aaron Conole Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 8 +++++++- include/linux/netfilter_ingress.h | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 9230f9aee896..ad444f0b4ed0 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -174,10 +174,16 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, if (!list_empty(hook_list)) { struct nf_hook_state state; + int ret; + /* We may already have this, but read-locks nest anyway */ + rcu_read_lock(); nf_hook_state_init(&state, hook_list, hook, thresh, pf, indev, outdev, sk, net, okfn); - return nf_hook_slow(skb, &state); + + ret = nf_hook_slow(skb, &state); + rcu_read_unlock(); + return ret; } return 1; } diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h index 5fcd375ef175..6965ba09eba7 100644 --- a/include/linux/netfilter_ingress.h +++ b/include/linux/netfilter_ingress.h @@ -14,6 +14,7 @@ static inline bool nf_hook_ingress_active(const struct sk_buff *skb) return !list_empty(&skb->dev->nf_hooks_ingress); } +/* caller must hold rcu_read_lock */ static inline int nf_hook_ingress(struct sk_buff *skb) { struct nf_hook_state state; From 2c1e2703ff812ccaa42a4bc8a25803955e342b85 Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Wed, 21 Sep 2016 11:35:03 -0400 Subject: [PATCH 0783/1050] netfilter: call nf_hook_ingress with rcu_read_lock This commit ensures that the rcu read-side lock is held while the ingress hook is called. This ensures that a call to nf_hook_slow (and ultimately nf_ingress) will be read protected. Signed-off-by: Aaron Conole Signed-off-by: Pablo Neira Ayuso --- net/core/dev.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 34b5322bc081..064919425b7d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4040,12 +4040,17 @@ static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev, { #ifdef CONFIG_NETFILTER_INGRESS if (nf_hook_ingress_active(skb)) { + int ingress_retval; + if (*pt_prev) { *ret = deliver_skb(skb, *pt_prev, orig_dev); *pt_prev = NULL; } - return nf_hook_ingress(skb); + rcu_read_lock(); + ingress_retval = nf_hook_ingress(skb); + rcu_read_unlock(); + return ingress_retval; } #endif /* CONFIG_NETFILTER_INGRESS */ return 0; From e2361cb90a0327bdab34d01d1a7b9dbd67c31e60 Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Wed, 21 Sep 2016 11:35:04 -0400 Subject: [PATCH 0784/1050] netfilter: Remove explicit rcu_read_lock in nf_hook_slow All of the callers of nf_hook_slow already hold the rcu_read_lock, so this cleanup removes the recursive call. This is just a cleanup, as the locking code gracefully handles this situation. Signed-off-by: Aaron Conole Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebt_redirect.c | 2 +- net/bridge/netfilter/ebtables.c | 2 +- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 2 +- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 2 +- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 2 +- net/netfilter/core.c | 6 +----- net/netfilter/nf_conntrack_core.c | 2 +- net/netfilter/nf_conntrack_h323_main.c | 2 +- net/netfilter/nf_conntrack_helper.c | 2 +- net/netfilter/nfnetlink_cthelper.c | 2 +- net/netfilter/nfnetlink_log.c | 8 ++++++-- net/netfilter/nfnetlink_queue.c | 2 +- net/netfilter/xt_helper.c | 2 +- 14 files changed, 19 insertions(+), 19 deletions(-) diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c index 203964997a51..2e7c4f974340 100644 --- a/net/bridge/netfilter/ebt_redirect.c +++ b/net/bridge/netfilter/ebt_redirect.c @@ -24,7 +24,7 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par) return EBT_DROP; if (par->hooknum != NF_BR_BROUTING) - /* rcu_read_lock()ed by nf_hook_slow */ + /* rcu_read_lock()ed by nf_hook_thresh */ ether_addr_copy(eth_hdr(skb)->h_dest, br_port_get_rcu(par->in)->br->dev->dev_addr); else diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index cceac5bb658f..dd7133216c9c 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -146,7 +146,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, return 1; if (NF_INVF(e, EBT_IOUT, ebt_dev_check(e->out, out))) return 1; - /* rcu_read_lock()ed by nf_hook_slow */ + /* rcu_read_lock()ed by nf_hook_thresh */ if (in && (p = br_port_get_rcu(in)) != NULL && NF_INVF(e, EBT_ILOGICALIN, ebt_dev_check(e->logical_in, p->br->dev))) diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 870aebda2932..713c09a74b90 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -110,7 +110,7 @@ static unsigned int ipv4_helper(void *priv, if (!help) return NF_ACCEPT; - /* rcu_read_lock()ed by nf_hook_slow */ + /* rcu_read_lock()ed by nf_hook_thresh */ helper = rcu_dereference(help->helper); if (!helper) return NF_ACCEPT; diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 4b5904bc2614..d075b3cf2400 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -149,7 +149,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, return -NF_ACCEPT; } - /* rcu_read_lock()ed by nf_hook_slow */ + /* rcu_read_lock()ed by nf_hook_thresh */ innerproto = __nf_ct_l4proto_find(PF_INET, origtuple.dst.protonum); /* Ordinarily, we'd expect the inverted tupleproto, but it's diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 1aa5848764a7..963ee3848675 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -115,7 +115,7 @@ static unsigned int ipv6_helper(void *priv, help = nfct_help(ct); if (!help) return NF_ACCEPT; - /* rcu_read_lock()ed by nf_hook_slow */ + /* rcu_read_lock()ed by nf_hook_thresh */ helper = rcu_dereference(help->helper); if (!helper) return NF_ACCEPT; diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 660bc10c7a9c..f5a61bc3ec2b 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -165,7 +165,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl, return -NF_ACCEPT; } - /* rcu_read_lock()ed by nf_hook_slow */ + /* rcu_read_lock()ed by nf_hook_thresh */ inproto = __nf_ct_l4proto_find(PF_INET6, origtuple.dst.protonum); /* Ordinarily, we'd expect the inverted tupleproto, but it's diff --git a/net/netfilter/core.c b/net/netfilter/core.c index f39276d1c2d7..c8faf8102394 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -291,16 +291,13 @@ repeat: /* Returns 1 if okfn() needs to be executed by the caller, - * -EPERM for NF_DROP, 0 otherwise. */ + * -EPERM for NF_DROP, 0 otherwise. Caller must hold rcu_read_lock. */ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state) { struct nf_hook_ops *elem; unsigned int verdict; int ret = 0; - /* We may already have this, but read-locks nest anyway */ - rcu_read_lock(); - elem = list_entry_rcu(state->hook_list, struct nf_hook_ops, list); next_hook: verdict = nf_iterate(state->hook_list, skb, state, &elem); @@ -321,7 +318,6 @@ next_hook: kfree_skb(skb); } } - rcu_read_unlock(); return ret; } EXPORT_SYMBOL(nf_hook_slow); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 8d1ddb9b63ed..c94ec197845c 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1275,7 +1275,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, skb->nfct = NULL; } - /* rcu_read_lock()ed by nf_hook_slow */ + /* rcu_read_lock()ed by nf_hook_thresh */ l3proto = __nf_ct_l3proto_find(pf); ret = l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff, &protonum); diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 5c0db5c64734..f65d93639d12 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -736,7 +736,7 @@ static int callforward_do_filter(struct net *net, const struct nf_afinfo *afinfo; int ret = 0; - /* rcu_read_lock()ed by nf_hook_slow() */ + /* rcu_read_lock()ed by nf_hook_thresh */ afinfo = nf_get_afinfo(family); if (!afinfo) return 0; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 4ffe388a9a1e..336e21559e01 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -346,7 +346,7 @@ void nf_ct_helper_log(struct sk_buff *skb, const struct nf_conn *ct, /* Called from the helper function, this call never fails */ help = nfct_help(ct); - /* rcu_read_lock()ed by nf_hook_slow */ + /* rcu_read_lock()ed by nf_hook_thresh */ helper = rcu_dereference(help->helper); nf_log_packet(nf_ct_net(ct), nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index e924e95fcc7f..3b79f34b5095 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -43,7 +43,7 @@ nfnl_userspace_cthelper(struct sk_buff *skb, unsigned int protoff, if (help == NULL) return NF_DROP; - /* rcu_read_lock()ed by nf_hook_slow */ + /* rcu_read_lock()ed by nf_hook_thresh */ helper = rcu_dereference(help->helper); if (helper == NULL) return NF_DROP; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 6577db524ef6..eb086a192c5a 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -442,7 +442,9 @@ __build_packet_message(struct nfnl_log_net *log, if (nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSINDEV, htonl(indev->ifindex)) || /* this is the bridge group "brX" */ - /* rcu_read_lock()ed by nf_hook_slow or nf_log_packet */ + /* rcu_read_lock()ed by nf_hook_thresh or + * nf_log_packet. + */ nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV, htonl(br_port_get_rcu(indev)->br->dev->ifindex))) goto nla_put_failure; @@ -477,7 +479,9 @@ __build_packet_message(struct nfnl_log_net *log, if (nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV, htonl(outdev->ifindex)) || /* this is the bridge group "brX" */ - /* rcu_read_lock()ed by nf_hook_slow or nf_log_packet */ + /* rcu_read_lock()ed by nf_hook_thresh or + * nf_log_packet. + */ nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV, htonl(br_port_get_rcu(outdev)->br->dev->ifindex))) goto nla_put_failure; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 808da34f94cd..7caa8b082c41 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -740,7 +740,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) struct net *net = entry->state.net; struct nfnl_queue_net *q = nfnl_queue_pernet(net); - /* rcu_read_lock()ed by nf_hook_slow() */ + /* rcu_read_lock()ed by nf_hook_thresh */ queue = instance_lookup(q, queuenum); if (!queue) return -ESRCH; diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c index 805c9f64a04c..f679dd4c272a 100644 --- a/net/netfilter/xt_helper.c +++ b/net/netfilter/xt_helper.c @@ -41,7 +41,7 @@ helper_mt(const struct sk_buff *skb, struct xt_action_param *par) if (!master_help) return ret; - /* rcu_read_lock()ed by nf_hook_slow */ + /* rcu_read_lock()ed by nf_hook_thresh */ helper = rcu_dereference(master_help->helper); if (!helper) return ret; From d4bb5caa9cc1a802ba25f605b24b5640c025806b Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Wed, 21 Sep 2016 11:35:05 -0400 Subject: [PATCH 0785/1050] netfilter: Only allow sane values in nf_register_net_hook This commit adds an upfront check for sane values to be passed when registering a netfilter hook. This will be used in a future patch for a simplified hook list traversal. Signed-off-by: Aaron Conole Signed-off-by: Pablo Neira Ayuso --- net/netfilter/core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/netfilter/core.c b/net/netfilter/core.c index c8faf8102394..67b74287535d 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -89,6 +89,11 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) struct nf_hook_entry *entry; struct nf_hook_ops *elem; + if (reg->pf == NFPROTO_NETDEV && + (reg->hooknum != NF_NETDEV_INGRESS || + !reg->dev || dev_net(reg->dev) != net)) + return -EINVAL; + entry = kmalloc(sizeof(*entry), GFP_KERNEL); if (!entry) return -ENOMEM; From a7056c5ba67ee6a956b42cf9ff9ba3a6a0bd9794 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 24 Sep 2016 18:05:27 +0100 Subject: [PATCH 0786/1050] rxrpc: Send an immediate ACK if we fill in a hole Send an immediate ACK if we fill in a hole in the buffer left by an out-of-sequence packet. This may allow the congestion management in the peer to avoid a retransmission if packets got reordered on the wire. Signed-off-by: David Howells --- net/rxrpc/input.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 349698d87ad1..757c16f033a0 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -331,8 +331,16 @@ next_subpacket: call->rxtx_annotations[ix] = annotation; smp_wmb(); call->rxtx_buffer[ix] = skb; - if (after(seq, call->rx_top)) + if (after(seq, call->rx_top)) { smp_store_release(&call->rx_top, seq); + } else if (before(seq, call->rx_top)) { + /* Send an immediate ACK if we fill in a hole */ + if (!ack) { + ack = RXRPC_ACK_DELAY; + ack_serial = serial; + } + immediate_ack = true; + } if (flags & RXRPC_LAST_PACKET) { set_bit(RXRPC_CALL_RX_LAST, &call->flags); trace_rxrpc_receive(call, rxrpc_receive_queue_last, serial, seq); From b69d94d7991f83928d3ea18fe12ab011fa852bb0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 24 Sep 2016 18:05:27 +0100 Subject: [PATCH 0787/1050] rxrpc: Include the last reply DATA serial number in the final ACK In a client call, include the serial number of the last DATA packet of the reply in the final ACK. Signed-off-by: David Howells --- net/rxrpc/recvmsg.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index a7458c398b9e..038ae62ddb4d 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -133,7 +133,7 @@ static int rxrpc_recvmsg_new_call(struct rxrpc_sock *rx, /* * End the packet reception phase. */ -static void rxrpc_end_rx_phase(struct rxrpc_call *call) +static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) { _enter("%d,%s", call->debug_id, rxrpc_call_states[call->state]); @@ -141,7 +141,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call) ASSERTCMP(call->rx_hard_ack, ==, call->rx_top); if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { - rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, true, false, + rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, serial, true, false, rxrpc_propose_ack_terminal_ack); rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); } @@ -202,7 +202,7 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) _debug("%u,%u,%02x", hard_ack, top, flags); trace_rxrpc_receive(call, rxrpc_receive_rotate, serial, hard_ack); if (flags & RXRPC_LAST_PACKET) { - rxrpc_end_rx_phase(call); + rxrpc_end_rx_phase(call, serial); } else { /* Check to see if there's an ACK that needs sending. */ if (after_eq(hard_ack, call->ackr_consumed + 2) || From dd7c1ee59a90ca8a75bce72c721851d5550f3c59 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 24 Sep 2016 18:05:27 +0100 Subject: [PATCH 0788/1050] rxrpc: Reinitialise the call ACK and timer state for client reply phase Clear the ACK reason, ACK timer and resend timer when entering the client reply phase when the first DATA packet is received. New ACKs will be proposed once the data is queued. The resend timer is no longer relevant and we need to cancel ACKs scheduled to probe for a lost reply. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 1 + net/rxrpc/input.c | 9 +++++++++ net/rxrpc/misc.c | 1 + 3 files changed, 11 insertions(+) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index e3bf9c0e3ad1..cdd35e2b40ba 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -682,6 +682,7 @@ extern const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5]; enum rxrpc_timer_trace { rxrpc_timer_begin, + rxrpc_timer_init_for_reply, rxrpc_timer_expired, rxrpc_timer_set_for_ack, rxrpc_timer_set_for_resend, diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 757c16f033a0..bda11eb2ab2a 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -149,6 +149,15 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call) { rxrpc_seq_t top = READ_ONCE(call->tx_top); + if (call->ackr_reason) { + spin_lock_bh(&call->lock); + call->ackr_reason = 0; + call->resend_at = call->expire_at; + call->ack_at = call->expire_at; + spin_unlock_bh(&call->lock); + rxrpc_set_timer(call, rxrpc_timer_init_for_reply); + } + if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) rxrpc_rotate_tx_window(call, top); if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index a473fd7dabaa..901c012a2700 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -191,6 +191,7 @@ const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5] = { const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8] = { [rxrpc_timer_begin] = "Begin ", [rxrpc_timer_expired] = "*EXPR*", + [rxrpc_timer_init_for_reply] = "IniRpl", [rxrpc_timer_set_for_ack] = "SetAck", [rxrpc_timer_set_for_send] = "SetTx ", [rxrpc_timer_set_for_resend] = "SetRTx", From df0562a72dba13ab49c7dd7cb15170697b9848ee Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 24 Sep 2016 23:00:54 +0100 Subject: [PATCH 0789/1050] rxrpc: Delay the resend timer to allow for nsec->jiffies conv error When determining the resend timer value, we have a value in nsec but the timer is in jiffies which may be a million or more times more coarse. nsecs_to_jiffies() rounds down - which means that the resend timeout expressed as jiffies is very likely earlier than the one expressed as nanoseconds from which it was derived. The problem is that rxrpc_resend() gets triggered by the timer, but can't then find anything to resend yet. It sets the timer again - but gets kicked off immediately again and again until the nanosecond-based expiry time is reached and we actually retransmit. Fix this by adding 1 to the jiffies-based resend_at value to counteract the rounding and make sure that the timer happens after the nanosecond-based expiry is passed. Alternatives would be to adjust the timestamp on the packets to align with the jiffie scale or to switch back to using jiffie-timestamps. Signed-off-by: David Howells --- net/rxrpc/call_event.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index a78a92fe5d77..d5bf9ce7ec6f 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -200,8 +200,14 @@ static void rxrpc_resend(struct rxrpc_call *call) ktime_to_ns(ktime_sub(skb->tstamp, max_age))); } - resend_at = ktime_sub(ktime_add_ms(oldest, rxrpc_resend_timeout), now); - call->resend_at = jiffies + nsecs_to_jiffies(ktime_to_ns(resend_at)); + resend_at = ktime_add_ms(oldest, rxrpc_resend_timeout); + call->resend_at = jiffies + + nsecs_to_jiffies(ktime_to_ns(ktime_sub(resend_at, now))) + + 1; /* We have to make sure that the calculated jiffies value + * falls at or after the nsec value, or we shall loop + * ceaselessly because the timer times out, but we haven't + * reached the nsec timeout yet. + */ /* Now go through the Tx window and perform the retransmissions. We * have to drop the lock for each send. If an ACK comes in whilst the From 31a1b989508ce64e8ead504884ced01e61870852 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 24 Sep 2016 18:05:26 +0100 Subject: [PATCH 0790/1050] rxrpc: Generate a summary of the ACK state for later use Generate a summary of the Tx buffer packet state when an ACK is received for use in a later patch that does congestion management. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 14 +++++++++++++ net/rxrpc/input.c | 45 +++++++++++++++++++++++++++++++---------- 2 files changed, 48 insertions(+), 11 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index cdd35e2b40ba..1a700b6a998b 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -540,6 +540,20 @@ struct rxrpc_call { /* transmission-phase ACK management */ rxrpc_serial_t acks_latest; /* serial number of latest ACK received */ + rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */ +}; + +/* + * Summary of a new ACK and the changes it made. + */ +struct rxrpc_ack_summary { + u8 ack_reason; + u8 nr_acks; /* Number of ACKs in packet */ + u8 nr_nacks; /* Number of NACKs in packet */ + u8 nr_new_acks; /* Number of new ACKs in packet */ + u8 nr_new_nacks; /* Number of new NACKs in packet */ + u8 nr_rot_new_acks; /* Number of rotated new ACKs */ + bool new_low_nack; /* T if new low NACK found */ }; enum rxrpc_skb_trace { diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index bda11eb2ab2a..dd699667eeef 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -56,12 +56,20 @@ static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb, /* * Apply a hard ACK by advancing the Tx window. */ -static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to) +static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, + struct rxrpc_ack_summary *summary) { struct sk_buff *skb, *list = NULL; int ix; u8 annotation; + if (call->acks_lowest_nak == call->tx_hard_ack) { + call->acks_lowest_nak = to; + } else if (before_eq(call->acks_lowest_nak, to)) { + summary->new_low_nack = true; + call->acks_lowest_nak = to; + } + spin_lock(&call->lock); while (before(call->tx_hard_ack, to)) { @@ -77,6 +85,8 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to) if (annotation & RXRPC_TX_ANNO_LAST) set_bit(RXRPC_CALL_TX_LAST, &call->flags); + if ((annotation & RXRPC_TX_ANNO_MASK) != RXRPC_TX_ANNO_ACK) + summary->nr_rot_new_acks++; } spin_unlock(&call->lock); @@ -147,6 +157,7 @@ bad_state: */ static bool rxrpc_receiving_reply(struct rxrpc_call *call) { + struct rxrpc_ack_summary summary = { 0 }; rxrpc_seq_t top = READ_ONCE(call->tx_top); if (call->ackr_reason) { @@ -159,7 +170,7 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call) } if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) - rxrpc_rotate_tx_window(call, top); + rxrpc_rotate_tx_window(call, top, &summary); if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { rxrpc_proto_abort("TXL", call, top); return false; @@ -508,7 +519,8 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, * the time the ACK was sent. */ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, - rxrpc_seq_t seq, int nr_acks) + rxrpc_seq_t seq, int nr_acks, + struct rxrpc_ack_summary *summary) { bool resend = false; int ix; @@ -521,14 +533,23 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, annotation &= ~RXRPC_TX_ANNO_MASK; switch (*acks++) { case RXRPC_ACK_TYPE_ACK: + summary->nr_acks++; if (anno_type == RXRPC_TX_ANNO_ACK) continue; + summary->nr_new_acks++; call->rxtx_annotations[ix] = RXRPC_TX_ANNO_ACK | annotation; break; case RXRPC_ACK_TYPE_NACK: + if (!summary->nr_nacks && + call->acks_lowest_nak != seq) { + call->acks_lowest_nak = seq; + summary->new_low_nack = true; + } + summary->nr_nacks++; if (anno_type == RXRPC_TX_ANNO_NAK) continue; + summary->nr_new_nacks++; if (anno_type == RXRPC_TX_ANNO_RETRANS) continue; call->rxtx_annotations[ix] = @@ -558,7 +579,7 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, u16 skew) { - u8 ack_reason; + struct rxrpc_ack_summary summary = { 0 }; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); union { struct rxrpc_ackpacket ack; @@ -581,10 +602,10 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, first_soft_ack = ntohl(buf.ack.firstPacket); hard_ack = first_soft_ack - 1; nr_acks = buf.ack.nAcks; - ack_reason = (buf.ack.reason < RXRPC_ACK__INVALID ? - buf.ack.reason : RXRPC_ACK__INVALID); + summary.ack_reason = (buf.ack.reason < RXRPC_ACK__INVALID ? + buf.ack.reason : RXRPC_ACK__INVALID); - trace_rxrpc_rx_ack(call, first_soft_ack, ack_reason, nr_acks); + trace_rxrpc_rx_ack(call, first_soft_ack, summary.ack_reason, nr_acks); _proto("Rx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", sp->hdr.serial, @@ -592,7 +613,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, first_soft_ack, ntohl(buf.ack.previousPacket), acked_serial, - rxrpc_ack_names[ack_reason], + rxrpc_ack_names[summary.ack_reason], buf.ack.nAcks); if (buf.ack.reason == RXRPC_ACK_PING_RESPONSE) @@ -649,12 +670,13 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, return rxrpc_proto_abort("AKN", call, 0); if (after(hard_ack, call->tx_hard_ack)) - rxrpc_rotate_tx_window(call, hard_ack); + rxrpc_rotate_tx_window(call, hard_ack, &summary); if (nr_acks > 0) { if (skb_copy_bits(skb, sp->offset, buf.acks, nr_acks) < 0) return rxrpc_proto_abort("XSA", call, 0); - rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks); + rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks, + &summary); } if (test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { @@ -669,11 +691,12 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, */ static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb) { + struct rxrpc_ack_summary summary = { 0 }; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); _proto("Rx ACKALL %%%u", sp->hdr.serial); - rxrpc_rotate_tx_window(call, call->tx_top); + rxrpc_rotate_tx_window(call, call->tx_top, &summary); if (test_bit(RXRPC_CALL_TX_LAST, &call->flags)) rxrpc_end_tx_phase(call, false, "ETL"); } From 0d967960d39ee89f9e0289692e9f7232f490e55c Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 24 Sep 2016 18:05:27 +0100 Subject: [PATCH 0791/1050] rxrpc: Schedule an ACK if the reply to a client call appears overdue If we've sent all the request data in a client call but haven't seen any sign of the reply data yet, schedule an ACK to be sent to the server to find out if the reply data got lost. If the server hasn't yet hard-ACK'd the request data, we send a PING ACK to demand a response to find out whether we need to retransmit. If the server says it has received all of the data, we send an IDLE ACK to tell the server that we haven't received anything in the receive phase as yet. To make this work, a non-immediate PING ACK must carry a delay. I've chosen the same as the IDLE ACK for the moment. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 2 ++ net/rxrpc/call_event.c | 1 + net/rxrpc/input.c | 8 ++++++++ net/rxrpc/misc.c | 2 ++ 4 files changed, 13 insertions(+) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 1a700b6a998b..b1e697fc9ffb 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -707,7 +707,9 @@ enum rxrpc_timer_trace { extern const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8]; enum rxrpc_propose_ack_trace { + rxrpc_propose_ack_client_tx_end, rxrpc_propose_ack_input_data, + rxrpc_propose_ack_ping_for_lost_reply, rxrpc_propose_ack_ping_for_params, rxrpc_propose_ack_respond_to_ack, rxrpc_propose_ack_respond_to_ping, diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index d5bf9ce7ec6f..05b94d1acf52 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -100,6 +100,7 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, expiry = rxrpc_soft_ack_delay; break; + case RXRPC_ACK_PING: case RXRPC_ACK_IDLE: if (rxrpc_idle_ack_delay < expiry) expiry = rxrpc_idle_ack_delay; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index dd699667eeef..0344f4494eb7 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -138,6 +138,8 @@ static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, write_unlock(&call->state_lock); if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) { + rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, false, true, + rxrpc_propose_ack_client_tx_end); trace_rxrpc_transmit(call, rxrpc_transmit_await_reply); } else { trace_rxrpc_transmit(call, rxrpc_transmit_end); @@ -684,6 +686,12 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, return; } + if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] & + RXRPC_TX_ANNO_LAST && + summary.nr_acks == call->tx_top - hard_ack) + rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial, + false, true, + rxrpc_propose_ack_ping_for_lost_reply); } /* diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 901c012a2700..a608769343e6 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -198,7 +198,9 @@ const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8] = { }; const char rxrpc_propose_ack_traces[rxrpc_propose_ack__nr_trace][8] = { + [rxrpc_propose_ack_client_tx_end] = "ClTxEnd", [rxrpc_propose_ack_input_data] = "DataIn ", + [rxrpc_propose_ack_ping_for_lost_reply] = "LostRpl", [rxrpc_propose_ack_ping_for_params] = "Params ", [rxrpc_propose_ack_respond_to_ack] = "Rsp2Ack", [rxrpc_propose_ack_respond_to_ping] = "Rsp2Png", From 57494343cb5d66962bb197878fb1cc576177db31 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 24 Sep 2016 18:05:27 +0100 Subject: [PATCH 0792/1050] rxrpc: Implement slow-start Implement RxRPC slow-start, which is similar to RFC 5681 for TCP. A tracepoint is added to log the state of the congestion management algorithm and the decisions it makes. Notes: (1) Since we send fixed-size DATA packets (apart from the final packet in each phase), counters and calculations are in terms of packets rather than bytes. (2) The ACK packet carries the equivalent of TCP SACK. (3) The FLIGHT_SIZE calculation in RFC 5681 doesn't seem particularly suited to SACK of a small number of packets. It seems that, almost inevitably, by the time three 'duplicate' ACKs have been seen, we have narrowed the loss down to one or two missing packets, and the FLIGHT_SIZE calculation ends up as 2. (4) In rxrpc_resend(), if there was no data that apparently needed retransmission, we transmit a PING ACK to ask the peer to tell us what its Rx window state is. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 45 ++++++++++ net/rxrpc/ar-internal.h | 53 ++++++++++- net/rxrpc/call_event.c | 36 +++++++- net/rxrpc/call_object.c | 13 +++ net/rxrpc/conn_event.c | 1 + net/rxrpc/input.c | 169 +++++++++++++++++++++++++++++++++-- net/rxrpc/misc.c | 19 ++++ net/rxrpc/output.c | 9 +- net/rxrpc/sendmsg.c | 7 +- 9 files changed, 339 insertions(+), 13 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 56475497043d..ada12d00118c 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -570,6 +570,51 @@ TRACE_EVENT(rxrpc_retransmit, __entry->expiry) ); +TRACE_EVENT(rxrpc_congest, + TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary, + rxrpc_serial_t ack_serial, enum rxrpc_congest_change change), + + TP_ARGS(call, summary, ack_serial, change), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(enum rxrpc_congest_change, change ) + __field(rxrpc_seq_t, hard_ack ) + __field(rxrpc_seq_t, top ) + __field(rxrpc_seq_t, lowest_nak ) + __field(rxrpc_serial_t, ack_serial ) + __field_struct(struct rxrpc_ack_summary, sum ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->change = change; + __entry->hard_ack = call->tx_hard_ack; + __entry->top = call->tx_top; + __entry->lowest_nak = call->acks_lowest_nak; + __entry->ack_serial = ack_serial; + memcpy(&__entry->sum, summary, sizeof(__entry->sum)); + ), + + TP_printk("c=%p %08x %s %08x %s cw=%u ss=%u nr=%u,%u nw=%u,%u r=%u b=%u u=%u d=%u l=%x%s%s%s", + __entry->call, + __entry->ack_serial, + rxrpc_ack_names[__entry->sum.ack_reason], + __entry->hard_ack, + rxrpc_congest_modes[__entry->sum.mode], + __entry->sum.cwnd, + __entry->sum.ssthresh, + __entry->sum.nr_acks, __entry->sum.nr_nacks, + __entry->sum.nr_new_acks, __entry->sum.nr_new_nacks, + __entry->sum.nr_rot_new_acks, + __entry->top - __entry->hard_ack, + __entry->sum.cumulative_acks, + __entry->sum.dup_acks, + __entry->lowest_nak, __entry->sum.new_low_nack ? "!" : "", + rxrpc_congest_changes[__entry->change], + __entry->sum.retrans_timeo ? " rTxTo" : "") + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index b1e697fc9ffb..ca96e547cb9a 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -402,6 +402,7 @@ enum rxrpc_call_flag { RXRPC_CALL_RX_LAST, /* Received the last packet (at rxtx_top) */ RXRPC_CALL_TX_LAST, /* Last packet in Tx buffer (at rxtx_top) */ RXRPC_CALL_PINGING, /* Ping in process */ + RXRPC_CALL_RETRANS_TIMEOUT, /* Retransmission due to timeout occurred */ }; /* @@ -446,6 +447,17 @@ enum rxrpc_call_completion { NR__RXRPC_CALL_COMPLETIONS }; +/* + * Call Tx congestion management modes. + */ +enum rxrpc_congest_mode { + RXRPC_CALL_SLOW_START, + RXRPC_CALL_CONGEST_AVOIDANCE, + RXRPC_CALL_PACKET_LOSS, + RXRPC_CALL_FAST_RETRANSMIT, + NR__RXRPC_CONGEST_MODES +}; + /* * RxRPC call definition * - matched by { connection, call_id } @@ -518,6 +530,20 @@ struct rxrpc_call { * not hard-ACK'd packet follows this. */ rxrpc_seq_t tx_top; /* Highest Tx slot allocated. */ + + /* TCP-style slow-start congestion control [RFC5681]. Since the SMSS + * is fixed, we keep these numbers in terms of segments (ie. DATA + * packets) rather than bytes. + */ +#define RXRPC_TX_SMSS RXRPC_JUMBO_DATALEN + u8 cong_cwnd; /* Congestion window size */ + u8 cong_extra; /* Extra to send for congestion management */ + u8 cong_ssthresh; /* Slow-start threshold */ + enum rxrpc_congest_mode cong_mode:8; /* Congestion management mode */ + u8 cong_dup_acks; /* Count of ACKs showing missing packets */ + u8 cong_cumul_acks; /* Cumulative ACK count */ + ktime_t cong_tstamp; /* Last time cwnd was changed */ + rxrpc_seq_t rx_hard_ack; /* Dead slot in buffer; the first received but not * consumed packet follows this. */ @@ -539,12 +565,13 @@ struct rxrpc_call { ktime_t ackr_ping_time; /* Time last ping sent */ /* transmission-phase ACK management */ + ktime_t acks_latest_ts; /* Timestamp of latest ACK received */ rxrpc_serial_t acks_latest; /* serial number of latest ACK received */ rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */ }; /* - * Summary of a new ACK and the changes it made. + * Summary of a new ACK and the changes it made to the Tx buffer packet states. */ struct rxrpc_ack_summary { u8 ack_reason; @@ -554,6 +581,14 @@ struct rxrpc_ack_summary { u8 nr_new_nacks; /* Number of new NACKs in packet */ u8 nr_rot_new_acks; /* Number of rotated new ACKs */ bool new_low_nack; /* T if new low NACK found */ + bool retrans_timeo; /* T if reTx due to timeout happened */ + u8 flight_size; /* Number of unreceived transmissions */ + /* Place to stash values for tracing */ + enum rxrpc_congest_mode mode:8; + u8 cwnd; + u8 ssthresh; + u8 dup_acks; + u8 cumulative_acks; }; enum rxrpc_skb_trace { @@ -709,6 +744,7 @@ extern const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8]; enum rxrpc_propose_ack_trace { rxrpc_propose_ack_client_tx_end, rxrpc_propose_ack_input_data, + rxrpc_propose_ack_ping_for_lost_ack, rxrpc_propose_ack_ping_for_lost_reply, rxrpc_propose_ack_ping_for_params, rxrpc_propose_ack_respond_to_ack, @@ -729,6 +765,21 @@ enum rxrpc_propose_ack_outcome { extern const char rxrpc_propose_ack_traces[rxrpc_propose_ack__nr_trace][8]; extern const char *const rxrpc_propose_ack_outcomes[rxrpc_propose_ack__nr_outcomes]; +enum rxrpc_congest_change { + rxrpc_cong_begin_retransmission, + rxrpc_cong_cleared_nacks, + rxrpc_cong_new_low_nack, + rxrpc_cong_no_change, + rxrpc_cong_progress, + rxrpc_cong_retransmit_again, + rxrpc_cong_rtt_window_end, + rxrpc_cong_saw_nack, + rxrpc_congest__nr_change +}; + +extern const char rxrpc_congest_modes[NR__RXRPC_CONGEST_MODES][10]; +extern const char rxrpc_congest_changes[rxrpc_congest__nr_change][9]; + extern const char *const rxrpc_pkts[]; extern const char const rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4]; diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 05b94d1acf52..0e8478012212 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -146,6 +146,14 @@ void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, spin_unlock_bh(&call->lock); } +/* + * Handle congestion being detected by the retransmit timeout. + */ +static void rxrpc_congestion_timeout(struct rxrpc_call *call) +{ + set_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags); +} + /* * Perform retransmission of NAK'd and unack'd packets. */ @@ -154,9 +162,9 @@ static void rxrpc_resend(struct rxrpc_call *call) struct rxrpc_skb_priv *sp; struct sk_buff *skb; rxrpc_seq_t cursor, seq, top; - ktime_t now = ktime_get_real(), max_age, oldest, resend_at; + ktime_t now = ktime_get_real(), max_age, oldest, resend_at, ack_ts; int ix; - u8 annotation, anno_type; + u8 annotation, anno_type, retrans = 0, unacked = 0; _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); @@ -193,10 +201,13 @@ static void rxrpc_resend(struct rxrpc_call *call) oldest = skb->tstamp; continue; } + if (!(annotation & RXRPC_TX_ANNO_RESENT)) + unacked++; } /* Okay, we need to retransmit a packet. */ call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS | annotation; + retrans++; trace_rxrpc_retransmit(call, seq, annotation | anno_type, ktime_to_ns(ktime_sub(skb->tstamp, max_age))); } @@ -210,6 +221,25 @@ static void rxrpc_resend(struct rxrpc_call *call) * reached the nsec timeout yet. */ + if (unacked) + rxrpc_congestion_timeout(call); + + /* If there was nothing that needed retransmission then it's likely + * that an ACK got lost somewhere. Send a ping to find out instead of + * retransmitting data. + */ + if (!retrans) { + rxrpc_set_timer(call, rxrpc_timer_set_for_resend); + spin_unlock_bh(&call->lock); + ack_ts = ktime_sub(now, call->acks_latest_ts); + if (ktime_to_ns(ack_ts) < call->peer->rtt) + goto out; + rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false, + rxrpc_propose_ack_ping_for_lost_ack); + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + goto out; + } + /* Now go through the Tx window and perform the retransmissions. We * have to drop the lock for each send. If an ACK comes in whilst the * lock is dropped, it may clear some of the retransmission markers for @@ -260,6 +290,7 @@ static void rxrpc_resend(struct rxrpc_call *call) out_unlock: spin_unlock_bh(&call->lock); +out: _leave(""); } @@ -293,6 +324,7 @@ recheck_state: if (time_after_eq(now, call->expire_at)) { rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, ETIME); set_bit(RXRPC_CALL_EV_ABORT, &call->events); + goto recheck_state; } if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events) || diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index a53f4c2c0025..d4b3293b78fa 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -160,6 +160,14 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) call->rx_winsize = rxrpc_rx_window_size; call->tx_winsize = 16; call->rx_expect_next = 1; + + if (RXRPC_TX_SMSS > 2190) + call->cong_cwnd = 2; + else if (RXRPC_TX_SMSS > 1095) + call->cong_cwnd = 3; + else + call->cong_cwnd = 4; + call->cong_ssthresh = RXRPC_RXTX_BUFF_SIZE - 1; return call; nomem_2: @@ -176,6 +184,7 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx, gfp_t gfp) { struct rxrpc_call *call; + ktime_t now; _enter(""); @@ -185,6 +194,9 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx, call->state = RXRPC_CALL_CLIENT_AWAIT_CONN; call->service_id = srx->srx_service; call->tx_phase = true; + now = ktime_get_real(); + call->acks_latest_ts = now; + call->cong_tstamp = now; _leave(" = %p", call); return call; @@ -325,6 +337,7 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx, call->state = RXRPC_CALL_SERVER_ACCEPTING; if (sp->hdr.securityIndex > 0) call->state = RXRPC_CALL_SERVER_SECURING; + call->cong_tstamp = skb->tstamp; /* Set the channel for this call. We don't get channel_lock as we're * only defending against the data_ready handler (which we're called diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index a1cf1ec5f29e..37609ce89f52 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -97,6 +97,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, pkt.info.maxMTU = htonl(mtu); pkt.info.rwind = htonl(rxrpc_rx_window_size); pkt.info.jumbo_max = htonl(rxrpc_rx_jumbo_max); + pkt.whdr.flags |= RXRPC_SLOW_START_OK; len += sizeof(pkt.ack) + sizeof(pkt.info); break; } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 0344f4494eb7..094720dd1eaf 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -36,6 +36,166 @@ static void rxrpc_proto_abort(const char *why, } } +/* + * Do TCP-style congestion management [RFC 5681]. + */ +static void rxrpc_congestion_management(struct rxrpc_call *call, + struct sk_buff *skb, + struct rxrpc_ack_summary *summary) +{ + enum rxrpc_congest_change change = rxrpc_cong_no_change; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + unsigned int cumulative_acks = call->cong_cumul_acks; + unsigned int cwnd = call->cong_cwnd; + bool resend = false; + + summary->flight_size = + (call->tx_top - call->tx_hard_ack) - summary->nr_acks; + + if (test_and_clear_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags)) { + summary->retrans_timeo = true; + call->cong_ssthresh = max_t(unsigned int, + summary->flight_size / 2, 2); + cwnd = 1; + if (cwnd > call->cong_ssthresh && + call->cong_mode == RXRPC_CALL_SLOW_START) { + call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; + call->cong_tstamp = skb->tstamp; + cumulative_acks = 0; + } + } + + cumulative_acks += summary->nr_new_acks; + cumulative_acks += summary->nr_rot_new_acks; + if (cumulative_acks > 255) + cumulative_acks = 255; + + summary->mode = call->cong_mode; + summary->cwnd = call->cong_cwnd; + summary->ssthresh = call->cong_ssthresh; + summary->cumulative_acks = cumulative_acks; + summary->dup_acks = call->cong_dup_acks; + + switch (call->cong_mode) { + case RXRPC_CALL_SLOW_START: + if (summary->nr_nacks > 0) + goto packet_loss_detected; + if (summary->cumulative_acks > 0) + cwnd += 1; + if (cwnd > call->cong_ssthresh) { + call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; + call->cong_tstamp = skb->tstamp; + } + goto out; + + case RXRPC_CALL_CONGEST_AVOIDANCE: + if (summary->nr_nacks > 0) + goto packet_loss_detected; + + /* We analyse the number of packets that get ACK'd per RTT + * period and increase the window if we managed to fill it. + */ + if (call->peer->rtt_usage == 0) + goto out; + if (ktime_before(skb->tstamp, + ktime_add_ns(call->cong_tstamp, + call->peer->rtt))) + goto out_no_clear_ca; + change = rxrpc_cong_rtt_window_end; + call->cong_tstamp = skb->tstamp; + if (cumulative_acks >= cwnd) + cwnd++; + goto out; + + case RXRPC_CALL_PACKET_LOSS: + if (summary->nr_nacks == 0) + goto resume_normality; + + if (summary->new_low_nack) { + change = rxrpc_cong_new_low_nack; + call->cong_dup_acks = 1; + if (call->cong_extra > 1) + call->cong_extra = 1; + goto send_extra_data; + } + + call->cong_dup_acks++; + if (call->cong_dup_acks < 3) + goto send_extra_data; + + change = rxrpc_cong_begin_retransmission; + call->cong_mode = RXRPC_CALL_FAST_RETRANSMIT; + call->cong_ssthresh = max_t(unsigned int, + summary->flight_size / 2, 2); + cwnd = call->cong_ssthresh + 3; + call->cong_extra = 0; + call->cong_dup_acks = 0; + resend = true; + goto out; + + case RXRPC_CALL_FAST_RETRANSMIT: + if (!summary->new_low_nack) { + if (summary->nr_new_acks == 0) + cwnd += 1; + call->cong_dup_acks++; + if (call->cong_dup_acks == 2) { + change = rxrpc_cong_retransmit_again; + call->cong_dup_acks = 0; + resend = true; + } + } else { + change = rxrpc_cong_progress; + cwnd = call->cong_ssthresh; + if (summary->nr_nacks == 0) + goto resume_normality; + } + goto out; + + default: + BUG(); + goto out; + } + +resume_normality: + change = rxrpc_cong_cleared_nacks; + call->cong_dup_acks = 0; + call->cong_extra = 0; + call->cong_tstamp = skb->tstamp; + if (cwnd <= call->cong_ssthresh) + call->cong_mode = RXRPC_CALL_SLOW_START; + else + call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; +out: + cumulative_acks = 0; +out_no_clear_ca: + if (cwnd >= RXRPC_RXTX_BUFF_SIZE - 1) + cwnd = RXRPC_RXTX_BUFF_SIZE - 1; + call->cong_cwnd = cwnd; + call->cong_cumul_acks = cumulative_acks; + trace_rxrpc_congest(call, summary, sp->hdr.serial, change); + if (resend && !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) + rxrpc_queue_call(call); + return; + +packet_loss_detected: + change = rxrpc_cong_saw_nack; + call->cong_mode = RXRPC_CALL_PACKET_LOSS; + call->cong_dup_acks = 0; + goto send_extra_data; + +send_extra_data: + /* Send some previously unsent DATA if we have some to advance the ACK + * state. + */ + if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] & + RXRPC_TX_ANNO_LAST || + summary->nr_acks != call->tx_top - call->tx_hard_ack) { + call->cong_extra++; + wake_up(&call->waitq); + } + goto out_no_clear_ca; +} + /* * Ping the other end to fill our RTT cache and to retrieve the rwind * and MTU parameters. @@ -524,7 +684,6 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, rxrpc_seq_t seq, int nr_acks, struct rxrpc_ack_summary *summary) { - bool resend = false; int ix; u8 annotation, anno_type; @@ -556,16 +715,11 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, continue; call->rxtx_annotations[ix] = RXRPC_TX_ANNO_NAK | annotation; - resend = true; break; default: return rxrpc_proto_abort("SFT", call, 0); } } - - if (resend && - !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) - rxrpc_queue_call(call); } /* @@ -663,6 +817,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, sp->hdr.serial, call->acks_latest); return; } + call->acks_latest_ts = skb->tstamp; call->acks_latest = sp->hdr.serial; if (before(hard_ack, call->tx_hard_ack) || @@ -692,6 +847,8 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial, false, true, rxrpc_propose_ack_ping_for_lost_reply); + + return rxrpc_congestion_management(call, skb, &summary); } /* diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index a608769343e6..aedb8978226d 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -200,6 +200,7 @@ const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8] = { const char rxrpc_propose_ack_traces[rxrpc_propose_ack__nr_trace][8] = { [rxrpc_propose_ack_client_tx_end] = "ClTxEnd", [rxrpc_propose_ack_input_data] = "DataIn ", + [rxrpc_propose_ack_ping_for_lost_ack] = "LostAck", [rxrpc_propose_ack_ping_for_lost_reply] = "LostRpl", [rxrpc_propose_ack_ping_for_params] = "Params ", [rxrpc_propose_ack_respond_to_ack] = "Rsp2Ack", @@ -214,3 +215,21 @@ const char *const rxrpc_propose_ack_outcomes[rxrpc_propose_ack__nr_outcomes] = { [rxrpc_propose_ack_update] = " Update", [rxrpc_propose_ack_subsume] = " Subsume", }; + +const char rxrpc_congest_modes[NR__RXRPC_CONGEST_MODES][10] = { + [RXRPC_CALL_SLOW_START] = "SlowStart", + [RXRPC_CALL_CONGEST_AVOIDANCE] = "CongAvoid", + [RXRPC_CALL_PACKET_LOSS] = "PktLoss ", + [RXRPC_CALL_FAST_RETRANSMIT] = "FastReTx ", +}; + +const char rxrpc_congest_changes[rxrpc_congest__nr_change][9] = { + [rxrpc_cong_begin_retransmission] = " Retrans", + [rxrpc_cong_cleared_nacks] = " Cleared", + [rxrpc_cong_new_low_nack] = " NewLowN", + [rxrpc_cong_no_change] = "", + [rxrpc_cong_progress] = " Progres", + [rxrpc_cong_retransmit_again] = " ReTxAgn", + [rxrpc_cong_rtt_window_end] = " RttWinE", + [rxrpc_cong_saw_nack] = " SawNack", +}; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 3eb01445e814..cf43a715685e 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -157,6 +157,8 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) spin_unlock_bh(&call->lock); + pkt->whdr.flags |= RXRPC_SLOW_START_OK; + iov[0].iov_len += sizeof(pkt->ack) + n; iov[1].iov_base = &pkt->ackinfo; iov[1].iov_len = sizeof(pkt->ackinfo); @@ -276,8 +278,11 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb) msg.msg_controllen = 0; msg.msg_flags = 0; - /* If our RTT cache needs working on, request an ACK. */ - if ((call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || + /* If our RTT cache needs working on, request an ACK. Also request + * ACKs if a DATA packet appears to have been lost. + */ + if (call->cong_mode == RXRPC_CALL_FAST_RETRANSMIT || + (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), ktime_get_real())) whdr.flags |= RXRPC_REQUEST_ACK; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 99939372b5a4..1f8040d82395 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -45,7 +45,9 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx, for (;;) { set_current_state(TASK_INTERRUPTIBLE); ret = 0; - if (call->tx_top - call->tx_hard_ack < call->tx_winsize) + if (call->tx_top - call->tx_hard_ack < + min_t(unsigned int, call->tx_winsize, + call->cong_cwnd + call->cong_extra)) break; if (call->state >= RXRPC_CALL_COMPLETE) { ret = -call->error; @@ -203,7 +205,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, _debug("alloc"); if (call->tx_top - call->tx_hard_ack >= - call->tx_winsize) { + min_t(unsigned int, call->tx_winsize, + call->cong_cwnd + call->cong_extra)) { ret = -EAGAIN; if (msg->msg_flags & MSG_DONTWAIT) goto maybe_error; From 54f17bbc52f71e2d313721046627c383d6c5c7da Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Wed, 21 Sep 2016 11:35:06 -0400 Subject: [PATCH 0793/1050] netfilter: nf_queue: whitespace cleanup A future patch will modify the hook drop and outfn functions. This will cause the line lengths to take up too much space. This is simply a readability change. Signed-off-by: Aaron Conole Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_queue.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 903dca050fb6..8fe85b98b5c8 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -22,10 +22,10 @@ struct nf_queue_entry { /* Packet queuing */ struct nf_queue_handler { - int (*outfn)(struct nf_queue_entry *entry, - unsigned int queuenum); - void (*nf_hook_drop)(struct net *net, - struct nf_hook_ops *ops); + int (*outfn)(struct nf_queue_entry *entry, + unsigned int queuenum); + void (*nf_hook_drop)(struct net *net, + struct nf_hook_ops *ops); }; void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh); From 8f46cca1e6c06a058374816887059bcc017b382f Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Thu, 22 Sep 2016 17:15:47 +0100 Subject: [PATCH 0794/1050] MIPS: SMP: Fix possibility of deadlock when bringing CPUs online This patch fixes the possibility of a deadlock when bringing up secondary CPUs. The deadlock occurs because the set_cpu_online() is called before synchronise_count_slave(). This can cause a deadlock if the boot CPU, having scheduled another thread, attempts to send an IPI to the secondary CPU, which it sees has been marked online. The secondary is blocked in synchronise_count_slave() waiting for the boot CPU to enter synchronise_count_master(), but the boot cpu is blocked in smp_call_function_many() waiting for the secondary to respond to it's IPI request. Fix this by marking the CPU online in cpu_callin_map and synchronising counters before declaring the CPU online and calculating the maps for IPIs. Signed-off-by: Matt Redfearn Reported-by: Justin Chen Tested-by: Justin Chen Cc: Florian Fainelli Cc: stable@vger.kernel.org # v4.1+ Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14302/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/smp.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index f95f094f36e4..b0baf48951fa 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -322,6 +322,9 @@ asmlinkage void start_secondary(void) cpumask_set_cpu(cpu, &cpu_coherent_mask); notify_cpu_starting(cpu); + cpumask_set_cpu(cpu, &cpu_callin_map); + synchronise_count_slave(cpu); + set_cpu_online(cpu, true); set_cpu_sibling_map(cpu); @@ -329,10 +332,6 @@ asmlinkage void start_secondary(void) calculate_cpu_foreign_map(); - cpumask_set_cpu(cpu, &cpu_callin_map); - - synchronise_count_slave(cpu); - /* * irq will be enabled in ->smp_finish(), enabling it too early * is dangerous. From 116e7111c8e3cc65ceef9664741bd593483e9517 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Thu, 22 Sep 2016 15:47:40 +0100 Subject: [PATCH 0795/1050] MIPS: Fix delay slot emulation count in debugfs Commit 432c6bacbd0c ("MIPS: Use per-mm page to execute branch delay slot instructions") accidentally removed use of the MIPS_FPU_EMU_INC_STATS macro from do_dsemulret, leading to the ds_emul file in debugfs always returning zero even though we perform delay slot emulations. Fix this by re-adding the use of the MIPS_FPU_EMU_INC_STATS macro. Signed-off-by: Paul Burton Fixes: 432c6bacbd0c ("MIPS: Use per-mm page to execute branch delay slot instructions") Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14301/ Signed-off-by: Ralf Baechle --- arch/mips/math-emu/dsemul.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/math-emu/dsemul.c b/arch/mips/math-emu/dsemul.c index 72a4642eee2c..4a094f7acb3d 100644 --- a/arch/mips/math-emu/dsemul.c +++ b/arch/mips/math-emu/dsemul.c @@ -298,5 +298,6 @@ bool do_dsemulret(struct pt_regs *xcp) /* Set EPC to return to post-branch instruction */ xcp->cp0_epc = current->thread.bd_emu_cont_pc; pr_debug("dsemulret to 0x%08lx\n", xcp->cp0_epc); + MIPS_FPU_EMU_INC_STATS(ds_emul); return true; } From c2675de447f8238e7e2e7eced78fa671d42a9a7e Mon Sep 17 00:00:00 2001 From: Lance Richardson Date: Sat, 24 Sep 2016 14:01:04 -0400 Subject: [PATCH 0796/1050] gre: use nla_get_be32() to extract flowinfo Eliminate a sparse endianness mismatch warning, use nla_get_be32() to extract a __be32 value instead of nla_get_u32(). Signed-off-by: Lance Richardson Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 397e1ed3daa3..4ce74f86291b 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1239,7 +1239,7 @@ static void ip6gre_netlink_parms(struct nlattr *data[], parms->encap_limit = nla_get_u8(data[IFLA_GRE_ENCAP_LIMIT]); if (data[IFLA_GRE_FLOWINFO]) - parms->flowinfo = nla_get_u32(data[IFLA_GRE_FLOWINFO]); + parms->flowinfo = nla_get_be32(data[IFLA_GRE_FLOWINFO]); if (data[IFLA_GRE_FLAGS]) parms->flags = nla_get_u32(data[IFLA_GRE_FLAGS]); From f2c7c1d09832ef0d6499a9e1a958c3ddc686f723 Mon Sep 17 00:00:00 2001 From: Harshitha Ramamurthy Date: Tue, 6 Sep 2016 18:05:06 -0700 Subject: [PATCH 0797/1050] i40e: Remove 100 Mbps SGMII support for X722 This patch fixes the problem where driver shows 100 Mbps as a supported speed, and allows it to be configured for advertising on X722 devices. This patch fixes the problem by not setting the 100 Mbps SGMII flag for X722 devices. Without this patch, the user incorrectly thinks that 100 Mbps is supported and hence might try to advertise it on X722 devices when it is actually not a supported speed. Change-ID: I8c3d7c4251a9402d98994ed29749b7b895a0f205 Signed-off-by: Harshitha Ramamurthy Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 53cde5b44346..60f082e10c3e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -8608,7 +8608,6 @@ static int i40e_sw_init(struct i40e_pf *pf) I40E_FLAG_WB_ON_ITR_CAPABLE | I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE | I40E_FLAG_NO_PCI_LINK_CHECK | - I40E_FLAG_100M_SGMII_CAPABLE | I40E_FLAG_USE_SET_LLDP_MIB | I40E_FLAG_GENEVE_OFFLOAD_CAPABLE; } else if ((pf->hw.aq.api_maj_ver > 1) || From a6cb91464b5b09c0ff749e4b01048f93ce450275 Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Tue, 6 Sep 2016 18:05:07 -0700 Subject: [PATCH 0798/1050] i40e: fix deleting mac filters There exists a bug in which deleting a mac filter does not actually occur. The driver reports that the filter has been deleted with no error. The problem occurs because the wrong cmd_flag is passed to the firmware when deleting the filter. The firmware reports an error back to the driver but it is expressly ignored. This fixes the bug by using the correct flag when deleting a filter. Without this patch, deleted filters remain in firmware and function as if they had not been deleted. Change-ID: I5f22b874f3b83f457702f18f0d5602ca21ac40c3 Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 60f082e10c3e..0841379a7517 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1315,7 +1315,7 @@ static void i40e_rm_default_mac_filter(struct i40e_vsi *vsi, u8 *macaddr) element.vlan_tag = 0; /* ...and some firmware does it this way. */ element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH | - I40E_AQC_MACVLAN_ADD_IGNORE_VLAN; + I40E_AQC_MACVLAN_DEL_IGNORE_VLAN; i40e_aq_remove_macvlan(&pf->hw, vsi->seid, &element, 1, NULL); } @@ -1908,7 +1908,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) ether_addr_copy(del_list[num_del].mac_addr, f->macaddr); if (f->vlan == I40E_VLAN_ANY) { del_list[num_del].vlan_tag = 0; - cmd_flags |= I40E_AQC_MACVLAN_ADD_IGNORE_VLAN; + cmd_flags |= I40E_AQC_MACVLAN_DEL_IGNORE_VLAN; } else { del_list[num_del].vlan_tag = cpu_to_le16((u16)(f->vlan)); From 2199254cb50a650934ef2d1c531ec6fdc0a826c5 Mon Sep 17 00:00:00 2001 From: Preethi Banala Date: Tue, 6 Sep 2016 18:05:08 -0700 Subject: [PATCH 0799/1050] i40e: add encap csum VF offload flag Add ENCAP_CSUM offload negotiation flag. Currently VF assumes checksum offload for encapsulated packets is supported by default. Going forward, this feature needs to be negotiated with PF before advertising to the stack. Hence, we need a flag to control it. This is in regards to prepping up for VF base mode functionality support. Change-ID: Iaab1f25cc0abda5f2fbe3309092640f0e77d163e Signed-off-by: Preethi Banala Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_virtchnl.h | 1 + drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h index c92a3bdee229..f861d3109d1a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h @@ -163,6 +163,7 @@ struct i40e_virtchnl_vsi_resource { #define I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING 0x00020000 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 0x00040000 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF 0X00080000 +#define I40E_VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM 0X00100000 struct i40e_virtchnl_vf_resource { u16 num_vsis; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h index f04ce6cb70dc..bd691ad86673 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h @@ -160,6 +160,7 @@ struct i40e_virtchnl_vsi_resource { #define I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING 0x00020000 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 0x00040000 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF 0X00080000 +#define I40E_VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM 0X00100000 struct i40e_virtchnl_vf_resource { u16 num_vsis; From 234dc4e67611c11bb3990abced26cb75b8ef262a Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 6 Sep 2016 18:05:09 -0700 Subject: [PATCH 0800/1050] i40e: cleanup ATR auto_disable_flags use Some locations that disable ATR accidentally used the "full" disable by disabling the flag in the standard flags field. This incorrectly forces ATR off permanently instead of temporarily disabling it. In addition, some code locations accidentally set the ATR flag enabled when they only meant to clear the auto_disable_flags. This results in ignoring the user's ethtool private flag settings. Additionally, when disabling ATR via ethtool, we did not perform a flush of the FD table. This results in the previously assigned ATR rules still functioning which was not expected. Cleanup all these areas so that automatic disable uses only the auto_disable_flag. Fix the flush code so that we can trigger a flush even when we've disabled ATR and SB support, as otherwise the flush doesn't work. Fix ethtool setting to actually request a flush. Fix NETIF_F_NTUPLE flag to only clear the auto_disable setting and not enable the full feature. Change-ID: Ib2486111f8031bd16943e9308757b276305c03b5 Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/i40e/i40e_ethtool.c | 3 +++ drivers/net/ethernet/intel/i40e/i40e_main.c | 20 +++++++------------ drivers/net/ethernet/intel/i40e/i40e_txrx.c | 14 ++++++------- 3 files changed, 17 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index af28a8c2166b..5cad80f157fb 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -3021,6 +3021,9 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) } else { pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED; pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED; + + /* flush current ATR settings */ + set_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state); } if ((flags & I40E_PRIV_FLAGS_VEB_STATS) && diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 0841379a7517..e1a2c9a77a63 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5242,7 +5242,7 @@ static int i40e_up_complete(struct i40e_vsi *vsi) /* reset fd counters */ pf->fd_add_err = pf->fd_atr_cnt = 0; if (pf->fd_tcp_rule > 0) { - pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED; + pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED; if (I40E_DEBUG_FD & pf->hw.debug_mask) dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 exist\n"); pf->fd_tcp_rule = 0; @@ -5976,9 +5976,6 @@ static void i40e_fdir_flush_and_replay(struct i40e_pf *pf) int fd_room; int reg; - if (!(pf->flags & (I40E_FLAG_FD_SB_ENABLED | I40E_FLAG_FD_ATR_ENABLED))) - return; - if (!time_after(jiffies, pf->fd_flush_timestamp + (I40E_MIN_FD_FLUSH_INTERVAL * HZ))) return; @@ -5998,7 +5995,7 @@ static void i40e_fdir_flush_and_replay(struct i40e_pf *pf) } pf->fd_flush_timestamp = jiffies; - pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED; + pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED; /* flush all filters */ wr32(&pf->hw, I40E_PFQF_CTL_1, I40E_PFQF_CTL_1_CLEARFDTABLE_MASK); @@ -6018,7 +6015,7 @@ static void i40e_fdir_flush_and_replay(struct i40e_pf *pf) /* replay sideband filters */ i40e_fdir_filter_restore(pf->vsi[pf->lan_vsi]); if (!disable_atr) - pf->flags |= I40E_FLAG_FD_ATR_ENABLED; + pf->auto_disable_flags &= ~I40E_FLAG_FD_ATR_ENABLED; clear_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state); if (I40E_DEBUG_FD & pf->hw.debug_mask) dev_info(&pf->pdev->dev, "FD Filter table flushed and FD-SB replayed.\n"); @@ -6052,9 +6049,6 @@ static void i40e_fdir_reinit_subtask(struct i40e_pf *pf) if (test_bit(__I40E_DOWN, &pf->state)) return; - if (!(pf->flags & (I40E_FLAG_FD_SB_ENABLED | I40E_FLAG_FD_ATR_ENABLED))) - return; - if (test_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state)) i40e_fdir_flush_and_replay(pf); @@ -8682,13 +8676,13 @@ bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t features) /* reset fd counters */ pf->fd_add_err = pf->fd_atr_cnt = pf->fd_tcp_rule = 0; pf->fdir_pf_active_filters = 0; - pf->flags |= I40E_FLAG_FD_ATR_ENABLED; - if (I40E_DEBUG_FD & pf->hw.debug_mask) - dev_info(&pf->pdev->dev, "ATR re-enabled.\n"); /* if ATR was auto disabled it can be re-enabled. */ if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) && - (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED)) + (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED)) { pf->auto_disable_flags &= ~I40E_FLAG_FD_ATR_ENABLED; + if (I40E_DEBUG_FD & pf->hw.debug_mask) + dev_info(&pf->pdev->dev, "ATR re-enabled.\n"); + } } return need_reset; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index bf7bb7c3f227..7ada05e7776f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -282,18 +282,18 @@ static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi, if (add) { pf->fd_tcp_rule++; - if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) { - if (I40E_DEBUG_FD & pf->hw.debug_mask) - dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n"); - pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED; - } + if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) && + I40E_DEBUG_FD & pf->hw.debug_mask) + dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n"); + pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED; } else { pf->fd_tcp_rule = (pf->fd_tcp_rule > 0) ? (pf->fd_tcp_rule - 1) : 0; if (pf->fd_tcp_rule == 0) { - pf->flags |= I40E_FLAG_FD_ATR_ENABLED; - if (I40E_DEBUG_FD & pf->hw.debug_mask) + if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) && + I40E_DEBUG_FD & pf->hw.debug_mask) dev_info(&pf->pdev->dev, "ATR re-enabled due to no sideband TCP/IPv4 rules\n"); + pf->auto_disable_flags &= ~I40E_FLAG_FD_ATR_ENABLED; } } From a3417d287fb02e7bd24c6b1068fe6f9b52a259a6 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 6 Sep 2016 18:05:10 -0700 Subject: [PATCH 0801/1050] i40e: check conflicting ntuple/sideband rules when re-enabling ATR In i40e_fdir_check_and_reenable(), the driver performs some checks to determine whether it is safe to re-enable FD Sideband and FD ATR support. The current check will only determine if there is available space in the flow director table. However, this ignores the fact that ATR should be disabled when there are TCP/IPv4 sideband rules in effect. Add the missing check, and update the info message printed when I40E_DEBUG_FD is enabled. Change-ID: Ibb9c63e5be95d63c53a498fdd5dbf69f54a00e08 Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index e1a2c9a77a63..89b0418a6c2e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5939,13 +5939,17 @@ void i40e_fdir_check_and_reenable(struct i40e_pf *pf) dev_info(&pf->pdev->dev, "FD Sideband/ntuple is being enabled since we have space in the table now\n"); } } - /* Wait for some more space to be available to turn on ATR */ + + /* Wait for some more space to be available to turn on ATR. We also + * must check that no existing ntuple rules for TCP are in effect + */ if (fcnt_prog < (fcnt_avail - I40E_FDIR_BUFFER_HEAD_ROOM * 2)) { if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) && - (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED)) { + (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED) && + (pf->fd_tcp_rule == 0)) { pf->auto_disable_flags &= ~I40E_FLAG_FD_ATR_ENABLED; if (I40E_DEBUG_FD & pf->hw.debug_mask) - dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table now\n"); + dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table and there are no conflicting ntuple rules\n"); } } From ac9c5c6d8c17fa105878442ac663f0e9abe3cff5 Mon Sep 17 00:00:00 2001 From: Henry Tieman Date: Tue, 6 Sep 2016 18:05:11 -0700 Subject: [PATCH 0802/1050] i40e: removing unreachable code The return value from i40e_shutdown_adminq() is always 0 (I40E_SUCCESS). So, the test for non-0 will never be true. Cleanup by removing the test and debug print statement. Change-ID: Ie51e8e37515c3e3a6a9ff26fa951d0e5e24343c1 Signed-off-by: Henry Tieman Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 89b0418a6c2e..e626761b8c94 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -11333,11 +11333,7 @@ static void i40e_remove(struct pci_dev *pdev) } /* shutdown the adminq */ - ret_code = i40e_shutdown_adminq(hw); - if (ret_code) - dev_warn(&pdev->dev, - "Failed to destroy the Admin Queue resources: %d\n", - ret_code); + i40e_shutdown_adminq(hw); /* destroy the locks only once, here */ mutex_destroy(&hw->aq.arq_mutex); From fa90efa59dabbaac24f1ad2e6535e6daa2845257 Mon Sep 17 00:00:00 2001 From: Bimmy Pujari Date: Tue, 6 Sep 2016 18:05:12 -0700 Subject: [PATCH 0803/1050] i40e/i40evf: Changed version to 1.6.16 Signed-off-by: Bimmy Pujari Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index e626761b8c94..b434d07d98cd 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -41,7 +41,7 @@ static const char i40e_driver_string[] = #define DRV_VERSION_MAJOR 1 #define DRV_VERSION_MINOR 6 -#define DRV_VERSION_BUILD 12 +#define DRV_VERSION_BUILD 16 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) DRV_KERN diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 99833f3ea34e..064419e48f21 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -38,7 +38,7 @@ static const char i40evf_driver_string[] = #define DRV_VERSION_MAJOR 1 #define DRV_VERSION_MINOR 6 -#define DRV_VERSION_BUILD 12 +#define DRV_VERSION_BUILD 16 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) \ From 7ac4b5c6fd351be8f849f687e290ca9724acfd33 Mon Sep 17 00:00:00 2001 From: Akeem Abodunrin Date: Mon, 12 Sep 2016 14:18:37 -0700 Subject: [PATCH 0804/1050] i40e: Increase minimum number of allocated VSI This patch increases minimum number of allocated VSIs, so as to resolve failure adding VSI for VF when 64-VFs assigned to a PF. The driver supports up to 128 VFs per device, users can decide to enable up to 64-VFs on a single PF, especially 2 X 40 devices. In that scenario, with VMDq co-existence, there would be starvation of VSIs - with this patch, supported features would have enough VSIs for configuration now. Change-ID: If084f4cd823667af8fe7fdc11489c705b32039d5 Signed-off-by: Akeem Abodunrin Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 30aaee42f648..6b22df62f018 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -74,7 +74,7 @@ #define I40E_MIN_NUM_DESCRIPTORS 64 #define I40E_MIN_MSIX 2 #define I40E_DEFAULT_NUM_VMDQ_VSI 8 /* max 256 VSIs */ -#define I40E_MIN_VSI_ALLOC 51 /* LAN, ATR, FCOE, 32 VF, 16 VMDQ */ +#define I40E_MIN_VSI_ALLOC 83 /* LAN, ATR, FCOE, 64 VF */ /* max 16 qps */ #define i40e_default_queues_per_vmdq(pf) \ (((pf)->flags & I40E_FLAG_RSS_AQ_CAPABLE) ? 4 : 1) From f19a973f46e85d4394cadb90fa7717f7ec98197a Mon Sep 17 00:00:00 2001 From: Mitch Williams Date: Mon, 12 Sep 2016 14:18:38 -0700 Subject: [PATCH 0805/1050] i40evf: enable adaptive interrupt throttling All of the code to support adaptive interrupt throttling is already in the interrupt handler, it just needs to be enabled. Fill out the data structures properly to make it happen. Single-flow traffic tests may show slightly lower throughput, but interrupts per second will drop by about 75%. Change-ID: I9cd7d42c025b906bf1bb85c6aeb6112684aa6471 Signed-off-by: Mitch Williams Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 064419e48f21..302c9743ef44 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -370,6 +370,8 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx) { struct i40e_q_vector *q_vector = &adapter->q_vectors[v_idx]; struct i40e_ring *rx_ring = &adapter->rx_rings[r_idx]; + struct i40e_vsi *vsi = &adapter->vsi; + struct i40e_hw *hw = &adapter->hw; rx_ring->q_vector = q_vector; rx_ring->next = q_vector->rx.ring; @@ -377,7 +379,10 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx) q_vector->rx.ring = rx_ring; q_vector->rx.count++; q_vector->rx.latency_range = I40E_LOW_LATENCY; + q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting); + q_vector->ring_mask |= BIT(r_idx); q_vector->itr_countdown = ITR_COUNTDOWN_START; + wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, v_idx - 1), q_vector->rx.itr); } /** @@ -391,6 +396,8 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx) { struct i40e_q_vector *q_vector = &adapter->q_vectors[v_idx]; struct i40e_ring *tx_ring = &adapter->tx_rings[t_idx]; + struct i40e_vsi *vsi = &adapter->vsi; + struct i40e_hw *hw = &adapter->hw; tx_ring->q_vector = q_vector; tx_ring->next = q_vector->tx.ring; @@ -398,9 +405,10 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx) q_vector->tx.ring = tx_ring; q_vector->tx.count++; q_vector->tx.latency_range = I40E_LOW_LATENCY; + q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting); q_vector->itr_countdown = ITR_COUNTDOWN_START; q_vector->num_ringpairs++; - q_vector->ring_mask |= BIT(t_idx); + wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, v_idx - 1), q_vector->tx.itr); } /** @@ -2269,10 +2277,8 @@ int i40evf_process_config(struct i40evf_adapter *adapter) adapter->vsi.back = adapter; adapter->vsi.base_vector = 1; adapter->vsi.work_limit = I40E_DEFAULT_IRQ_WORK; - adapter->vsi.rx_itr_setting = (I40E_ITR_DYNAMIC | - ITR_REG_TO_USEC(I40E_ITR_RX_DEF)); - adapter->vsi.tx_itr_setting = (I40E_ITR_DYNAMIC | - ITR_REG_TO_USEC(I40E_ITR_TX_DEF)); + adapter->vsi.rx_itr_setting = (I40E_ITR_DYNAMIC | I40E_ITR_RX_DEF); + adapter->vsi.tx_itr_setting = (I40E_ITR_DYNAMIC | I40E_ITR_TX_DEF); vsi->netdev = adapter->netdev; vsi->qs_handle = adapter->vsi_res->qset_handle; if (vfres->vf_offload_flags & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF) { From 64bfd68eaecdce7b86e179fe39662340c8aed20d Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 12 Sep 2016 14:18:39 -0700 Subject: [PATCH 0806/1050] i40e: Fix Flow Director raw_buf cleanup The Tx cleanup flow was incorrectly assuming it could check for the flow director bits after it had unmapped the buffer. However in this case it results in us trying to free a raw_buf as though it is an sk_buff. To fix this I am moving up the flag test for the FD_SB bit so that when find a non-NULL skb or raw_buf value we then check the flag and use the appropriate call to free the buffer. Change-ID: I6284034ba1ea87c9922e56f6eb3181f7f09bddde Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 8 ++++---- drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 7ada05e7776f..a2077bedac93 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -532,7 +532,10 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, struct i40e_tx_buffer *tx_buffer) { if (tx_buffer->skb) { - dev_kfree_skb_any(tx_buffer->skb); + if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) + kfree(tx_buffer->raw_buf); + else + dev_kfree_skb_any(tx_buffer->skb); if (dma_unmap_len(tx_buffer, len)) dma_unmap_single(ring->dev, dma_unmap_addr(tx_buffer, dma), @@ -545,9 +548,6 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, DMA_TO_DEVICE); } - if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) - kfree(tx_buffer->raw_buf); - tx_buffer->next_to_watch = NULL; tx_buffer->skb = NULL; dma_unmap_len_set(tx_buffer, len, 0); diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index e3427ebd5b84..cb6b13098699 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -51,7 +51,10 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, struct i40e_tx_buffer *tx_buffer) { if (tx_buffer->skb) { - dev_kfree_skb_any(tx_buffer->skb); + if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) + kfree(tx_buffer->raw_buf); + else + dev_kfree_skb_any(tx_buffer->skb); if (dma_unmap_len(tx_buffer, len)) dma_unmap_single(ring->dev, dma_unmap_addr(tx_buffer, dma), @@ -64,9 +67,6 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, DMA_TO_DEVICE); } - if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) - kfree(tx_buffer->raw_buf); - tx_buffer->next_to_watch = NULL; tx_buffer->skb = NULL; dma_unmap_len_set(tx_buffer, len, 0); From e486bdfd7c491e997f29fcdf6a4216861ab1d06a Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 12 Sep 2016 14:18:40 -0700 Subject: [PATCH 0807/1050] i40e/i40evf: Add txring_txq function to match fm10k and ixgbe This patch adds a txring_txq function which allows us to convert a i40e_ring/i40evf_ring to a netdev_tx_queue structure. This way we can avoid having to make a multi-line function call for all the spots that need access to this. Change-ID: Ic063b71d8b92ea406d2c32e798c8e2b02809d65b Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 17 ++++++----------- drivers/net/ethernet/intel/i40e/i40e_txrx.h | 9 +++++++++ drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 17 ++++++----------- drivers/net/ethernet/intel/i40evf/i40e_txrx.h | 9 +++++++++ 4 files changed, 30 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index a2077bedac93..6e0a7acf17a2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -584,8 +584,7 @@ void i40e_clean_tx_ring(struct i40e_ring *tx_ring) return; /* cleanup Tx queue statistics */ - netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index)); + netdev_tx_reset_queue(txring_txq(tx_ring)); } /** @@ -754,8 +753,8 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, tx_ring->arm_wb = true; } - netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index), + /* notify netdev of completed buffers */ + netdev_tx_completed_queue(txring_txq(tx_ring), total_packets, total_bytes); #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) @@ -2784,9 +2783,7 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_ring->next_to_use = i; - netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index), - first->bytecount); + netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); i40e_maybe_stop_tx(tx_ring, DESC_NEEDED); /* Algorithm to optimize tail and RS bit setting: @@ -2811,13 +2808,11 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, * trigger a force WB. */ if (skb->xmit_more && - !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index))) { + !netif_xmit_stopped(txring_txq(tx_ring))) { tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET; tail_bump = false; } else if (!skb->xmit_more && - !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index)) && + !netif_xmit_stopped(txring_txq(tx_ring)) && (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) && (tx_ring->packet_stride < WB_STRIDE) && (desc_count < WB_STRIDE)) { diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index b78c810d1835..508840585645 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -463,4 +463,13 @@ static inline bool i40e_rx_is_fcoe(u16 ptype) return (ptype >= I40E_RX_PTYPE_L2_FCOE_PAY3) && (ptype <= I40E_RX_PTYPE_L2_FCOE_VFT_FCOTHER); } + +/** + * txring_txq - Find the netdev Tx ring based on the i40e Tx ring + * @ring: Tx ring to find the netdev equivalent of + **/ +static inline struct netdev_queue *txring_txq(const struct i40e_ring *ring) +{ + return netdev_get_tx_queue(ring->netdev, ring->queue_index); +} #endif /* _I40E_TXRX_H_ */ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index cb6b13098699..d4c6a767be94 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -103,8 +103,7 @@ void i40evf_clean_tx_ring(struct i40e_ring *tx_ring) return; /* cleanup Tx queue statistics */ - netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index)); + netdev_tx_reset_queue(txring_txq(tx_ring)); } /** @@ -273,8 +272,8 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, tx_ring->arm_wb = true; } - netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index), + /* notify netdev of completed buffers */ + netdev_tx_completed_queue(txring_txq(tx_ring), total_packets, total_bytes); #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) @@ -2012,9 +2011,7 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_ring->next_to_use = i; - netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index), - first->bytecount); + netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); i40e_maybe_stop_tx(tx_ring, DESC_NEEDED); /* Algorithm to optimize tail and RS bit setting: @@ -2039,13 +2036,11 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, * trigger a force WB. */ if (skb->xmit_more && - !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index))) { + !netif_xmit_stopped(txring_txq(tx_ring))) { tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET; tail_bump = false; } else if (!skb->xmit_more && - !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index)) && + !netif_xmit_stopped(txring_txq(tx_ring)) && (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) && (tx_ring->packet_stride < WB_STRIDE) && (desc_count < WB_STRIDE)) { diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h index 0112277e5882..84e561cc0166 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h @@ -445,4 +445,13 @@ static inline bool i40e_rx_is_fcoe(u16 ptype) return (ptype >= I40E_RX_PTYPE_L2_FCOE_PAY3) && (ptype <= I40E_RX_PTYPE_L2_FCOE_VFT_FCOTHER); } + +/** + * txring_txq - Find the netdev Tx ring based on the i40e Tx ring + * @ring: Tx ring to find the netdev equivalent of + **/ +static inline struct netdev_queue *txring_txq(const struct i40e_ring *ring) +{ + return netdev_get_tx_queue(ring->netdev, ring->queue_index); +} #endif /* _I40E_TXRX_H_ */ From 5e02f2837349b399e48fd2a5e5149c9ee9c27cdd Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 12 Sep 2016 14:18:41 -0700 Subject: [PATCH 0808/1050] i40e: Split Flow Director descriptor config into separate function In an effort to improve code readability I am splitting the Flow Director filter configuration out into a separate function like we have done for the standard xmit path. The general idea is to provide a single block of code that translates the flow specification into a proper Flow Director descriptor. Change-ID: Id355ad8030c4e6c72c57504fa09de60c976a8ffe Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 115 +++++++++++--------- 1 file changed, 64 insertions(+), 51 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 6e0a7acf17a2..ef9b8d7f85d4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -40,6 +40,69 @@ static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size, } #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS) +/** + * i40e_fdir - Generate a Flow Director descriptor based on fdata + * @tx_ring: Tx ring to send buffer on + * @fdata: Flow director filter data + * @add: Indicate if we are adding a rule or deleting one + * + **/ +static void i40e_fdir(struct i40e_ring *tx_ring, + struct i40e_fdir_filter *fdata, bool add) +{ + struct i40e_filter_program_desc *fdir_desc; + struct i40e_pf *pf = tx_ring->vsi->back; + u32 flex_ptype, dtype_cmd; + u16 i; + + /* grab the next descriptor */ + i = tx_ring->next_to_use; + fdir_desc = I40E_TX_FDIRDESC(tx_ring, i); + + i++; + tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; + + flex_ptype = I40E_TXD_FLTR_QW0_QINDEX_MASK & + (fdata->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT); + + flex_ptype |= I40E_TXD_FLTR_QW0_FLEXOFF_MASK & + (fdata->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT); + + flex_ptype |= I40E_TXD_FLTR_QW0_PCTYPE_MASK & + (fdata->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT); + + /* Use LAN VSI Id if not programmed by user */ + flex_ptype |= I40E_TXD_FLTR_QW0_DEST_VSI_MASK & + ((u32)(fdata->dest_vsi ? : pf->vsi[pf->lan_vsi]->id) << + I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT); + + dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG; + + dtype_cmd |= add ? + I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE << + I40E_TXD_FLTR_QW1_PCMD_SHIFT : + I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE << + I40E_TXD_FLTR_QW1_PCMD_SHIFT; + + dtype_cmd |= I40E_TXD_FLTR_QW1_DEST_MASK & + (fdata->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT); + + dtype_cmd |= I40E_TXD_FLTR_QW1_FD_STATUS_MASK & + (fdata->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT); + + if (fdata->cnt_index) { + dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK; + dtype_cmd |= I40E_TXD_FLTR_QW1_CNTINDEX_MASK & + ((u32)fdata->cnt_index << + I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT); + } + + fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype); + fdir_desc->rsvd = cpu_to_le32(0); + fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd); + fdir_desc->fd_id = cpu_to_le32(fdata->fd_id); +} + #define I40E_FD_CLEAN_DELAY 10 /** * i40e_program_fdir_filter - Program a Flow Director filter @@ -51,11 +114,9 @@ static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size, int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet, struct i40e_pf *pf, bool add) { - struct i40e_filter_program_desc *fdir_desc; struct i40e_tx_buffer *tx_buf, *first; struct i40e_tx_desc *tx_desc; struct i40e_ring *tx_ring; - unsigned int fpt, dcc; struct i40e_vsi *vsi; struct device *dev; dma_addr_t dma; @@ -92,56 +153,8 @@ int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet, /* grab the next descriptor */ i = tx_ring->next_to_use; - fdir_desc = I40E_TX_FDIRDESC(tx_ring, i); first = &tx_ring->tx_bi[i]; - memset(first, 0, sizeof(struct i40e_tx_buffer)); - - tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0; - - fpt = (fdir_data->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) & - I40E_TXD_FLTR_QW0_QINDEX_MASK; - - fpt |= (fdir_data->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT) & - I40E_TXD_FLTR_QW0_FLEXOFF_MASK; - - fpt |= (fdir_data->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) & - I40E_TXD_FLTR_QW0_PCTYPE_MASK; - - /* Use LAN VSI Id if not programmed by user */ - if (fdir_data->dest_vsi == 0) - fpt |= (pf->vsi[pf->lan_vsi]->id) << - I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT; - else - fpt |= ((u32)fdir_data->dest_vsi << - I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT) & - I40E_TXD_FLTR_QW0_DEST_VSI_MASK; - - dcc = I40E_TX_DESC_DTYPE_FILTER_PROG; - - if (add) - dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE << - I40E_TXD_FLTR_QW1_PCMD_SHIFT; - else - dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE << - I40E_TXD_FLTR_QW1_PCMD_SHIFT; - - dcc |= (fdir_data->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT) & - I40E_TXD_FLTR_QW1_DEST_MASK; - - dcc |= (fdir_data->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT) & - I40E_TXD_FLTR_QW1_FD_STATUS_MASK; - - if (fdir_data->cnt_index != 0) { - dcc |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK; - dcc |= ((u32)fdir_data->cnt_index << - I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) & - I40E_TXD_FLTR_QW1_CNTINDEX_MASK; - } - - fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(fpt); - fdir_desc->rsvd = cpu_to_le32(0); - fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dcc); - fdir_desc->fd_id = cpu_to_le32(fdir_data->fd_id); + i40e_fdir(tx_ring, fdir_data, add); /* Now program a dummy descriptor */ i = tx_ring->next_to_use; From 1eb846ac90b956e52f4269d80f13cfbe1df6850b Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 12 Sep 2016 14:18:42 -0700 Subject: [PATCH 0809/1050] i40e: Strip out debugfs hook for Flow Director filter programming This interface was only ever meant for debug only. Since it is not supposed to be here we are removing it. Change-ID: Id771a1e5e7d3e2b4b7f56591b61fb48c921e1d04 Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 2 - .../net/ethernet/intel/i40e/i40e_debugfs.c | 80 ------------------- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 5 +- 3 files changed, 3 insertions(+), 84 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 6b22df62f018..2030d7c1dc94 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -710,8 +710,6 @@ struct rtnl_link_stats64 *i40e_get_vsi_stats_struct(struct i40e_vsi *vsi); int i40e_fetch_switch_configuration(struct i40e_pf *pf, bool printconfig); -int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet, - struct i40e_pf *pf, bool add); int i40e_add_del_fdir(struct i40e_vsi *vsi, struct i40e_fdir_filter *input, bool add); void i40e_fdir_check_and_reenable(struct i40e_pf *pf); diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 8555f04002da..0c1875b5b16d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -1430,84 +1430,6 @@ static ssize_t i40e_dbg_command_write(struct file *filp, buff = NULL; kfree(desc); desc = NULL; - } else if ((strncmp(cmd_buf, "add fd_filter", 13) == 0) || - (strncmp(cmd_buf, "rem fd_filter", 13) == 0)) { - struct i40e_fdir_filter fd_data; - u16 packet_len, i, j = 0; - char *asc_packet; - u8 *raw_packet; - bool add = false; - int ret; - - if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED)) - goto command_write_done; - - if (strncmp(cmd_buf, "add", 3) == 0) - add = true; - - if (add && (pf->auto_disable_flags & I40E_FLAG_FD_SB_ENABLED)) - goto command_write_done; - - asc_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, - GFP_KERNEL); - if (!asc_packet) - goto command_write_done; - - raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, - GFP_KERNEL); - - if (!raw_packet) { - kfree(asc_packet); - asc_packet = NULL; - goto command_write_done; - } - - cnt = sscanf(&cmd_buf[13], - "%hx %2hhx %2hhx %hx %2hhx %2hhx %hx %x %hd %511s", - &fd_data.q_index, - &fd_data.flex_off, &fd_data.pctype, - &fd_data.dest_vsi, &fd_data.dest_ctl, - &fd_data.fd_status, &fd_data.cnt_index, - &fd_data.fd_id, &packet_len, asc_packet); - if (cnt != 10) { - dev_info(&pf->pdev->dev, - "program fd_filter: bad command string, cnt=%d\n", - cnt); - kfree(asc_packet); - asc_packet = NULL; - kfree(raw_packet); - goto command_write_done; - } - - /* fix packet length if user entered 0 */ - if (packet_len == 0) - packet_len = I40E_FDIR_MAX_RAW_PACKET_SIZE; - - /* make sure to check the max as well */ - packet_len = min_t(u16, - packet_len, I40E_FDIR_MAX_RAW_PACKET_SIZE); - - for (i = 0; i < packet_len; i++) { - cnt = sscanf(&asc_packet[j], "%2hhx ", &raw_packet[i]); - if (!cnt) - break; - j += 3; - } - dev_info(&pf->pdev->dev, "FD raw packet dump\n"); - print_hex_dump(KERN_INFO, "FD raw packet: ", - DUMP_PREFIX_OFFSET, 16, 1, - raw_packet, packet_len, true); - ret = i40e_program_fdir_filter(&fd_data, raw_packet, pf, add); - if (!ret) { - dev_info(&pf->pdev->dev, "Filter command send Status : Success\n"); - } else { - dev_info(&pf->pdev->dev, - "Filter command send failed %d\n", ret); - } - kfree(raw_packet); - raw_packet = NULL; - kfree(asc_packet); - asc_packet = NULL; } else if (strncmp(cmd_buf, "fd current cnt", 14) == 0) { dev_info(&pf->pdev->dev, "FD current total filter count for this interface: %d\n", i40e_get_current_fd_count(pf)); @@ -1732,8 +1654,6 @@ static ssize_t i40e_dbg_command_write(struct file *filp, dev_info(&pf->pdev->dev, " globr\n"); dev_info(&pf->pdev->dev, " send aq_cmd \n"); dev_info(&pf->pdev->dev, " send indirect aq_cmd \n"); - dev_info(&pf->pdev->dev, " add fd_filter \n"); - dev_info(&pf->pdev->dev, " rem fd_filter \n"); dev_info(&pf->pdev->dev, " fd current cnt"); dev_info(&pf->pdev->dev, " lldp start\n"); dev_info(&pf->pdev->dev, " lldp stop\n"); diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index ef9b8d7f85d4..5237c491af89 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -111,8 +111,9 @@ static void i40e_fdir(struct i40e_ring *tx_ring, * @pf: The PF pointer * @add: True for add/update, False for remove **/ -int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet, - struct i40e_pf *pf, bool add) +static int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, + u8 *raw_packet, struct i40e_pf *pf, + bool add) { struct i40e_tx_buffer *tx_buf, *first; struct i40e_tx_desc *tx_desc; From a4fa59cc5bb028ebb8048e8dcb6f92b2a1ea07f6 Mon Sep 17 00:00:00 2001 From: Mitch Williams Date: Mon, 12 Sep 2016 14:18:43 -0700 Subject: [PATCH 0810/1050] i40e: don't configure zero-size RSS table In some rare cases, we might get a VSI with no queues. In this case, we cannot configure RSS on this VSI as it will try to divide by zero when configuring the lookup table. Change-ID: I6ae173a7dd3481a081e079eb10eb80275de2adb0 Signed-off-by: Mitch Williams Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index b434d07d98cd..8176596932be 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -8282,6 +8282,8 @@ static int i40e_pf_config_rss(struct i40e_pf *pf) if (!vsi->rss_size) vsi->rss_size = min_t(int, pf->alloc_rss_size, vsi->num_queue_pairs); + if (!vsi->rss_size) + return -EINVAL; lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); if (!lut) From 65e87c0398f542d5bd51cfd8a29b9dfd246b6a1c Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 12 Sep 2016 14:18:44 -0700 Subject: [PATCH 0811/1050] i40evf: support queue-specific settings for interrupt moderation In commit a75e8005d506f3 ("i40e: queue-specific settings for interrupt moderation") the i40e driver gained support for setting interrupt moderation values per queue. This patch adds support for this feature to the i40evf driver as well. In addition, a few changes are made to the i40e implementation to add function header documentation comments, as well. This behaves in a similar fashion to the implementation in i40e. Thus, requesting the moderation value when no queue is provided will report queue 0 value, while setting the value without a queue will set all queues at once. Change-ID: I1f310a57c8e6c84a8524c178d44d1b7a6d3a848e Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/i40e/i40e_ethtool.c | 72 +++++- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 21 +- drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 26 +- drivers/net/ethernet/intel/i40evf/i40e_txrx.h | 8 + drivers/net/ethernet/intel/i40evf/i40evf.h | 7 - .../ethernet/intel/i40evf/i40evf_ethtool.c | 224 +++++++++++++----- .../net/ethernet/intel/i40evf/i40evf_main.c | 10 +- 7 files changed, 289 insertions(+), 79 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 5cad80f157fb..92bc8846f1ba 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -1970,11 +1970,22 @@ static int i40e_set_phys_id(struct net_device *netdev, * 125us (8000 interrupts per second) == ITR(62) */ +/** + * __i40e_get_coalesce - get per-queue coalesce settings + * @netdev: the netdev to check + * @ec: ethtool coalesce data structure + * @queue: which queue to pick + * + * Gets the per-queue settings for coalescence. Specifically Rx and Tx usecs + * are per queue. If queue is <0 then we default to queue 0 as the + * representative value. + **/ static int __i40e_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, int queue) { struct i40e_netdev_priv *np = netdev_priv(netdev); + struct i40e_ring *rx_ring, *tx_ring; struct i40e_vsi *vsi = np->vsi; ec->tx_max_coalesced_frames_irq = vsi->work_limit; @@ -1989,14 +2000,18 @@ static int __i40e_get_coalesce(struct net_device *netdev, return -EINVAL; } - if (ITR_IS_DYNAMIC(vsi->rx_rings[queue]->rx_itr_setting)) + rx_ring = vsi->rx_rings[queue]; + tx_ring = vsi->tx_rings[queue]; + + if (ITR_IS_DYNAMIC(rx_ring->rx_itr_setting)) ec->use_adaptive_rx_coalesce = 1; - if (ITR_IS_DYNAMIC(vsi->tx_rings[queue]->tx_itr_setting)) + if (ITR_IS_DYNAMIC(tx_ring->tx_itr_setting)) ec->use_adaptive_tx_coalesce = 1; - ec->rx_coalesce_usecs = vsi->rx_rings[queue]->rx_itr_setting & ~I40E_ITR_DYNAMIC; - ec->tx_coalesce_usecs = vsi->tx_rings[queue]->tx_itr_setting & ~I40E_ITR_DYNAMIC; + ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC; + ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC; + /* we use the _usecs_high to store/set the interrupt rate limit * that the hardware supports, that almost but not quite @@ -2010,18 +2025,44 @@ static int __i40e_get_coalesce(struct net_device *netdev, return 0; } +/** + * i40e_get_coalesce - get a netdev's coalesce settings + * @netdev: the netdev to check + * @ec: ethtool coalesce data structure + * + * Gets the coalesce settings for a particular netdev. Note that if user has + * modified per-queue settings, this only guarantees to represent queue 0. See + * __i40e_get_coalesce for more details. + **/ static int i40e_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec) { return __i40e_get_coalesce(netdev, ec, -1); } +/** + * i40e_get_per_queue_coalesce - gets coalesce settings for particular queue + * @netdev: netdev structure + * @ec: ethtool's coalesce settings + * @queue: the particular queue to read + * + * Will read a specific queue's coalesce settings + **/ static int i40e_get_per_queue_coalesce(struct net_device *netdev, u32 queue, struct ethtool_coalesce *ec) { return __i40e_get_coalesce(netdev, ec, queue); } +/** + * i40e_set_itr_per_queue - set ITR values for specific queue + * @vsi: the VSI to set values for + * @ec: coalesce settings from ethtool + * @queue: the queue to modify + * + * Change the ITR settings for a specific queue. + **/ + static void i40e_set_itr_per_queue(struct i40e_vsi *vsi, struct ethtool_coalesce *ec, int queue) @@ -2060,6 +2101,14 @@ static void i40e_set_itr_per_queue(struct i40e_vsi *vsi, i40e_flush(hw); } +/** + * __i40e_set_coalesce - set coalesce settings for particular queue + * @netdev: the netdev to change + * @ec: ethtool coalesce settings + * @queue: the queue to change + * + * Sets the coalesce settings for a particular queue. + **/ static int __i40e_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, int queue) @@ -2120,12 +2169,27 @@ static int __i40e_set_coalesce(struct net_device *netdev, return 0; } +/** + * i40e_set_coalesce - set coalesce settings for every queue on the netdev + * @netdev: the netdev to change + * @ec: ethtool coalesce settings + * + * This will set each queue to the same coalesce settings. + **/ static int i40e_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec) { return __i40e_set_coalesce(netdev, ec, -1); } +/** + * i40e_set_per_queue_coalesce - set specific queue's coalesce settings + * @netdev: the netdev to change + * @ec: ethtool's coalesce settings + * @queue: the queue to change + * + * Sets the specified queue's coalesce settings. + **/ static int i40e_set_per_queue_coalesce(struct net_device *netdev, u32 queue, struct ethtool_coalesce *ec) { diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 5237c491af89..6287bf63c43c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1877,6 +1877,15 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr) /* a small macro to shorten up some long lines */ #define INTREG I40E_PFINT_DYN_CTLN +static inline int get_rx_itr_enabled(struct i40e_vsi *vsi, int idx) +{ + return !!(vsi->rx_rings[idx]->rx_itr_setting); +} + +static inline int get_tx_itr_enabled(struct i40e_vsi *vsi, int idx) +{ + return !!(vsi->tx_rings[idx]->tx_itr_setting); +} /** * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt @@ -1892,6 +1901,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, u32 rxval, txval; int vector; int idx = q_vector->v_idx; + int rx_itr_setting, tx_itr_setting; vector = (q_vector->v_idx + vsi->base_vector); @@ -1900,18 +1910,21 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, */ rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0); + rx_itr_setting = get_rx_itr_enabled(vsi, idx); + tx_itr_setting = get_tx_itr_enabled(vsi, idx); + if (q_vector->itr_countdown > 0 || - (!ITR_IS_DYNAMIC(vsi->rx_rings[idx]->rx_itr_setting) && - !ITR_IS_DYNAMIC(vsi->tx_rings[idx]->tx_itr_setting))) { + (!ITR_IS_DYNAMIC(rx_itr_setting) && + !ITR_IS_DYNAMIC(tx_itr_setting))) { goto enable_int; } - if (ITR_IS_DYNAMIC(vsi->rx_rings[idx]->rx_itr_setting)) { + if (ITR_IS_DYNAMIC(tx_itr_setting)) { rx = i40e_set_new_dynamic_itr(&q_vector->rx); rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr); } - if (ITR_IS_DYNAMIC(vsi->tx_rings[idx]->tx_itr_setting)) { + if (ITR_IS_DYNAMIC(tx_itr_setting)) { tx = i40e_set_new_dynamic_itr(&q_vector->tx); txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr); } diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index d4c6a767be94..75f2a2cdd738 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -1311,6 +1311,19 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr) /* a small macro to shorten up some long lines */ #define INTREG I40E_VFINT_DYN_CTLN1 +static inline int get_rx_itr_enabled(struct i40e_vsi *vsi, int idx) +{ + struct i40evf_adapter *adapter = vsi->back; + + return !!(adapter->rx_rings[idx].rx_itr_setting); +} + +static inline int get_tx_itr_enabled(struct i40e_vsi *vsi, int idx) +{ + struct i40evf_adapter *adapter = vsi->back; + + return !!(adapter->tx_rings[idx].tx_itr_setting); +} /** * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt @@ -1325,6 +1338,8 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, bool rx = false, tx = false; u32 rxval, txval; int vector; + int idx = q_vector->v_idx; + int rx_itr_setting, tx_itr_setting; vector = (q_vector->v_idx + vsi->base_vector); @@ -1333,18 +1348,21 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, */ rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0); + rx_itr_setting = get_rx_itr_enabled(vsi, idx); + tx_itr_setting = get_tx_itr_enabled(vsi, idx); + if (q_vector->itr_countdown > 0 || - (!ITR_IS_DYNAMIC(vsi->rx_itr_setting) && - !ITR_IS_DYNAMIC(vsi->tx_itr_setting))) { + (!ITR_IS_DYNAMIC(rx_itr_setting) && + !ITR_IS_DYNAMIC(tx_itr_setting))) { goto enable_int; } - if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) { + if (ITR_IS_DYNAMIC(rx_itr_setting)) { rx = i40e_set_new_dynamic_itr(&q_vector->rx); rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr); } - if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) { + if (ITR_IS_DYNAMIC(tx_itr_setting)) { tx = i40e_set_new_dynamic_itr(&q_vector->tx); txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr); } diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h index 84e561cc0166..abcdecabbc56 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h @@ -287,6 +287,14 @@ struct i40e_ring { u8 dcb_tc; /* Traffic class of ring */ u8 __iomem *tail; + /* high bit set means dynamic, use accessors routines to read/write. + * hardware only supports 2us resolution for the ITR registers. + * these values always store the USER setting, and must be converted + * before programming to a register. + */ + u16 rx_itr_setting; + u16 tx_itr_setting; + u16 count; /* Number of descriptors */ u16 reg_idx; /* HW register index of the ring */ u16 rx_buf_len; diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h index dc00aaf94687..c5fd724313c7 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf.h +++ b/drivers/net/ethernet/intel/i40evf/i40evf.h @@ -59,13 +59,6 @@ struct i40e_vsi { unsigned long state; int base_vector; u16 work_limit; - /* high bit set means dynamic, use accessor routines to read/write. - * hardware only supports 2us resolution for the ITR registers. - * these values always store the USER setting, and must be converted - * before programming to a register. - */ - u16 rx_itr_setting; - u16 tx_itr_setting; u16 qs_handle; }; diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c index e17a15456266..a9940154eead 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c @@ -295,6 +295,50 @@ static int i40evf_set_ringparam(struct net_device *netdev, return 0; } +/** + * __i40evf_get_coalesce - get per-queue coalesce settings + * @netdev: the netdev to check + * @ec: ethtool coalesce data structure + * @queue: which queue to pick + * + * Gets the per-queue settings for coalescence. Specifically Rx and Tx usecs + * are per queue. If queue is <0 then we default to queue 0 as the + * representative value. + **/ +static int __i40evf_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, + int queue) +{ + struct i40evf_adapter *adapter = netdev_priv(netdev); + struct i40e_vsi *vsi = &adapter->vsi; + struct i40e_ring *rx_ring, *tx_ring; + + ec->tx_max_coalesced_frames = vsi->work_limit; + ec->rx_max_coalesced_frames = vsi->work_limit; + + /* Rx and Tx usecs per queue value. If user doesn't specify the + * queue, return queue 0's value to represent. + */ + if (queue < 0) + queue = 0; + else if (queue >= adapter->num_active_queues) + return -EINVAL; + + rx_ring = &adapter->rx_rings[queue]; + tx_ring = &adapter->tx_rings[queue]; + + if (ITR_IS_DYNAMIC(rx_ring->rx_itr_setting)) + ec->use_adaptive_rx_coalesce = 1; + + if (ITR_IS_DYNAMIC(tx_ring->tx_itr_setting)) + ec->use_adaptive_tx_coalesce = 1; + + ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC; + ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC; + + return 0; +} + /** * i40evf_get_coalesce - Get interrupt coalescing settings * @netdev: network interface device structure @@ -302,25 +346,124 @@ static int i40evf_set_ringparam(struct net_device *netdev, * * Returns current coalescing settings. This is referred to elsewhere in the * driver as Interrupt Throttle Rate, as this is how the hardware describes - * this functionality. + * this functionality. Note that if per-queue settings have been modified this + * only represents the settings of queue 0. **/ static int i40evf_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec) +{ + return __i40evf_get_coalesce(netdev, ec, -1); +} + +/** + * i40evf_get_per_queue_coalesce - get coalesce values for specific queue + * @netdev: netdev to read + * @ec: coalesce settings from ethtool + * @queue: the queue to read + * + * Read specific queue's coalesce settings. + **/ +static int i40evf_get_per_queue_coalesce(struct net_device *netdev, + u32 queue, + struct ethtool_coalesce *ec) +{ + return __i40evf_get_coalesce(netdev, ec, queue); +} + +/** + * i40evf_set_itr_per_queue - set ITR values for specific queue + * @vsi: the VSI to set values for + * @ec: coalesce settings from ethtool + * @queue: the queue to modify + * + * Change the ITR settings for a specific queue. + **/ +static void i40evf_set_itr_per_queue(struct i40evf_adapter *adapter, + struct ethtool_coalesce *ec, + int queue) +{ + struct i40e_vsi *vsi = &adapter->vsi; + struct i40e_hw *hw = &adapter->hw; + struct i40e_q_vector *q_vector; + u16 vector; + + adapter->rx_rings[queue].rx_itr_setting = ec->rx_coalesce_usecs; + adapter->tx_rings[queue].tx_itr_setting = ec->tx_coalesce_usecs; + + if (ec->use_adaptive_rx_coalesce) + adapter->rx_rings[queue].rx_itr_setting |= I40E_ITR_DYNAMIC; + else + adapter->rx_rings[queue].rx_itr_setting &= ~I40E_ITR_DYNAMIC; + + if (ec->use_adaptive_tx_coalesce) + adapter->tx_rings[queue].tx_itr_setting |= I40E_ITR_DYNAMIC; + else + adapter->tx_rings[queue].tx_itr_setting &= ~I40E_ITR_DYNAMIC; + + q_vector = adapter->rx_rings[queue].q_vector; + q_vector->rx.itr = ITR_TO_REG(adapter->rx_rings[queue].rx_itr_setting); + vector = vsi->base_vector + q_vector->v_idx; + wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, vector - 1), q_vector->rx.itr); + + q_vector = adapter->tx_rings[queue].q_vector; + q_vector->tx.itr = ITR_TO_REG(adapter->tx_rings[queue].tx_itr_setting); + vector = vsi->base_vector + q_vector->v_idx; + wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, vector - 1), q_vector->tx.itr); + + i40e_flush(hw); +} + +/** + * __i40evf_set_coalesce - set coalesce settings for particular queue + * @netdev: the netdev to change + * @ec: ethtool coalesce settings + * @queue: the queue to change + * + * Sets the coalesce settings for a particular queue. + **/ +static int __i40evf_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, + int queue) { struct i40evf_adapter *adapter = netdev_priv(netdev); struct i40e_vsi *vsi = &adapter->vsi; + int i; - ec->tx_max_coalesced_frames = vsi->work_limit; - ec->rx_max_coalesced_frames = vsi->work_limit; + if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq) + vsi->work_limit = ec->tx_max_coalesced_frames_irq; - if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) - ec->use_adaptive_rx_coalesce = 1; + if (ec->rx_coalesce_usecs == 0) { + if (ec->use_adaptive_rx_coalesce) + netif_info(adapter, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n"); + } else if ((ec->rx_coalesce_usecs < (I40E_MIN_ITR << 1)) || + (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1))) { + netif_info(adapter, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n"); + return -EINVAL; + } - if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) - ec->use_adaptive_tx_coalesce = 1; + else + if (ec->tx_coalesce_usecs == 0) { + if (ec->use_adaptive_tx_coalesce) + netif_info(adapter, drv, netdev, "tx-usecs=0, need to disable adaptive-tx for a complete disable\n"); + } else if ((ec->tx_coalesce_usecs < (I40E_MIN_ITR << 1)) || + (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1))) { + netif_info(adapter, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n"); + return -EINVAL; + } - ec->rx_coalesce_usecs = vsi->rx_itr_setting & ~I40E_ITR_DYNAMIC; - ec->tx_coalesce_usecs = vsi->tx_itr_setting & ~I40E_ITR_DYNAMIC; + /* Rx and Tx usecs has per queue value. If user doesn't specify the + * queue, apply to all queues. + */ + if (queue < 0) { + for (i = 0; i < adapter->num_active_queues; i++) + i40evf_set_itr_per_queue(adapter, ec, i); + } else if (queue < adapter->num_active_queues) { + i40evf_set_itr_per_queue(adapter, ec, queue); + } else { + netif_info(adapter, drv, netdev, "Invalid queue value, queue range is 0 - %d\n", + adapter->num_active_queues - 1); + return -EINVAL; + } return 0; } @@ -330,56 +473,27 @@ static int i40evf_get_coalesce(struct net_device *netdev, * @netdev: network interface device structure * @ec: ethtool coalesce structure * - * Change current coalescing settings. + * Change current coalescing settings for every queue. **/ static int i40evf_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec) { - struct i40evf_adapter *adapter = netdev_priv(netdev); - struct i40e_hw *hw = &adapter->hw; - struct i40e_vsi *vsi = &adapter->vsi; - struct i40e_q_vector *q_vector; - int i; + return __i40evf_set_coalesce(netdev, ec, -1); +} - if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq) - vsi->work_limit = ec->tx_max_coalesced_frames_irq; - - if ((ec->rx_coalesce_usecs >= (I40E_MIN_ITR << 1)) && - (ec->rx_coalesce_usecs <= (I40E_MAX_ITR << 1))) - vsi->rx_itr_setting = ec->rx_coalesce_usecs; - - else - return -EINVAL; - - if ((ec->tx_coalesce_usecs >= (I40E_MIN_ITR << 1)) && - (ec->tx_coalesce_usecs <= (I40E_MAX_ITR << 1))) - vsi->tx_itr_setting = ec->tx_coalesce_usecs; - else if (ec->use_adaptive_tx_coalesce) - vsi->tx_itr_setting = (I40E_ITR_DYNAMIC | - ITR_REG_TO_USEC(I40E_ITR_RX_DEF)); - else - return -EINVAL; - - if (ec->use_adaptive_rx_coalesce) - vsi->rx_itr_setting |= I40E_ITR_DYNAMIC; - else - vsi->rx_itr_setting &= ~I40E_ITR_DYNAMIC; - - if (ec->use_adaptive_tx_coalesce) - vsi->tx_itr_setting |= I40E_ITR_DYNAMIC; - else - vsi->tx_itr_setting &= ~I40E_ITR_DYNAMIC; - - for (i = 0; i < adapter->num_msix_vectors - NONQ_VECS; i++) { - q_vector = &adapter->q_vectors[i]; - q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting); - wr32(hw, I40E_VFINT_ITRN1(0, i), q_vector->rx.itr); - q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting); - wr32(hw, I40E_VFINT_ITRN1(1, i), q_vector->tx.itr); - i40e_flush(hw); - } - - return 0; +/** + * i40evf_set_per_queue_coalesce - set specific queue's coalesce settings + * @netdev: the netdev to change + * @ec: ethtool's coalesce settings + * @queue: the queue to modify + * + * Modifies a specific queue's coalesce settings. + */ +static int i40evf_set_per_queue_coalesce(struct net_device *netdev, + u32 queue, + struct ethtool_coalesce *ec) +{ + return __i40evf_set_coalesce(netdev, ec, queue); } /** @@ -533,6 +647,8 @@ static const struct ethtool_ops i40evf_ethtool_ops = { .set_msglevel = i40evf_set_msglevel, .get_coalesce = i40evf_get_coalesce, .set_coalesce = i40evf_set_coalesce, + .get_per_queue_coalesce = i40evf_get_per_queue_coalesce, + .set_per_queue_coalesce = i40evf_set_per_queue_coalesce, .get_rxnfc = i40evf_get_rxnfc, .get_rxfh_indir_size = i40evf_get_rxfh_indir_size, .get_rxfh = i40evf_get_rxfh, diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 302c9743ef44..14372810fc27 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -370,7 +370,6 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx) { struct i40e_q_vector *q_vector = &adapter->q_vectors[v_idx]; struct i40e_ring *rx_ring = &adapter->rx_rings[r_idx]; - struct i40e_vsi *vsi = &adapter->vsi; struct i40e_hw *hw = &adapter->hw; rx_ring->q_vector = q_vector; @@ -379,7 +378,7 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx) q_vector->rx.ring = rx_ring; q_vector->rx.count++; q_vector->rx.latency_range = I40E_LOW_LATENCY; - q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting); + q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting); q_vector->ring_mask |= BIT(r_idx); q_vector->itr_countdown = ITR_COUNTDOWN_START; wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, v_idx - 1), q_vector->rx.itr); @@ -396,7 +395,6 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx) { struct i40e_q_vector *q_vector = &adapter->q_vectors[v_idx]; struct i40e_ring *tx_ring = &adapter->tx_rings[t_idx]; - struct i40e_vsi *vsi = &adapter->vsi; struct i40e_hw *hw = &adapter->hw; tx_ring->q_vector = q_vector; @@ -405,7 +403,7 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx) q_vector->tx.ring = tx_ring; q_vector->tx.count++; q_vector->tx.latency_range = I40E_LOW_LATENCY; - q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting); + q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting); q_vector->itr_countdown = ITR_COUNTDOWN_START; q_vector->num_ringpairs++; wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, v_idx - 1), q_vector->tx.itr); @@ -1162,6 +1160,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter) tx_ring->netdev = adapter->netdev; tx_ring->dev = &adapter->pdev->dev; tx_ring->count = adapter->tx_desc_count; + tx_ring->tx_itr_setting = (I40E_ITR_DYNAMIC | I40E_ITR_TX_DEF); if (adapter->flags & I40E_FLAG_WB_ON_ITR_CAPABLE) tx_ring->flags |= I40E_TXR_FLAGS_WB_ON_ITR; @@ -1170,6 +1169,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter) rx_ring->netdev = adapter->netdev; rx_ring->dev = &adapter->pdev->dev; rx_ring->count = adapter->rx_desc_count; + rx_ring->rx_itr_setting = (I40E_ITR_DYNAMIC | I40E_ITR_RX_DEF); } return 0; @@ -2277,8 +2277,6 @@ int i40evf_process_config(struct i40evf_adapter *adapter) adapter->vsi.back = adapter; adapter->vsi.base_vector = 1; adapter->vsi.work_limit = I40E_DEFAULT_IRQ_WORK; - adapter->vsi.rx_itr_setting = (I40E_ITR_DYNAMIC | I40E_ITR_RX_DEF); - adapter->vsi.tx_itr_setting = (I40E_ITR_DYNAMIC | I40E_ITR_TX_DEF); vsi->netdev = adapter->netdev; vsi->qs_handle = adapter->vsi_res->qset_handle; if (vfres->vf_offload_flags & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF) { From e3b37f11e6e4e6b6f02cc762f182ce233d2c1c9d Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Wed, 21 Sep 2016 11:35:07 -0400 Subject: [PATCH 0812/1050] netfilter: replace list_head with single linked list The netfilter hook list never uses the prev pointer, and so can be trimmed to be a simple singly-linked list. In addition to having a more light weight structure for hook traversal, struct net becomes 5568 bytes (down from 6400) and struct net_device becomes 2176 bytes (down from 2240). Signed-off-by: Aaron Conole Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netdevice.h | 2 +- include/linux/netfilter.h | 63 +++++++------ include/linux/netfilter_ingress.h | 17 ++-- include/net/netfilter/nf_queue.h | 3 +- include/net/netns/netfilter.h | 2 +- net/bridge/br_netfilter_hooks.c | 19 ++-- net/netfilter/core.c | 145 +++++++++++++++++++----------- net/netfilter/nf_internals.h | 10 +-- net/netfilter/nf_queue.c | 18 ++-- net/netfilter/nfnetlink_queue.c | 8 +- 10 files changed, 169 insertions(+), 118 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 67bb978470dc..41f49f5ab62a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1783,7 +1783,7 @@ struct net_device { #endif struct netdev_queue __rcu *ingress_queue; #ifdef CONFIG_NETFILTER_INGRESS - struct list_head nf_hooks_ingress; + struct nf_hook_entry __rcu *nf_hooks_ingress; #endif unsigned char broadcast[MAX_ADDR_LEN]; diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index ad444f0b4ed0..44e20dac98a9 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -55,12 +55,34 @@ struct nf_hook_state { struct net_device *out; struct sock *sk; struct net *net; - struct list_head *hook_list; + struct nf_hook_entry __rcu *hook_entries; int (*okfn)(struct net *, struct sock *, struct sk_buff *); }; +typedef unsigned int nf_hookfn(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state); +struct nf_hook_ops { + struct list_head list; + + /* User fills in from here down. */ + nf_hookfn *hook; + struct net_device *dev; + void *priv; + u_int8_t pf; + unsigned int hooknum; + /* Hooks are ordered in ascending priority. */ + int priority; +}; + +struct nf_hook_entry { + struct nf_hook_entry __rcu *next; + struct nf_hook_ops ops; + const struct nf_hook_ops *orig_ops; +}; + static inline void nf_hook_state_init(struct nf_hook_state *p, - struct list_head *hook_list, + struct nf_hook_entry *hook_entry, unsigned int hook, int thresh, u_int8_t pf, struct net_device *indev, @@ -76,26 +98,11 @@ static inline void nf_hook_state_init(struct nf_hook_state *p, p->out = outdev; p->sk = sk; p->net = net; - p->hook_list = hook_list; + RCU_INIT_POINTER(p->hook_entries, hook_entry); p->okfn = okfn; } -typedef unsigned int nf_hookfn(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state); -struct nf_hook_ops { - struct list_head list; - - /* User fills in from here down. */ - nf_hookfn *hook; - struct net_device *dev; - void *priv; - u_int8_t pf; - unsigned int hooknum; - /* Hooks are ordered in ascending priority. */ - int priority; -}; struct nf_sockopt_ops { struct list_head list; @@ -161,7 +168,8 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, int (*okfn)(struct net *, struct sock *, struct sk_buff *), int thresh) { - struct list_head *hook_list; + struct nf_hook_entry *hook_head; + int ret = 1; #ifdef HAVE_JUMP_LABEL if (__builtin_constant_p(pf) && @@ -170,22 +178,19 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, return 1; #endif - hook_list = &net->nf.hooks[pf][hook]; - - if (!list_empty(hook_list)) { + rcu_read_lock(); + hook_head = rcu_dereference(net->nf.hooks[pf][hook]); + if (hook_head) { struct nf_hook_state state; - int ret; - /* We may already have this, but read-locks nest anyway */ - rcu_read_lock(); - nf_hook_state_init(&state, hook_list, hook, thresh, + nf_hook_state_init(&state, hook_head, hook, thresh, pf, indev, outdev, sk, net, okfn); ret = nf_hook_slow(skb, &state); - rcu_read_unlock(); - return ret; } - return 1; + rcu_read_unlock(); + + return ret; } static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h index 6965ba09eba7..33e37fb41d5d 100644 --- a/include/linux/netfilter_ingress.h +++ b/include/linux/netfilter_ingress.h @@ -11,23 +11,30 @@ static inline bool nf_hook_ingress_active(const struct sk_buff *skb) if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS])) return false; #endif - return !list_empty(&skb->dev->nf_hooks_ingress); + return rcu_access_pointer(skb->dev->nf_hooks_ingress); } /* caller must hold rcu_read_lock */ static inline int nf_hook_ingress(struct sk_buff *skb) { + struct nf_hook_entry *e = rcu_dereference(skb->dev->nf_hooks_ingress); struct nf_hook_state state; - nf_hook_state_init(&state, &skb->dev->nf_hooks_ingress, - NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV, - skb->dev, NULL, NULL, dev_net(skb->dev), NULL); + /* Must recheck the ingress hook head, in the event it became NULL + * after the check in nf_hook_ingress_active evaluated to true. + */ + if (unlikely(!e)) + return 0; + + nf_hook_state_init(&state, e, NF_NETDEV_INGRESS, INT_MIN, + NFPROTO_NETDEV, skb->dev, NULL, NULL, + dev_net(skb->dev), NULL); return nf_hook_slow(skb, &state); } static inline void nf_hook_ingress_init(struct net_device *dev) { - INIT_LIST_HEAD(&dev->nf_hooks_ingress); + RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL); } #else /* CONFIG_NETFILTER_INGRESS */ static inline int nf_hook_ingress_active(struct sk_buff *skb) diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 8fe85b98b5c8..2280cfe86c56 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -11,7 +11,6 @@ struct nf_queue_entry { struct sk_buff *skb; unsigned int id; - struct nf_hook_ops *elem; struct nf_hook_state state; u16 size; /* sizeof(entry) + saved route keys */ @@ -25,7 +24,7 @@ struct nf_queue_handler { int (*outfn)(struct nf_queue_entry *entry, unsigned int queuenum); void (*nf_hook_drop)(struct net *net, - struct nf_hook_ops *ops); + const struct nf_hook_entry *hooks); }; void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh); diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index 36d723579af2..58487b1cc99a 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -16,6 +16,6 @@ struct netns_nf { #ifdef CONFIG_SYSCTL struct ctl_table_header *nf_log_dir_header; #endif - struct list_head hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; + struct nf_hook_entry __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; }; #endif diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 6029af47377d..2fe9345c1407 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -1002,28 +1002,21 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { - struct nf_hook_ops *elem; + struct nf_hook_entry *elem; struct nf_hook_state state; - struct list_head *head; int ret; - head = &net->nf.hooks[NFPROTO_BRIDGE][hook]; + elem = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]); - list_for_each_entry_rcu(elem, head, list) { - struct nf_hook_ops *next; + while (elem && (elem->ops.priority <= NF_BR_PRI_BRNF)) + elem = rcu_dereference(elem->next); - next = list_entry_rcu(list_next_rcu(&elem->list), - struct nf_hook_ops, list); - if (next->priority <= NF_BR_PRI_BRNF) - continue; - } - - if (&elem->list == head) + if (!elem) return okfn(net, sk, skb); /* We may already have this, but read-locks nest anyway */ rcu_read_lock(); - nf_hook_state_init(&state, head, hook, NF_BR_PRI_BRNF + 1, + nf_hook_state_init(&state, elem, hook, NF_BR_PRI_BRNF + 1, NFPROTO_BRIDGE, indev, outdev, sk, net, okfn); ret = nf_hook_slow(skb, &state); diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 67b74287535d..72fc514ec676 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -61,33 +62,50 @@ EXPORT_SYMBOL(nf_hooks_needed); #endif static DEFINE_MUTEX(nf_hook_mutex); +#define nf_entry_dereference(e) \ + rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex)) -static struct list_head *nf_find_hook_list(struct net *net, - const struct nf_hook_ops *reg) +static struct nf_hook_entry *nf_hook_entry_head(struct net *net, + const struct nf_hook_ops *reg) { - struct list_head *hook_list = NULL; + struct nf_hook_entry *hook_head = NULL; if (reg->pf != NFPROTO_NETDEV) - hook_list = &net->nf.hooks[reg->pf][reg->hooknum]; + hook_head = nf_entry_dereference(net->nf.hooks[reg->pf] + [reg->hooknum]); else if (reg->hooknum == NF_NETDEV_INGRESS) { #ifdef CONFIG_NETFILTER_INGRESS if (reg->dev && dev_net(reg->dev) == net) - hook_list = ®->dev->nf_hooks_ingress; + hook_head = + nf_entry_dereference( + reg->dev->nf_hooks_ingress); #endif } - return hook_list; + return hook_head; } -struct nf_hook_entry { - const struct nf_hook_ops *orig_ops; - struct nf_hook_ops ops; -}; +/* must hold nf_hook_mutex */ +static void nf_set_hooks_head(struct net *net, const struct nf_hook_ops *reg, + struct nf_hook_entry *entry) +{ + switch (reg->pf) { + case NFPROTO_NETDEV: + /* We already checked in nf_register_net_hook() that this is + * used from ingress. + */ + rcu_assign_pointer(reg->dev->nf_hooks_ingress, entry); + break; + default: + rcu_assign_pointer(net->nf.hooks[reg->pf][reg->hooknum], + entry); + break; + } +} int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) { - struct list_head *hook_list; + struct nf_hook_entry *hooks_entry; struct nf_hook_entry *entry; - struct nf_hook_ops *elem; if (reg->pf == NFPROTO_NETDEV && (reg->hooknum != NF_NETDEV_INGRESS || @@ -100,19 +118,30 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) entry->orig_ops = reg; entry->ops = *reg; - - hook_list = nf_find_hook_list(net, reg); - if (!hook_list) { - kfree(entry); - return -ENOENT; - } + entry->next = NULL; mutex_lock(&nf_hook_mutex); - list_for_each_entry(elem, hook_list, list) { - if (reg->priority < elem->priority) - break; + hooks_entry = nf_hook_entry_head(net, reg); + + if (hooks_entry && hooks_entry->orig_ops->priority > reg->priority) { + /* This is the case where we need to insert at the head */ + entry->next = hooks_entry; + hooks_entry = NULL; } - list_add_rcu(&entry->ops.list, elem->list.prev); + + while (hooks_entry && + reg->priority >= hooks_entry->orig_ops->priority && + nf_entry_dereference(hooks_entry->next)) { + hooks_entry = nf_entry_dereference(hooks_entry->next); + } + + if (hooks_entry) { + entry->next = nf_entry_dereference(hooks_entry->next); + rcu_assign_pointer(hooks_entry->next, entry); + } else { + nf_set_hooks_head(net, reg, entry); + } + mutex_unlock(&nf_hook_mutex); #ifdef CONFIG_NETFILTER_INGRESS if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) @@ -127,24 +156,33 @@ EXPORT_SYMBOL(nf_register_net_hook); void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) { - struct list_head *hook_list; - struct nf_hook_entry *entry; - struct nf_hook_ops *elem; - - hook_list = nf_find_hook_list(net, reg); - if (!hook_list) - return; + struct nf_hook_entry *hooks_entry; mutex_lock(&nf_hook_mutex); - list_for_each_entry(elem, hook_list, list) { - entry = container_of(elem, struct nf_hook_entry, ops); - if (entry->orig_ops == reg) { - list_del_rcu(&entry->ops.list); - break; - } + hooks_entry = nf_hook_entry_head(net, reg); + if (hooks_entry->orig_ops == reg) { + nf_set_hooks_head(net, reg, + nf_entry_dereference(hooks_entry->next)); + goto unlock; } + while (hooks_entry && nf_entry_dereference(hooks_entry->next)) { + struct nf_hook_entry *next = + nf_entry_dereference(hooks_entry->next); + struct nf_hook_entry *nnext; + + if (next->orig_ops != reg) { + hooks_entry = next; + continue; + } + nnext = nf_entry_dereference(next->next); + rcu_assign_pointer(hooks_entry->next, nnext); + hooks_entry = next; + break; + } + +unlock: mutex_unlock(&nf_hook_mutex); - if (&elem->list == hook_list) { + if (!hooks_entry) { WARN(1, "nf_unregister_net_hook: hook not found!\n"); return; } @@ -156,10 +194,10 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif synchronize_net(); - nf_queue_nf_hook_drop(net, &entry->ops); + nf_queue_nf_hook_drop(net, hooks_entry); /* other cpu might still process nfqueue verdict that used reg */ synchronize_net(); - kfree(entry); + kfree(hooks_entry); } EXPORT_SYMBOL(nf_unregister_net_hook); @@ -258,10 +296,9 @@ void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n) } EXPORT_SYMBOL(nf_unregister_hooks); -unsigned int nf_iterate(struct list_head *head, - struct sk_buff *skb, +unsigned int nf_iterate(struct sk_buff *skb, struct nf_hook_state *state, - struct nf_hook_ops **elemp) + struct nf_hook_entry **entryp) { unsigned int verdict; @@ -269,20 +306,23 @@ unsigned int nf_iterate(struct list_head *head, * The caller must not block between calls to this * function because of risk of continuing from deleted element. */ - list_for_each_entry_continue_rcu((*elemp), head, list) { - if (state->thresh > (*elemp)->priority) + while (*entryp) { + if (state->thresh > (*entryp)->ops.priority) { + *entryp = rcu_dereference((*entryp)->next); continue; + } /* Optimization: we don't need to hold module reference here, since function can't sleep. --RR */ repeat: - verdict = (*elemp)->hook((*elemp)->priv, skb, state); + verdict = (*entryp)->ops.hook((*entryp)->ops.priv, skb, state); if (verdict != NF_ACCEPT) { #ifdef CONFIG_NETFILTER_DEBUG if (unlikely((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT)) { NFDEBUG("Evil return from %p(%u).\n", - (*elemp)->hook, state->hook); + (*entryp)->ops.hook, state->hook); + *entryp = rcu_dereference((*entryp)->next); continue; } #endif @@ -290,6 +330,7 @@ repeat: return verdict; goto repeat; } + *entryp = rcu_dereference((*entryp)->next); } return NF_ACCEPT; } @@ -299,13 +340,13 @@ repeat: * -EPERM for NF_DROP, 0 otherwise. Caller must hold rcu_read_lock. */ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state) { - struct nf_hook_ops *elem; + struct nf_hook_entry *entry; unsigned int verdict; int ret = 0; - elem = list_entry_rcu(state->hook_list, struct nf_hook_ops, list); + entry = rcu_dereference(state->hook_entries); next_hook: - verdict = nf_iterate(state->hook_list, skb, state, &elem); + verdict = nf_iterate(skb, state, &entry); if (verdict == NF_ACCEPT || verdict == NF_STOP) { ret = 1; } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) { @@ -314,8 +355,10 @@ next_hook: if (ret == 0) ret = -EPERM; } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { - int err = nf_queue(skb, elem, state, - verdict >> NF_VERDICT_QBITS); + int err; + + RCU_INIT_POINTER(state->hook_entries, entry); + err = nf_queue(skb, state, verdict >> NF_VERDICT_QBITS); if (err < 0) { if (err == -ESRCH && (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) @@ -442,7 +485,7 @@ static int __net_init netfilter_net_init(struct net *net) for (i = 0; i < ARRAY_SIZE(net->nf.hooks); i++) { for (h = 0; h < NF_MAX_HOOKS; h++) - INIT_LIST_HEAD(&net->nf.hooks[i][h]); + RCU_INIT_POINTER(net->nf.hooks[i][h], NULL); } #ifdef CONFIG_PROC_FS diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index 065522564ac6..e0adb5959342 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -13,13 +13,13 @@ /* core.c */ -unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb, - struct nf_hook_state *state, struct nf_hook_ops **elemp); +unsigned int nf_iterate(struct sk_buff *skb, struct nf_hook_state *state, + struct nf_hook_entry **entryp); /* nf_queue.c */ -int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem, - struct nf_hook_state *state, unsigned int queuenum); -void nf_queue_nf_hook_drop(struct net *net, struct nf_hook_ops *ops); +int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, + unsigned int queuenum); +void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry); int __init netfilter_queue_init(void); /* nf_log.c */ diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index b19ad20a705c..96964a0070e1 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -96,14 +96,14 @@ void nf_queue_entry_get_refs(struct nf_queue_entry *entry) } EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs); -void nf_queue_nf_hook_drop(struct net *net, struct nf_hook_ops *ops) +void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry) { const struct nf_queue_handler *qh; rcu_read_lock(); qh = rcu_dereference(net->nf.queue_handler); if (qh) - qh->nf_hook_drop(net, ops); + qh->nf_hook_drop(net, entry); rcu_read_unlock(); } @@ -112,7 +112,6 @@ void nf_queue_nf_hook_drop(struct net *net, struct nf_hook_ops *ops) * through nf_reinject(). */ int nf_queue(struct sk_buff *skb, - struct nf_hook_ops *elem, struct nf_hook_state *state, unsigned int queuenum) { @@ -141,7 +140,6 @@ int nf_queue(struct sk_buff *skb, *entry = (struct nf_queue_entry) { .skb = skb, - .elem = elem, .state = *state, .size = sizeof(*entry) + afinfo->route_key_size, }; @@ -165,11 +163,15 @@ err: void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) { + struct nf_hook_entry *hook_entry; struct sk_buff *skb = entry->skb; - struct nf_hook_ops *elem = entry->elem; const struct nf_afinfo *afinfo; + struct nf_hook_ops *elem; int err; + hook_entry = rcu_dereference(entry->state.hook_entries); + elem = &hook_entry->ops; + nf_queue_entry_release_refs(entry); /* Continue traversal iff userspace said ok... */ @@ -186,8 +188,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) if (verdict == NF_ACCEPT) { next_hook: - verdict = nf_iterate(entry->state.hook_list, - skb, &entry->state, &elem); + verdict = nf_iterate(skb, &entry->state, &hook_entry); } switch (verdict & NF_VERDICT_MASK) { @@ -198,7 +199,8 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) local_bh_enable(); break; case NF_QUEUE: - err = nf_queue(skb, elem, &entry->state, + RCU_INIT_POINTER(entry->state.hook_entries, hook_entry); + err = nf_queue(skb, &entry->state, verdict >> NF_VERDICT_QBITS); if (err < 0) { if (err == -ESRCH && diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 7caa8b082c41..af832c526048 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -917,12 +917,14 @@ static struct notifier_block nfqnl_dev_notifier = { .notifier_call = nfqnl_rcv_dev_event, }; -static int nf_hook_cmp(struct nf_queue_entry *entry, unsigned long ops_ptr) +static int nf_hook_cmp(struct nf_queue_entry *entry, unsigned long entry_ptr) { - return entry->elem == (struct nf_hook_ops *)ops_ptr; + return rcu_access_pointer(entry->state.hook_entries) == + (struct nf_hook_entry *)entry_ptr; } -static void nfqnl_nf_hook_drop(struct net *net, struct nf_hook_ops *hook) +static void nfqnl_nf_hook_drop(struct net *net, + const struct nf_hook_entry *hook) { struct nfnl_queue_net *q = nfnl_queue_pernet(net); int i; From 8d11350f5f33378efc5f905bee325f3e76d6bcca Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Thu, 22 Sep 2016 14:53:53 +0800 Subject: [PATCH 0813/1050] netfilter: seqadj: Fix the wrong ack adjust for the RST packet without ack It is valid that the TCP RST packet which does not set ack flag, and bytes of ack number are zero. But current seqadj codes would adjust the "0" ack to invalid ack number. Actually seqadj need to check the ack flag before adjust it for these RST packets. The following is my test case client is 10.26.98.245, and add one iptable rule: iptables -I INPUT -p tcp --sport 12345 -m connbytes --connbytes 2: --connbytes-dir reply --connbytes-mode packets -j REJECT --reject-with tcp-reset This iptables rule could generate on TCP RST without ack flag. server:10.172.135.55 Enable the synproxy with seqadjust by the following iptables rules iptables -t raw -A PREROUTING -i eth0 -p tcp -d 10.172.135.55 --dport 12345 -m tcp --syn -j CT --notrack iptables -A INPUT -i eth0 -p tcp -d 10.172.135.55 --dport 12345 -m conntrack --ctstate INVALID,UNTRACKED -j SYNPROXY --sack-perm --timestamp --wscale 7 --mss 1460 iptables -A OUTPUT -o eth0 -p tcp -s 10.172.135.55 --sport 12345 -m conntrack --ctstate INVALID,UNTRACKED -m tcp --tcp-flags SYN,RST,ACK SYN,ACK -j ACCEPT The following is my test result. 1. packet trace on client root@routers:/tmp# tcpdump -i eth0 tcp port 12345 -n tcpdump: verbose output suppressed, use -v or -vv for full protocol decode listening on eth0, link-type EN10MB (Ethernet), capture size 65535 bytes IP 10.26.98.245.45154 > 10.172.135.55.12345: Flags [S], seq 3695959829, win 29200, options [mss 1460,sackOK,TS val 452367884 ecr 0,nop,wscale 7], length 0 IP 10.172.135.55.12345 > 10.26.98.245.45154: Flags [S.], seq 546723266, ack 3695959830, win 0, options [mss 1460,sackOK,TS val 15643479 ecr 452367884, nop,wscale 7], length 0 IP 10.26.98.245.45154 > 10.172.135.55.12345: Flags [.], ack 1, win 229, options [nop,nop,TS val 452367885 ecr 15643479], length 0 IP 10.172.135.55.12345 > 10.26.98.245.45154: Flags [.], ack 1, win 226, options [nop,nop,TS val 15643479 ecr 452367885], length 0 IP 10.26.98.245.45154 > 10.172.135.55.12345: Flags [R], seq 3695959830, win 0, length 0 2. seqadj log on server [62873.867319] Adjusting sequence number from 602341895->546723267, ack from 3695959830->3695959830 [62873.867644] Adjusting sequence number from 602341895->546723267, ack from 3695959830->3695959830 [62873.869040] Adjusting sequence number from 3695959830->3695959830, ack from 0->55618628 To summarize, it is clear that the seqadj codes adjust the 0 ack when receive one TCP RST packet without ack. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_seqadj.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c index dff0f0cc59e4..ef7063eced7c 100644 --- a/net/netfilter/nf_conntrack_seqadj.c +++ b/net/netfilter/nf_conntrack_seqadj.c @@ -169,7 +169,7 @@ int nf_ct_seq_adjust(struct sk_buff *skb, s32 seqoff, ackoff; struct nf_conn_seqadj *seqadj = nfct_seqadj(ct); struct nf_ct_seqadj *this_way, *other_way; - int res; + int res = 1; this_way = &seqadj->seq[dir]; other_way = &seqadj->seq[!dir]; @@ -184,27 +184,31 @@ int nf_ct_seq_adjust(struct sk_buff *skb, else seqoff = this_way->offset_before; + newseq = htonl(ntohl(tcph->seq) + seqoff); + inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, false); + pr_debug("Adjusting sequence number from %u->%u\n", + ntohl(tcph->seq), ntohl(newseq)); + tcph->seq = newseq; + + if (!tcph->ack) + goto out; + if (after(ntohl(tcph->ack_seq) - other_way->offset_before, other_way->correction_pos)) ackoff = other_way->offset_after; else ackoff = other_way->offset_before; - newseq = htonl(ntohl(tcph->seq) + seqoff); newack = htonl(ntohl(tcph->ack_seq) - ackoff); - - inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, false); inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, false); - - pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n", + pr_debug("Adjusting ack number from %u->%u, ack from %u->%u\n", ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), ntohl(newack)); - - tcph->seq = newseq; tcph->ack_seq = newack; res = nf_ct_sack_adjust(skb, protoff, tcph, ct, ctinfo); +out: spin_unlock_bh(&ct->lock); return res; From d767ff2c84f19be1aa403762f34eebbb403caf6d Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Thu, 22 Sep 2016 22:28:51 +0800 Subject: [PATCH 0814/1050] netfilter: nft_ct: unnecessary to require dir when use ct l3proto/protocol Currently, if the user want to match ct l3proto, we must specify the direction, for example: # nft add rule filter input ct original l3proto ipv4 ^^^^^^^^ Otherwise, error message will be reported: # nft add rule filter input ct l3proto ipv4 nft add rule filter input ct l3proto ipv4 :1:1-38: Error: Could not process rule: Invalid argument add rule filter input ct l3proto ipv4 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Actually, there's no need to require NFTA_CT_DIRECTION attr, because ct l3proto and protocol are unrelated to direction. And for compatibility, even if the user specify the NFTA_CT_DIRECTION attr, do not report error, just skip it. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_ct.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 51e180f2a003..825fbbc62f48 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -128,15 +128,18 @@ static void nft_ct_get_eval(const struct nft_expr *expr, memcpy(dest, &count, sizeof(count)); return; } + case NFT_CT_L3PROTOCOL: + *dest = nf_ct_l3num(ct); + return; + case NFT_CT_PROTOCOL: + *dest = nf_ct_protonum(ct); + return; default: break; } tuple = &ct->tuplehash[priv->dir].tuple; switch (priv->key) { - case NFT_CT_L3PROTOCOL: - *dest = nf_ct_l3num(ct); - return; case NFT_CT_SRC: memcpy(dest, tuple->src.u3.all, nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16); @@ -145,9 +148,6 @@ static void nft_ct_get_eval(const struct nft_expr *expr, memcpy(dest, tuple->dst.u3.all, nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16); return; - case NFT_CT_PROTOCOL: - *dest = nf_ct_protonum(ct); - return; case NFT_CT_PROTO_SRC: *dest = (__force __u16)tuple->src.u.all; return; @@ -283,8 +283,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, case NFT_CT_L3PROTOCOL: case NFT_CT_PROTOCOL: - if (tb[NFTA_CT_DIRECTION] == NULL) - return -EINVAL; + /* For compatibility, do not report error if NFTA_CT_DIRECTION + * attribute is specified. + */ len = sizeof(u8); break; case NFT_CT_SRC: @@ -432,8 +433,6 @@ static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) goto nla_put_failure; switch (priv->key) { - case NFT_CT_L3PROTOCOL: - case NFT_CT_PROTOCOL: case NFT_CT_SRC: case NFT_CT_DST: case NFT_CT_PROTO_SRC: From 7bfdde7045ad54d9fdccac70baffd094d9de73f8 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Thu, 22 Sep 2016 22:28:52 +0800 Subject: [PATCH 0815/1050] netfilter: nft_ct: report error if mark and dir specified simultaneously NFT_CT_MARK is unrelated to direction, so if NFTA_CT_DIRECTION attr is specified, report EINVAL to the userspace. This validation check was already done at nft_ct_get_init, but we missed it in nft_ct_set_init. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_ct.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 825fbbc62f48..d7b0d171172a 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -364,6 +364,8 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, switch (priv->key) { #ifdef CONFIG_NF_CONNTRACK_MARK case NFT_CT_MARK: + if (tb[NFTA_CT_DIRECTION]) + return -EINVAL; len = FIELD_SIZEOF(struct nf_conn, mark); break; #endif From 0dc60a4546fefc6dc9f54abf60beeeb3501726fa Mon Sep 17 00:00:00 2001 From: Vishwanath Pai Date: Thu, 22 Sep 2016 12:42:46 -0400 Subject: [PATCH 0816/1050] netfilter: xt_hashlimit: Prepare for revision 2 I am planning to add a revision 2 for the hashlimit xtables module to support higher packets per second rates. This patch renames all the functions and variables related to revision 1 by adding _v1 at the end of the names. Signed-off-by: Vishwanath Pai Signed-off-by: Joshua Hunt Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_hashlimit.c | 61 ++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 178696852bde..e93d9e0a3f35 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -56,7 +56,7 @@ static inline struct hashlimit_net *hashlimit_pernet(struct net *net) } /* need to declare this at the top */ -static const struct file_operations dl_file_ops; +static const struct file_operations dl_file_ops_v1; /* hash table crap */ struct dsthash_dst { @@ -215,8 +215,8 @@ dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent) } static void htable_gc(struct work_struct *work); -static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo, - u_int8_t family) +static int htable_create_v1(struct net *net, struct xt_hashlimit_mtinfo1 *minfo, + u_int8_t family) { struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); struct xt_hashlimit_htable *hinfo; @@ -265,7 +265,7 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo, hinfo->pde = proc_create_data(minfo->name, 0, (family == NFPROTO_IPV4) ? hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit, - &dl_file_ops, hinfo); + &dl_file_ops_v1, hinfo); if (hinfo->pde == NULL) { kfree(hinfo->name); vfree(hinfo); @@ -398,7 +398,7 @@ static void htable_put(struct xt_hashlimit_htable *hinfo) (slowest userspace tool allows), which means CREDITS_PER_JIFFY*HZ*60*60*24 < 2^32 ie. */ -#define MAX_CPJ (0xFFFFFFFF / (HZ*60*60*24)) +#define MAX_CPJ_v1 (0xFFFFFFFF / (HZ*60*60*24)) /* Repeated shift and or gives us all 1s, final shift and add 1 gives * us the power of 2 below the theoretical max, so GCC simply does a @@ -410,7 +410,7 @@ static void htable_put(struct xt_hashlimit_htable *hinfo) #define _POW2_BELOW32(x) (_POW2_BELOW16(x)|_POW2_BELOW16((x)>>16)) #define POW2_BELOW32(x) ((_POW2_BELOW32(x)>>1) + 1) -#define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ) +#define CREDITS_PER_JIFFY_v1 POW2_BELOW32(MAX_CPJ_v1) /* in byte mode, the lowest possible rate is one packet/second. * credit_cap is used as a counter that tells us how many times we can @@ -428,11 +428,12 @@ static u32 xt_hashlimit_len_to_chunks(u32 len) static u32 user2credits(u32 user) { /* If multiplying would overflow... */ - if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY)) + if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY_v1)) /* Divide first. */ - return (user / XT_HASHLIMIT_SCALE) * HZ * CREDITS_PER_JIFFY; + return (user / XT_HASHLIMIT_SCALE) *\ + HZ * CREDITS_PER_JIFFY_v1; - return (user * HZ * CREDITS_PER_JIFFY) / XT_HASHLIMIT_SCALE; + return (user * HZ * CREDITS_PER_JIFFY_v1) / XT_HASHLIMIT_SCALE; } static u32 user2credits_byte(u32 user) @@ -461,7 +462,7 @@ static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, u32 mode) return; } } else { - dh->rateinfo.credit += delta * CREDITS_PER_JIFFY; + dh->rateinfo.credit += delta * CREDITS_PER_JIFFY_v1; cap = dh->rateinfo.credit_cap; } if (dh->rateinfo.credit > cap) @@ -603,7 +604,7 @@ static u32 hashlimit_byte_cost(unsigned int len, struct dsthash_ent *dh) } static bool -hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) +hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_hashlimit_mtinfo1 *info = par->matchinfo; struct xt_hashlimit_htable *hinfo = info->hinfo; @@ -660,7 +661,7 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) return false; } -static int hashlimit_mt_check(const struct xt_mtchk_param *par) +static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par) { struct net *net = par->net; struct xt_hashlimit_mtinfo1 *info = par->matchinfo; @@ -701,7 +702,7 @@ static int hashlimit_mt_check(const struct xt_mtchk_param *par) mutex_lock(&hashlimit_mutex); info->hinfo = htable_find_get(net, info->name, par->family); if (info->hinfo == NULL) { - ret = htable_create(net, info, par->family); + ret = htable_create_v1(net, info, par->family); if (ret < 0) { mutex_unlock(&hashlimit_mutex); return ret; @@ -711,7 +712,7 @@ static int hashlimit_mt_check(const struct xt_mtchk_param *par) return 0; } -static void hashlimit_mt_destroy(const struct xt_mtdtor_param *par) +static void hashlimit_mt_destroy_v1(const struct xt_mtdtor_param *par) { const struct xt_hashlimit_mtinfo1 *info = par->matchinfo; @@ -723,10 +724,10 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { .name = "hashlimit", .revision = 1, .family = NFPROTO_IPV4, - .match = hashlimit_mt, + .match = hashlimit_mt_v1, .matchsize = sizeof(struct xt_hashlimit_mtinfo1), - .checkentry = hashlimit_mt_check, - .destroy = hashlimit_mt_destroy, + .checkentry = hashlimit_mt_check_v1, + .destroy = hashlimit_mt_destroy_v1, .me = THIS_MODULE, }, #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) @@ -734,10 +735,10 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { .name = "hashlimit", .revision = 1, .family = NFPROTO_IPV6, - .match = hashlimit_mt, + .match = hashlimit_mt_v1, .matchsize = sizeof(struct xt_hashlimit_mtinfo1), - .checkentry = hashlimit_mt_check, - .destroy = hashlimit_mt_destroy, + .checkentry = hashlimit_mt_check_v1, + .destroy = hashlimit_mt_destroy_v1, .me = THIS_MODULE, }, #endif @@ -786,8 +787,8 @@ static void dl_seq_stop(struct seq_file *s, void *v) spin_unlock_bh(&htable->lock); } -static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, - struct seq_file *s) +static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family, + struct seq_file *s) { const struct xt_hashlimit_htable *ht = s->private; @@ -825,7 +826,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, return seq_has_overflowed(s); } -static int dl_seq_show(struct seq_file *s, void *v) +static int dl_seq_show_v1(struct seq_file *s, void *v) { struct xt_hashlimit_htable *htable = s->private; unsigned int *bucket = (unsigned int *)v; @@ -833,22 +834,22 @@ static int dl_seq_show(struct seq_file *s, void *v) if (!hlist_empty(&htable->hash[*bucket])) { hlist_for_each_entry(ent, &htable->hash[*bucket], node) - if (dl_seq_real_show(ent, htable->family, s)) + if (dl_seq_real_show_v1(ent, htable->family, s)) return -1; } return 0; } -static const struct seq_operations dl_seq_ops = { +static const struct seq_operations dl_seq_ops_v1 = { .start = dl_seq_start, .next = dl_seq_next, .stop = dl_seq_stop, - .show = dl_seq_show + .show = dl_seq_show_v1 }; -static int dl_proc_open(struct inode *inode, struct file *file) +static int dl_proc_open_v1(struct inode *inode, struct file *file) { - int ret = seq_open(file, &dl_seq_ops); + int ret = seq_open(file, &dl_seq_ops_v1); if (!ret) { struct seq_file *sf = file->private_data; @@ -857,9 +858,9 @@ static int dl_proc_open(struct inode *inode, struct file *file) return ret; } -static const struct file_operations dl_file_ops = { +static const struct file_operations dl_file_ops_v1 = { .owner = THIS_MODULE, - .open = dl_proc_open, + .open = dl_proc_open_v1, .read = seq_read, .llseek = seq_lseek, .release = seq_release From 11d5f15723c9f39d7c131d0149d024c17dbef676 Mon Sep 17 00:00:00 2001 From: Vishwanath Pai Date: Thu, 22 Sep 2016 12:43:44 -0400 Subject: [PATCH 0817/1050] netfilter: xt_hashlimit: Create revision 2 to support higher pps rates Create a new revision for the hashlimit iptables extension module. Rev 2 will support higher pps of upto 1 million, Version 1 supports only 10k. To support this we have to increase the size of the variables avg and burst in hashlimit_cfg to 64-bit. Create two new structs hashlimit_cfg2 and xt_hashlimit_mtinfo2 and also create newer versions of all the functions for match, checkentry and destroy. Some of the functions like hashlimit_mt, hashlimit_mt_check etc are very similar in both rev1 and rev2 with only minor changes, so I have split those functions and moved all the common code to a *_common function. Signed-off-by: Vishwanath Pai Signed-off-by: Joshua Hunt Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/xt_hashlimit.h | 23 ++ net/netfilter/xt_hashlimit.c | 332 ++++++++++++++++---- 2 files changed, 286 insertions(+), 69 deletions(-) diff --git a/include/uapi/linux/netfilter/xt_hashlimit.h b/include/uapi/linux/netfilter/xt_hashlimit.h index 6db90372f09c..3efc0ca18345 100644 --- a/include/uapi/linux/netfilter/xt_hashlimit.h +++ b/include/uapi/linux/netfilter/xt_hashlimit.h @@ -6,6 +6,7 @@ /* timings are in milliseconds. */ #define XT_HASHLIMIT_SCALE 10000 +#define XT_HASHLIMIT_SCALE_v2 1000000llu /* 1/10,000 sec period => max of 10,000/sec. Min rate is then 429490 * seconds, or one packet every 59 hours. */ @@ -63,6 +64,20 @@ struct hashlimit_cfg1 { __u8 srcmask, dstmask; }; +struct hashlimit_cfg2 { + __u64 avg; /* Average secs between packets * scale */ + __u64 burst; /* Period multiplier for upper limit. */ + __u32 mode; /* bitmask of XT_HASHLIMIT_HASH_* */ + + /* user specified */ + __u32 size; /* how many buckets */ + __u32 max; /* max number of entries */ + __u32 gc_interval; /* gc interval */ + __u32 expire; /* when do entries expire? */ + + __u8 srcmask, dstmask; +}; + struct xt_hashlimit_mtinfo1 { char name[IFNAMSIZ]; struct hashlimit_cfg1 cfg; @@ -71,4 +86,12 @@ struct xt_hashlimit_mtinfo1 { struct xt_hashlimit_htable *hinfo __attribute__((aligned(8))); }; +struct xt_hashlimit_mtinfo2 { + char name[NAME_MAX]; + struct hashlimit_cfg2 cfg; + + /* Used internally by the kernel */ + struct xt_hashlimit_htable *hinfo __attribute__((aligned(8))); +}; + #endif /* _UAPI_XT_HASHLIMIT_H */ diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index e93d9e0a3f35..44a095ecc7b7 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -57,6 +57,7 @@ static inline struct hashlimit_net *hashlimit_pernet(struct net *net) /* need to declare this at the top */ static const struct file_operations dl_file_ops_v1; +static const struct file_operations dl_file_ops; /* hash table crap */ struct dsthash_dst { @@ -86,8 +87,8 @@ struct dsthash_ent { unsigned long expires; /* precalculated expiry time */ struct { unsigned long prev; /* last modification */ - u_int32_t credit; - u_int32_t credit_cap, cost; + u_int64_t credit; + u_int64_t credit_cap, cost; } rateinfo; struct rcu_head rcu; }; @@ -98,7 +99,7 @@ struct xt_hashlimit_htable { u_int8_t family; bool rnd_initialized; - struct hashlimit_cfg1 cfg; /* config */ + struct hashlimit_cfg2 cfg; /* config */ /* used internally */ spinlock_t lock; /* lock for list_head */ @@ -114,6 +115,30 @@ struct xt_hashlimit_htable { struct hlist_head hash[0]; /* hashtable itself */ }; +static int +cfg_copy(struct hashlimit_cfg2 *to, void *from, int revision) +{ + if (revision == 1) { + struct hashlimit_cfg1 *cfg = (struct hashlimit_cfg1 *)from; + + to->mode = cfg->mode; + to->avg = cfg->avg; + to->burst = cfg->burst; + to->size = cfg->size; + to->max = cfg->max; + to->gc_interval = cfg->gc_interval; + to->expire = cfg->expire; + to->srcmask = cfg->srcmask; + to->dstmask = cfg->dstmask; + } else if (revision == 2) { + memcpy(to, from, sizeof(struct hashlimit_cfg2)); + } else { + return -EINVAL; + } + + return 0; +} + static DEFINE_MUTEX(hashlimit_mutex); /* protects htables list */ static struct kmem_cache *hashlimit_cachep __read_mostly; @@ -215,16 +240,18 @@ dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent) } static void htable_gc(struct work_struct *work); -static int htable_create_v1(struct net *net, struct xt_hashlimit_mtinfo1 *minfo, - u_int8_t family) +static int htable_create(struct net *net, struct hashlimit_cfg2 *cfg, + const char *name, u_int8_t family, + struct xt_hashlimit_htable **out_hinfo, + int revision) { struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); struct xt_hashlimit_htable *hinfo; - unsigned int size; - unsigned int i; + unsigned int size, i; + int ret; - if (minfo->cfg.size) { - size = minfo->cfg.size; + if (cfg->size) { + size = cfg->size; } else { size = (totalram_pages << PAGE_SHIFT) / 16384 / sizeof(struct list_head); @@ -238,10 +265,14 @@ static int htable_create_v1(struct net *net, struct xt_hashlimit_mtinfo1 *minfo, sizeof(struct list_head) * size); if (hinfo == NULL) return -ENOMEM; - minfo->hinfo = hinfo; + *out_hinfo = hinfo; /* copy match config into hashtable config */ - memcpy(&hinfo->cfg, &minfo->cfg, sizeof(hinfo->cfg)); + ret = cfg_copy(&hinfo->cfg, (void *)cfg, 2); + + if (ret) + return ret; + hinfo->cfg.size = size; if (hinfo->cfg.max == 0) hinfo->cfg.max = 8 * hinfo->cfg.size; @@ -255,17 +286,18 @@ static int htable_create_v1(struct net *net, struct xt_hashlimit_mtinfo1 *minfo, hinfo->count = 0; hinfo->family = family; hinfo->rnd_initialized = false; - hinfo->name = kstrdup(minfo->name, GFP_KERNEL); + hinfo->name = kstrdup(name, GFP_KERNEL); if (!hinfo->name) { vfree(hinfo); return -ENOMEM; } spin_lock_init(&hinfo->lock); - hinfo->pde = proc_create_data(minfo->name, 0, + hinfo->pde = proc_create_data(name, 0, (family == NFPROTO_IPV4) ? hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit, - &dl_file_ops_v1, hinfo); + (revision == 1) ? &dl_file_ops_v1 : &dl_file_ops, + hinfo); if (hinfo->pde == NULL) { kfree(hinfo->name); vfree(hinfo); @@ -399,6 +431,7 @@ static void htable_put(struct xt_hashlimit_htable *hinfo) CREDITS_PER_JIFFY*HZ*60*60*24 < 2^32 ie. */ #define MAX_CPJ_v1 (0xFFFFFFFF / (HZ*60*60*24)) +#define MAX_CPJ (0xFFFFFFFFFFFFFFFF / (HZ*60*60*24)) /* Repeated shift and or gives us all 1s, final shift and add 1 gives * us the power of 2 below the theoretical max, so GCC simply does a @@ -408,8 +441,11 @@ static void htable_put(struct xt_hashlimit_htable *hinfo) #define _POW2_BELOW8(x) (_POW2_BELOW4(x)|_POW2_BELOW4((x)>>4)) #define _POW2_BELOW16(x) (_POW2_BELOW8(x)|_POW2_BELOW8((x)>>8)) #define _POW2_BELOW32(x) (_POW2_BELOW16(x)|_POW2_BELOW16((x)>>16)) +#define _POW2_BELOW64(x) (_POW2_BELOW32(x)|_POW2_BELOW32((x)>>32)) #define POW2_BELOW32(x) ((_POW2_BELOW32(x)>>1) + 1) +#define POW2_BELOW64(x) ((_POW2_BELOW64(x)>>1) + 1) +#define CREDITS_PER_JIFFY POW2_BELOW64(MAX_CPJ) #define CREDITS_PER_JIFFY_v1 POW2_BELOW32(MAX_CPJ_v1) /* in byte mode, the lowest possible rate is one packet/second. @@ -425,15 +461,24 @@ static u32 xt_hashlimit_len_to_chunks(u32 len) } /* Precision saver. */ -static u32 user2credits(u32 user) +static u64 user2credits(u64 user, int revision) { - /* If multiplying would overflow... */ - if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY_v1)) - /* Divide first. */ - return (user / XT_HASHLIMIT_SCALE) *\ - HZ * CREDITS_PER_JIFFY_v1; + if (revision == 1) { + /* If multiplying would overflow... */ + if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY_v1)) + /* Divide first. */ + return (user / XT_HASHLIMIT_SCALE) *\ + HZ * CREDITS_PER_JIFFY_v1; - return (user * HZ * CREDITS_PER_JIFFY_v1) / XT_HASHLIMIT_SCALE; + return (user * HZ * CREDITS_PER_JIFFY_v1) \ + / XT_HASHLIMIT_SCALE; + } else { + if (user > 0xFFFFFFFFFFFFFFFF / (HZ*CREDITS_PER_JIFFY)) + return (user / XT_HASHLIMIT_SCALE_v2) *\ + HZ * CREDITS_PER_JIFFY; + + return (user * HZ * CREDITS_PER_JIFFY) / XT_HASHLIMIT_SCALE_v2; + } } static u32 user2credits_byte(u32 user) @@ -443,10 +488,11 @@ static u32 user2credits_byte(u32 user) return (u32) (us >> 32); } -static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, u32 mode) +static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, + u32 mode, int revision) { unsigned long delta = now - dh->rateinfo.prev; - u32 cap; + u64 cap, cpj; if (delta == 0) return; @@ -454,7 +500,7 @@ static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, u32 mode) dh->rateinfo.prev = now; if (mode & XT_HASHLIMIT_BYTES) { - u32 tmp = dh->rateinfo.credit; + u64 tmp = dh->rateinfo.credit; dh->rateinfo.credit += CREDITS_PER_JIFFY_BYTES * delta; cap = CREDITS_PER_JIFFY_BYTES * HZ; if (tmp >= dh->rateinfo.credit) {/* overflow */ @@ -462,7 +508,9 @@ static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, u32 mode) return; } } else { - dh->rateinfo.credit += delta * CREDITS_PER_JIFFY_v1; + cpj = (revision == 1) ? + CREDITS_PER_JIFFY_v1 : CREDITS_PER_JIFFY; + dh->rateinfo.credit += delta * cpj; cap = dh->rateinfo.credit_cap; } if (dh->rateinfo.credit > cap) @@ -470,7 +518,7 @@ static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, u32 mode) } static void rateinfo_init(struct dsthash_ent *dh, - struct xt_hashlimit_htable *hinfo) + struct xt_hashlimit_htable *hinfo, int revision) { dh->rateinfo.prev = jiffies; if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) { @@ -479,8 +527,8 @@ static void rateinfo_init(struct dsthash_ent *dh, dh->rateinfo.credit_cap = hinfo->cfg.burst; } else { dh->rateinfo.credit = user2credits(hinfo->cfg.avg * - hinfo->cfg.burst); - dh->rateinfo.cost = user2credits(hinfo->cfg.avg); + hinfo->cfg.burst, revision); + dh->rateinfo.cost = user2credits(hinfo->cfg.avg, revision); dh->rateinfo.credit_cap = dh->rateinfo.credit; } } @@ -604,15 +652,15 @@ static u32 hashlimit_byte_cost(unsigned int len, struct dsthash_ent *dh) } static bool -hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) +hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par, + struct xt_hashlimit_htable *hinfo, + const struct hashlimit_cfg2 *cfg, int revision) { - const struct xt_hashlimit_mtinfo1 *info = par->matchinfo; - struct xt_hashlimit_htable *hinfo = info->hinfo; unsigned long now = jiffies; struct dsthash_ent *dh; struct dsthash_dst dst; bool race = false; - u32 cost; + u64 cost; if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0) goto hotdrop; @@ -627,18 +675,18 @@ hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) } else if (race) { /* Already got an entry, update expiration timeout */ dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire); - rateinfo_recalc(dh, now, hinfo->cfg.mode); + rateinfo_recalc(dh, now, hinfo->cfg.mode, revision); } else { dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire); - rateinfo_init(dh, hinfo); + rateinfo_init(dh, hinfo, revision); } } else { /* update expiration timeout */ dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire); - rateinfo_recalc(dh, now, hinfo->cfg.mode); + rateinfo_recalc(dh, now, hinfo->cfg.mode, revision); } - if (info->cfg.mode & XT_HASHLIMIT_BYTES) + if (cfg->mode & XT_HASHLIMIT_BYTES) cost = hashlimit_byte_cost(skb->len, dh); else cost = dh->rateinfo.cost; @@ -648,70 +696,126 @@ hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) dh->rateinfo.credit -= cost; spin_unlock(&dh->lock); rcu_read_unlock_bh(); - return !(info->cfg.mode & XT_HASHLIMIT_INVERT); + return !(cfg->mode & XT_HASHLIMIT_INVERT); } spin_unlock(&dh->lock); rcu_read_unlock_bh(); /* default match is underlimit - so over the limit, we need to invert */ - return info->cfg.mode & XT_HASHLIMIT_INVERT; + return cfg->mode & XT_HASHLIMIT_INVERT; hotdrop: par->hotdrop = true; return false; } -static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par) +static bool +hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) { - struct net *net = par->net; - struct xt_hashlimit_mtinfo1 *info = par->matchinfo; + const struct xt_hashlimit_mtinfo1 *info = par->matchinfo; + struct xt_hashlimit_htable *hinfo = info->hinfo; + struct hashlimit_cfg2 cfg = {}; int ret; - if (info->cfg.gc_interval == 0 || info->cfg.expire == 0) - return -EINVAL; - if (info->name[sizeof(info->name)-1] != '\0') + ret = cfg_copy(&cfg, (void *)&info->cfg, 1); + + if (ret) + return ret; + + return hashlimit_mt_common(skb, par, hinfo, &cfg, 1); +} + +static bool +hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_hashlimit_mtinfo2 *info = par->matchinfo; + struct xt_hashlimit_htable *hinfo = info->hinfo; + + return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 2); +} + +static int hashlimit_mt_check_common(const struct xt_mtchk_param *par, + struct xt_hashlimit_htable **hinfo, + struct hashlimit_cfg2 *cfg, + const char *name, int revision) +{ + struct net *net = par->net; + int ret; + + if (cfg->gc_interval == 0 || cfg->expire == 0) return -EINVAL; if (par->family == NFPROTO_IPV4) { - if (info->cfg.srcmask > 32 || info->cfg.dstmask > 32) + if (cfg->srcmask > 32 || cfg->dstmask > 32) return -EINVAL; } else { - if (info->cfg.srcmask > 128 || info->cfg.dstmask > 128) + if (cfg->srcmask > 128 || cfg->dstmask > 128) return -EINVAL; } - if (info->cfg.mode & ~XT_HASHLIMIT_ALL) { + if (cfg->mode & ~XT_HASHLIMIT_ALL) { pr_info("Unknown mode mask %X, kernel too old?\n", - info->cfg.mode); + cfg->mode); return -EINVAL; } /* Check for overflow. */ - if (info->cfg.mode & XT_HASHLIMIT_BYTES) { - if (user2credits_byte(info->cfg.avg) == 0) { - pr_info("overflow, rate too high: %u\n", info->cfg.avg); + if (cfg->mode & XT_HASHLIMIT_BYTES) { + if (user2credits_byte(cfg->avg) == 0) { + pr_info("overflow, rate too high: %llu\n", cfg->avg); return -EINVAL; } - } else if (info->cfg.burst == 0 || - user2credits(info->cfg.avg * info->cfg.burst) < - user2credits(info->cfg.avg)) { - pr_info("overflow, try lower: %u/%u\n", - info->cfg.avg, info->cfg.burst); + } else if (cfg->burst == 0 || + user2credits(cfg->avg * cfg->burst, revision) < + user2credits(cfg->avg, revision)) { + pr_info("overflow, try lower: %llu/%llu\n", + cfg->avg, cfg->burst); return -ERANGE; } mutex_lock(&hashlimit_mutex); - info->hinfo = htable_find_get(net, info->name, par->family); - if (info->hinfo == NULL) { - ret = htable_create_v1(net, info, par->family); + *hinfo = htable_find_get(net, name, par->family); + if (*hinfo == NULL) { + ret = htable_create(net, cfg, name, par->family, + hinfo, revision); if (ret < 0) { mutex_unlock(&hashlimit_mutex); return ret; } } mutex_unlock(&hashlimit_mutex); + return 0; } +static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par) +{ + struct xt_hashlimit_mtinfo1 *info = par->matchinfo; + struct hashlimit_cfg2 cfg = {}; + int ret; + + if (info->name[sizeof(info->name) - 1] != '\0') + return -EINVAL; + + ret = cfg_copy(&cfg, (void *)&info->cfg, 1); + + if (ret) + return ret; + + return hashlimit_mt_check_common(par, &info->hinfo, + &cfg, info->name, 1); +} + +static int hashlimit_mt_check(const struct xt_mtchk_param *par) +{ + struct xt_hashlimit_mtinfo2 *info = par->matchinfo; + + if (info->name[sizeof(info->name) - 1] != '\0') + return -EINVAL; + + return hashlimit_mt_check_common(par, &info->hinfo, &info->cfg, + info->name, 2); +} + static void hashlimit_mt_destroy_v1(const struct xt_mtdtor_param *par) { const struct xt_hashlimit_mtinfo1 *info = par->matchinfo; @@ -719,6 +823,13 @@ static void hashlimit_mt_destroy_v1(const struct xt_mtdtor_param *par) htable_put(info->hinfo); } +static void hashlimit_mt_destroy(const struct xt_mtdtor_param *par) +{ + const struct xt_hashlimit_mtinfo2 *info = par->matchinfo; + + htable_put(info->hinfo); +} + static struct xt_match hashlimit_mt_reg[] __read_mostly = { { .name = "hashlimit", @@ -730,6 +841,16 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { .destroy = hashlimit_mt_destroy_v1, .me = THIS_MODULE, }, + { + .name = "hashlimit", + .revision = 2, + .family = NFPROTO_IPV4, + .match = hashlimit_mt, + .matchsize = sizeof(struct xt_hashlimit_mtinfo2), + .checkentry = hashlimit_mt_check, + .destroy = hashlimit_mt_destroy, + .me = THIS_MODULE, + }, #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "hashlimit", @@ -741,6 +862,16 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { .destroy = hashlimit_mt_destroy_v1, .me = THIS_MODULE, }, + { + .name = "hashlimit", + .revision = 2, + .family = NFPROTO_IPV6, + .match = hashlimit_mt, + .matchsize = sizeof(struct xt_hashlimit_mtinfo2), + .checkentry = hashlimit_mt_check, + .destroy = hashlimit_mt_destroy, + .me = THIS_MODULE, + }, #endif }; @@ -787,18 +918,12 @@ static void dl_seq_stop(struct seq_file *s, void *v) spin_unlock_bh(&htable->lock); } -static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family, - struct seq_file *s) +static void dl_seq_print(struct dsthash_ent *ent, u_int8_t family, + struct seq_file *s) { - const struct xt_hashlimit_htable *ht = s->private; - - spin_lock(&ent->lock); - /* recalculate to show accurate numbers */ - rateinfo_recalc(ent, jiffies, ht->cfg.mode); - switch (family) { case NFPROTO_IPV4: - seq_printf(s, "%ld %pI4:%u->%pI4:%u %u %u %u\n", + seq_printf(s, "%ld %pI4:%u->%pI4:%u %llu %llu %llu\n", (long)(ent->expires - jiffies)/HZ, &ent->dst.ip.src, ntohs(ent->dst.src_port), @@ -809,7 +934,7 @@ static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family, break; #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) case NFPROTO_IPV6: - seq_printf(s, "%ld %pI6:%u->%pI6:%u %u %u %u\n", + seq_printf(s, "%ld %pI6:%u->%pI6:%u %llu %llu %llu\n", (long)(ent->expires - jiffies)/HZ, &ent->dst.ip6.src, ntohs(ent->dst.src_port), @@ -822,6 +947,34 @@ static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family, default: BUG(); } +} + +static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family, + struct seq_file *s) +{ + const struct xt_hashlimit_htable *ht = s->private; + + spin_lock(&ent->lock); + /* recalculate to show accurate numbers */ + rateinfo_recalc(ent, jiffies, ht->cfg.mode, 1); + + dl_seq_print(ent, family, s); + + spin_unlock(&ent->lock); + return seq_has_overflowed(s); +} + +static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, + struct seq_file *s) +{ + const struct xt_hashlimit_htable *ht = s->private; + + spin_lock(&ent->lock); + /* recalculate to show accurate numbers */ + rateinfo_recalc(ent, jiffies, ht->cfg.mode, 2); + + dl_seq_print(ent, family, s); + spin_unlock(&ent->lock); return seq_has_overflowed(s); } @@ -840,6 +993,20 @@ static int dl_seq_show_v1(struct seq_file *s, void *v) return 0; } +static int dl_seq_show(struct seq_file *s, void *v) +{ + struct xt_hashlimit_htable *htable = s->private; + unsigned int *bucket = (unsigned int *)v; + struct dsthash_ent *ent; + + if (!hlist_empty(&htable->hash[*bucket])) { + hlist_for_each_entry(ent, &htable->hash[*bucket], node) + if (dl_seq_real_show(ent, htable->family, s)) + return -1; + } + return 0; +} + static const struct seq_operations dl_seq_ops_v1 = { .start = dl_seq_start, .next = dl_seq_next, @@ -847,6 +1014,13 @@ static const struct seq_operations dl_seq_ops_v1 = { .show = dl_seq_show_v1 }; +static const struct seq_operations dl_seq_ops = { + .start = dl_seq_start, + .next = dl_seq_next, + .stop = dl_seq_stop, + .show = dl_seq_show +}; + static int dl_proc_open_v1(struct inode *inode, struct file *file) { int ret = seq_open(file, &dl_seq_ops_v1); @@ -858,6 +1032,18 @@ static int dl_proc_open_v1(struct inode *inode, struct file *file) return ret; } +static int dl_proc_open(struct inode *inode, struct file *file) +{ + int ret = seq_open(file, &dl_seq_ops); + + if (!ret) { + struct seq_file *sf = file->private_data; + + sf->private = PDE_DATA(inode); + } + return ret; +} + static const struct file_operations dl_file_ops_v1 = { .owner = THIS_MODULE, .open = dl_proc_open_v1, @@ -866,6 +1052,14 @@ static const struct file_operations dl_file_ops_v1 = { .release = seq_release }; +static const struct file_operations dl_file_ops = { + .owner = THIS_MODULE, + .open = dl_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + static int __net_init hashlimit_proc_net_init(struct net *net) { struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); From 58e207e4983d7acea39b7fbec9343d8a6d218a18 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 22 Sep 2016 23:49:17 +0200 Subject: [PATCH 0818/1050] netfilter: evict stale entries when user reads /proc/net/nf_conntrack Fabian reports a possible conntrack memory leak (could not reproduce so far), however, one minor issue can be easily resolved: > cat /proc/net/nf_conntrack | wc -l = 5 > 4 minutes required to clean up the table. We should not report those timed-out entries to the user in first place. And instead of just skipping those timed-out entries while iterating over the table we can also zap them (we already do this during ctnetlink walks, but I forgot about the /proc interface). Fixes: f330a7fdbe16 ("netfilter: conntrack: get rid of conntrack timer") Reported-by: Fabian Frederick Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_standalone.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 7d52f8401afd..5f446cd9f3fd 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -212,6 +212,11 @@ static int ct_seq_show(struct seq_file *s, void *v) if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) return 0; + if (nf_ct_should_gc(ct)) { + nf_ct_kill(ct); + goto release; + } + /* we only want to print DIR_ORIGINAL */ if (NF_CT_DIRECTION(hash)) goto release; From 1245800c0f96eb6ebb368593e251d66c01e61022 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 23 Sep 2016 22:57:13 -0400 Subject: [PATCH 0819/1050] tracing: Move mutex to protect against resetting of seq data The iter->seq can be reset outside the protection of the mutex. So can reading of user data. Move the mutex up to the beginning of the function. Fixes: d7350c3f45694 ("tracing/core: make the read callbacks reentrants") Cc: stable@vger.kernel.org # 2.6.30+ Reported-by: Al Viro Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8a4bd6b68a0b..8fb4847b0450 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4890,19 +4890,20 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, struct trace_iterator *iter = filp->private_data; ssize_t sret; - /* return any leftover data */ - sret = trace_seq_to_user(&iter->seq, ubuf, cnt); - if (sret != -EBUSY) - return sret; - - trace_seq_init(&iter->seq); - /* * Avoid more than one consumer on a single file descriptor * This is just a matter of traces coherency, the ring buffer itself * is protected. */ mutex_lock(&iter->mutex); + + /* return any leftover data */ + sret = trace_seq_to_user(&iter->seq, ubuf, cnt); + if (sret != -EBUSY) + goto out; + + trace_seq_init(&iter->seq); + if (iter->trace->read) { sret = iter->trace->read(iter, filp, ubuf, cnt, ppos); if (sret) From 1ae2293dd6d2f5c823cf97e60b70d03631cd622f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 17 Sep 2016 18:31:46 -0400 Subject: [PATCH 0820/1050] fix memory leaks in tracing_buffers_splice_read() Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- kernel/trace/trace.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8fb4847b0450..77eeab2776ef 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5930,9 +5930,6 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, return -EBUSY; #endif - if (splice_grow_spd(pipe, &spd)) - return -ENOMEM; - if (*ppos & (PAGE_SIZE - 1)) return -EINVAL; @@ -5942,6 +5939,9 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, len &= PAGE_MASK; } + if (splice_grow_spd(pipe, &spd)) + return -ENOMEM; + again: trace_access_lock(iter->cpu_file); entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file); @@ -5999,19 +5999,21 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, /* did we read anything? */ if (!spd.nr_pages) { if (ret) - return ret; + goto out; + ret = -EAGAIN; if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) - return -EAGAIN; + goto out; ret = wait_on_pipe(iter, true); if (ret) - return ret; + goto out; goto again; } ret = splice_to_pipe(pipe, &spd); +out: splice_shrink_spd(&spd); return ret; From 62fd5258ebe3ea240371234955a6e2cc99e0b6c3 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 22 Sep 2016 11:53:34 -0700 Subject: [PATCH 0821/1050] radix tree test suite: Test radix_tree_replace_slot() for multiorder entries When we replace a multiorder entry, check that all indices reflect the new value. Also, compile the test suite with -O2, which shows other problems with the code due to some dodgy pointer operations in the radix tree code. Signed-off-by: Matthew Wilcox Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/Makefile | 2 +- tools/testing/radix-tree/multiorder.c | 16 ++++++++++++---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index 3b530467148e..9d0919ed52a4 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -1,5 +1,5 @@ -CFLAGS += -I. -g -Wall -D_LGPL_SOURCE +CFLAGS += -I. -g -O2 -Wall -D_LGPL_SOURCE LDFLAGS += -lpthread -lurcu TARGETS = main OFILES = main.o radix-tree.o linux.o test.o tag_check.o find_next_bit.o \ diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index 39d9b9568fe2..05d7bc488971 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -124,6 +124,8 @@ static void multiorder_check(unsigned long index, int order) unsigned long i; unsigned long min = index & ~((1UL << order) - 1); unsigned long max = min + (1UL << order); + void **slot; + struct item *item2 = item_create(min); RADIX_TREE(tree, GFP_KERNEL); printf("Multiorder index %ld, order %d\n", index, order); @@ -139,13 +141,19 @@ static void multiorder_check(unsigned long index, int order) item_check_absent(&tree, i); for (i = max; i < 2*max; i++) item_check_absent(&tree, i); + for (i = min; i < max; i++) + assert(radix_tree_insert(&tree, i, item2) == -EEXIST); + + slot = radix_tree_lookup_slot(&tree, index); + free(*slot); + radix_tree_replace_slot(slot, item2); for (i = min; i < max; i++) { - static void *entry = (void *) - (0xA0 | RADIX_TREE_EXCEPTIONAL_ENTRY); - assert(radix_tree_insert(&tree, i, entry) == -EEXIST); + struct item *item = item_lookup(&tree, i); + assert(item != 0); + assert(item->index == min); } - assert(item_delete(&tree, index) != 0); + assert(item_delete(&tree, min) != 0); for (i = 0; i < 2*max; i++) item_check_absent(&tree, i); From 8d2c0d36d6826ddc3114801c599619d3f2932f0a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 25 Sep 2016 13:32:46 -0700 Subject: [PATCH 0822/1050] radix tree: fix sibling entry handling in radix_tree_descend() The fixes to the radix tree test suite show that the multi-order case is broken. The basic reason is that the radix tree code uses tagged pointers with the "internal" bit in the low bits, and calculating the pointer indices was supposed to mask off those bits. But gcc will notice that we then use the index to re-create the pointer, and will avoid doing the arithmetic and use the tagged pointer directly. This cleans the code up, using the existing is_sibling_entry() helper to validate the sibling pointer range (instead of open-coding it), and using entry_to_node() to mask off the low tag bit from the pointer. And once you do that, you might as well just use the now cleaned-up pointer directly. [ Side note: the multi-order code isn't actually ever used in the kernel right now, and the only reason I didn't just delete all that code is that Kirill Shutemov piped up and said: "Well, my ext4-with-huge-pages patchset[1] uses multi-order entries. It also converts shmem-with-huge-pages and hugetlb to them. I'm okay with converting it to other mechanism, but I need something. (I looked into Konstantin's RFC patchset[2]. It looks okay, but I don't feel myself qualified to review it as I don't know much about radix-tree internals.)" [1] http://lkml.kernel.org/r/20160915115523.29737-1-kirill.shutemov@linux.intel.com [2] http://lkml.kernel.org/r/147230727479.9957.1087787722571077339.stgit@zurg ] Reported-by: Matthew Wilcox Cc: Andrew Morton Cc: Ross Zwisler Cc: Johannes Weiner Cc: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Cedric Blancher Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 1b7bf7314141..91f0727e3cad 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -105,10 +105,10 @@ static unsigned int radix_tree_descend(struct radix_tree_node *parent, #ifdef CONFIG_RADIX_TREE_MULTIORDER if (radix_tree_is_internal_node(entry)) { - unsigned long siboff = get_slot_offset(parent, entry); - if (siboff < RADIX_TREE_MAP_SIZE) { - offset = siboff; - entry = rcu_dereference_raw(parent->slots[offset]); + if (is_sibling_entry(parent, entry)) { + void **sibentry = (void **) entry_to_node(entry); + offset = get_slot_offset(parent, sibentry); + entry = rcu_dereference_raw(*sibentry); } } #endif From 7a682575ad4829b4de3e672a6ad5f73a05826b82 Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Fri, 23 Sep 2016 11:27:42 +0200 Subject: [PATCH 0823/1050] netfilter: xt_socket: fix transparent match for IPv6 request sockets The introduction of TCP_NEW_SYN_RECV state, and the addition of request sockets to the ehash table seems to have broken the --transparent option of the socket match for IPv6 (around commit a9407000). Now that the socket lookup finds the TCP_NEW_SYN_RECV socket instead of the listener, the --transparent option tries to match on the no_srccheck flag of the request socket. Unfortunately, that flag was only set for IPv4 sockets in tcp_v4_init_req() by copying the transparent flag of the listener socket. This effectively causes '-m socket --transparent' not match on the ACK packet sent by the client in a TCP handshake. Based on the suggestion from Eric Dumazet, this change moves the code initializing no_srccheck to tcp_conn_request(), rendering the above scenario working again. Fixes: a940700003 ("netfilter: xt_socket: prepare for TCP_NEW_SYN_RECV support") Signed-off-by: Alex Badics Signed-off-by: KOVACS Krisztian Signed-off-by: Pablo Neira Ayuso --- net/ipv4/tcp_input.c | 1 + net/ipv4/tcp_ipv4.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8cd02c0b056c..f3a9f3c2c8d8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6269,6 +6269,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; tcp_openreq_init(req, &tmp_opt, skb, sk); + inet_rsk(req)->no_srccheck = inet_sk(sk)->transparent; /* Note: tcp_v6_init_req() might override ir_iif for link locals */ inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a75bf48d7950..13b05adf9d3e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1196,7 +1196,6 @@ static void tcp_v4_init_req(struct request_sock *req, sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); - ireq->no_srccheck = inet_sk(sk_listener)->transparent; ireq->opt = tcp_v4_save_options(skb); } From 0f3cd9b3697708c86a825ae3cedabf7be6fd3e72 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 23 Sep 2016 15:23:33 +0200 Subject: [PATCH 0824/1050] netfilter: nf_tables: add range expression Inverse ranges != [a,b] are not currently possible because rules are composites of && operations, and we need to express this: data < a || data > b This patch adds a new range expression. Positive ranges can be already through two cmp expressions: cmp(sreg, data, >=) cmp(sreg, data, <=) This new range expression provides an alternative way to express this. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_core.h | 3 + include/uapi/linux/netfilter/nf_tables.h | 29 +++++ net/netfilter/Makefile | 3 +- net/netfilter/nf_tables_core.c | 7 +- net/netfilter/nft_range.c | 138 +++++++++++++++++++++++ 5 files changed, 178 insertions(+), 2 deletions(-) create mode 100644 net/netfilter/nft_range.c diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index a9060dd99db7..00f4f6b1b1ba 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -28,6 +28,9 @@ extern const struct nft_expr_ops nft_cmp_fast_ops; int nft_cmp_module_init(void); void nft_cmp_module_exit(void); +int nft_range_module_init(void); +void nft_range_module_exit(void); + int nft_lookup_module_init(void); void nft_lookup_module_exit(void); diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 1cf41dd838b2..c6c4477c136b 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -546,6 +546,35 @@ enum nft_cmp_attributes { }; #define NFTA_CMP_MAX (__NFTA_CMP_MAX - 1) +/** + * enum nft_range_ops - nf_tables range operator + * + * @NFT_RANGE_EQ: equal + * @NFT_RANGE_NEQ: not equal + */ +enum nft_range_ops { + NFT_RANGE_EQ, + NFT_RANGE_NEQ, +}; + +/** + * enum nft_range_attributes - nf_tables range expression netlink attributes + * + * @NFTA_RANGE_SREG: source register of data to compare (NLA_U32: nft_registers) + * @NFTA_RANGE_OP: cmp operation (NLA_U32: nft_cmp_ops) + * @NFTA_RANGE_FROM_DATA: data range from (NLA_NESTED: nft_data_attributes) + * @NFTA_RANGE_TO_DATA: data range to (NLA_NESTED: nft_data_attributes) + */ +enum nft_range_attributes { + NFTA_RANGE_UNSPEC, + NFTA_RANGE_SREG, + NFTA_RANGE_OP, + NFTA_RANGE_FROM_DATA, + NFTA_RANGE_TO_DATA, + __NFTA_RANGE_MAX +}; +#define NFTA_RANGE_MAX (__NFTA_RANGE_MAX - 1) + enum nft_lookup_flags { NFT_LOOKUP_F_INV = (1 << 0), }; diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 0c8581100ac6..c23c3c84416f 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -71,8 +71,9 @@ obj-$(CONFIG_NF_DUP_NETDEV) += nf_dup_netdev.o # nf_tables nf_tables-objs += nf_tables_core.o nf_tables_api.o nf_tables_trace.o -nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nft_dynset.o +nf_tables-objs += nft_immediate.o nft_cmp.o nft_range.o nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o +nf_tables-objs += nft_lookup.o nft_dynset.o obj-$(CONFIG_NF_TABLES) += nf_tables.o obj-$(CONFIG_NF_TABLES_INET) += nf_tables_inet.o diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 67259cefef06..7c94ce0080d5 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -263,8 +263,13 @@ int __init nf_tables_core_module_init(void) if (err < 0) goto err7; - return 0; + err = nft_range_module_init(); + if (err < 0) + goto err8; + return 0; +err8: + nft_dynset_module_exit(); err7: nft_payload_module_exit(); err6: diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c new file mode 100644 index 000000000000..c6d5358482d1 --- /dev/null +++ b/net/netfilter/nft_range.c @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2016 Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_range_expr { + struct nft_data data_from; + struct nft_data data_to; + enum nft_registers sreg:8; + u8 len; + enum nft_range_ops op:8; +}; + +static void nft_range_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_range_expr *priv = nft_expr_priv(expr); + bool mismatch; + int d1, d2; + + d1 = memcmp(®s->data[priv->sreg], &priv->data_from, priv->len); + d2 = memcmp(®s->data[priv->sreg], &priv->data_to, priv->len); + switch (priv->op) { + case NFT_RANGE_EQ: + mismatch = (d1 < 0 || d2 > 0); + break; + case NFT_RANGE_NEQ: + mismatch = (d1 >= 0 && d2 <= 0); + break; + } + + if (mismatch) + regs->verdict.code = NFT_BREAK; +} + +static const struct nla_policy nft_range_policy[NFTA_RANGE_MAX + 1] = { + [NFTA_RANGE_SREG] = { .type = NLA_U32 }, + [NFTA_RANGE_OP] = { .type = NLA_U32 }, + [NFTA_RANGE_FROM_DATA] = { .type = NLA_NESTED }, + [NFTA_RANGE_TO_DATA] = { .type = NLA_NESTED }, +}; + +static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_range_expr *priv = nft_expr_priv(expr); + struct nft_data_desc desc_from, desc_to; + int err; + + err = nft_data_init(NULL, &priv->data_from, sizeof(priv->data_from), + &desc_from, tb[NFTA_RANGE_FROM_DATA]); + if (err < 0) + return err; + + err = nft_data_init(NULL, &priv->data_to, sizeof(priv->data_to), + &desc_to, tb[NFTA_RANGE_TO_DATA]); + if (err < 0) + goto err1; + + if (desc_from.len != desc_to.len) { + err = -EINVAL; + goto err2; + } + + priv->sreg = nft_parse_register(tb[NFTA_RANGE_SREG]); + err = nft_validate_register_load(priv->sreg, desc_from.len); + if (err < 0) + goto err2; + + priv->op = ntohl(nla_get_be32(tb[NFTA_RANGE_OP])); + priv->len = desc_from.len; + return 0; +err2: + nft_data_uninit(&priv->data_to, desc_to.type); +err1: + nft_data_uninit(&priv->data_from, desc_from.type); + return err; +} + +static int nft_range_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_range_expr *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_RANGE_SREG, priv->sreg)) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_RANGE_OP, htonl(priv->op))) + goto nla_put_failure; + + if (nft_data_dump(skb, NFTA_RANGE_FROM_DATA, &priv->data_from, + NFT_DATA_VALUE, priv->len) < 0 || + nft_data_dump(skb, NFTA_RANGE_TO_DATA, &priv->data_to, + NFT_DATA_VALUE, priv->len) < 0) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_range_type; +static const struct nft_expr_ops nft_range_ops = { + .type = &nft_range_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_range_expr)), + .eval = nft_range_eval, + .init = nft_range_init, + .dump = nft_range_dump, +}; + +static struct nft_expr_type nft_range_type __read_mostly = { + .name = "range", + .ops = &nft_range_ops, + .policy = nft_range_policy, + .maxattr = NFTA_RANGE_MAX, + .owner = THIS_MODULE, +}; + +int __init nft_range_module_init(void) +{ + return nft_register_expr(&nft_range_type); +} + +void nft_range_module_exit(void) +{ + nft_unregister_expr(&nft_range_type); +} From ff107d27761ff4b644c82c209e004ec9c8fbbc22 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sun, 25 Sep 2016 16:35:56 +0800 Subject: [PATCH 0825/1050] netfilter: nft_log: complete NFTA_LOG_FLAGS attr support NFTA_LOG_FLAGS attribute is already supported, but the related NF_LOG_XXX flags are not exposed to the userspace. So we cannot explicitly enable log flags to log uid, tcp sequence, ip options and so on, i.e. such rule "nft add rule filter output log uid" is not supported yet. So move NF_LOG_XXX macro definitions to the uapi/../nf_log.h. In order to keep consistent with other modules, change NF_LOG_MASK to refer to all supported log flags. On the other hand, add a new NF_LOG_DEFAULT_MASK to refer to the original default log flags. Finally, if user specify the unsupported log flags or NFTA_LOG_GROUP and NFTA_LOG_FLAGS are set at the same time, report EINVAL to the userspace. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_log.h | 11 +++-------- include/uapi/linux/netfilter/nf_log.h | 12 ++++++++++++ net/bridge/netfilter/ebt_log.c | 2 +- net/ipv4/netfilter/ip_tables.c | 2 +- net/ipv4/netfilter/nf_log_arp.c | 2 +- net/ipv4/netfilter/nf_log_ipv4.c | 4 ++-- net/ipv6/netfilter/ip6_tables.c | 2 +- net/ipv6/netfilter/nf_log_ipv6.c | 4 ++-- net/netfilter/nf_tables_core.c | 2 +- net/netfilter/nft_log.c | 9 ++++++++- 10 files changed, 32 insertions(+), 18 deletions(-) create mode 100644 include/uapi/linux/netfilter/nf_log.h diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index ee07dc8b0a7b..309cd267be4f 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -2,15 +2,10 @@ #define _NF_LOG_H #include +#include -/* those NF_LOG_* defines and struct nf_loginfo are legacy definitios that will - * disappear once iptables is replaced with pkttables. Please DO NOT use them - * for any new code! */ -#define NF_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ -#define NF_LOG_TCPOPT 0x02 /* Log TCP options */ -#define NF_LOG_IPOPT 0x04 /* Log IP options */ -#define NF_LOG_UID 0x08 /* Log UID owning local socket */ -#define NF_LOG_MASK 0x0f +/* Log tcp sequence, tcp options, ip options and uid owning local socket */ +#define NF_LOG_DEFAULT_MASK 0x0f /* This flag indicates that copy_len field in nf_loginfo is set */ #define NF_LOG_F_COPY_LEN 0x1 diff --git a/include/uapi/linux/netfilter/nf_log.h b/include/uapi/linux/netfilter/nf_log.h new file mode 100644 index 000000000000..8be21e02387d --- /dev/null +++ b/include/uapi/linux/netfilter/nf_log.h @@ -0,0 +1,12 @@ +#ifndef _NETFILTER_NF_LOG_H +#define _NETFILTER_NF_LOG_H + +#define NF_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ +#define NF_LOG_TCPOPT 0x02 /* Log TCP options */ +#define NF_LOG_IPOPT 0x04 /* Log IP options */ +#define NF_LOG_UID 0x08 /* Log UID owning local socket */ +#define NF_LOG_NFLOG 0x10 /* Unsupported, don't reuse */ +#define NF_LOG_MACDECODE 0x20 /* Decode MAC header */ +#define NF_LOG_MASK 0x2f + +#endif /* _NETFILTER_NF_LOG_H */ diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 152300d164ac..9a11086ba6ff 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -91,7 +91,7 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum, if (loginfo->type == NF_LOG_TYPE_LOG) bitmask = loginfo->u.log.logflags; else - bitmask = NF_LOG_MASK; + bitmask = NF_LOG_DEFAULT_MASK; if ((bitmask & EBT_LOG_IP) && eth_hdr(skb)->h_proto == htons(ETH_P_IP)) { diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index f993545a3373..7c00ce90adb8 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -156,7 +156,7 @@ static struct nf_loginfo trace_loginfo = { .u = { .log = { .level = 4, - .logflags = NF_LOG_MASK, + .logflags = NF_LOG_DEFAULT_MASK, }, }, }; diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c index 8945c2653814..b24795e2ee6d 100644 --- a/net/ipv4/netfilter/nf_log_arp.c +++ b/net/ipv4/netfilter/nf_log_arp.c @@ -30,7 +30,7 @@ static struct nf_loginfo default_loginfo = { .u = { .log = { .level = LOGLEVEL_NOTICE, - .logflags = NF_LOG_MASK, + .logflags = NF_LOG_DEFAULT_MASK, }, }, }; diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c index 20f225593a8b..5b571e1b5f15 100644 --- a/net/ipv4/netfilter/nf_log_ipv4.c +++ b/net/ipv4/netfilter/nf_log_ipv4.c @@ -29,7 +29,7 @@ static struct nf_loginfo default_loginfo = { .u = { .log = { .level = LOGLEVEL_NOTICE, - .logflags = NF_LOG_MASK, + .logflags = NF_LOG_DEFAULT_MASK, }, }, }; @@ -46,7 +46,7 @@ static void dump_ipv4_packet(struct nf_log_buf *m, if (info->type == NF_LOG_TYPE_LOG) logflags = info->u.log.logflags; else - logflags = NF_LOG_MASK; + logflags = NF_LOG_DEFAULT_MASK; ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph); if (ih == NULL) { diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 552fac2f390a..55aacea24396 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -190,7 +190,7 @@ static struct nf_loginfo trace_loginfo = { .u = { .log = { .level = LOGLEVEL_WARNING, - .logflags = NF_LOG_MASK, + .logflags = NF_LOG_DEFAULT_MASK, }, }, }; diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c index c1bcf699a23d..f6aee2895fee 100644 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -30,7 +30,7 @@ static struct nf_loginfo default_loginfo = { .u = { .log = { .level = LOGLEVEL_NOTICE, - .logflags = NF_LOG_MASK, + .logflags = NF_LOG_DEFAULT_MASK, }, }, }; @@ -52,7 +52,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m, if (info->type == NF_LOG_TYPE_LOG) logflags = info->u.log.logflags; else - logflags = NF_LOG_MASK; + logflags = NF_LOG_DEFAULT_MASK; ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h); if (ih == NULL) { diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 7c94ce0080d5..0dd5c695482f 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -34,7 +34,7 @@ static struct nf_loginfo trace_loginfo = { .u = { .log = { .level = LOGLEVEL_WARNING, - .logflags = NF_LOG_MASK, + .logflags = NF_LOG_DEFAULT_MASK, }, }, }; diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index 24a73bb26e94..1b01404bb33f 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -58,8 +58,11 @@ static int nft_log_init(const struct nft_ctx *ctx, if (tb[NFTA_LOG_LEVEL] != NULL && tb[NFTA_LOG_GROUP] != NULL) return -EINVAL; - if (tb[NFTA_LOG_GROUP] != NULL) + if (tb[NFTA_LOG_GROUP] != NULL) { li->type = NF_LOG_TYPE_ULOG; + if (tb[NFTA_LOG_FLAGS] != NULL) + return -EINVAL; + } nla = tb[NFTA_LOG_PREFIX]; if (nla != NULL) { @@ -87,6 +90,10 @@ static int nft_log_init(const struct nft_ctx *ctx, if (tb[NFTA_LOG_FLAGS] != NULL) { li->u.log.logflags = ntohl(nla_get_be32(tb[NFTA_LOG_FLAGS])); + if (li->u.log.logflags & ~NF_LOG_MASK) { + err = -EINVAL; + goto err1; + } } break; case NF_LOG_TYPE_ULOG: From 8cb2a7d5667ab9a9c2fdd356357b85b63b320901 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sun, 25 Sep 2016 16:47:05 +0800 Subject: [PATCH 0826/1050] netfilter: nf_log: get rid of XT_LOG_* macros nf_log is used by both nftables and iptables, so use XT_LOG_XXX macros here is not appropriate. Replace them with NF_LOG_XXX. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_log_ipv4.c | 6 +++--- net/ipv6/netfilter/nf_log_ipv6.c | 14 +++++++------- net/netfilter/nf_log_common.c | 4 ++-- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c index 5b571e1b5f15..856648966f4c 100644 --- a/net/ipv4/netfilter/nf_log_ipv4.c +++ b/net/ipv4/netfilter/nf_log_ipv4.c @@ -76,7 +76,7 @@ static void dump_ipv4_packet(struct nf_log_buf *m, if (ntohs(ih->frag_off) & IP_OFFSET) nf_log_buf_add(m, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET); - if ((logflags & XT_LOG_IPOPT) && + if ((logflags & NF_LOG_IPOPT) && ih->ihl * 4 > sizeof(struct iphdr)) { const unsigned char *op; unsigned char _opt[4 * 15 - sizeof(struct iphdr)]; @@ -250,7 +250,7 @@ static void dump_ipv4_packet(struct nf_log_buf *m, } /* Max length: 15 "UID=4294967295 " */ - if ((logflags & XT_LOG_UID) && !iphoff) + if ((logflags & NF_LOG_UID) && !iphoff) nf_log_dump_sk_uid_gid(m, skb->sk); /* Max length: 16 "MARK=0xFFFFFFFF " */ @@ -282,7 +282,7 @@ static void dump_ipv4_mac_header(struct nf_log_buf *m, if (info->type == NF_LOG_TYPE_LOG) logflags = info->u.log.logflags; - if (!(logflags & XT_LOG_MACDECODE)) + if (!(logflags & NF_LOG_MACDECODE)) goto fallback; switch (dev->type) { diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c index f6aee2895fee..57d86066a13b 100644 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -84,7 +84,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m, } /* Max length: 48 "OPT (...) " */ - if (logflags & XT_LOG_IPOPT) + if (logflags & NF_LOG_IPOPT) nf_log_buf_add(m, "OPT ( "); switch (currenthdr) { @@ -121,7 +121,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m, case IPPROTO_ROUTING: case IPPROTO_HOPOPTS: if (fragment) { - if (logflags & XT_LOG_IPOPT) + if (logflags & NF_LOG_IPOPT) nf_log_buf_add(m, ")"); return; } @@ -129,7 +129,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m, break; /* Max Length */ case IPPROTO_AH: - if (logflags & XT_LOG_IPOPT) { + if (logflags & NF_LOG_IPOPT) { struct ip_auth_hdr _ahdr; const struct ip_auth_hdr *ah; @@ -161,7 +161,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m, hdrlen = (hp->hdrlen+2)<<2; break; case IPPROTO_ESP: - if (logflags & XT_LOG_IPOPT) { + if (logflags & NF_LOG_IPOPT) { struct ip_esp_hdr _esph; const struct ip_esp_hdr *eh; @@ -194,7 +194,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m, nf_log_buf_add(m, "Unknown Ext Hdr %u", currenthdr); return; } - if (logflags & XT_LOG_IPOPT) + if (logflags & NF_LOG_IPOPT) nf_log_buf_add(m, ") "); currenthdr = hp->nexthdr; @@ -277,7 +277,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m, } /* Max length: 15 "UID=4294967295 " */ - if ((logflags & XT_LOG_UID) && recurse) + if ((logflags & NF_LOG_UID) && recurse) nf_log_dump_sk_uid_gid(m, skb->sk); /* Max length: 16 "MARK=0xFFFFFFFF " */ @@ -295,7 +295,7 @@ static void dump_ipv6_mac_header(struct nf_log_buf *m, if (info->type == NF_LOG_TYPE_LOG) logflags = info->u.log.logflags; - if (!(logflags & XT_LOG_MACDECODE)) + if (!(logflags & NF_LOG_MACDECODE)) goto fallback; switch (dev->type) { diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c index a5aa5967b8e1..119fe1cb1ea9 100644 --- a/net/netfilter/nf_log_common.c +++ b/net/netfilter/nf_log_common.c @@ -77,7 +77,7 @@ int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb, nf_log_buf_add(m, "SPT=%u DPT=%u ", ntohs(th->source), ntohs(th->dest)); /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ - if (logflags & XT_LOG_TCPSEQ) { + if (logflags & NF_LOG_TCPSEQ) { nf_log_buf_add(m, "SEQ=%u ACK=%u ", ntohl(th->seq), ntohl(th->ack_seq)); } @@ -107,7 +107,7 @@ int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb, /* Max length: 11 "URGP=65535 " */ nf_log_buf_add(m, "URGP=%u ", ntohs(th->urg_ptr)); - if ((logflags & XT_LOG_TCPOPT) && th->doff*4 > sizeof(struct tcphdr)) { + if ((logflags & NF_LOG_TCPOPT) && th->doff*4 > sizeof(struct tcphdr)) { u_int8_t _opt[60 - sizeof(struct tcphdr)]; const u_int8_t *op; unsigned int i; From 38e088546522e1e86d2b8f401a1354ad3a9b3303 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Sun, 11 Sep 2016 23:54:25 +0100 Subject: [PATCH 0827/1050] mm: check VMA flags to avoid invalid PROT_NONE NUMA balancing The NUMA balancing logic uses an arch-specific PROT_NONE page table flag defined by pte_protnone() or pmd_protnone() to mark PTEs or huge page PMDs respectively as requiring balancing upon a subsequent page fault. User-defined PROT_NONE memory regions which also have this flag set will not normally invoke the NUMA balancing code as do_page_fault() will send a segfault to the process before handle_mm_fault() is even called. However if access_remote_vm() is invoked to access a PROT_NONE region of memory, handle_mm_fault() is called via faultin_page() and __get_user_pages() without any access checks being performed, meaning the NUMA balancing logic is incorrectly invoked on a non-NUMA memory region. A simple means of triggering this problem is to access PROT_NONE mmap'd memory using /proc/self/mem which reliably results in the NUMA handling functions being invoked when CONFIG_NUMA_BALANCING is set. This issue was reported in bugzilla (issue 99101) which includes some simple repro code. There are BUG_ON() checks in do_numa_page() and do_huge_pmd_numa_page() added at commit c0e7cad to avoid accidentally provoking strange behaviour by attempting to apply NUMA balancing to pages that are in fact PROT_NONE. The BUG_ON()'s are consistently triggered by the repro. This patch moves the PROT_NONE check into mm/memory.c rather than invoking BUG_ON() as faulting in these pages via faultin_page() is a valid reason for reaching the NUMA check with the PROT_NONE page table flag set and is therefore not always a bug. Link: https://bugzilla.kernel.org/show_bug.cgi?id=99101 Reported-by: Trevor Saunders Signed-off-by: Lorenzo Stoakes Acked-by: Rik van Riel Cc: Andrew Morton Cc: Mel Gorman Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 3 --- mm/memory.c | 12 +++++++----- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index a6abd76baa72..53ae6d00656a 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1138,9 +1138,6 @@ int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd) bool was_writable; int flags = 0; - /* A PROT_NONE fault should not end up here */ - BUG_ON(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))); - fe->ptl = pmd_lock(vma->vm_mm, fe->pmd); if (unlikely(!pmd_same(pmd, *fe->pmd))) goto out_unlock; diff --git a/mm/memory.c b/mm/memory.c index 83be99d9d8a1..793fe0f9841c 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3351,9 +3351,6 @@ static int do_numa_page(struct fault_env *fe, pte_t pte) bool was_writable = pte_write(pte); int flags = 0; - /* A PROT_NONE fault should not end up here */ - BUG_ON(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))); - /* * The "pte" at this point cannot be used safely without * validation through pte_unmap_same(). It's of NUMA type but @@ -3458,6 +3455,11 @@ static int wp_huge_pmd(struct fault_env *fe, pmd_t orig_pmd) return VM_FAULT_FALLBACK; } +static inline bool vma_is_accessible(struct vm_area_struct *vma) +{ + return vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE); +} + /* * These routines also need to handle stuff like marking pages dirty * and/or accessed for architectures that don't do it in hardware (most @@ -3524,7 +3526,7 @@ static int handle_pte_fault(struct fault_env *fe) if (!pte_present(entry)) return do_swap_page(fe, entry); - if (pte_protnone(entry)) + if (pte_protnone(entry) && vma_is_accessible(fe->vma)) return do_numa_page(fe, entry); fe->ptl = pte_lockptr(fe->vma->vm_mm, fe->pmd); @@ -3590,7 +3592,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, barrier(); if (pmd_trans_huge(orig_pmd) || pmd_devmap(orig_pmd)) { - if (pmd_protnone(orig_pmd)) + if (pmd_protnone(orig_pmd) && vma_is_accessible(vma)) return do_huge_pmd_numa_page(&fe, orig_pmd); if ((fe.flags & FAULT_FLAG_WRITE) && From 90b75db6498a19da96dac4b55c909ff3721f3045 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 26 Sep 2016 09:57:33 +1000 Subject: [PATCH 0828/1050] fault_in_multipages_readable() throws set-but-unused error When building XFS with -Werror, it now fails with: include/linux/pagemap.h: In function 'fault_in_multipages_readable': include/linux/pagemap.h:602:16: error: variable 'c' set but not used [-Werror=unused-but-set-variable] volatile char c; ^ This is a regression caused by commit e23d4159b109 ("fix fault_in_multipages_...() on architectures with no-op access_ok()"). Fix it by re-adding the "(void)c" trick taht was previously used to make the compiler think the variable is used. Signed-off-by: Dave Chinner Cc: Al Viro Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 7e3d53753612..01e84436cddf 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -620,6 +620,7 @@ static inline int fault_in_multipages_readable(const char __user *uaddr, return __get_user(c, end); } + (void)c; return 0; } From 08895a8b6b06ed2323cd97a36ee40a116b3db8ed Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 25 Sep 2016 18:47:13 -0700 Subject: [PATCH 0829/1050] Linux 4.8-rc8 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 74e22c2f408b..ce2ddb3fec98 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 8 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = -rc8 NAME = Psychotic Stoned Sheep # *DOCUMENTATION* From 1cb1860d9133ff795cac640b9af4569a2668b45e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 25 Sep 2016 14:14:45 -0700 Subject: [PATCH 0830/1050] cxgb4: fix -ve error check on a signed iq iq is unsigned, so the error check for iq < 0 has no effect so errors can slip past this check. Fix this by making iq signed and also get_filter_steerq return a signed int so a -ve error can be returned. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index 2a616171cb68..10736738ff30 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -117,11 +117,11 @@ static int validate_filter(struct net_device *dev, return 0; } -static unsigned int get_filter_steerq(struct net_device *dev, - struct ch_filter_specification *fs) +static int get_filter_steerq(struct net_device *dev, + struct ch_filter_specification *fs) { struct adapter *adapter = netdev2adap(dev); - unsigned int iq; + int iq; /* If the user has requested steering matching Ingress Packets * to a specific Queue Set, we need to make sure it's in range @@ -443,10 +443,10 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, struct filter_ctx *ctx) { struct adapter *adapter = netdev2adap(dev); - unsigned int max_fidx, fidx, iq; + unsigned int max_fidx, fidx; struct filter_entry *f; u32 iconf; - int ret; + int iq, ret; max_fidx = adapter->tids.nftids; if (filter_id != (max_fidx + adapter->tids.nsftids - 1) && From 2cf750704bb6d7ed8c7d732e071dd1bc890ea5e8 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sun, 25 Sep 2016 23:08:31 +0200 Subject: [PATCH 0831/1050] ipmr, ip6mr: fix scheduling while atomic and a deadlock with ipmr_get_route Since the commit below the ipmr/ip6mr rtnl_unicast() code uses the portid instead of the previous dst_pid which was copied from in_skb's portid. Since the skb is new the portid is 0 at that point so the packets are sent to the kernel and we get scheduling while atomic or a deadlock (depending on where it happens) by trying to acquire rtnl two times. Also since this is RTM_GETROUTE, it can be triggered by a normal user. Here's the sleeping while atomic trace: [ 7858.212557] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:620 [ 7858.212748] in_atomic(): 1, irqs_disabled(): 0, pid: 0, name: swapper/0 [ 7858.212881] 2 locks held by swapper/0/0: [ 7858.213013] #0: (((&mrt->ipmr_expire_timer))){+.-...}, at: [] call_timer_fn+0x5/0x350 [ 7858.213422] #1: (mfc_unres_lock){+.....}, at: [] ipmr_expire_process+0x25/0x130 [ 7858.213807] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.8.0-rc7+ #179 [ 7858.213934] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140531_083030-gandalf 04/01/2014 [ 7858.214108] 0000000000000000 ffff88005b403c50 ffffffff813a7804 0000000000000000 [ 7858.214412] ffffffff81a1338e ffff88005b403c78 ffffffff810a4a72 ffffffff81a1338e [ 7858.214716] 000000000000026c 0000000000000000 ffff88005b403ca8 ffffffff810a4b9f [ 7858.215251] Call Trace: [ 7858.215412] [] dump_stack+0x85/0xc1 [ 7858.215662] [] ___might_sleep+0x192/0x250 [ 7858.215868] [] __might_sleep+0x6f/0x100 [ 7858.216072] [] mutex_lock_nested+0x33/0x4d0 [ 7858.216279] [] ? netlink_lookup+0x25f/0x460 [ 7858.216487] [] rtnetlink_rcv+0x1b/0x40 [ 7858.216687] [] netlink_unicast+0x19c/0x260 [ 7858.216900] [] rtnl_unicast+0x20/0x30 [ 7858.217128] [] ipmr_destroy_unres+0xa9/0xf0 [ 7858.217351] [] ipmr_expire_process+0x8f/0x130 [ 7858.217581] [] ? ipmr_net_init+0x180/0x180 [ 7858.217785] [] ? ipmr_net_init+0x180/0x180 [ 7858.217990] [] call_timer_fn+0xa5/0x350 [ 7858.218192] [] ? call_timer_fn+0x5/0x350 [ 7858.218415] [] ? ipmr_net_init+0x180/0x180 [ 7858.218656] [] run_timer_softirq+0x260/0x640 [ 7858.218865] [] ? __do_softirq+0xbb/0x54f [ 7858.219068] [] __do_softirq+0xe8/0x54f [ 7858.219269] [] irq_exit+0xb8/0xc0 [ 7858.219463] [] smp_apic_timer_interrupt+0x42/0x50 [ 7858.219678] [] apic_timer_interrupt+0x8c/0xa0 [ 7858.219897] [] ? native_safe_halt+0x6/0x10 [ 7858.220165] [] ? trace_hardirqs_on+0xd/0x10 [ 7858.220373] [] default_idle+0x23/0x190 [ 7858.220574] [] arch_cpu_idle+0xf/0x20 [ 7858.220790] [] default_idle_call+0x4c/0x60 [ 7858.221016] [] cpu_startup_entry+0x39b/0x4d0 [ 7858.221257] [] rest_init+0x135/0x140 [ 7858.221469] [] start_kernel+0x50e/0x51b [ 7858.221670] [] ? early_idt_handler_array+0x120/0x120 [ 7858.221894] [] x86_64_start_reservations+0x2a/0x2c [ 7858.222113] [] x86_64_start_kernel+0x13b/0x14a Fixes: 2942e9005056 ("[RTNETLINK]: Use rtnl_unicast() for rtnetlink unicasts") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute.h | 2 +- include/linux/mroute6.h | 2 +- net/ipv4/ipmr.c | 3 ++- net/ipv4/route.c | 3 ++- net/ipv6/ip6mr.c | 5 +++-- net/ipv6/route.c | 4 +++- 6 files changed, 12 insertions(+), 7 deletions(-) diff --git a/include/linux/mroute.h b/include/linux/mroute.h index d351fd3e1049..e5fb81376e92 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -120,5 +120,5 @@ struct mfc_cache { struct rtmsg; int ipmr_get_route(struct net *net, struct sk_buff *skb, __be32 saddr, __be32 daddr, - struct rtmsg *rtm, int nowait); + struct rtmsg *rtm, int nowait, u32 portid); #endif diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index 3987b64040c5..19a1c0c2993b 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -116,7 +116,7 @@ struct mfc6_cache { struct rtmsg; extern int ip6mr_get_route(struct net *net, struct sk_buff *skb, - struct rtmsg *rtm, int nowait); + struct rtmsg *rtm, int nowait, u32 portid); #ifdef CONFIG_IPV6_MROUTE extern struct sock *mroute6_socket(struct net *net, struct sk_buff *skb); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a87bcd2d4a94..5f006e13de56 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2123,7 +2123,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, int ipmr_get_route(struct net *net, struct sk_buff *skb, __be32 saddr, __be32 daddr, - struct rtmsg *rtm, int nowait) + struct rtmsg *rtm, int nowait, u32 portid) { struct mfc_cache *cache; struct mr_table *mrt; @@ -2168,6 +2168,7 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, return -ENOMEM; } + NETLINK_CB(skb2).portid = portid; skb_push(skb2, sizeof(struct iphdr)); skb_reset_network_header(skb2); iph = ip_hdr(skb2); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b5b47a26d4ec..62c3ed0b7556 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2503,7 +2503,8 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, IPV4_DEVCONF_ALL(net, MC_FORWARDING)) { int err = ipmr_get_route(net, skb, fl4->saddr, fl4->daddr, - r, nowait); + r, nowait, portid); + if (err <= 0) { if (!nowait) { if (err == 0) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index fccb5dd91902..7f4265b1649b 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2285,8 +2285,8 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, return 1; } -int ip6mr_get_route(struct net *net, - struct sk_buff *skb, struct rtmsg *rtm, int nowait) +int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, + int nowait, u32 portid) { int err; struct mr6_table *mrt; @@ -2331,6 +2331,7 @@ int ip6mr_get_route(struct net *net, return -ENOMEM; } + NETLINK_CB(skb2).portid = portid; skb_reset_transport_header(skb2); skb_put(skb2, sizeof(struct ipv6hdr)); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e3a224b97905..269218aacbea 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3202,7 +3202,9 @@ static int rt6_fill_node(struct net *net, if (iif) { #ifdef CONFIG_IPV6_MROUTE if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { - int err = ip6mr_get_route(net, skb, rtm, nowait); + int err = ip6mr_get_route(net, skb, rtm, nowait, + portid); + if (err <= 0) { if (!nowait) { if (err == 0) From 876a55b8e255702d2c406872fd791817a3c98bd1 Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Sun, 25 Sep 2016 23:00:45 +0200 Subject: [PATCH 0832/1050] net: smc91x: take into account register shift This aligns smc91x with its cousin, namely smc911x.c. This also allows the driver to run also in a device-tree based lubbock board build, on which it was tested. Signed-off-by: Robert Jarzmik Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/smc91x.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index 503a3b6dce91..73212590d04a 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -2323,6 +2323,9 @@ static int smc_drv_probe(struct platform_device *pdev) } else { lp->cfg.flags |= SMC91X_USE_16BIT; } + if (!device_property_read_u32(&pdev->dev, "reg-shift", + &val)) + lp->io_shift = val; } #endif From e6053dd56a022d2ef754e46b4a9836c65f1dc434 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 25 Sep 2016 15:40:36 +0000 Subject: [PATCH 0833/1050] be2net: fix non static symbol warnings Fixes the following sparse warnings: drivers/net/ethernet/emulex/benet/be_main.c:47:25: warning: symbol 'be_err_recovery_workq' was not declared. Should it be static? drivers/net/ethernet/emulex/benet/be_main.c:63:25: warning: symbol 'be_wq' was not declared. Should it be static? Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index ac513e6627d1..dcb930a52613 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -44,7 +44,7 @@ MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data."); /* Per-module error detection/recovery workq shared across all functions. * Each function schedules its own work request on this shared workq. */ -struct workqueue_struct *be_err_recovery_workq; +static struct workqueue_struct *be_err_recovery_workq; static const struct pci_device_id be_dev_ids[] = { { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) }, @@ -60,7 +60,7 @@ static const struct pci_device_id be_dev_ids[] = { MODULE_DEVICE_TABLE(pci, be_dev_ids); /* Workqueue used by all functions for defering cmd calls to the adapter */ -struct workqueue_struct *be_wq; +static struct workqueue_struct *be_wq; /* UE Status Low CSR */ static const char * const ue_status_low_desc[] = { From b3f5bf64d8dbbb08685b91ee4576d2e82e7bd60f Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 25 Sep 2016 15:43:02 +0000 Subject: [PATCH 0834/1050] net: dsa: mv88e6xxx: fix non static symbol warnings Fixes the following sparse warnings: drivers/net/dsa/mv88e6xxx/chip.c:219:5: warning: symbol 'mv88e6xxx_port_read' was not declared. Should it be static? drivers/net/dsa/mv88e6xxx/chip.c:227:5: warning: symbol 'mv88e6xxx_port_write' was not declared. Should it be static? Signed-off-by: Wei Yongjun Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 122876cb926c..b2c25daef294 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -216,16 +216,16 @@ int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val) return 0; } -int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, int reg, - u16 *val) +static int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, int reg, + u16 *val) { int addr = chip->info->port_base_addr + port; return mv88e6xxx_read(chip, addr, reg, val); } -int mv88e6xxx_port_write(struct mv88e6xxx_chip *chip, int port, int reg, - u16 val) +static int mv88e6xxx_port_write(struct mv88e6xxx_chip *chip, int port, int reg, + u16 val) { int addr = chip->info->port_base_addr + port; From ecdad234755368e1beaa317e930387ea37eff881 Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Sun, 25 Sep 2016 17:16:44 +0800 Subject: [PATCH 0835/1050] net: hisilicon: mark symbols static where possible We get 2 warnings when building kernel with W=1: drivers/net/ethernet/hisilicon/hisi_femac.c:943:5: warning: no previous prototype for 'hisi_femac_drv_suspend' [-Wmissing-prototypes] drivers/net/ethernet/hisilicon/hisi_femac.c:960:5: warning: no previous prototype for 'hisi_femac_drv_resume' [-Wmissing-prototypes] In fact, these two functions are only used in the file in which they are declared and don't need a declaration, but can be made static. so this patch marks these functions with 'static'. Signed-off-by: Baoyou Xie Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hisi_femac.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hisi_femac.c b/drivers/net/ethernet/hisilicon/hisi_femac.c index ca68e2208b5b..ced185962ef8 100644 --- a/drivers/net/ethernet/hisilicon/hisi_femac.c +++ b/drivers/net/ethernet/hisilicon/hisi_femac.c @@ -940,8 +940,8 @@ static int hisi_femac_drv_remove(struct platform_device *pdev) } #ifdef CONFIG_PM -int hisi_femac_drv_suspend(struct platform_device *pdev, - pm_message_t state) +static int hisi_femac_drv_suspend(struct platform_device *pdev, + pm_message_t state) { struct net_device *ndev = platform_get_drvdata(pdev); struct hisi_femac_priv *priv = netdev_priv(ndev); @@ -957,7 +957,7 @@ int hisi_femac_drv_suspend(struct platform_device *pdev, return 0; } -int hisi_femac_drv_resume(struct platform_device *pdev) +static int hisi_femac_drv_resume(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct hisi_femac_priv *priv = netdev_priv(ndev); From 49e3e6f34d6a00c7725eb42a2b78ee1612bdcac9 Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Sun, 25 Sep 2016 17:19:04 +0800 Subject: [PATCH 0836/1050] net: hip04: mark tx_done() static We get 1 warning when building kernel with W=1: drivers/net/ethernet/hisilicon/hip04_eth.c:603:22: warning: no previous prototype for 'tx_done' [-Wmissing-prototypes] In fact, this function is only used in the file in which it is declared and don't need a declaration, but can be made static. so this patch marks this function with 'static'. Signed-off-by: Baoyou Xie Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hip04_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index 415ffa1509d5..39778892b3b3 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -600,7 +600,7 @@ static irqreturn_t hip04_mac_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -enum hrtimer_restart tx_done(struct hrtimer *hrtimer) +static enum hrtimer_restart tx_done(struct hrtimer *hrtimer) { struct hip04_priv *priv; From 2dc0d2b45289c880130fe0c54ec684947d407786 Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Sun, 25 Sep 2016 17:20:41 +0800 Subject: [PATCH 0837/1050] net: mvneta: mark symbols static where possible We get 2 warnings when building kernel with W=1: drivers/net/ethernet/marvell/mvneta.c:639:27: warning: no previous prototype for 'mvneta_get_stats64' [-Wmissing-prototypes] drivers/net/ethernet/marvell/mvneta.c:3529:5: warning: no previous prototype for 'mvneta_ethtool_set_link_ksettings' [-Wmissing-prototypes] In fact, these two functions are only used in the file in which they are declared and don't need a declaration, but can be made static. so this patch marks these functions with 'static'. Signed-off-by: Baoyou Xie Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 8e4252dd9a9d..bfada880e5eb 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -634,8 +634,9 @@ static void mvneta_mib_counters_clear(struct mvneta_port *pp) } /* Get System Network Statistics */ -struct rtnl_link_stats64 *mvneta_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats) +static struct rtnl_link_stats64 * +mvneta_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) { struct mvneta_port *pp = netdev_priv(dev); unsigned int start; @@ -3506,8 +3507,9 @@ static int mvneta_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /* Ethtool methods */ /* Set link ksettings (phy address, speed) for ethtools */ -int mvneta_ethtool_set_link_ksettings(struct net_device *ndev, - const struct ethtool_link_ksettings *cmd) +static int +mvneta_ethtool_set_link_ksettings(struct net_device *ndev, + const struct ethtool_link_ksettings *cmd) { struct mvneta_port *pp = netdev_priv(ndev); struct phy_device *phydev = ndev->phydev; From 50935857f878c014d92be49cbf651bcfbfdacdc0 Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Sun, 25 Sep 2016 14:10:09 +0800 Subject: [PATCH 0838/1050] cxgb4: mark symbols static where possible We get 10 warnings when building kernel with W=1: drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c:304:5: warning: no previous prototype for 'cxgb4_dcb_enabled' [-Wmissing-prototypes] drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c:194:5: warning: no previous prototype for 'setup_sge_queues_uld' [-Wmissing-prototypes] drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c:241:6: warning: no previous prototype for 'free_sge_queues_uld' [-Wmissing-prototypes] drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c:268:5: warning: no previous prototype for 'cfg_queues_uld' [-Wmissing-prototypes] drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c:344:6: warning: no previous prototype for 'free_queues_uld' [-Wmissing-prototypes] drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c:353:5: warning: no previous prototype for 'request_msix_queue_irqs_uld' [-Wmissing-prototypes] drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c:379:6: warning: no previous prototype for 'free_msix_queue_irqs_uld' [-Wmissing-prototypes] drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c:393:6: warning: no previous prototype for 'name_msix_vecs_uld' [-Wmissing-prototypes] drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c:433:6: warning: no previous prototype for 'enable_rx_uld' [-Wmissing-prototypes] drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c:442:6: warning: no previous prototype for 'quiesce_rx_uld' [-Wmissing-prototypes] In fact, these functions are only used in the file in which they are declared and don't need a declaration, but can be made static. so this patch marks these functions with 'static'. Signed-off-by: Baoyou Xie Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 +- .../net/ethernet/chelsio/cxgb4/cxgb4_uld.c | 23 +++++++++++-------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index eaa7fa98a205..c8a5c434ad2c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -279,7 +279,7 @@ static void dcb_tx_queue_prio_enable(struct net_device *dev, int enable) } #endif /* CONFIG_CHELSIO_T4_DCB */ -int cxgb4_dcb_enabled(const struct net_device *dev) +static int cxgb4_dcb_enabled(const struct net_device *dev) { #ifdef CONFIG_CHELSIO_T4_DCB struct port_info *pi = netdev_priv(dev); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c index f13b593d7c8b..b4b2d20aab3c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c @@ -191,7 +191,8 @@ freeout: return err; } -int setup_sge_queues_uld(struct adapter *adap, unsigned int uld_type, bool lro) +static int +setup_sge_queues_uld(struct adapter *adap, unsigned int uld_type, bool lro) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; int i, ret = 0; @@ -238,7 +239,7 @@ static void t4_free_uld_rxqs(struct adapter *adap, int n, } } -void free_sge_queues_uld(struct adapter *adap, unsigned int uld_type) +static void free_sge_queues_uld(struct adapter *adap, unsigned int uld_type) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; @@ -265,8 +266,8 @@ void free_sge_queues_uld(struct adapter *adap, unsigned int uld_type) kfree(rxq_info->msix_tbl); } -int cfg_queues_uld(struct adapter *adap, unsigned int uld_type, - const struct cxgb4_uld_info *uld_info) +static int cfg_queues_uld(struct adapter *adap, unsigned int uld_type, + const struct cxgb4_uld_info *uld_info) { struct sge *s = &adap->sge; struct sge_uld_rxq_info *rxq_info; @@ -341,7 +342,7 @@ int cfg_queues_uld(struct adapter *adap, unsigned int uld_type, return 0; } -void free_queues_uld(struct adapter *adap, unsigned int uld_type) +static void free_queues_uld(struct adapter *adap, unsigned int uld_type) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; @@ -350,7 +351,8 @@ void free_queues_uld(struct adapter *adap, unsigned int uld_type) kfree(rxq_info); } -int request_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type) +static int +request_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; int err = 0; @@ -376,7 +378,8 @@ unwind: return err; } -void free_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type) +static void +free_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; unsigned int idx, bmap_idx; @@ -390,7 +393,7 @@ void free_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type) } } -void name_msix_vecs_uld(struct adapter *adap, unsigned int uld_type) +static void name_msix_vecs_uld(struct adapter *adap, unsigned int uld_type) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; int n = sizeof(adap->msix_info_ulds[0].desc); @@ -430,7 +433,7 @@ static void quiesce_rx(struct adapter *adap, struct sge_rspq *q) } } -void enable_rx_uld(struct adapter *adap, unsigned int uld_type) +static void enable_rx_uld(struct adapter *adap, unsigned int uld_type) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; int idx; @@ -439,7 +442,7 @@ void enable_rx_uld(struct adapter *adap, unsigned int uld_type) enable_rx(adap, &rxq_info->uldrxq[idx].rspq); } -void quiesce_rx_uld(struct adapter *adap, unsigned int uld_type) +static void quiesce_rx_uld(struct adapter *adap, unsigned int uld_type) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; int idx; From e2072600a24161b7ddcfb26814f69f5fbc8ef85a Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Sun, 25 Sep 2016 14:23:15 +0800 Subject: [PATCH 0839/1050] net: bcmgenet: remove unused function in bcmgenet.c We get 1 warning when building kernel with W=1: drivers/net/ethernet/broadcom/genet/bcmgenet.c:2763:5: warning: no previous prototype for 'bcmgenet_hfb_add_filter' [-Wmissing-prototypes] In fact, this function is implemented in drivers/net/ethernet/broadcom/genet/bcmgenet.c, but be called by no one, thus can be removed. So this patch removes the unused functions. Signed-off-by: Baoyou Xie Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/genet/bcmgenet.c | 122 ------------------ 1 file changed, 122 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 2013474bfdbf..7f478499b649 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2664,128 +2664,6 @@ static void bcmgenet_enable_dma(struct bcmgenet_priv *priv, u32 dma_ctrl) bcmgenet_tdma_writel(priv, reg, DMA_CTRL); } -static bool bcmgenet_hfb_is_filter_enabled(struct bcmgenet_priv *priv, - u32 f_index) -{ - u32 offset; - u32 reg; - - offset = HFB_FLT_ENABLE_V3PLUS + (f_index < 32) * sizeof(u32); - reg = bcmgenet_hfb_reg_readl(priv, offset); - return !!(reg & (1 << (f_index % 32))); -} - -static void bcmgenet_hfb_enable_filter(struct bcmgenet_priv *priv, u32 f_index) -{ - u32 offset; - u32 reg; - - offset = HFB_FLT_ENABLE_V3PLUS + (f_index < 32) * sizeof(u32); - reg = bcmgenet_hfb_reg_readl(priv, offset); - reg |= (1 << (f_index % 32)); - bcmgenet_hfb_reg_writel(priv, reg, offset); -} - -static void bcmgenet_hfb_set_filter_rx_queue_mapping(struct bcmgenet_priv *priv, - u32 f_index, u32 rx_queue) -{ - u32 offset; - u32 reg; - - offset = f_index / 8; - reg = bcmgenet_rdma_readl(priv, DMA_INDEX2RING_0 + offset); - reg &= ~(0xF << (4 * (f_index % 8))); - reg |= ((rx_queue & 0xF) << (4 * (f_index % 8))); - bcmgenet_rdma_writel(priv, reg, DMA_INDEX2RING_0 + offset); -} - -static void bcmgenet_hfb_set_filter_length(struct bcmgenet_priv *priv, - u32 f_index, u32 f_length) -{ - u32 offset; - u32 reg; - - offset = HFB_FLT_LEN_V3PLUS + - ((priv->hw_params->hfb_filter_cnt - 1 - f_index) / 4) * - sizeof(u32); - reg = bcmgenet_hfb_reg_readl(priv, offset); - reg &= ~(0xFF << (8 * (f_index % 4))); - reg |= ((f_length & 0xFF) << (8 * (f_index % 4))); - bcmgenet_hfb_reg_writel(priv, reg, offset); -} - -static int bcmgenet_hfb_find_unused_filter(struct bcmgenet_priv *priv) -{ - u32 f_index; - - for (f_index = 0; f_index < priv->hw_params->hfb_filter_cnt; f_index++) - if (!bcmgenet_hfb_is_filter_enabled(priv, f_index)) - return f_index; - - return -ENOMEM; -} - -/* bcmgenet_hfb_add_filter - * - * Add new filter to Hardware Filter Block to match and direct Rx traffic to - * desired Rx queue. - * - * f_data is an array of unsigned 32-bit integers where each 32-bit integer - * provides filter data for 2 bytes (4 nibbles) of Rx frame: - * - * bits 31:20 - unused - * bit 19 - nibble 0 match enable - * bit 18 - nibble 1 match enable - * bit 17 - nibble 2 match enable - * bit 16 - nibble 3 match enable - * bits 15:12 - nibble 0 data - * bits 11:8 - nibble 1 data - * bits 7:4 - nibble 2 data - * bits 3:0 - nibble 3 data - * - * Example: - * In order to match: - * - Ethernet frame type = 0x0800 (IP) - * - IP version field = 4 - * - IP protocol field = 0x11 (UDP) - * - * The following filter is needed: - * u32 hfb_filter_ipv4_udp[] = { - * Rx frame offset 0x00: 0x00000000, 0x00000000, 0x00000000, 0x00000000, - * Rx frame offset 0x08: 0x00000000, 0x00000000, 0x000F0800, 0x00084000, - * Rx frame offset 0x10: 0x00000000, 0x00000000, 0x00000000, 0x00030011, - * }; - * - * To add the filter to HFB and direct the traffic to Rx queue 0, call: - * bcmgenet_hfb_add_filter(priv, hfb_filter_ipv4_udp, - * ARRAY_SIZE(hfb_filter_ipv4_udp), 0); - */ -int bcmgenet_hfb_add_filter(struct bcmgenet_priv *priv, u32 *f_data, - u32 f_length, u32 rx_queue) -{ - int f_index; - u32 i; - - f_index = bcmgenet_hfb_find_unused_filter(priv); - if (f_index < 0) - return -ENOMEM; - - if (f_length > priv->hw_params->hfb_filter_size) - return -EINVAL; - - for (i = 0; i < f_length; i++) - bcmgenet_hfb_writel(priv, f_data[i], - (f_index * priv->hw_params->hfb_filter_size + i) * - sizeof(u32)); - - bcmgenet_hfb_set_filter_length(priv, f_index, 2 * f_length); - bcmgenet_hfb_set_filter_rx_queue_mapping(priv, f_index, rx_queue); - bcmgenet_hfb_enable_filter(priv, f_index); - bcmgenet_hfb_reg_writel(priv, 0x1, HFB_CTRL); - - return 0; -} - /* bcmgenet_hfb_clear * * Clear Hardware Filter Block and disable all filtering. From b47c62c5de2bc43a26bcaca8d7a93bf9dee66ffe Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 23 Sep 2016 22:23:59 +0200 Subject: [PATCH 0840/1050] nfp: bpf: improve handling for disabled BPF syscall I stumbled over a new warning during randconfig testing, with CONFIG_BPF_SYSCALL disabled: drivers/net/ethernet/netronome/nfp/nfp_net_offload.c: In function 'nfp_net_bpf_offload': drivers/net/ethernet/netronome/nfp/nfp_net_offload.c:263:3: error: '*((void *)&res+4)' may be used uninitialized in this function [-Werror=maybe-uninitialized] drivers/net/ethernet/netronome/nfp/nfp_net_offload.c:263:3: error: 'res.n_instr' may be used uninitialized in this function [-Werror=maybe-uninitialized] As far as I can tell, this is a false positive caused by the compiler getting confused about a function that is partially inlined, but it's easy to avoid while improving the code: The nfp_bpf_jit() stub helper for that configuration is unusual as it is defined in a header file but not marked 'static inline'. By moving the compile-time check into the caller using the IS_ENABLED() macro, we can remove that stub and simplify the nfp_net_bpf_offload_prepare() function enough to unconfuse the compiler. Fixes: 7533fdc0f77f ("nfp: bpf: add hardware bpf offload") Signed-off-by: Arnd Bergmann Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_bpf.h | 10 ---------- drivers/net/ethernet/netronome/nfp/nfp_net_offload.c | 3 +++ 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h index fc220cd04115..87aa8a3e9112 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h @@ -192,20 +192,10 @@ struct nfp_bpf_result { bool dense_mode; }; -#ifdef CONFIG_BPF_SYSCALL int nfp_bpf_jit(struct bpf_prog *filter, void *prog, enum nfp_bpf_action_type act, unsigned int prog_start, unsigned int prog_done, unsigned int prog_sz, struct nfp_bpf_result *res); -#else -int -nfp_bpf_jit(struct bpf_prog *filter, void *prog, enum nfp_bpf_action_type act, - unsigned int prog_start, unsigned int prog_done, - unsigned int prog_sz, struct nfp_bpf_result *res) -{ - return -ENOTSUPP; -} -#endif int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c index 43f42f842eda..8acfb631a0ea 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c @@ -148,6 +148,9 @@ nfp_net_bpf_offload_prepare(struct nfp_net *nn, unsigned int max_mtu; int ret; + if (!IS_ENABLED(CONFIG_BPF_SYSCALL)) + return -ENOTSUPP; + ret = nfp_net_bpf_get_act(nn, cls_bpf); if (ret < 0) return ret; From e8c6ae9fbf8ca70ef0c2de0d2f3995acb0dc8968 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Thu, 22 Sep 2016 17:12:25 -0400 Subject: [PATCH 0841/1050] bnx2x: allocate mac filtering 'mcast_list' in PAGE_SIZE increments Currently, we can have high order page allocations that specify GFP_ATOMIC when configuring multicast MAC address filters. For example, we have seen order 2 page allocation failures with ~500 multicast addresses configured. Convert the allocation for 'mcast_list' to be done in PAGE_SIZE increments. Signed-off-by: Jason Baron Cc: Yuval Mintz Cc: Ariel Elior Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/bnx2x/bnx2x_main.c | 87 ++++++++++++------- 1 file changed, 55 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index dab61a81a3ba..20fe6a8c35c1 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -12563,41 +12563,62 @@ static int bnx2x_close(struct net_device *dev) return 0; } -static int bnx2x_init_mcast_macs_list(struct bnx2x *bp, - struct bnx2x_mcast_ramrod_params *p) +struct bnx2x_mcast_list_elem_group { - int mc_count = netdev_mc_count(bp->dev); - struct bnx2x_mcast_list_elem *mc_mac = - kcalloc(mc_count, sizeof(*mc_mac), GFP_ATOMIC); - struct netdev_hw_addr *ha; + struct list_head mcast_group_link; + struct bnx2x_mcast_list_elem mcast_elems[]; +}; - if (!mc_mac) { - BNX2X_ERR("Failed to allocate mc MAC list\n"); - return -ENOMEM; +#define MCAST_ELEMS_PER_PG \ + ((PAGE_SIZE - sizeof(struct bnx2x_mcast_list_elem_group)) / \ + sizeof(struct bnx2x_mcast_list_elem)) + +static void bnx2x_free_mcast_macs_list(struct list_head *mcast_group_list) +{ + struct bnx2x_mcast_list_elem_group *current_mcast_group; + + while (!list_empty(mcast_group_list)) { + current_mcast_group = list_first_entry(mcast_group_list, + struct bnx2x_mcast_list_elem_group, + mcast_group_link); + list_del(¤t_mcast_group->mcast_group_link); + free_page((unsigned long)current_mcast_group); } - - INIT_LIST_HEAD(&p->mcast_list); - - netdev_for_each_mc_addr(ha, bp->dev) { - mc_mac->mac = bnx2x_mc_addr(ha); - list_add_tail(&mc_mac->link, &p->mcast_list); - mc_mac++; - } - - p->mcast_list_len = mc_count; - - return 0; } -static void bnx2x_free_mcast_macs_list( - struct bnx2x_mcast_ramrod_params *p) +static int bnx2x_init_mcast_macs_list(struct bnx2x *bp, + struct bnx2x_mcast_ramrod_params *p, + struct list_head *mcast_group_list) { - struct bnx2x_mcast_list_elem *mc_mac = - list_first_entry(&p->mcast_list, struct bnx2x_mcast_list_elem, - link); + struct bnx2x_mcast_list_elem *mc_mac; + struct netdev_hw_addr *ha; + struct bnx2x_mcast_list_elem_group *current_mcast_group = NULL; + int mc_count = netdev_mc_count(bp->dev); + int offset = 0; - WARN_ON(!mc_mac); - kfree(mc_mac); + INIT_LIST_HEAD(&p->mcast_list); + netdev_for_each_mc_addr(ha, bp->dev) { + if (!offset) { + current_mcast_group = + (struct bnx2x_mcast_list_elem_group *) + __get_free_page(GFP_ATOMIC); + if (!current_mcast_group) { + bnx2x_free_mcast_macs_list(mcast_group_list); + BNX2X_ERR("Failed to allocate mc MAC list\n"); + return -ENOMEM; + } + list_add(¤t_mcast_group->mcast_group_link, + mcast_group_list); + } + mc_mac = ¤t_mcast_group->mcast_elems[offset]; + mc_mac->mac = bnx2x_mc_addr(ha); + list_add_tail(&mc_mac->link, &p->mcast_list); + offset++; + if (offset == MCAST_ELEMS_PER_PG) + offset = 0; + } + p->mcast_list_len = mc_count; + return 0; } /** @@ -12647,6 +12668,7 @@ static int bnx2x_set_uc_list(struct bnx2x *bp) static int bnx2x_set_mc_list_e1x(struct bnx2x *bp) { + LIST_HEAD(mcast_group_list); struct net_device *dev = bp->dev; struct bnx2x_mcast_ramrod_params rparam = {NULL}; int rc = 0; @@ -12662,7 +12684,7 @@ static int bnx2x_set_mc_list_e1x(struct bnx2x *bp) /* then, configure a new MACs list */ if (netdev_mc_count(dev)) { - rc = bnx2x_init_mcast_macs_list(bp, &rparam); + rc = bnx2x_init_mcast_macs_list(bp, &rparam, &mcast_group_list); if (rc) return rc; @@ -12673,7 +12695,7 @@ static int bnx2x_set_mc_list_e1x(struct bnx2x *bp) BNX2X_ERR("Failed to set a new multicast configuration: %d\n", rc); - bnx2x_free_mcast_macs_list(&rparam); + bnx2x_free_mcast_macs_list(&mcast_group_list); } return rc; @@ -12681,6 +12703,7 @@ static int bnx2x_set_mc_list_e1x(struct bnx2x *bp) static int bnx2x_set_mc_list(struct bnx2x *bp) { + LIST_HEAD(mcast_group_list); struct bnx2x_mcast_ramrod_params rparam = {NULL}; struct net_device *dev = bp->dev; int rc = 0; @@ -12692,7 +12715,7 @@ static int bnx2x_set_mc_list(struct bnx2x *bp) rparam.mcast_obj = &bp->mcast_obj; if (netdev_mc_count(dev)) { - rc = bnx2x_init_mcast_macs_list(bp, &rparam); + rc = bnx2x_init_mcast_macs_list(bp, &rparam, &mcast_group_list); if (rc) return rc; @@ -12703,7 +12726,7 @@ static int bnx2x_set_mc_list(struct bnx2x *bp) BNX2X_ERR("Failed to set a new multicast configuration: %d\n", rc); - bnx2x_free_mcast_macs_list(&rparam); + bnx2x_free_mcast_macs_list(&mcast_group_list); } else { /* If no mc addresses are required, flush the configuration */ rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_DEL); From 3129e1599c6edfafc2a0a8be9f2eb344c7feb920 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Thu, 22 Sep 2016 17:12:26 -0400 Subject: [PATCH 0842/1050] bnx2x: allocate mac filtering pending list in PAGE_SIZE increments Currently, we can have high order page allocations that specify GFP_ATOMIC when configuring multicast MAC address filters. For example, we have seen order 2 page allocation failures with ~500 multicast addresses configured. Convert the allocation for the pending list to be done in PAGE_SIZE increments. Signed-off-by: Jason Baron Cc: Yuval Mintz Cc: Ariel Elior Acked-by: Yuval Mintz Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/bnx2x/bnx2x_sp.c | 127 ++++++++++++------ 1 file changed, 88 insertions(+), 39 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c index d468380c2a23..4947a9cbf0c1 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c @@ -2606,8 +2606,23 @@ struct bnx2x_mcast_bin_elem { int type; /* BNX2X_MCAST_CMD_SET_{ADD, DEL} */ }; +union bnx2x_mcast_elem { + struct bnx2x_mcast_bin_elem bin_elem; + struct bnx2x_mcast_mac_elem mac_elem; +}; + +struct bnx2x_mcast_elem_group { + struct list_head mcast_group_link; + union bnx2x_mcast_elem mcast_elems[]; +}; + +#define MCAST_MAC_ELEMS_PER_PG \ + ((PAGE_SIZE - sizeof(struct bnx2x_mcast_elem_group)) / \ + sizeof(union bnx2x_mcast_elem)) + struct bnx2x_pending_mcast_cmd { struct list_head link; + struct list_head group_head; int type; /* BNX2X_MCAST_CMD_X */ union { struct list_head macs_head; @@ -2638,16 +2653,29 @@ static int bnx2x_mcast_wait(struct bnx2x *bp, return 0; } +static void bnx2x_free_groups(struct list_head *mcast_group_list) +{ + struct bnx2x_mcast_elem_group *current_mcast_group; + + while (!list_empty(mcast_group_list)) { + current_mcast_group = list_first_entry(mcast_group_list, + struct bnx2x_mcast_elem_group, + mcast_group_link); + list_del(¤t_mcast_group->mcast_group_link); + free_page((unsigned long)current_mcast_group); + } +} + static int bnx2x_mcast_enqueue_cmd(struct bnx2x *bp, struct bnx2x_mcast_obj *o, struct bnx2x_mcast_ramrod_params *p, enum bnx2x_mcast_cmd cmd) { - int total_sz; struct bnx2x_pending_mcast_cmd *new_cmd; - struct bnx2x_mcast_mac_elem *cur_mac = NULL; struct bnx2x_mcast_list_elem *pos; - int macs_list_len = 0, macs_list_len_size; + struct bnx2x_mcast_elem_group *elem_group; + struct bnx2x_mcast_mac_elem *mac_elem; + int total_elems = 0, macs_list_len = 0, offset = 0; /* When adding MACs we'll need to store their values */ if (cmd == BNX2X_MCAST_CMD_ADD || cmd == BNX2X_MCAST_CMD_SET) @@ -2657,50 +2685,61 @@ static int bnx2x_mcast_enqueue_cmd(struct bnx2x *bp, if (!p->mcast_list_len) return 0; - /* For a set command, we need to allocate sufficient memory for all - * the bins, since we can't analyze at this point how much memory would - * be required. - */ - macs_list_len_size = macs_list_len * - sizeof(struct bnx2x_mcast_mac_elem); - if (cmd == BNX2X_MCAST_CMD_SET) { - int bin_size = BNX2X_MCAST_BINS_NUM * - sizeof(struct bnx2x_mcast_bin_elem); - - if (bin_size > macs_list_len_size) - macs_list_len_size = bin_size; - } - total_sz = sizeof(*new_cmd) + macs_list_len_size; - /* Add mcast is called under spin_lock, thus calling with GFP_ATOMIC */ - new_cmd = kzalloc(total_sz, GFP_ATOMIC); - + new_cmd = kzalloc(sizeof(*new_cmd), GFP_ATOMIC); if (!new_cmd) return -ENOMEM; + INIT_LIST_HEAD(&new_cmd->data.macs_head); + INIT_LIST_HEAD(&new_cmd->group_head); + new_cmd->type = cmd; + new_cmd->done = false; + DP(BNX2X_MSG_SP, "About to enqueue a new %d command. macs_list_len=%d\n", cmd, macs_list_len); - INIT_LIST_HEAD(&new_cmd->data.macs_head); - - new_cmd->type = cmd; - new_cmd->done = false; - switch (cmd) { case BNX2X_MCAST_CMD_ADD: case BNX2X_MCAST_CMD_SET: - cur_mac = (struct bnx2x_mcast_mac_elem *) - ((u8 *)new_cmd + sizeof(*new_cmd)); - - /* Push the MACs of the current command into the pending command - * MACs list: FIFO + /* For a set command, we need to allocate sufficient memory for + * all the bins, since we can't analyze at this point how much + * memory would be required. */ - list_for_each_entry(pos, &p->mcast_list, link) { - memcpy(cur_mac->mac, pos->mac, ETH_ALEN); - list_add_tail(&cur_mac->link, &new_cmd->data.macs_head); - cur_mac++; + total_elems = macs_list_len; + if (cmd == BNX2X_MCAST_CMD_SET) { + if (total_elems < BNX2X_MCAST_BINS_NUM) + total_elems = BNX2X_MCAST_BINS_NUM; + } + while (total_elems > 0) { + elem_group = (struct bnx2x_mcast_elem_group *) + __get_free_page(GFP_ATOMIC | __GFP_ZERO); + if (!elem_group) { + kfree(new_cmd); + bnx2x_free_groups(&new_cmd->group_head); + return -ENOMEM; + } + total_elems -= MCAST_MAC_ELEMS_PER_PG; + list_add_tail(&elem_group->mcast_group_link, + &new_cmd->group_head); + } + elem_group = list_first_entry(&new_cmd->group_head, + struct bnx2x_mcast_elem_group, + mcast_group_link); + list_for_each_entry(pos, &p->mcast_list, link) { + mac_elem = &elem_group->mcast_elems[offset].mac_elem; + memcpy(mac_elem->mac, pos->mac, ETH_ALEN); + /* Push the MACs of the current command into the pending + * command MACs list: FIFO + */ + list_add_tail(&mac_elem->link, + &new_cmd->data.macs_head); + offset++; + if (offset == MCAST_MAC_ELEMS_PER_PG) { + offset = 0; + elem_group = list_next_entry(elem_group, + mcast_group_link); + } } - break; case BNX2X_MCAST_CMD_DEL: @@ -2978,7 +3017,8 @@ bnx2x_mcast_hdl_pending_set_e2_convert(struct bnx2x *bp, u64 cur[BNX2X_MCAST_VEC_SZ], req[BNX2X_MCAST_VEC_SZ]; struct bnx2x_mcast_mac_elem *pmac_pos, *pmac_pos_n; struct bnx2x_mcast_bin_elem *p_item; - int i, cnt = 0, mac_cnt = 0; + struct bnx2x_mcast_elem_group *elem_group; + int cnt = 0, mac_cnt = 0, offset = 0, i; memset(req, 0, sizeof(u64) * BNX2X_MCAST_VEC_SZ); memcpy(cur, o->registry.aprox_match.vec, @@ -3001,9 +3041,10 @@ bnx2x_mcast_hdl_pending_set_e2_convert(struct bnx2x *bp, * a list that will be used to configure bins. */ cmd_pos->set_convert = true; - p_item = (struct bnx2x_mcast_bin_elem *)(cmd_pos + 1); INIT_LIST_HEAD(&cmd_pos->data.macs_head); - + elem_group = list_first_entry(&cmd_pos->group_head, + struct bnx2x_mcast_elem_group, + mcast_group_link); for (i = 0; i < BNX2X_MCAST_BINS_NUM; i++) { bool b_current = !!BIT_VEC64_TEST_BIT(cur, i); bool b_required = !!BIT_VEC64_TEST_BIT(req, i); @@ -3011,12 +3052,18 @@ bnx2x_mcast_hdl_pending_set_e2_convert(struct bnx2x *bp, if (b_current == b_required) continue; + p_item = &elem_group->mcast_elems[offset].bin_elem; p_item->bin = i; p_item->type = b_required ? BNX2X_MCAST_CMD_SET_ADD : BNX2X_MCAST_CMD_SET_DEL; list_add_tail(&p_item->link , &cmd_pos->data.macs_head); - p_item++; cnt++; + offset++; + if (offset == MCAST_MAC_ELEMS_PER_PG) { + offset = 0; + elem_group = list_next_entry(elem_group, + mcast_group_link); + } } /* We now definitely know how many commands are hiding here. @@ -3103,6 +3150,7 @@ static inline int bnx2x_mcast_handle_pending_cmds_e2(struct bnx2x *bp, */ if (cmd_pos->done) { list_del(&cmd_pos->link); + bnx2x_free_groups(&cmd_pos->group_head); kfree(cmd_pos); } @@ -3741,6 +3789,7 @@ static inline int bnx2x_mcast_handle_pending_cmds_e1( } list_del(&cmd_pos->link); + bnx2x_free_groups(&cmd_pos->group_head); kfree(cmd_pos); return cnt; From a762bb8ecb899d1e058b51f3daba4e1ed42b3479 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Sat, 17 Sep 2016 09:06:29 -0700 Subject: [PATCH 0843/1050] wlcore: Prepare family to fix nvs file handling Move struct wilink_family_data to be available for all TI WLAN variants. And fix familiy typo, it should be just family. Looks like wl12xx use two different nvs.bin files and wl18xx uses a different conf.bin file. Signed-off-by: Tony Lindgren Signed-off-by: Kalle Valo --- drivers/net/wireless/ti/wlcore/spi.c | 12 ++++-------- drivers/net/wireless/ti/wlcore/wlcore_i.h | 7 +++++++ 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/ti/wlcore/spi.c b/drivers/net/wireless/ti/wlcore/spi.c index 0ed526e4c0df..a336493dab9b 100644 --- a/drivers/net/wireless/ti/wlcore/spi.c +++ b/drivers/net/wireless/ti/wlcore/spi.c @@ -80,17 +80,13 @@ ((SPI_AGGR_BUFFER_SIZE / WSPI_MAX_CHUNK_SIZE) + 1) -struct wilink_familiy_data { - char name[8]; -}; +static const struct wilink_family_data *wilink_data; -static const struct wilink_familiy_data *wilink_data; - -static const struct wilink_familiy_data wl18xx_data = { +static const struct wilink_family_data wl18xx_data = { .name = "wl18xx", }; -static const struct wilink_familiy_data wl12xx_data = { +static const struct wilink_family_data wl12xx_data = { .name = "wl12xx", }; @@ -461,7 +457,7 @@ static int wlcore_probe_of(struct spi_device *spi, struct wl12xx_spi_glue *glue, return -ENODEV; wilink_data = of_id->data; - dev_info(&spi->dev, "selected chip familiy is %s\n", + dev_info(&spi->dev, "selected chip family is %s\n", wilink_data->name); if (of_find_property(dt_node, "clock-xtal", NULL)) diff --git a/drivers/net/wireless/ti/wlcore/wlcore_i.h b/drivers/net/wireless/ti/wlcore/wlcore_i.h index 0277ae508b8a..f68280db6e5e 100644 --- a/drivers/net/wireless/ti/wlcore/wlcore_i.h +++ b/drivers/net/wireless/ti/wlcore/wlcore_i.h @@ -42,6 +42,12 @@ */ #define WL12XX_NVS_NAME "ti-connectivity/wl1271-nvs.bin" +struct wilink_family_data { + const char *name; + const char *nvs_name; /* wl12xx nvs file */ + const char *cfg_name; /* wl18xx cfg file */ +}; + #define WL1271_TX_SECURITY_LO16(s) ((u16)((s) & 0xffff)) #define WL1271_TX_SECURITY_HI32(s) ((u32)(((s) >> 16) & 0xffffffff)) #define WL1271_TX_SQN_POST_RECOVERY_PADDING 0xff @@ -208,6 +214,7 @@ struct wl1271_if_operations { struct wlcore_platdev_data { struct wl1271_if_operations *if_ops; + const struct wilink_family_data *family; bool ref_clock_xtal; /* specify whether the clock is XTAL or not */ u32 ref_clock_freq; /* in Hertz */ From d776fc86b82ffd7cfe9eb4182cc398cb8ab4199c Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Sat, 17 Sep 2016 09:06:30 -0700 Subject: [PATCH 0844/1050] wlcore: sdio: Populate config firmware data Configure the config firmware names and make it available in platform data. Signed-off-by: Tony Lindgren Signed-off-by: Kalle Valo --- drivers/net/wireless/ti/wlcore/sdio.c | 76 +++++++++++++++++---------- 1 file changed, 47 insertions(+), 29 deletions(-) diff --git a/drivers/net/wireless/ti/wlcore/sdio.c b/drivers/net/wireless/ti/wlcore/sdio.c index 5839acbbc782..a6e94b1a12cb 100644 --- a/drivers/net/wireless/ti/wlcore/sdio.c +++ b/drivers/net/wireless/ti/wlcore/sdio.c @@ -216,17 +216,33 @@ static struct wl1271_if_operations sdio_ops = { }; #ifdef CONFIG_OF + +static const struct wilink_family_data wl127x_data = { + .name = "wl127x", + .nvs_name = "ti-connectivity/wl127x-nvs.bin", +}; + +static const struct wilink_family_data wl128x_data = { + .name = "wl128x", + .nvs_name = "ti-connectivity/wl128x-nvs.bin", +}; + +static const struct wilink_family_data wl18xx_data = { + .name = "wl18xx", + .cfg_name = "ti-connectivity/wl18xx-conf.bin", +}; + static const struct of_device_id wlcore_sdio_of_match_table[] = { - { .compatible = "ti,wl1271" }, - { .compatible = "ti,wl1273" }, - { .compatible = "ti,wl1281" }, - { .compatible = "ti,wl1283" }, - { .compatible = "ti,wl1801" }, - { .compatible = "ti,wl1805" }, - { .compatible = "ti,wl1807" }, - { .compatible = "ti,wl1831" }, - { .compatible = "ti,wl1835" }, - { .compatible = "ti,wl1837" }, + { .compatible = "ti,wl1271", .data = &wl127x_data }, + { .compatible = "ti,wl1273", .data = &wl127x_data }, + { .compatible = "ti,wl1281", .data = &wl128x_data }, + { .compatible = "ti,wl1283", .data = &wl128x_data }, + { .compatible = "ti,wl1801", .data = &wl18xx_data }, + { .compatible = "ti,wl1805", .data = &wl18xx_data }, + { .compatible = "ti,wl1807", .data = &wl18xx_data }, + { .compatible = "ti,wl1831", .data = &wl18xx_data }, + { .compatible = "ti,wl1835", .data = &wl18xx_data }, + { .compatible = "ti,wl1837", .data = &wl18xx_data }, { } }; @@ -234,9 +250,13 @@ static int wlcore_probe_of(struct device *dev, int *irq, struct wlcore_platdev_data *pdev_data) { struct device_node *np = dev->of_node; + const struct of_device_id *of_id; - if (!np || !of_match_node(wlcore_sdio_of_match_table, np)) - return -ENODATA; + of_id = of_match_node(wlcore_sdio_of_match_table, np); + if (!of_id) + return -ENODEV; + + pdev_data->family = of_id->data; *irq = irq_of_parse_and_map(np, 0); if (!*irq) { @@ -263,7 +283,7 @@ static int wlcore_probe_of(struct device *dev, int *irq, static int wl1271_probe(struct sdio_func *func, const struct sdio_device_id *id) { - struct wlcore_platdev_data pdev_data; + struct wlcore_platdev_data *pdev_data; struct wl12xx_sdio_glue *glue; struct resource res[1]; mmc_pm_flag_t mmcflags; @@ -275,14 +295,15 @@ static int wl1271_probe(struct sdio_func *func, if (func->num != 0x02) return -ENODEV; - memset(&pdev_data, 0x00, sizeof(pdev_data)); - pdev_data.if_ops = &sdio_ops; + pdev_data = devm_kzalloc(&func->dev, sizeof(*pdev_data), GFP_KERNEL); + if (!pdev_data) + return -ENOMEM; - glue = kzalloc(sizeof(*glue), GFP_KERNEL); - if (!glue) { - dev_err(&func->dev, "can't allocate glue\n"); - goto out; - } + pdev_data->if_ops = &sdio_ops; + + glue = devm_kzalloc(&func->dev, sizeof(*glue), GFP_KERNEL); + if (!glue) + return -ENOMEM; glue->dev = &func->dev; @@ -292,16 +313,16 @@ static int wl1271_probe(struct sdio_func *func, /* Use block mode for transferring over one block size of data */ func->card->quirks |= MMC_QUIRK_BLKSZ_FOR_BYTE_MODE; - ret = wlcore_probe_of(&func->dev, &irq, &pdev_data); + ret = wlcore_probe_of(&func->dev, &irq, pdev_data); if (ret) - goto out_free_glue; + goto out; /* if sdio can keep power while host is suspended, enable wow */ mmcflags = sdio_get_host_pm_caps(func); dev_dbg(glue->dev, "sdio PM caps = 0x%x\n", mmcflags); if (mmcflags & MMC_PM_KEEP_POWER) - pdev_data.pwr_in_suspend = true; + pdev_data->pwr_in_suspend = true; sdio_set_drvdata(func, glue); @@ -323,7 +344,7 @@ static int wl1271_probe(struct sdio_func *func, if (!glue->core) { dev_err(glue->dev, "can't allocate platform_device"); ret = -ENOMEM; - goto out_free_glue; + goto out; } glue->core->dev.parent = &func->dev; @@ -341,8 +362,8 @@ static int wl1271_probe(struct sdio_func *func, goto out_dev_put; } - ret = platform_device_add_data(glue->core, &pdev_data, - sizeof(pdev_data)); + ret = platform_device_add_data(glue->core, pdev_data, + sizeof(*pdev_data)); if (ret) { dev_err(glue->dev, "can't add platform data\n"); goto out_dev_put; @@ -358,9 +379,6 @@ static int wl1271_probe(struct sdio_func *func, out_dev_put: platform_device_put(glue->core); -out_free_glue: - kfree(glue); - out: return ret; } From c815fdebef444d591f57a1e90dcfa0e2f8112e10 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Sat, 17 Sep 2016 09:06:31 -0700 Subject: [PATCH 0845/1050] wlcore: spi: Populate config firmware data Configure the config firmware names and make it available in platform data. Let's also fix the order of the struct wilink_family_data while at it. Signed-off-by: Tony Lindgren Signed-off-by: Kalle Valo --- drivers/net/wireless/ti/wlcore/spi.c | 42 ++++++++++++++++------------ 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/drivers/net/wireless/ti/wlcore/spi.c b/drivers/net/wireless/ti/wlcore/spi.c index a336493dab9b..f949ad2bd898 100644 --- a/drivers/net/wireless/ti/wlcore/spi.c +++ b/drivers/net/wireless/ti/wlcore/spi.c @@ -79,15 +79,19 @@ #define WSPI_MAX_NUM_OF_CHUNKS \ ((SPI_AGGR_BUFFER_SIZE / WSPI_MAX_CHUNK_SIZE) + 1) +static const struct wilink_family_data wl127x_data = { + .name = "wl127x", + .nvs_name = "ti-connectivity/wl127x-nvs.bin", +}; -static const struct wilink_family_data *wilink_data; +static const struct wilink_family_data wl128x_data = { + .name = "wl128x", + .nvs_name = "ti-connectivity/wl128x-nvs.bin", +}; static const struct wilink_family_data wl18xx_data = { .name = "wl18xx", -}; - -static const struct wilink_family_data wl12xx_data = { - .name = "wl12xx", + .cfg_name = "ti-connectivity/wl18xx-conf.bin", }; struct wl12xx_spi_glue { @@ -425,10 +429,10 @@ static struct wl1271_if_operations spi_ops = { }; static const struct of_device_id wlcore_spi_of_match_table[] = { - { .compatible = "ti,wl1271", .data = &wl12xx_data}, - { .compatible = "ti,wl1273", .data = &wl12xx_data}, - { .compatible = "ti,wl1281", .data = &wl12xx_data}, - { .compatible = "ti,wl1283", .data = &wl12xx_data}, + { .compatible = "ti,wl1271", .data = &wl127x_data}, + { .compatible = "ti,wl1273", .data = &wl127x_data}, + { .compatible = "ti,wl1281", .data = &wl128x_data}, + { .compatible = "ti,wl1283", .data = &wl128x_data}, { .compatible = "ti,wl1801", .data = &wl18xx_data}, { .compatible = "ti,wl1805", .data = &wl18xx_data}, { .compatible = "ti,wl1807", .data = &wl18xx_data}, @@ -456,9 +460,9 @@ static int wlcore_probe_of(struct spi_device *spi, struct wl12xx_spi_glue *glue, if (!of_id) return -ENODEV; - wilink_data = of_id->data; + pdev_data->family = of_id->data; dev_info(&spi->dev, "selected chip family is %s\n", - wilink_data->name); + pdev_data->family->name); if (of_find_property(dt_node, "clock-xtal", NULL)) pdev_data->ref_clock_xtal = true; @@ -475,13 +479,15 @@ static int wlcore_probe_of(struct spi_device *spi, struct wl12xx_spi_glue *glue, static int wl1271_probe(struct spi_device *spi) { struct wl12xx_spi_glue *glue; - struct wlcore_platdev_data pdev_data; + struct wlcore_platdev_data *pdev_data; struct resource res[1]; int ret; - memset(&pdev_data, 0x00, sizeof(pdev_data)); + pdev_data = devm_kzalloc(&spi->dev, sizeof(*pdev_data), GFP_KERNEL); + if (!pdev_data) + return -ENOMEM; - pdev_data.if_ops = &spi_ops; + pdev_data->if_ops = &spi_ops; glue = devm_kzalloc(&spi->dev, sizeof(*glue), GFP_KERNEL); if (!glue) { @@ -505,7 +511,7 @@ static int wl1271_probe(struct spi_device *spi) return PTR_ERR(glue->reg); } - ret = wlcore_probe_of(spi, glue, &pdev_data); + ret = wlcore_probe_of(spi, glue, pdev_data); if (ret) { dev_err(glue->dev, "can't get device tree parameters (%d)\n", ret); @@ -518,7 +524,7 @@ static int wl1271_probe(struct spi_device *spi) return ret; } - glue->core = platform_device_alloc(wilink_data->name, + glue->core = platform_device_alloc(pdev_data->family->name, PLATFORM_DEVID_AUTO); if (!glue->core) { dev_err(glue->dev, "can't allocate platform_device\n"); @@ -539,8 +545,8 @@ static int wl1271_probe(struct spi_device *spi) goto out_dev_put; } - ret = platform_device_add_data(glue->core, &pdev_data, - sizeof(pdev_data)); + ret = platform_device_add_data(glue->core, pdev_data, + sizeof(*pdev_data)); if (ret) { dev_err(glue->dev, "can't add platform data\n"); goto out_dev_put; From 3e1ac932682b1377ae1c9d6283f0b88e28fb227d Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Sat, 17 Sep 2016 09:06:32 -0700 Subject: [PATCH 0846/1050] wlcore: Fix config firmware loading issues Booting multiple wl12xx and wl18xx devices using the same rootfs is a pain. You currently have to symlink the right nvs file depending on the wl12xx type. For example, with wl1271-nvs.bin being a symlink to wl127x-nvs.bin by default and trying to bring up a wl128x based device: wlcore: ERROR nvs size is not as expected: 1113 != 912 wlcore: ERROR NVS file is needed during boot wlcore: ERROR NVS file is needed during boot wlcore: ERROR firmware boot failed despite 3 retries Note that wl18xx uses a separate config firmware wl18xx-conf.bin that can be generated with tools using the following two git repos: git.ti.com/wilink8-wlan/18xx-ti-utils git.ti.com/wilink8-wlan/wl18xx_fw So let's not configure the nvs file for wl18xx as it's not needed AFAIK. If it turns out that we also need the nvs file for wl18xx, we can just add it to the config firmware data for wl18xx. Let's fix the issue by using the chip specific config firmware data, and make sure we produce understandable warnings if something is missing. Signed-off-by: Tony Lindgren Signed-off-by: Kalle Valo --- drivers/net/wireless/ti/wlcore/boot.c | 15 +++++++--- drivers/net/wireless/ti/wlcore/main.c | 36 +++++++++++++++-------- drivers/net/wireless/ti/wlcore/wlcore_i.h | 7 ----- 3 files changed, 35 insertions(+), 23 deletions(-) diff --git a/drivers/net/wireless/ti/wlcore/boot.c b/drivers/net/wireless/ti/wlcore/boot.c index f75d30444117..f00509ea8aca 100644 --- a/drivers/net/wireless/ti/wlcore/boot.c +++ b/drivers/net/wireless/ti/wlcore/boot.c @@ -282,6 +282,9 @@ EXPORT_SYMBOL_GPL(wlcore_boot_upload_firmware); int wlcore_boot_upload_nvs(struct wl1271 *wl) { + struct platform_device *pdev = wl->pdev; + struct wlcore_platdev_data *pdev_data = dev_get_platdata(&pdev->dev); + const char *nvs_name = "unknown"; size_t nvs_len, burst_len; int i; u32 dest_addr, val; @@ -293,6 +296,9 @@ int wlcore_boot_upload_nvs(struct wl1271 *wl) return -ENODEV; } + if (pdev_data && pdev_data->family) + nvs_name = pdev_data->family->nvs_name; + if (wl->quirks & WLCORE_QUIRK_LEGACY_NVS) { struct wl1271_nvs_file *nvs = (struct wl1271_nvs_file *)wl->nvs; @@ -310,8 +316,9 @@ int wlcore_boot_upload_nvs(struct wl1271 *wl) if (wl->nvs_len != sizeof(struct wl1271_nvs_file) && (wl->nvs_len != WL1271_INI_LEGACY_NVS_FILE_SIZE || wl->enable_11a)) { - wl1271_error("nvs size is not as expected: %zu != %zu", - wl->nvs_len, sizeof(struct wl1271_nvs_file)); + wl1271_error("%s size is not as expected: %zu != %zu", + nvs_name, wl->nvs_len, + sizeof(struct wl1271_nvs_file)); kfree(wl->nvs); wl->nvs = NULL; wl->nvs_len = 0; @@ -328,8 +335,8 @@ int wlcore_boot_upload_nvs(struct wl1271 *wl) if (nvs->general_params.dual_mode_select) wl->enable_11a = true; } else { - wl1271_error("nvs size is not as expected: %zu != %zu", - wl->nvs_len, + wl1271_error("%s size is not as expected: %zu != %zu", + nvs_name, wl->nvs_len, sizeof(struct wl128x_nvs_file)); kfree(wl->nvs); wl->nvs = NULL; diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index ef6c15b952cc..471521a0db7b 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -6413,9 +6413,12 @@ static void wlcore_nvs_cb(const struct firmware *fw, void *context) goto out; } wl->nvs_len = fw->size; - } else { + } else if (pdev_data->family->nvs_name) { wl1271_debug(DEBUG_BOOT, "Could not get nvs file %s", - WL12XX_NVS_NAME); + pdev_data->family->nvs_name); + wl->nvs = NULL; + wl->nvs_len = 0; + } else { wl->nvs = NULL; wl->nvs_len = 0; } @@ -6510,21 +6513,29 @@ out: int wlcore_probe(struct wl1271 *wl, struct platform_device *pdev) { - int ret; + struct wlcore_platdev_data *pdev_data = dev_get_platdata(&pdev->dev); + const char *nvs_name; + int ret = 0; - if (!wl->ops || !wl->ptable) + if (!wl->ops || !wl->ptable || !pdev_data) return -EINVAL; wl->dev = &pdev->dev; wl->pdev = pdev; platform_set_drvdata(pdev, wl); - ret = request_firmware_nowait(THIS_MODULE, FW_ACTION_HOTPLUG, - WL12XX_NVS_NAME, &pdev->dev, GFP_KERNEL, - wl, wlcore_nvs_cb); - if (ret < 0) { - wl1271_error("request_firmware_nowait failed: %d", ret); - complete_all(&wl->nvs_loading_complete); + if (pdev_data->family && pdev_data->family->nvs_name) { + nvs_name = pdev_data->family->nvs_name; + ret = request_firmware_nowait(THIS_MODULE, FW_ACTION_HOTPLUG, + nvs_name, &pdev->dev, GFP_KERNEL, + wl, wlcore_nvs_cb); + if (ret < 0) { + wl1271_error("request_firmware_nowait failed for %s: %d", + nvs_name, ret); + complete_all(&wl->nvs_loading_complete); + } + } else { + wlcore_nvs_cb(NULL, wl); } return ret; @@ -6533,9 +6544,11 @@ EXPORT_SYMBOL_GPL(wlcore_probe); int wlcore_remove(struct platform_device *pdev) { + struct wlcore_platdev_data *pdev_data = dev_get_platdata(&pdev->dev); struct wl1271 *wl = platform_get_drvdata(pdev); - wait_for_completion(&wl->nvs_loading_complete); + if (pdev_data->family && pdev_data->family->nvs_name) + wait_for_completion(&wl->nvs_loading_complete); if (!wl->initialized) return 0; @@ -6572,4 +6585,3 @@ MODULE_PARM_DESC(no_recovery, "Prevent HW recovery. FW will remain stuck."); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Luciano Coelho "); MODULE_AUTHOR("Juuso Oikarinen "); -MODULE_FIRMWARE(WL12XX_NVS_NAME); diff --git a/drivers/net/wireless/ti/wlcore/wlcore_i.h b/drivers/net/wireless/ti/wlcore/wlcore_i.h index f68280db6e5e..e840985385fc 100644 --- a/drivers/net/wireless/ti/wlcore/wlcore_i.h +++ b/drivers/net/wireless/ti/wlcore/wlcore_i.h @@ -35,13 +35,6 @@ #include "conf.h" #include "ini.h" -/* - * wl127x and wl128x are using the same NVS file name. However, the - * ini parameters between them are different. The driver validates - * the correct NVS size in wl1271_boot_upload_nvs(). - */ -#define WL12XX_NVS_NAME "ti-connectivity/wl1271-nvs.bin" - struct wilink_family_data { const char *name; const char *nvs_name; /* wl12xx nvs file */ From 33e40d025ed82fcab102035db38a727c914399a4 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Sat, 17 Sep 2016 09:06:33 -0700 Subject: [PATCH 0847/1050] wlcore: wl18xx: Use chip specific configuration firmware Use the wl18xx specific config firmware we now have available. Signed-off-by: Tony Lindgren Signed-off-by: Kalle Valo --- drivers/net/wireless/ti/wl18xx/main.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/ti/wl18xx/main.c b/drivers/net/wireless/ti/wl18xx/main.c index 00a04dfc03d1..06d6943b257c 100644 --- a/drivers/net/wireless/ti/wl18xx/main.c +++ b/drivers/net/wireless/ti/wl18xx/main.c @@ -1397,25 +1397,24 @@ out: return ret; } -#define WL18XX_CONF_FILE_NAME "ti-connectivity/wl18xx-conf.bin" - static int wl18xx_load_conf_file(struct device *dev, struct wlcore_conf *conf, - struct wl18xx_priv_conf *priv_conf) + struct wl18xx_priv_conf *priv_conf, + const char *file) { struct wlcore_conf_file *conf_file; const struct firmware *fw; int ret; - ret = request_firmware(&fw, WL18XX_CONF_FILE_NAME, dev); + ret = request_firmware(&fw, file, dev); if (ret < 0) { wl1271_error("could not get configuration binary %s: %d", - WL18XX_CONF_FILE_NAME, ret); + file, ret); return ret; } if (fw->size != WL18XX_CONF_SIZE) { - wl1271_error("configuration binary file size is wrong, expected %zu got %zu", - WL18XX_CONF_SIZE, fw->size); + wl1271_error("%s configuration binary size is wrong, expected %zu got %zu", + file, WL18XX_CONF_SIZE, fw->size); ret = -EINVAL; goto out_release; } @@ -1448,9 +1447,12 @@ out_release: static int wl18xx_conf_init(struct wl1271 *wl, struct device *dev) { + struct platform_device *pdev = wl->pdev; + struct wlcore_platdev_data *pdata = dev_get_platdata(&pdev->dev); struct wl18xx_priv *priv = wl->priv; - if (wl18xx_load_conf_file(dev, &wl->conf, &priv->conf) < 0) { + if (wl18xx_load_conf_file(dev, &wl->conf, &priv->conf, + pdata->family->cfg_name) < 0) { wl1271_warning("falling back to default config"); /* apply driver default configuration */ @@ -2141,4 +2143,3 @@ MODULE_PARM_DESC(num_rx_desc_param, MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Luciano Coelho "); MODULE_FIRMWARE(WL18XX_FW_NAME); -MODULE_FIRMWARE(WL18XX_CONF_FILE_NAME); From 89951db2be53106edbe0d24b3b5f9a787326daf6 Mon Sep 17 00:00:00 2001 From: Ganapathi Bhat Date: Tue, 20 Sep 2016 18:46:23 +0530 Subject: [PATCH 0848/1050] mwifiex: cfg80211 set_default_mgmt_key handler Previously device used to start using IGTK key as Tx key as soon as it gets downloaded in add_key(). This patch implements set_default_mgmt_key handler. We will update Tx key ID in set_default_mgmt_key(). Signed-off-by: Ganapathi Bhat Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- .../net/wireless/marvell/mwifiex/cfg80211.c | 24 +++++++++++++++++++ drivers/net/wireless/marvell/mwifiex/fw.h | 1 + drivers/net/wireless/marvell/mwifiex/ioctl.h | 1 + .../net/wireless/marvell/mwifiex/sta_cmd.c | 5 ++++ 4 files changed, 31 insertions(+) diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index c7f2faa81921..39ce76ad00bc 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -483,6 +483,29 @@ mwifiex_cfg80211_add_key(struct wiphy *wiphy, struct net_device *netdev, return 0; } +/* + * CFG802.11 operation handler to set default mgmt key. + */ +static int +mwifiex_cfg80211_set_default_mgmt_key(struct wiphy *wiphy, + struct net_device *netdev, + u8 key_index) +{ + struct mwifiex_private *priv = mwifiex_netdev_get_priv(netdev); + struct mwifiex_ds_encrypt_key encrypt_key; + + wiphy_dbg(wiphy, "set default mgmt key, key index=%d\n", key_index); + + memset(&encrypt_key, 0, sizeof(struct mwifiex_ds_encrypt_key)); + encrypt_key.key_len = WLAN_KEY_LEN_CCMP; + encrypt_key.key_index = key_index; + encrypt_key.is_igtk_def_key = true; + eth_broadcast_addr(encrypt_key.mac_addr); + + return mwifiex_send_cmd(priv, HostCmd_CMD_802_11_KEY_MATERIAL, + HostCmd_ACT_GEN_SET, true, &encrypt_key, true); +} + /* * This function sends domain information to the firmware. * @@ -4082,6 +4105,7 @@ static struct cfg80211_ops mwifiex_cfg80211_ops = { .leave_ibss = mwifiex_cfg80211_leave_ibss, .add_key = mwifiex_cfg80211_add_key, .del_key = mwifiex_cfg80211_del_key, + .set_default_mgmt_key = mwifiex_cfg80211_set_default_mgmt_key, .mgmt_tx = mwifiex_cfg80211_mgmt_tx, .mgmt_frame_register = mwifiex_cfg80211_mgmt_frame_register, .remain_on_channel = mwifiex_cfg80211_remain_on_channel, diff --git a/drivers/net/wireless/marvell/mwifiex/fw.h b/drivers/net/wireless/marvell/mwifiex/fw.h index 18aa5256e88b..4b1894b4757f 100644 --- a/drivers/net/wireless/marvell/mwifiex/fw.h +++ b/drivers/net/wireless/marvell/mwifiex/fw.h @@ -78,6 +78,7 @@ enum KEY_TYPE_ID { KEY_TYPE_ID_AES, KEY_TYPE_ID_WAPI, KEY_TYPE_ID_AES_CMAC, + KEY_TYPE_ID_AES_CMAC_DEF, }; #define WPA_PN_SIZE 8 diff --git a/drivers/net/wireless/marvell/mwifiex/ioctl.h b/drivers/net/wireless/marvell/mwifiex/ioctl.h index 70429815ff53..536ab834b126 100644 --- a/drivers/net/wireless/marvell/mwifiex/ioctl.h +++ b/drivers/net/wireless/marvell/mwifiex/ioctl.h @@ -260,6 +260,7 @@ struct mwifiex_ds_encrypt_key { u8 is_igtk_key; u8 is_current_wep_key; u8 is_rx_seq_valid; + u8 is_igtk_def_key; }; struct mwifiex_power_cfg { diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c index 49048b41fd36..2a162c33d271 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c @@ -598,6 +598,11 @@ static int mwifiex_set_aes_key_v2(struct mwifiex_private *priv, memcpy(km->key_param_set.key_params.cmac_aes.key, enc_key->key_material, enc_key->key_len); len += sizeof(struct mwifiex_cmac_aes_param); + } else if (enc_key->is_igtk_def_key) { + mwifiex_dbg(adapter, INFO, + "%s: Set CMAC default Key index\n", __func__); + km->key_param_set.key_type = KEY_TYPE_ID_AES_CMAC_DEF; + km->key_param_set.key_idx = enc_key->key_index & KEY_INDEX_MASK; } else { mwifiex_dbg(adapter, INFO, "%s: Set AES Key\n", __func__); From b3589dfe02123a0d0ea82076a9f8ef84a46852c0 Mon Sep 17 00:00:00 2001 From: Hante Meuleman Date: Mon, 19 Sep 2016 12:09:51 +0100 Subject: [PATCH 0849/1050] brcmfmac: ignore 11d configuration errors 802.11d is not always supported by firmware anymore. Currently the AP configuration of 11d will cause an abort if the ioctl set is failing. This behavior is not correct and the error should be ignored. Reviewed-by: Arend Van Spriel Reviewed-by: Franky Lin Reviewed-by: Pieter-Paul Giesberts Signed-off-by: Hante Meuleman Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- .../broadcom/brcm80211/brcmfmac/cfg80211.c | 27 ++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 748eaa68cf07..0f2667e95e81 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -4502,6 +4502,7 @@ brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev, u16 chanspec = chandef_to_chanspec(&cfg->d11inf, &settings->chandef); bool mbss; int is_11d; + bool supports_11d; brcmf_dbg(TRACE, "ctrlchn=%d, center=%d, bw=%d, beacon_interval=%d, dtim_period=%d,\n", settings->chandef.chan->hw_value, @@ -4514,11 +4515,16 @@ brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev, mbss = ifp->vif->mbss; /* store current 11d setting */ - brcmf_fil_cmd_int_get(ifp, BRCMF_C_GET_REGULATORY, &ifp->vif->is_11d); - country_ie = brcmf_parse_tlvs((u8 *)settings->beacon.tail, - settings->beacon.tail_len, - WLAN_EID_COUNTRY); - is_11d = country_ie ? 1 : 0; + if (brcmf_fil_cmd_int_get(ifp, BRCMF_C_GET_REGULATORY, + &ifp->vif->is_11d)) { + supports_11d = false; + } else { + country_ie = brcmf_parse_tlvs((u8 *)settings->beacon.tail, + settings->beacon.tail_len, + WLAN_EID_COUNTRY); + is_11d = country_ie ? 1 : 0; + supports_11d = true; + } memset(&ssid_le, 0, sizeof(ssid_le)); if (settings->ssid == NULL || settings->ssid_len == 0) { @@ -4577,7 +4583,7 @@ brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev, /* Parameters shared by all radio interfaces */ if (!mbss) { - if (is_11d != ifp->vif->is_11d) { + if ((supports_11d) && (is_11d != ifp->vif->is_11d)) { err = brcmf_fil_cmd_int_set(ifp, BRCMF_C_SET_REGULATORY, is_11d); if (err < 0) { @@ -4619,7 +4625,7 @@ brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev, brcmf_err("SET INFRA error %d\n", err); goto exit; } - } else if (WARN_ON(is_11d != ifp->vif->is_11d)) { + } else if (WARN_ON(supports_11d && (is_11d != ifp->vif->is_11d))) { /* Multiple-BSS should use same 11d configuration */ err = -EINVAL; goto exit; @@ -4753,11 +4759,8 @@ static int brcmf_cfg80211_stop_ap(struct wiphy *wiphy, struct net_device *ndev) brcmf_err("setting INFRA mode failed %d\n", err); if (brcmf_feat_is_enabled(ifp, BRCMF_FEAT_MBSS)) brcmf_fil_iovar_int_set(ifp, "mbss", 0); - err = brcmf_fil_cmd_int_set(ifp, BRCMF_C_SET_REGULATORY, - ifp->vif->is_11d); - if (err < 0) - brcmf_err("restoring REGULATORY setting failed %d\n", - err); + brcmf_fil_cmd_int_set(ifp, BRCMF_C_SET_REGULATORY, + ifp->vif->is_11d); /* Bring device back up so it can be used again */ err = brcmf_fil_cmd_int_set(ifp, BRCMF_C_UP, 1); if (err < 0) From 704d1c6b56f4ee2ad6a5f012a72a278d17c1a223 Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Mon, 19 Sep 2016 12:09:52 +0100 Subject: [PATCH 0850/1050] brcmfmac: rework pointer trickery in brcmf_proto_bcdc_query_dcmd() The variable info is assigned to point to bcdc->msg[1], which is the same as pointing to bcdc->buf. As that is what we want to access make it clear by fixing the assignment. This also avoid out-of-bounds errors from static analyzers are bcdc->msg[1] is not in the structure definition. Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcdc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcdc.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcdc.c index d1bc51f92686..038a960c5104 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcdc.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcdc.c @@ -194,7 +194,7 @@ retry: } /* Check info buffer */ - info = (void *)&msg[1]; + info = (void *)&bcdc->buf[0]; /* Copy info buffer */ if (buf) { From bc981641360183990de59da17f9f560f9150b801 Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Mon, 19 Sep 2016 12:09:53 +0100 Subject: [PATCH 0851/1050] brcmfmac: fix memory leak in brcmf_flowring_add_tdls_peer() In the error paths in brcmf_flowring_add_tdls_peer() the allocated resource should be freed. Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- .../net/wireless/broadcom/brcm80211/brcmfmac/flowring.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/flowring.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/flowring.c index 7e269f9aa607..b16b367b0569 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/flowring.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/flowring.c @@ -495,14 +495,18 @@ void brcmf_flowring_add_tdls_peer(struct brcmf_flowring *flow, int ifidx, } else { search = flow->tdls_entry; if (memcmp(search->mac, peer, ETH_ALEN) == 0) - return; + goto free_entry; while (search->next) { search = search->next; if (memcmp(search->mac, peer, ETH_ALEN) == 0) - return; + goto free_entry; } search->next = tdls_entry; } flow->tdls_active = true; + return; + +free_entry: + kfree(tdls_entry); } From 26305d3d7298d1ddf8fd4ce95a382aa90534f0a3 Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Mon, 19 Sep 2016 12:09:54 +0100 Subject: [PATCH 0852/1050] brcmfmac: initialize variable in brcmf_sdiod_regrl() In case of an error the variable returned is uninitialized. The caller will probably check the error code before using it, but better assure it is set to zero. Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c index 03404cbe9237..72139b579b18 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c @@ -420,7 +420,7 @@ u8 brcmf_sdiod_regrb(struct brcmf_sdio_dev *sdiodev, u32 addr, int *ret) u32 brcmf_sdiod_regrl(struct brcmf_sdio_dev *sdiodev, u32 addr, int *ret) { - u32 data; + u32 data = 0; int retval; brcmf_dbg(SDIO, "addr:0x%08x\n", addr); From 8fa5fdec09cd379c9ecb8972f344f8f308e0ccf3 Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Mon, 19 Sep 2016 12:09:55 +0100 Subject: [PATCH 0853/1050] brcmfmac: remove worker from .ndo_set_mac_address() callback As it turns out there is no need to use a worker for the callback because it is not called from atomic context. Reported-by: Dan Williams Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- .../broadcom/brcm80211/brcmfmac/core.c | 39 +++++++------------ .../broadcom/brcm80211/brcmfmac/core.h | 2 - 2 files changed, 13 insertions(+), 28 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c index 65e8c8766441..3f6a58035077 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c @@ -136,27 +136,6 @@ static void _brcmf_set_multicast_list(struct work_struct *work) err); } -static void -_brcmf_set_mac_address(struct work_struct *work) -{ - struct brcmf_if *ifp; - s32 err; - - ifp = container_of(work, struct brcmf_if, setmacaddr_work); - - brcmf_dbg(TRACE, "Enter, bsscfgidx=%d\n", ifp->bsscfgidx); - - err = brcmf_fil_iovar_data_set(ifp, "cur_etheraddr", ifp->mac_addr, - ETH_ALEN); - if (err < 0) { - brcmf_err("Setting cur_etheraddr failed, %d\n", err); - } else { - brcmf_dbg(TRACE, "MAC address updated to %pM\n", - ifp->mac_addr); - memcpy(ifp->ndev->dev_addr, ifp->mac_addr, ETH_ALEN); - } -} - #if IS_ENABLED(CONFIG_IPV6) static void _brcmf_update_ndtable(struct work_struct *work) { @@ -190,10 +169,20 @@ static int brcmf_netdev_set_mac_address(struct net_device *ndev, void *addr) { struct brcmf_if *ifp = netdev_priv(ndev); struct sockaddr *sa = (struct sockaddr *)addr; + int err; - memcpy(&ifp->mac_addr, sa->sa_data, ETH_ALEN); - schedule_work(&ifp->setmacaddr_work); - return 0; + brcmf_dbg(TRACE, "Enter, bsscfgidx=%d\n", ifp->bsscfgidx); + + err = brcmf_fil_iovar_data_set(ifp, "cur_etheraddr", sa->sa_data, + ETH_ALEN); + if (err < 0) { + brcmf_err("Setting cur_etheraddr failed, %d\n", err); + } else { + brcmf_dbg(TRACE, "updated to %pM\n", sa->sa_data); + memcpy(ifp->mac_addr, sa->sa_data, ETH_ALEN); + memcpy(ifp->ndev->dev_addr, ifp->mac_addr, ETH_ALEN); + } + return err; } static void brcmf_netdev_set_multicast_list(struct net_device *ndev) @@ -525,7 +514,6 @@ int brcmf_net_attach(struct brcmf_if *ifp, bool rtnl_locked) /* set the mac address */ memcpy(ndev->dev_addr, ifp->mac_addr, ETH_ALEN); - INIT_WORK(&ifp->setmacaddr_work, _brcmf_set_mac_address); INIT_WORK(&ifp->multicast_work, _brcmf_set_multicast_list); INIT_WORK(&ifp->ndoffload_work, _brcmf_update_ndtable); @@ -730,7 +718,6 @@ static void brcmf_del_if(struct brcmf_pub *drvr, s32 bsscfgidx, } if (ifp->ndev->netdev_ops == &brcmf_netdev_ops_pri) { - cancel_work_sync(&ifp->setmacaddr_work); cancel_work_sync(&ifp->multicast_work); cancel_work_sync(&ifp->ndoffload_work); } diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h index 8fa34cad5a96..8a810bb24a11 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h @@ -176,7 +176,6 @@ enum brcmf_netif_stop_reason { * @vif: points to cfg80211 specific interface information. * @ndev: associated network device. * @stats: interface specific network statistics. - * @setmacaddr_work: worker object for setting mac address. * @multicast_work: worker object for multicast provisioning. * @ndoffload_work: worker object for neighbor discovery offload configuration. * @fws_desc: interface specific firmware-signalling descriptor. @@ -193,7 +192,6 @@ struct brcmf_if { struct brcmf_cfg80211_vif *vif; struct net_device *ndev; struct net_device_stats stats; - struct work_struct setmacaddr_work; struct work_struct multicast_work; struct work_struct ndoffload_work; struct brcmf_fws_mac_descriptor *fws_desc; From 835680b82f029818c813324aed3073cdcf63241f Mon Sep 17 00:00:00 2001 From: Hante Meuleman Date: Mon, 19 Sep 2016 12:09:56 +0100 Subject: [PATCH 0854/1050] brcmfmac: remove unnecessary null pointer check in the function brcmf_bus_start() in the exception handling a check is made to dermine whether ifp is null, though this is not possible. Removing the unnessary check. Reviewed-by: Arend Van Spriel Reviewed-by: Franky Lin Reviewed-by: Pieter-Paul Giesberts Signed-off-by: Hante Meuleman Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c index 3f6a58035077..9a05371453ce 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c @@ -1048,8 +1048,7 @@ fail: brcmf_fws_del_interface(ifp); brcmf_fws_deinit(drvr); } - if (ifp) - brcmf_net_detach(ifp->ndev, false); + brcmf_net_detach(ifp->ndev, false); if (p2p_ifp) brcmf_net_detach(p2p_ifp->ndev, false); drvr->iflist[0] = NULL; From 2b7425f3629b38c438f890c20c5faeca64b144ff Mon Sep 17 00:00:00 2001 From: Hante Meuleman Date: Mon, 19 Sep 2016 12:09:57 +0100 Subject: [PATCH 0855/1050] brcmfmac: fix clearing entry IPv6 address When IPv6 address is to be cleared there is a possible out of bound access. But also the clearing of the last entry and the adjustment of total number of stored IPv6 addresses is not updated. This patch fixes that bug. Bug was found using coverity. Reviewed-by: Arend Van Spriel Reviewed-by: Franky Lin Reviewed-by: Pieter-Paul Giesberts Signed-off-by: Hante Meuleman Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c index 9a05371453ce..7a65f9da048a 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c @@ -873,9 +873,12 @@ static int brcmf_inet6addr_changed(struct notifier_block *nb, } break; case NETDEV_DOWN: - if (i < NDOL_MAX_ENTRIES) - for (; i < ifp->ipv6addr_idx; i++) + if (i < NDOL_MAX_ENTRIES) { + for (; i < ifp->ipv6addr_idx - 1; i++) table[i] = table[i + 1]; + memset(&table[i], 0, sizeof(table[i])); + ifp->ipv6addr_idx--; + } break; default: break; From a7ed7828ecda0c2b5e0d7f55dedd4230afd4b583 Mon Sep 17 00:00:00 2001 From: Hante Meuleman Date: Mon, 19 Sep 2016 12:09:58 +0100 Subject: [PATCH 0856/1050] brcmfmac: fix out of bound access on clearing wowl wake indicator Clearing the wowl wakeindicator happens with a rather odd construction where the string "clear" is used to set the iovar wowl_wakeind. This was implemented incorrectly as it caused an out of bound access. Use an intermediate variable of correct length and copy string in that. Problem was found using coverity. Reviewed-by: Arend Van Spriel Reviewed-by: Franky Lin Reviewed-by: Pieter-Paul Giesberts Signed-off-by: Hante Meuleman Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 0f2667e95e81..d97d6b153d6a 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -3703,6 +3703,7 @@ static void brcmf_configure_wowl(struct brcmf_cfg80211_info *cfg, struct cfg80211_wowlan *wowl) { u32 wowl_config; + struct brcmf_wowl_wakeind_le wowl_wakeind; u32 i; brcmf_dbg(TRACE, "Suspend, wowl config.\n"); @@ -3744,8 +3745,9 @@ static void brcmf_configure_wowl(struct brcmf_cfg80211_info *cfg, if (!test_bit(BRCMF_VIF_STATUS_CONNECTED, &ifp->vif->sme_state)) wowl_config |= BRCMF_WOWL_UNASSOC; - brcmf_fil_iovar_data_set(ifp, "wowl_wakeind", "clear", - sizeof(struct brcmf_wowl_wakeind_le)); + memcpy(&wowl_wakeind, "clear", 6); + brcmf_fil_iovar_data_set(ifp, "wowl_wakeind", &wowl_wakeind, + sizeof(wowl_wakeind)); brcmf_fil_iovar_int_set(ifp, "wowl", wowl_config); brcmf_fil_iovar_int_set(ifp, "wowl_activate", 1); brcmf_bus_wowl_config(cfg->pub->bus_if, true); From 92c313604711a0976def79dabb9e8da3cc2cc780 Mon Sep 17 00:00:00 2001 From: Hante Meuleman Date: Mon, 19 Sep 2016 12:09:59 +0100 Subject: [PATCH 0857/1050] brcmfmac: simplify mapping of auth type The 802.11 standard only has four valid auth type configurations of which our firmware only supports two, ie. Open System and Shared Key. Simplify the mapping falling back to automatic for other types specified by user-space. Reviewed-by: Arend Van Spriel Reviewed-by: Franky Lin Reviewed-by: Pieter-Paul Giesberts Signed-off-by: Hante Meuleman Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- .../net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index d97d6b153d6a..6aeb69cb29b6 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -1595,15 +1595,9 @@ static s32 brcmf_set_auth_type(struct net_device *ndev, val = 1; brcmf_dbg(CONN, "shared key\n"); break; - case NL80211_AUTHTYPE_AUTOMATIC: - val = 2; - brcmf_dbg(CONN, "automatic\n"); - break; - case NL80211_AUTHTYPE_NETWORK_EAP: - brcmf_dbg(CONN, "network eap\n"); default: val = 2; - brcmf_err("invalid auth type (%d)\n", sme->auth_type); + brcmf_dbg(CONN, "automatic, auth type (%d)\n", sme->auth_type); break; } From 1afac196c16753f93d482eedb9aeb802e740e67e Mon Sep 17 00:00:00 2001 From: Cathy Luo Date: Tue, 20 Sep 2016 20:49:02 +0530 Subject: [PATCH 0858/1050] mwifiex: fix kernel crash for USB chipsets Following crash issue is observed during TCP traffic stress test [ 2253.625439] NMI watchdog: BUG: soft lockup - CPU#3 stuck for 22s! [kworker/u17:1:5191] [ 2253.625520] Call Trace: [ 2253.625527] [] ? moal_spin_lock+0x30/0x30 [usb8xxx] [ 2253.625533] [] ? wlan_wmm_lists_empty+0xb/0xf0 [mlan] [ 2253.625537] [] mlan_main_process+0x1b3/0x720 [mlan] [ 2253.625540] [] woal_main_work_queue+0x45/0x80 [usb8xxx] [ 2253.625543] [] process_one_work+0x150/0x3f0 [ 2253.625545] [] worker_thread+0x121/0x520 [ 2253.625547] [] ? rescuer_thread+0x330/0x330 [ 2253.625549] [] kthread+0xd2/0xf0 [ 2253.625551] [] ? kthread_create_on_node+0x1c0/0x1c0 [ 2253.625553] [] ret_from_fork+0x7c/0xb0 [ 2253.625555] [] ? kthread_create_on_node+0x1c0/0x1c0 In mwifiex_usb_tx_complete(), we are updating port->block_status first and then freeing the skb attached to that URB. We may end up attaching new skb to URB in a corner case and same will be freed. This results in the kernel crash. The problem is solved by changing the sequence. Signed-off-by: Cathy Luo Signed-off-by: Shengzhen Li Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/usb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/usb.c b/drivers/net/wireless/marvell/mwifiex/usb.c index 8a20620fa119..e8283dc1f088 100644 --- a/drivers/net/wireless/marvell/mwifiex/usb.c +++ b/drivers/net/wireless/marvell/mwifiex/usb.c @@ -273,6 +273,8 @@ static void mwifiex_usb_tx_complete(struct urb *urb) } else { mwifiex_dbg(adapter, DATA, "%s: DATA\n", __func__); + mwifiex_write_data_complete(adapter, context->skb, 0, + urb->status ? -1 : 0); for (i = 0; i < MWIFIEX_TX_DATA_PORT; i++) { port = &card->port[i]; if (context->ep == port->tx_data_ep) { @@ -282,8 +284,6 @@ static void mwifiex_usb_tx_complete(struct urb *urb) } } adapter->data_sent = false; - mwifiex_write_data_complete(adapter, context->skb, 0, - urb->status ? -1 : 0); } if (card->mc_resync_flag) From 5476f8030d9a9f7082ba5a4d4f0a1bfbf6936800 Mon Sep 17 00:00:00 2001 From: Cathy Luo Date: Tue, 20 Sep 2016 20:49:03 +0530 Subject: [PATCH 0859/1050] mwifiex: fix race condition causing tx timeout It's been observed that in a corner case mwifiex_usb_tx_complete() gets called before we exit from mwifiex_usb_host_to_card() after submitting the urb. 'data_sent' flag remains set in this case. It blocks further Tx packets and triggers watchdog timeout. The problem is fixed by setting data_sent and port_block flag at correct place. Signed-off-by: Cathy Luo Signed-off-by: Shengzhen Li Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/usb.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/usb.c b/drivers/net/wireless/marvell/mwifiex/usb.c index e8283dc1f088..8fd51e42ea5e 100644 --- a/drivers/net/wireless/marvell/mwifiex/usb.c +++ b/drivers/net/wireless/marvell/mwifiex/usb.c @@ -897,6 +897,13 @@ static int mwifiex_usb_host_to_card(struct mwifiex_adapter *adapter, u8 ep, else atomic_inc(&port->tx_data_urb_pending); + if (ep != card->tx_cmd_ep && + atomic_read(&port->tx_data_urb_pending) == + MWIFIEX_TX_DATA_URB) { + port->block_status = true; + adapter->data_sent = mwifiex_usb_data_sent(adapter); + } + if (usb_submit_urb(tx_urb, GFP_ATOMIC)) { mwifiex_dbg(adapter, ERROR, "%s: usb_submit_urb failed\n", __func__); @@ -905,6 +912,7 @@ static int mwifiex_usb_host_to_card(struct mwifiex_adapter *adapter, u8 ep, } else { atomic_dec(&port->tx_data_urb_pending); port->block_status = false; + adapter->data_sent = false; if (port->tx_data_ix) port->tx_data_ix--; else @@ -916,9 +924,7 @@ static int mwifiex_usb_host_to_card(struct mwifiex_adapter *adapter, u8 ep, if (ep != card->tx_cmd_ep && atomic_read(&port->tx_data_urb_pending) == MWIFIEX_TX_DATA_URB) { - port->block_status = true; - ret = -ENOSR; - goto done; + return -ENOSR; } } From ac3b561721e946047eebbca73d8dcaee1cc9b302 Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Tue, 20 Sep 2016 20:49:04 +0530 Subject: [PATCH 0860/1050] mwifiex: code rearrangement in mwifiex_usb_host_to_card() This patch helps get rid of goto statement and improves readability. Signed-off-by: Amitkumar Karwar Signed-off-by: Cathy Luo Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/usb.c | 23 ++++++---------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/usb.c b/drivers/net/wireless/marvell/mwifiex/usb.c index 8fd51e42ea5e..73eb0846db21 100644 --- a/drivers/net/wireless/marvell/mwifiex/usb.c +++ b/drivers/net/wireless/marvell/mwifiex/usb.c @@ -841,7 +841,7 @@ static int mwifiex_usb_host_to_card(struct mwifiex_adapter *adapter, u8 ep, struct usb_tx_data_port *port = NULL; u8 *data = (u8 *)skb->data; struct urb *tx_urb; - int idx, ret; + int idx, ret = -EINPROGRESS; if (adapter->is_suspended) { mwifiex_dbg(adapter, ERROR, @@ -865,8 +865,9 @@ static int mwifiex_usb_host_to_card(struct mwifiex_adapter *adapter, u8 ep, if (atomic_read(&port->tx_data_urb_pending) >= MWIFIEX_TX_DATA_URB) { port->block_status = true; - ret = -EBUSY; - goto done; + adapter->data_sent = + mwifiex_usb_data_sent(adapter); + return -EBUSY; } if (port->tx_data_ix >= MWIFIEX_TX_DATA_URB) port->tx_data_ix = 0; @@ -902,6 +903,7 @@ static int mwifiex_usb_host_to_card(struct mwifiex_adapter *adapter, u8 ep, MWIFIEX_TX_DATA_URB) { port->block_status = true; adapter->data_sent = mwifiex_usb_data_sent(adapter); + ret = -ENOSR; } if (usb_submit_urb(tx_urb, GFP_ATOMIC)) { @@ -918,22 +920,9 @@ static int mwifiex_usb_host_to_card(struct mwifiex_adapter *adapter, u8 ep, else port->tx_data_ix = MWIFIEX_TX_DATA_URB; } - - return -1; - } else { - if (ep != card->tx_cmd_ep && - atomic_read(&port->tx_data_urb_pending) == - MWIFIEX_TX_DATA_URB) { - return -ENOSR; - } + ret = -1; } - return -EINPROGRESS; - -done: - if (ep != card->tx_cmd_ep) - adapter->data_sent = mwifiex_usb_data_sent(adapter); - return ret; } From 3a589fae4a2cff317c3cabd4f76578ffd2761808 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Tue, 20 Sep 2016 21:19:26 -0400 Subject: [PATCH 0861/1050] rtl8xxxu: Fix off by one error calculating pubq This was detected tracing the 8188eu driver, but doesn't seem to make any difference when using it. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index ca92022f9d50..98fcd7ba6826 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -3869,7 +3869,7 @@ static void rtl8xxxu_init_queue_reserved_page(struct rtl8xxxu_priv *priv) val32 = (nq << RQPN_NPQ_SHIFT) | (eq << RQPN_EPQ_SHIFT); rtl8xxxu_write32(priv, REG_RQPN_NPQ, val32); - pubq = fops->total_page_num - hq - lq - nq; + pubq = fops->total_page_num - hq - lq - nq - 1; val32 = RQPN_LOAD; val32 |= (hq << RQPN_HI_PQ_SHIFT); From c0a99bbb1b7a11605a53f84f5c444be3ef25a8ab Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Tue, 20 Sep 2016 21:19:27 -0400 Subject: [PATCH 0862/1050] rtl8xxxu: Clean up llt_init() API Remove last_tx_page argument from the llt_init() function. The rtl8xxxu_fileops structure contains the correct TX_TOTAL_PAGE_NUM value for the device, and rtl8xxxu_auto_llt_table() doesn't need to know the value in the first place. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h | 6 +++--- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 9 ++++++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h index 1f54b8928d3c..a10a57ca16db 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h @@ -1318,7 +1318,7 @@ struct rtl8xxxu_fileops { int (*power_on) (struct rtl8xxxu_priv *priv); void (*power_off) (struct rtl8xxxu_priv *priv); void (*reset_8051) (struct rtl8xxxu_priv *priv); - int (*llt_init) (struct rtl8xxxu_priv *priv, u8 last_tx_page); + int (*llt_init) (struct rtl8xxxu_priv *priv); void (*init_phy_bb) (struct rtl8xxxu_priv *priv); int (*init_phy_rf) (struct rtl8xxxu_priv *priv); void (*phy_init_antenna_selection) (struct rtl8xxxu_priv *priv); @@ -1400,14 +1400,14 @@ int rtl8xxxu_load_firmware(struct rtl8xxxu_priv *priv, char *fw_name); void rtl8xxxu_firmware_self_reset(struct rtl8xxxu_priv *priv); void rtl8xxxu_power_off(struct rtl8xxxu_priv *priv); void rtl8xxxu_reset_8051(struct rtl8xxxu_priv *priv); -int rtl8xxxu_auto_llt_table(struct rtl8xxxu_priv *priv, u8 last_tx_page); +int rtl8xxxu_auto_llt_table(struct rtl8xxxu_priv *priv); void rtl8xxxu_gen2_prepare_calibrate(struct rtl8xxxu_priv *priv, u8 start); int rtl8xxxu_flush_fifo(struct rtl8xxxu_priv *priv); int rtl8xxxu_gen2_h2c_cmd(struct rtl8xxxu_priv *priv, struct h2c_cmd *h2c, int len); int rtl8xxxu_active_to_lps(struct rtl8xxxu_priv *priv); void rtl8xxxu_disabled_to_emu(struct rtl8xxxu_priv *priv); -int rtl8xxxu_init_llt_table(struct rtl8xxxu_priv *priv, u8 last_tx_page); +int rtl8xxxu_init_llt_table(struct rtl8xxxu_priv *priv); void rtl8xxxu_gen1_phy_iq_calibrate(struct rtl8xxxu_priv *priv); void rtl8xxxu_gen1_init_phy_bb(struct rtl8xxxu_priv *priv); void rtl8xxxu_gen1_set_tx_power(struct rtl8xxxu_priv *priv, diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 98fcd7ba6826..c628b908efc2 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -2472,10 +2472,13 @@ static int rtl8xxxu_llt_write(struct rtl8xxxu_priv *priv, u8 address, u8 data) return ret; } -int rtl8xxxu_init_llt_table(struct rtl8xxxu_priv *priv, u8 last_tx_page) +int rtl8xxxu_init_llt_table(struct rtl8xxxu_priv *priv) { int ret; int i; + u8 last_tx_page; + + last_tx_page = priv->fops->total_page_num; for (i = 0; i < last_tx_page; i++) { ret = rtl8xxxu_llt_write(priv, i, i + 1); @@ -2503,7 +2506,7 @@ exit: return ret; } -int rtl8xxxu_auto_llt_table(struct rtl8xxxu_priv *priv, u8 last_tx_page) +int rtl8xxxu_auto_llt_table(struct rtl8xxxu_priv *priv) { u32 val32; int ret = 0; @@ -3988,7 +3991,7 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) dev_dbg(dev, "%s: macpower %i\n", __func__, macpower); if (!macpower) { - ret = priv->fops->llt_init(priv, TX_TOTAL_PAGE_NUM); + ret = priv->fops->llt_init(priv); if (ret) { dev_warn(dev, "%s: LLT table init failed\n", __func__); goto exit; From 2fc5dd27bf9b75d83a7071d13cca044bc39748fb Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Tue, 20 Sep 2016 21:19:28 -0400 Subject: [PATCH 0863/1050] rtl8xxxu: Use a struct rtl8xxxu_fileops * in rtl8xxxu_init_device() This saves some 217, or about, derefences of priv->fops. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- .../wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 37 ++++++++++--------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index c628b908efc2..71145ebb3564 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -3886,6 +3886,7 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) { struct rtl8xxxu_priv *priv = hw->priv; struct device *dev = &priv->udev->dev; + struct rtl8xxxu_fileops *fops = priv->fops; bool macpower; int ret; u8 val8; @@ -3904,7 +3905,7 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) else macpower = true; - ret = priv->fops->power_on(priv); + ret = fops->power_on(priv); if (ret < 0) { dev_warn(dev, "%s: Failed power on\n", __func__); goto exit; @@ -3921,7 +3922,7 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) /* * Set RX page boundary */ - rtl8xxxu_write16(priv, REG_TRXFF_BNDY + 2, priv->fops->trxff_boundary); + rtl8xxxu_write16(priv, REG_TRXFF_BNDY + 2, fops->trxff_boundary); ret = rtl8xxxu_download_firmware(priv); dev_dbg(dev, "%s: download_firmware %i\n", __func__, ret); @@ -3932,8 +3933,8 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) if (ret) goto exit; - if (priv->fops->phy_init_antenna_selection) - priv->fops->phy_init_antenna_selection(priv); + if (fops->phy_init_antenna_selection) + fops->phy_init_antenna_selection(priv); ret = rtl8xxxu_init_mac(priv); @@ -3946,7 +3947,7 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) if (ret) goto exit; - ret = priv->fops->init_phy_rf(priv); + ret = fops->init_phy_rf(priv); if (ret) goto exit; @@ -3971,7 +3972,7 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) /* * Set TX buffer boundary */ - val8 = priv->fops->total_page_num + 1; + val8 = fops->total_page_num + 1; rtl8xxxu_write8(priv, REG_TXPKTBUF_BCNQ_BDNY, val8); rtl8xxxu_write8(priv, REG_TXPKTBUF_MGQ_BDNY, val8); @@ -3984,14 +3985,14 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) * The vendor drivers set PBP for all devices, except 8192e. * There is no explanation for this in any of the sources. */ - val8 = (priv->fops->pbp_rx << PBP_PAGE_SIZE_RX_SHIFT) | - (priv->fops->pbp_tx << PBP_PAGE_SIZE_TX_SHIFT); + val8 = (fops->pbp_rx << PBP_PAGE_SIZE_RX_SHIFT) | + (fops->pbp_tx << PBP_PAGE_SIZE_TX_SHIFT); if (priv->rtl_chip != RTL8192E) rtl8xxxu_write8(priv, REG_PBP, val8); dev_dbg(dev, "%s: macpower %i\n", __func__, macpower); if (!macpower) { - ret = priv->fops->llt_init(priv); + ret = fops->llt_init(priv); if (ret) { dev_warn(dev, "%s: LLT table init failed\n", __func__); goto exit; @@ -4000,12 +4001,12 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) /* * Chip specific quirks */ - priv->fops->usb_quirks(priv); + fops->usb_quirks(priv); /* * Enable TX report and TX report timer for 8723bu/8188eu/... */ - if (priv->fops->has_tx_report) { + if (fops->has_tx_report) { val8 = rtl8xxxu_read8(priv, REG_TX_REPORT_CTRL); val8 |= TX_REPORT_CTRL_TIMER_ENABLE; rtl8xxxu_write8(priv, REG_TX_REPORT_CTRL, val8); @@ -4140,8 +4141,8 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) rtl8xxxu_write8(priv, REG_RSV_CTRL, val8); } - if (priv->fops->init_aggregation) - priv->fops->init_aggregation(priv); + if (fops->init_aggregation) + fops->init_aggregation(priv); /* * Enable CCK and OFDM block @@ -4158,7 +4159,7 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) /* * Start out with default power levels for channel 6, 20MHz */ - priv->fops->set_tx_power(priv, 1, false); + fops->set_tx_power(priv, 1, false); /* Let the 8051 take control of antenna setting */ if (priv->rtl_chip != RTL8192E) { @@ -4174,8 +4175,8 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) rtl8xxxu_write16(priv, REG_FAST_EDCA_CTRL, 0); - if (priv->fops->init_statistics) - priv->fops->init_statistics(priv); + if (fops->init_statistics) + fops->init_statistics(priv); if (priv->rtl_chip == RTL8192E) { /* @@ -4193,12 +4194,12 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) rtl8723a_phy_lc_calibrate(priv); - priv->fops->phy_iq_calibrate(priv); + fops->phy_iq_calibrate(priv); /* * This should enable thermal meter */ - if (priv->fops->gen2_thermal_meter) + if (fops->gen2_thermal_meter) rtl8xxxu_write_rfreg(priv, RF_A, RF6052_REG_T_METER_8723B, 0x37cf8); else From b42fbed6b8a5942e9f76ec8c7f9c9fd798a2d3af Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Tue, 20 Sep 2016 21:19:29 -0400 Subject: [PATCH 0864/1050] rtl8xxxu: Stop log spam from each successful interrupt As soon as debugging is turned on, the logs are filled with messages reporting the interrupt status. As this quantity is usually zero, this output is not needed. In fact, there will be a report if the status is not zero, thus the debug line in question could probably be deleted. Rather than taking that action, I have changed it to only be printed when the newly added RTL8XXXU_DEBUG_INTERRUPT bit is set in the debug mask. Signed-off-by: Larry Finger Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h | 1 + drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h index a10a57ca16db..1016628926d2 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h @@ -29,6 +29,7 @@ #define RTL8XXXU_DEBUG_H2C 0x800 #define RTL8XXXU_DEBUG_ACTION 0x1000 #define RTL8XXXU_DEBUG_EFUSE 0x2000 +#define RTL8XXXU_DEBUG_INTERRUPT 0x4000 #define RTW_USB_CONTROL_MSG_TIMEOUT 500 #define RTL8XXXU_MAX_REG_POLL 500 diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 71145ebb3564..b2d7f6e69667 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -5375,7 +5375,8 @@ static void rtl8xxxu_int_complete(struct urb *urb) struct device *dev = &priv->udev->dev; int ret; - dev_dbg(dev, "%s: status %i\n", __func__, urb->status); + if (rtl8xxxu_debug & RTL8XXXU_DEBUG_INTERRUPT) + dev_dbg(dev, "%s: status %i\n", __func__, urb->status); if (urb->status == 0) { usb_anchor_urb(urb, &priv->int_anchor); ret = usb_submit_urb(urb, GFP_ATOMIC); From 23e9c128adb2038c27a424a5f91136e7fa3e0dc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Wed, 21 Sep 2016 08:23:24 +0200 Subject: [PATCH 0865/1050] brcmfmac: fix memory leak in brcmf_fill_bss_param MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function is called from get_station callback which means that every time user space was getting/dumping station(s) we were leaking 2 KiB. Signed-off-by: Rafał Miłecki Fixes: 1f0dc59a6de ("brcmfmac: rework .get_station() callback") Cc: stable@vger.kernel.org # 4.2+ Acked-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 6aeb69cb29b6..b777e1b2f87a 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -2527,7 +2527,7 @@ static void brcmf_fill_bss_param(struct brcmf_if *ifp, struct station_info *si) WL_BSS_INFO_MAX); if (err) { brcmf_err("Failed to get bss info (%d)\n", err); - return; + goto out_kfree; } si->filled |= BIT(NL80211_STA_INFO_BSS_PARAM); si->bss_param.beacon_interval = le16_to_cpu(buf->bss_le.beacon_period); @@ -2539,6 +2539,9 @@ static void brcmf_fill_bss_param(struct brcmf_if *ifp, struct station_info *si) si->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_PREAMBLE; if (capability & WLAN_CAPABILITY_SHORT_SLOT_TIME) si->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_SLOT_TIME; + +out_kfree: + kfree(buf); } static s32 From 2df86ad959c9d1cdbeb2f23a0801857731156692 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Fri, 23 Sep 2016 15:27:46 +0200 Subject: [PATCH 0866/1050] brcmfmac: drop unused fields from struct brcmf_pub MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit They seem to be there from the first day. We calculate these values but never use them. Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c | 3 --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h | 4 ---- drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c | 2 -- 3 files changed, 9 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c index 7a65f9da048a..17152805d96f 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c @@ -508,9 +508,6 @@ int brcmf_net_attach(struct brcmf_if *ifp, bool rtnl_locked) ndev->needed_headroom += drvr->hdrlen; ndev->ethtool_ops = &brcmf_ethtool_ops; - drvr->rxsz = ndev->mtu + ndev->hard_header_len + - drvr->hdrlen; - /* set the mac address */ memcpy(ndev->dev_addr, ifp->mac_addr, ETH_ALEN); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h index 8a810bb24a11..c94dcab260d0 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h @@ -112,15 +112,11 @@ struct brcmf_pub { /* Internal brcmf items */ uint hdrlen; /* Total BRCMF header length (proto + bus) */ - uint rxsz; /* Rx buffer size bus module should use */ /* Dongle media info */ char fwver[BRCMF_DRIVER_FIRMWARE_VERSION_LEN]; u8 mac[ETH_ALEN]; /* MAC address obtained from dongle */ - /* Multicast data packets sent to dongle */ - unsigned long tx_multicast; - struct mac_address addresses[BRCMF_MAX_IFS]; struct brcmf_if *iflist[BRCMF_MAX_IFS]; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c index 9f9024a7bd64..a190f535efc9 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c @@ -2104,8 +2104,6 @@ int brcmf_fws_process_skb(struct brcmf_if *ifp, struct sk_buff *skb) if ((skb->priority == 0) || (skb->priority > 7)) skb->priority = cfg80211_classify8021d(skb, NULL); - drvr->tx_multicast += !!multicast; - if (fws->avoid_queueing) { rc = brcmf_proto_txdata(drvr, ifp->ifidx, 0, skb); if (rc < 0) From 4b87e5af638b6056bd6c20b0954d09a5a58633be Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Mon, 12 Sep 2016 16:03:30 +0300 Subject: [PATCH 0867/1050] iwlwifi: remove support for fw older than -17 and -22 FW versions older than -17 for 3160 and 7260 and older than -22 for newer NICs are not supported anymore. Don't load these versions and remove code that handles them. Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-7000.c | 6 ++--- drivers/net/wireless/intel/iwlwifi/iwl-8000.c | 2 +- drivers/net/wireless/intel/iwlwifi/iwl-9000.c | 2 +- .../net/wireless/intel/iwlwifi/iwl-fw-file.h | 24 +---------------- .../wireless/intel/iwlwifi/mvm/debugfs-vif.c | 3 +-- .../wireless/intel/iwlwifi/mvm/fw-api-power.h | 21 +++++---------- .../wireless/intel/iwlwifi/mvm/fw-api-tx.h | 2 -- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 10 +------ .../net/wireless/intel/iwlwifi/mvm/mac80211.c | 24 +++++++---------- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 2 -- drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 6 ++--- .../net/wireless/intel/iwlwifi/mvm/power.c | 15 ++++------- drivers/net/wireless/intel/iwlwifi/mvm/scan.c | 27 +++---------------- drivers/net/wireless/intel/iwlwifi/mvm/tt.c | 10 ++----- 14 files changed, 36 insertions(+), 118 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-7000.c b/drivers/net/wireless/intel/iwlwifi/iwl-7000.c index aa575fb9dea0..d4b73dedf89b 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-7000.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-7000.c @@ -77,9 +77,9 @@ #define IWL3168_UCODE_API_MAX 26 /* Lowest firmware API version supported */ -#define IWL7260_UCODE_API_MIN 16 -#define IWL7265_UCODE_API_MIN 16 -#define IWL7265D_UCODE_API_MIN 16 +#define IWL7260_UCODE_API_MIN 17 +#define IWL7265_UCODE_API_MIN 17 +#define IWL7265D_UCODE_API_MIN 17 #define IWL3168_UCODE_API_MIN 20 /* NVM versions */ diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-8000.c b/drivers/net/wireless/intel/iwlwifi/iwl-8000.c index 990cf2b17517..d02ca1491d16 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-8000.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-8000.c @@ -74,7 +74,7 @@ #define IWL8265_UCODE_API_MAX 26 /* Lowest firmware API version supported */ -#define IWL8000_UCODE_API_MIN 16 +#define IWL8000_UCODE_API_MIN 17 #define IWL8265_UCODE_API_MIN 20 /* NVM versions */ diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-9000.c b/drivers/net/wireless/intel/iwlwifi/iwl-9000.c index a2c7946c12c9..ff850410d897 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-9000.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-9000.c @@ -58,7 +58,7 @@ #define IWL9000_UCODE_API_MAX 26 /* Lowest firmware API version supported */ -#define IWL9000_UCODE_API_MIN 16 +#define IWL9000_UCODE_API_MIN 17 /* NVM versions */ #define IWL9000_NVM_VERSION 0x0a1d diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h b/drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h index 94423f04320b..ceec5ca2b1ab 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h @@ -199,8 +199,6 @@ struct iwl_ucode_capa { * @IWL_UCODE_TLV_FLAGS_NEWSCAN: new uCode scan behavior on hidden SSID, * treats good CRC threshold as a boolean * @IWL_UCODE_TLV_FLAGS_MFP: This uCode image supports MFP (802.11w). - * @IWL_UCODE_TLV_FLAGS_P2P: This uCode image supports P2P. - * @IWL_UCODE_TLV_FLAGS_DW_BC_TABLE: The SCD byte count table is in DWORDS * @IWL_UCODE_TLV_FLAGS_UAPSD_SUPPORT: This uCode image supports uAPSD * @IWL_UCODE_TLV_FLAGS_SHORT_BL: 16 entries of black list instead of 64 in scan * offload profile config command. @@ -210,36 +208,24 @@ struct iwl_ucode_capa { * from the probe request template. * @IWL_UCODE_TLV_FLAGS_NEW_NSOFFL_SMALL: new NS offload (small version) * @IWL_UCODE_TLV_FLAGS_NEW_NSOFFL_LARGE: new NS offload (large version) - * @IWL_UCODE_TLV_FLAGS_P2P_PM: P2P client supports PM as a stand alone MAC - * @IWL_UCODE_TLV_FLAGS_P2P_BSS_PS_DCM: support power save on BSS station and - * P2P client interfaces simultaneously if they are in different bindings. - * @IWL_UCODE_TLV_FLAGS_P2P_BSS_PS_SCM: support power save on BSS station and - * P2P client interfaces simultaneously if they are in same bindings. * @IWL_UCODE_TLV_FLAGS_UAPSD_SUPPORT: General support for uAPSD * @IWL_UCODE_TLV_FLAGS_P2P_PS_UAPSD: P2P client supports uAPSD power save * @IWL_UCODE_TLV_FLAGS_BCAST_FILTERING: uCode supports broadcast filtering. - * @IWL_UCODE_TLV_FLAGS_GO_UAPSD: AP/GO interfaces support uAPSD clients * @IWL_UCODE_TLV_FLAGS_EBS_SUPPORT: this uCode image supports EBS. */ enum iwl_ucode_tlv_flag { IWL_UCODE_TLV_FLAGS_PAN = BIT(0), IWL_UCODE_TLV_FLAGS_NEWSCAN = BIT(1), IWL_UCODE_TLV_FLAGS_MFP = BIT(2), - IWL_UCODE_TLV_FLAGS_P2P = BIT(3), - IWL_UCODE_TLV_FLAGS_DW_BC_TABLE = BIT(4), IWL_UCODE_TLV_FLAGS_SHORT_BL = BIT(7), IWL_UCODE_TLV_FLAGS_D3_6_IPV6_ADDRS = BIT(10), IWL_UCODE_TLV_FLAGS_NO_BASIC_SSID = BIT(12), IWL_UCODE_TLV_FLAGS_NEW_NSOFFL_SMALL = BIT(15), IWL_UCODE_TLV_FLAGS_NEW_NSOFFL_LARGE = BIT(16), - IWL_UCODE_TLV_FLAGS_P2P_PM = BIT(21), - IWL_UCODE_TLV_FLAGS_BSS_P2P_PS_DCM = BIT(22), - IWL_UCODE_TLV_FLAGS_BSS_P2P_PS_SCM = BIT(23), IWL_UCODE_TLV_FLAGS_UAPSD_SUPPORT = BIT(24), IWL_UCODE_TLV_FLAGS_EBS_SUPPORT = BIT(25), IWL_UCODE_TLV_FLAGS_P2P_PS_UAPSD = BIT(26), IWL_UCODE_TLV_FLAGS_BCAST_FILTERING = BIT(29), - IWL_UCODE_TLV_FLAGS_GO_UAPSD = BIT(30), }; typedef unsigned int __bitwise__ iwl_ucode_tlv_api_t; @@ -249,13 +235,8 @@ typedef unsigned int __bitwise__ iwl_ucode_tlv_api_t; * @IWL_UCODE_TLV_API_FRAGMENTED_SCAN: This ucode supports active dwell time * longer than the passive one, which is essential for fragmented scan. * @IWL_UCODE_TLV_API_WIFI_MCC_UPDATE: ucode supports MCC updates with source. - * @IWL_UCODE_TLV_API_WIDE_CMD_HDR: ucode supports wide command header * @IWL_UCODE_TLV_API_LQ_SS_PARAMS: Configure STBC/BFER via LQ CMD ss_params * @IWL_UCODE_TLV_API_NEW_VERSION: new versioning format - * @IWL_UCODE_TLV_API_EXT_SCAN_PRIORITY: scan APIs use 8-level priority - * instead of 3. - * @IWL_UCODE_TLV_API_TX_POWER_CHAIN: TX power API has larger command size - * (command version 3) that supports per-chain limits * @IWL_UCODE_TLV_API_SCAN_TSF_REPORT: Scan start time reported in scan * iteration complete notification, and the timestamp reported for RX * received during scan, are reported in TSF of the mac specified in the @@ -266,11 +247,8 @@ typedef unsigned int __bitwise__ iwl_ucode_tlv_api_t; enum iwl_ucode_tlv_api { IWL_UCODE_TLV_API_FRAGMENTED_SCAN = (__force iwl_ucode_tlv_api_t)8, IWL_UCODE_TLV_API_WIFI_MCC_UPDATE = (__force iwl_ucode_tlv_api_t)9, - IWL_UCODE_TLV_API_WIDE_CMD_HDR = (__force iwl_ucode_tlv_api_t)14, IWL_UCODE_TLV_API_LQ_SS_PARAMS = (__force iwl_ucode_tlv_api_t)18, - IWL_UCODE_TLV_API_NEW_VERSION = (__force iwl_ucode_tlv_api_t)20, - IWL_UCODE_TLV_API_EXT_SCAN_PRIORITY = (__force iwl_ucode_tlv_api_t)24, - IWL_UCODE_TLV_API_TX_POWER_CHAIN = (__force iwl_ucode_tlv_api_t)27, + IWL_UCODE_TLV_API_NEW_VERSION = (__force iwl_ucode_tlv_api_t)20, IWL_UCODE_TLV_API_SCAN_TSF_REPORT = (__force iwl_ucode_tlv_api_t)28, NUM_IWL_UCODE_TLV_API diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c index 8ff19210ef1e..2d6f44fbaf62 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c @@ -1578,8 +1578,7 @@ void iwl_mvm_vif_dbgfs_register(struct iwl_mvm *mvm, struct ieee80211_vif *vif) if (iwlmvm_mod_params.power_scheme != IWL_POWER_SCHEME_CAM && ((vif->type == NL80211_IFTYPE_STATION && !vif->p2p) || - (vif->type == NL80211_IFTYPE_STATION && vif->p2p && - mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_BSS_P2P_PS_DCM))) + (vif->type == NL80211_IFTYPE_STATION && vif->p2p))) MVM_DEBUGFS_ADD_FILE_VIF(pm_params, mvmvif->dbgfs_dir, S_IWUSR | S_IRUSR); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-power.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-power.h index 404b0de9e2dc..3fa43d1348a2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-power.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-power.h @@ -313,35 +313,26 @@ enum iwl_dev_tx_power_cmd_mode { IWL_TX_POWER_MODE_SET_ACK = 3, }; /* TX_POWER_REDUCED_FLAGS_TYPE_API_E_VER_4 */; +#define IWL_NUM_CHAIN_LIMITS 2 +#define IWL_NUM_SUB_BANDS 5 + /** - * struct iwl_dev_tx_power_cmd_v2 - TX power reduction command + * struct iwl_dev_tx_power_cmd - TX power reduction command * @set_mode: see &enum iwl_dev_tx_power_cmd_mode * @mac_context_id: id of the mac ctx for which we are reducing TX power. * @pwr_restriction: TX power restriction in 1/8 dBms. * @dev_24: device TX power restriction in 1/8 dBms * @dev_52_low: device TX power restriction upper band - low * @dev_52_high: device TX power restriction upper band - high + * @per_chain_restriction: per chain restrictions */ -struct iwl_dev_tx_power_cmd_v2 { +struct iwl_dev_tx_power_cmd_v3 { __le32 set_mode; __le32 mac_context_id; __le16 pwr_restriction; __le16 dev_24; __le16 dev_52_low; __le16 dev_52_high; -} __packed; /* TX_REDUCED_POWER_API_S_VER_2 */ - -#define IWL_NUM_CHAIN_LIMITS 2 -#define IWL_NUM_SUB_BANDS 5 - -/** - * struct iwl_dev_tx_power_cmd - TX power reduction command - * @v2: version 2 of the command, embedded here for easier software handling - * @per_chain_restriction: per chain restrictions - */ -struct iwl_dev_tx_power_cmd_v3 { - /* v3 is just an extension of v2 - keep this here */ - struct iwl_dev_tx_power_cmd_v2 v2; __le16 per_chain_restriction[IWL_NUM_CHAIN_LIMITS][IWL_NUM_SUB_BANDS]; } __packed; /* TX_REDUCED_POWER_API_S_VER_3 */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h index 0055a960238b..57d29a1b7b8d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h @@ -89,7 +89,6 @@ * @TX_CMD_FLG_MH_PAD: driver inserted 2 byte padding after MAC header. * Should be set for 26/30 length MAC headers * @TX_CMD_FLG_RESP_TO_DRV: zero this if the response should go only to FW - * @TX_CMD_FLG_CCMP_AGG: this frame uses CCMP for aggregation acceleration * @TX_CMD_FLG_TKIP_MIC_DONE: FW already performed TKIP MIC calculation * @TX_CMD_FLG_DUR: disable duration overwriting used in PS-Poll Assoc-id * @TX_CMD_FLG_FW_DROP: FW should mark frame to be dropped @@ -116,7 +115,6 @@ enum iwl_tx_flags { TX_CMD_FLG_KEEP_SEQ_CTL = BIT(18), TX_CMD_FLG_MH_PAD = BIT(20), TX_CMD_FLG_RESP_TO_DRV = BIT(21), - TX_CMD_FLG_CCMP_AGG = BIT(22), TX_CMD_FLG_TKIP_MIC_DONE = BIT(23), TX_CMD_FLG_DUR = BIT(25), TX_CMD_FLG_FW_DROP = BIT(26), diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 897412057d1f..872066317fa5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -1046,19 +1046,11 @@ static int iwl_mvm_sar_init(struct iwl_mvm *mvm) { struct iwl_mvm_sar_table sar_table; struct iwl_dev_tx_power_cmd cmd = { - .v3.v2.set_mode = cpu_to_le32(IWL_TX_POWER_MODE_SET_CHAINS), + .v3.set_mode = cpu_to_le32(IWL_TX_POWER_MODE_SET_CHAINS), }; int ret, i, j, idx; int len = sizeof(cmd); - /* we can't do anything with the table if the FW doesn't support it */ - if (!fw_has_api(&mvm->fw->ucode_capa, - IWL_UCODE_TLV_API_TX_POWER_CHAIN)) { - IWL_DEBUG_RADIO(mvm, - "FW doesn't support per-chain TX power settings.\n"); - return 0; - } - if (!fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_TX_POWER_ACK)) len = sizeof(cmd.v3); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index a5ede8c9bea2..318efd814037 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -479,13 +479,11 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) hw->wiphy->n_cipher_suites++; } - /* - * Enable 11w if advertised by firmware and software crypto - * is not enabled (as the firmware will interpret some mgmt - * packets, so enabling it with software crypto isn't safe) + /* Enable 11w if software crypto is not enabled (as the + * firmware will interpret some mgmt packets, so enabling it + * with software crypto isn't safe). */ - if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_MFP && - !iwlwifi_mod_params.sw_crypto) { + if (!iwlwifi_mod_params.sw_crypto) { ieee80211_hw_set(hw, MFP_CAPABLE); mvm->ciphers[hw->wiphy->n_cipher_suites] = WLAN_CIPHER_SUITE_AES_CMAC; @@ -547,9 +545,7 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) hw->wiphy->regulatory_flags |= REGULATORY_CUSTOM_REG | REGULATORY_DISABLE_BEACON_HINTS; - if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_GO_UAPSD) - hw->wiphy->flags |= WIPHY_FLAG_AP_UAPSD; - + hw->wiphy->flags |= WIPHY_FLAG_AP_UAPSD; hw->wiphy->flags |= WIPHY_FLAG_HAS_CHANNEL_SWITCH; hw->wiphy->iface_combinations = iwl_mvm_iface_combinations; @@ -1273,20 +1269,18 @@ static int iwl_mvm_set_tx_power(struct iwl_mvm *mvm, struct ieee80211_vif *vif, s16 tx_power) { struct iwl_dev_tx_power_cmd cmd = { - .v3.v2.set_mode = cpu_to_le32(IWL_TX_POWER_MODE_SET_MAC), - .v3.v2.mac_context_id = + .v3.set_mode = cpu_to_le32(IWL_TX_POWER_MODE_SET_MAC), + .v3.mac_context_id = cpu_to_le32(iwl_mvm_vif_from_mac80211(vif)->id), - .v3.v2.pwr_restriction = cpu_to_le16(8 * tx_power), + .v3.pwr_restriction = cpu_to_le16(8 * tx_power), }; int len = sizeof(cmd); if (tx_power == IWL_DEFAULT_MAX_TX_POWER) - cmd.v3.v2.pwr_restriction = cpu_to_le16(IWL_DEV_MAX_TX_POWER); + cmd.v3.pwr_restriction = cpu_to_le16(IWL_DEV_MAX_TX_POWER); if (!fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_TX_POWER_ACK)) len = sizeof(cmd.v3); - if (!fw_has_api(&mvm->fw->ucode_capa, IWL_UCODE_TLV_API_TX_POWER_CHAIN)) - len = sizeof(cmd.v3.v2); return iwl_mvm_send_cmd_pdu(mvm, REDUCE_TX_POWER_CMD, 0, len, &cmd); } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index b7cfdcbcf95b..d17cbf603f7c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -1305,8 +1305,6 @@ static inline void iwl_mvm_set_tx_cmd_ccmp(struct ieee80211_tx_info *info, tx_cmd->sec_ctl = TX_CMD_SEC_CCM; memcpy(tx_cmd->key, keyconf->key, keyconf->keylen); - if (info->flags & IEEE80211_TX_CTL_AMPDU) - tx_cmd->tx_flags |= cpu_to_le32(TX_CMD_FLG_CCMP_AGG); } static inline void iwl_mvm_wait_for_async_handlers(struct iwl_mvm *mvm) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 6c08aa3fd2df..05fe6dd1a2c8 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -653,11 +653,9 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, /* the hardware splits the A-MSDU */ if (mvm->cfg->mq_rx_supported) trans_cfg.rx_buf_size = IWL_AMSDU_4K; - trans->wide_cmd_header = fw_has_api(&mvm->fw->ucode_capa, - IWL_UCODE_TLV_API_WIDE_CMD_HDR); - if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_DW_BC_TABLE) - trans_cfg.bc_table_dword = true; + trans->wide_cmd_header = true; + trans_cfg.bc_table_dword = true; trans_cfg.command_groups = iwl_mvm_groups; trans_cfg.command_groups_size = ARRAY_SIZE(iwl_mvm_groups); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/power.c b/drivers/net/wireless/intel/iwlwifi/mvm/power.c index ff85865b1dda..af6d10c23e5a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/power.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/power.c @@ -694,8 +694,7 @@ static void iwl_mvm_power_set_pm(struct iwl_mvm *mvm, /* enable PM on p2p if p2p stand alone */ if (vifs->p2p_active && !vifs->bss_active && !vifs->ap_active) { - if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_P2P_PM) - p2p_mvmvif->pm_enabled = true; + p2p_mvmvif->pm_enabled = true; return; } @@ -707,12 +706,10 @@ static void iwl_mvm_power_set_pm(struct iwl_mvm *mvm, ap_mvmvif->phy_ctxt->id); /* clients are not stand alone: enable PM if DCM */ - if (!(client_same_channel || ap_same_channel) && - (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_BSS_P2P_PS_DCM)) { + if (!(client_same_channel || ap_same_channel)) { if (vifs->bss_active) bss_mvmvif->pm_enabled = true; - if (vifs->p2p_active && - (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_P2P_PM)) + if (vifs->p2p_active) p2p_mvmvif->pm_enabled = true; return; } @@ -721,12 +718,10 @@ static void iwl_mvm_power_set_pm(struct iwl_mvm *mvm, * There is only one channel in the system and there are only * bss and p2p clients that share it */ - if (client_same_channel && !vifs->ap_active && - (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_BSS_P2P_PS_SCM)) { + if (client_same_channel && !vifs->ap_active) { /* share same channel*/ bss_mvmvif->pm_enabled = true; - if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_P2P_PM) - p2p_mvmvif->pm_enabled = true; + p2p_mvmvif->pm_enabled = true; } } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c index 00b03fc5807b..f279fdd6eb44 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c @@ -739,22 +739,6 @@ iwl_mvm_build_scan_probe(struct iwl_mvm *mvm, struct ieee80211_vif *vif, params->preq.common_data.len = cpu_to_le16(ies->common_ie_len); } -static __le32 iwl_mvm_scan_priority(struct iwl_mvm *mvm, - enum iwl_scan_priority_ext prio) -{ - if (fw_has_api(&mvm->fw->ucode_capa, - IWL_UCODE_TLV_API_EXT_SCAN_PRIORITY)) - return cpu_to_le32(prio); - - if (prio <= IWL_SCAN_PRIORITY_EXT_2) - return cpu_to_le32(IWL_SCAN_PRIORITY_LOW); - - if (prio <= IWL_SCAN_PRIORITY_EXT_4) - return cpu_to_le32(IWL_SCAN_PRIORITY_MEDIUM); - - return cpu_to_le32(IWL_SCAN_PRIORITY_HIGH); -} - static void iwl_mvm_scan_lmac_dwell(struct iwl_mvm *mvm, struct iwl_scan_req_lmac *cmd, struct iwl_mvm_scan_params *params) @@ -765,7 +749,7 @@ static void iwl_mvm_scan_lmac_dwell(struct iwl_mvm *mvm, cmd->extended_dwell = scan_timing[params->type].dwell_extended; cmd->max_out_time = cpu_to_le32(scan_timing[params->type].max_out_time); cmd->suspend_time = cpu_to_le32(scan_timing[params->type].suspend_time); - cmd->scan_prio = iwl_mvm_scan_priority(mvm, IWL_SCAN_PRIORITY_EXT_6); + cmd->scan_prio = cpu_to_le32(IWL_SCAN_PRIORITY_EXT_6); } static inline bool iwl_mvm_scan_fits(struct iwl_mvm *mvm, int n_ssids, @@ -1067,15 +1051,12 @@ static void iwl_mvm_scan_umac_dwell(struct iwl_mvm *mvm, cmd->fragmented_dwell = scan_timing[params->type].dwell_fragmented; cmd->max_out_time = cpu_to_le32(scan_timing[params->type].max_out_time); cmd->suspend_time = cpu_to_le32(scan_timing[params->type].suspend_time); - cmd->scan_priority = - iwl_mvm_scan_priority(mvm, IWL_SCAN_PRIORITY_EXT_6); + cmd->scan_priority = cpu_to_le32(IWL_SCAN_PRIORITY_EXT_6); if (iwl_mvm_is_regular_scan(params)) - cmd->ooc_priority = - iwl_mvm_scan_priority(mvm, IWL_SCAN_PRIORITY_EXT_6); + cmd->ooc_priority = cpu_to_le32(IWL_SCAN_PRIORITY_EXT_6); else - cmd->ooc_priority = - iwl_mvm_scan_priority(mvm, IWL_SCAN_PRIORITY_EXT_2); + cmd->ooc_priority = cpu_to_le32(IWL_SCAN_PRIORITY_EXT_2); } static void diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c index 58fc7b3c711c..63a051be832e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c @@ -241,11 +241,8 @@ static int iwl_mvm_get_temp_cmd(struct iwl_mvm *mvm) }; u32 cmdid; - if (fw_has_api(&mvm->fw->ucode_capa, IWL_UCODE_TLV_API_WIDE_CMD_HDR)) - cmdid = iwl_cmd_id(CMD_DTS_MEASUREMENT_TRIGGER_WIDE, - PHY_OPS_GROUP, 0); - else - cmdid = CMD_DTS_MEASUREMENT_TRIGGER; + cmdid = iwl_cmd_id(CMD_DTS_MEASUREMENT_TRIGGER_WIDE, + PHY_OPS_GROUP, 0); if (!fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_EXTENDED_DTS_MEASURE)) @@ -261,9 +258,6 @@ int iwl_mvm_get_temp(struct iwl_mvm *mvm, s32 *temp) DTS_MEASUREMENT_NOTIF_WIDE) }; int ret; - if (!fw_has_api(&mvm->fw->ucode_capa, IWL_UCODE_TLV_API_WIDE_CMD_HDR)) - temp_notif[0] = DTS_MEASUREMENT_NOTIFICATION; - lockdep_assert_held(&mvm->mutex); iwl_init_notification_wait(&mvm->notif_wait, &wait_temp_notif, From 4dc6511f86b5f3edfd289d4714488d56562f9095 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 13 Sep 2016 22:59:27 +0300 Subject: [PATCH 0868/1050] iwlwifi: mvm: fix typo in TC_CMD_SEC_KEY_FROM_TABLE This define should really be TX_CMD_SEC_KEY_FROM_TABLE Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h | 4 ++-- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h index 57d29a1b7b8d..59ca97a11b2b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h @@ -147,7 +147,7 @@ enum iwl_tx_pm_timeouts { * @TX_CMD_SEC_EXT: extended cipher algorithm. * @TX_CMD_SEC_GCMP: GCMP encryption algorithm. * @TX_CMD_SEC_KEY128: set for 104 bits WEP key. - * @TC_CMD_SEC_KEY_FROM_TABLE: for a non-WEP key, set if the key should be taken + * @TX_CMD_SEC_KEY_FROM_TABLE: for a non-WEP key, set if the key should be taken * from the table instead of from the TX command. * If the key is taken from the key table its index should be given by the * first byte of the TX command key field. @@ -159,7 +159,7 @@ enum iwl_tx_cmd_sec_ctrl { TX_CMD_SEC_EXT = 0x04, TX_CMD_SEC_GCMP = 0x05, TX_CMD_SEC_KEY128 = 0x08, - TC_CMD_SEC_KEY_FROM_TABLE = 0x08, + TX_CMD_SEC_KEY_FROM_TABLE = 0x08, }; /* TODO: how does these values are OK with only 16 bit variable??? */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index ef4bdfc9e7e2..a3fe73b963c3 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -441,7 +441,7 @@ static void iwl_mvm_set_tx_cmd_crypto(struct iwl_mvm *mvm, * one. * Need to handle this. */ - tx_cmd->sec_ctl |= TX_CMD_SEC_GCMP | TC_CMD_SEC_KEY_FROM_TABLE; + tx_cmd->sec_ctl |= TX_CMD_SEC_GCMP | TX_CMD_SEC_KEY_FROM_TABLE; tx_cmd->key[0] = keyconf->hw_key_idx; iwl_mvm_set_tx_cmd_pn(info, crypto_hdr); break; From 3ac37d0109a32f684d38c9a47c49dde8d27bf1b8 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Thu, 8 Sep 2016 10:44:38 +0300 Subject: [PATCH 0869/1050] iwlwifi: allow error table address new range The firmware has a new smart linker, and this table can now be in ICCM or in SMEM. It is not hardcoded, but depends on code size. Allow the full range. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c index 9e366e28c983..d04babd99b53 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c @@ -512,7 +512,7 @@ void iwl_mvm_dump_nic_error_log(struct iwl_mvm *mvm) base = mvm->fw->inst_errlog_ptr; } - if (base < 0x800000) { + if (base < 0x400000) { IWL_ERR(mvm, "Not valid error log pointer 0x%08X for %s uCode\n", base, From 9cd70e80f7f0df1d6d13d8aeb50a16bf40e2962c Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 20 Sep 2016 13:40:33 +0300 Subject: [PATCH 0870/1050] iwlwifi: mvm: initialise ADD_STA before sending it to the firmware When we unshare a queue, the ADD_STA was not properly initialised. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 258a234feb71..fc771885e383 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -933,7 +933,7 @@ static void iwl_mvm_unshare_queue(struct iwl_mvm *mvm, int queue) /* If aggs should be turned back on - do it */ if (mvmsta->tid_data[tid].state == IWL_AGG_ON) { - struct iwl_mvm_add_sta_cmd cmd; + struct iwl_mvm_add_sta_cmd cmd = {0}; mvmsta->tid_disable_agg &= ~BIT(tid); From b6f5be287344433100201c4d6f3f8dcd1755a6b3 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sun, 25 Sep 2016 23:31:24 +0200 Subject: [PATCH 0871/1050] net: tg3: use new api ethtool_{get|set}_link_ksettings The ethtool api {get|set}_settings is deprecated. We move this driver to new api {get|set}_link_ksettings. Signed-off-by: Philippe Reynes Acked-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 110 +++++++++++++++------------- 1 file changed, 61 insertions(+), 49 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index a2551bcd1027..2726f032f2d4 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -12079,95 +12079,107 @@ static int tg3_set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, return ret; } -static int tg3_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int tg3_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct tg3 *tp = netdev_priv(dev); + u32 supported, advertising; if (tg3_flag(tp, USE_PHYLIB)) { struct phy_device *phydev; if (!(tp->phy_flags & TG3_PHYFLG_IS_CONNECTED)) return -EAGAIN; phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr); - return phy_ethtool_gset(phydev, cmd); + return phy_ethtool_ksettings_get(phydev, cmd); } - cmd->supported = (SUPPORTED_Autoneg); + supported = (SUPPORTED_Autoneg); if (!(tp->phy_flags & TG3_PHYFLG_10_100_ONLY)) - cmd->supported |= (SUPPORTED_1000baseT_Half | - SUPPORTED_1000baseT_Full); + supported |= (SUPPORTED_1000baseT_Half | + SUPPORTED_1000baseT_Full); if (!(tp->phy_flags & TG3_PHYFLG_ANY_SERDES)) { - cmd->supported |= (SUPPORTED_100baseT_Half | - SUPPORTED_100baseT_Full | - SUPPORTED_10baseT_Half | - SUPPORTED_10baseT_Full | - SUPPORTED_TP); - cmd->port = PORT_TP; + supported |= (SUPPORTED_100baseT_Half | + SUPPORTED_100baseT_Full | + SUPPORTED_10baseT_Half | + SUPPORTED_10baseT_Full | + SUPPORTED_TP); + cmd->base.port = PORT_TP; } else { - cmd->supported |= SUPPORTED_FIBRE; - cmd->port = PORT_FIBRE; + supported |= SUPPORTED_FIBRE; + cmd->base.port = PORT_FIBRE; } + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); - cmd->advertising = tp->link_config.advertising; + advertising = tp->link_config.advertising; if (tg3_flag(tp, PAUSE_AUTONEG)) { if (tp->link_config.flowctrl & FLOW_CTRL_RX) { if (tp->link_config.flowctrl & FLOW_CTRL_TX) { - cmd->advertising |= ADVERTISED_Pause; + advertising |= ADVERTISED_Pause; } else { - cmd->advertising |= ADVERTISED_Pause | - ADVERTISED_Asym_Pause; + advertising |= ADVERTISED_Pause | + ADVERTISED_Asym_Pause; } } else if (tp->link_config.flowctrl & FLOW_CTRL_TX) { - cmd->advertising |= ADVERTISED_Asym_Pause; + advertising |= ADVERTISED_Asym_Pause; } } + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); + if (netif_running(dev) && tp->link_up) { - ethtool_cmd_speed_set(cmd, tp->link_config.active_speed); - cmd->duplex = tp->link_config.active_duplex; - cmd->lp_advertising = tp->link_config.rmt_adv; + cmd->base.speed = tp->link_config.active_speed; + cmd->base.duplex = tp->link_config.active_duplex; + ethtool_convert_legacy_u32_to_link_mode( + cmd->link_modes.lp_advertising, + tp->link_config.rmt_adv); + if (!(tp->phy_flags & TG3_PHYFLG_ANY_SERDES)) { if (tp->phy_flags & TG3_PHYFLG_MDIX_STATE) - cmd->eth_tp_mdix = ETH_TP_MDI_X; + cmd->base.eth_tp_mdix = ETH_TP_MDI_X; else - cmd->eth_tp_mdix = ETH_TP_MDI; + cmd->base.eth_tp_mdix = ETH_TP_MDI; } } else { - ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN); - cmd->duplex = DUPLEX_UNKNOWN; - cmd->eth_tp_mdix = ETH_TP_MDI_INVALID; + cmd->base.speed = SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; + cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID; } - cmd->phy_address = tp->phy_addr; - cmd->transceiver = XCVR_INTERNAL; - cmd->autoneg = tp->link_config.autoneg; - cmd->maxtxpkt = 0; - cmd->maxrxpkt = 0; + cmd->base.phy_address = tp->phy_addr; + cmd->base.autoneg = tp->link_config.autoneg; return 0; } -static int tg3_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int tg3_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct tg3 *tp = netdev_priv(dev); - u32 speed = ethtool_cmd_speed(cmd); + u32 speed = cmd->base.speed; + u32 advertising; if (tg3_flag(tp, USE_PHYLIB)) { struct phy_device *phydev; if (!(tp->phy_flags & TG3_PHYFLG_IS_CONNECTED)) return -EAGAIN; phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr); - return phy_ethtool_sset(phydev, cmd); + return phy_ethtool_ksettings_set(phydev, cmd); } - if (cmd->autoneg != AUTONEG_ENABLE && - cmd->autoneg != AUTONEG_DISABLE) + if (cmd->base.autoneg != AUTONEG_ENABLE && + cmd->base.autoneg != AUTONEG_DISABLE) return -EINVAL; - if (cmd->autoneg == AUTONEG_DISABLE && - cmd->duplex != DUPLEX_FULL && - cmd->duplex != DUPLEX_HALF) + if (cmd->base.autoneg == AUTONEG_DISABLE && + cmd->base.duplex != DUPLEX_FULL && + cmd->base.duplex != DUPLEX_HALF) return -EINVAL; - if (cmd->autoneg == AUTONEG_ENABLE) { + ethtool_convert_link_mode_to_legacy_u32(&advertising, + cmd->link_modes.advertising); + + if (cmd->base.autoneg == AUTONEG_ENABLE) { u32 mask = ADVERTISED_Autoneg | ADVERTISED_Pause | ADVERTISED_Asym_Pause; @@ -12185,7 +12197,7 @@ static int tg3_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) else mask |= ADVERTISED_FIBRE; - if (cmd->advertising & ~mask) + if (advertising & ~mask) return -EINVAL; mask &= (ADVERTISED_1000baseT_Half | @@ -12195,13 +12207,13 @@ static int tg3_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full); - cmd->advertising &= mask; + advertising &= mask; } else { if (tp->phy_flags & TG3_PHYFLG_ANY_SERDES) { if (speed != SPEED_1000) return -EINVAL; - if (cmd->duplex != DUPLEX_FULL) + if (cmd->base.duplex != DUPLEX_FULL) return -EINVAL; } else { if (speed != SPEED_100 && @@ -12212,16 +12224,16 @@ static int tg3_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) tg3_full_lock(tp, 0); - tp->link_config.autoneg = cmd->autoneg; - if (cmd->autoneg == AUTONEG_ENABLE) { - tp->link_config.advertising = (cmd->advertising | + tp->link_config.autoneg = cmd->base.autoneg; + if (cmd->base.autoneg == AUTONEG_ENABLE) { + tp->link_config.advertising = (advertising | ADVERTISED_Autoneg); tp->link_config.speed = SPEED_UNKNOWN; tp->link_config.duplex = DUPLEX_UNKNOWN; } else { tp->link_config.advertising = 0; tp->link_config.speed = speed; - tp->link_config.duplex = cmd->duplex; + tp->link_config.duplex = cmd->base.duplex; } tp->phy_flags |= TG3_PHYFLG_USER_CONFIGURED; @@ -14094,8 +14106,6 @@ static int tg3_get_eee(struct net_device *dev, struct ethtool_eee *edata) } static const struct ethtool_ops tg3_ethtool_ops = { - .get_settings = tg3_get_settings, - .set_settings = tg3_set_settings, .get_drvinfo = tg3_get_drvinfo, .get_regs_len = tg3_get_regs_len, .get_regs = tg3_get_regs, @@ -14128,6 +14138,8 @@ static const struct ethtool_ops tg3_ethtool_ops = { .get_ts_info = tg3_get_ts_info, .get_eee = tg3_get_eee, .set_eee = tg3_set_eee, + .get_link_ksettings = tg3_get_link_ksettings, + .set_link_ksettings = tg3_set_link_ksettings, }; static struct rtnl_link_stats64 *tg3_get_stats64(struct net_device *dev, From 8d58881b995904bf8b150dae69be0829f832e7be Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 22 Sep 2016 14:20:54 -0700 Subject: [PATCH 0872/1050] scsi: Avoid that toggling use_blk_mq triggers a memory leak This patch avoids that the following memory leak is triggered if use_blk_mq is disabled after a SCSI host has been allocated by the ib_srp driver and before the same SCSI host is freed: unreferenced object 0xffff8803a168c568 (size 256): backtrace: [] kmemleak_alloc+0x45/0xa0 [] __kmalloc_node+0x1e4/0x400 [] blk_mq_alloc_tag_set+0xb4/0x230 [] scsi_mq_setup_tags+0xc7/0xd0 [] scsi_add_host_with_dma+0x216/0x2d0 [] srp_create_target+0xe55/0x13d0 [ib_srp] [] dev_attr_store+0x13/0x20 [] sysfs_kf_write+0x40/0x50 [] kernfs_fop_write+0x137/0x1c0 [] __vfs_write+0x23/0x140 [] vfs_write+0xb0/0x190 [] SyS_write+0x44/0xa0 [] entry_SYSCALL_64_fastpath+0x18/0xa8 Fixes: 9aa9cc4221f5 ("scsi: remove the disable_blk_mq host flag") Signed-off-by: Bart Van Assche Cc: Christoph Hellwig Cc: Martin K. Petersen Cc: Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/hosts.c | 2 ++ drivers/scsi/scsi.c | 1 - drivers/scsi/scsi_priv.h | 1 + include/scsi/scsi_host.h | 5 +---- 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index ba9af4a2bd2a..ec6381e57eb7 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -486,6 +486,8 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize) else shost->dma_boundary = 0xffffffff; + shost->use_blk_mq = scsi_use_blk_mq; + device_initialize(&shost->shost_gendev); dev_set_name(&shost->shost_gendev, "host%d", shost->host_no); shost->shost_gendev.bus = &scsi_bus_type; diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 1f36aca44394..1deb6adc411f 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -1160,7 +1160,6 @@ bool scsi_use_blk_mq = true; bool scsi_use_blk_mq = false; #endif module_param_named(use_blk_mq, scsi_use_blk_mq, bool, S_IWUSR | S_IRUGO); -EXPORT_SYMBOL_GPL(scsi_use_blk_mq); static int __init init_scsi(void) { diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index 57a4b9973320..85c8a51bc563 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -29,6 +29,7 @@ extern int scsi_init_hosts(void); extern void scsi_exit_hosts(void); /* scsi.c */ +extern bool scsi_use_blk_mq; extern int scsi_setup_command_freelist(struct Scsi_Host *shost); extern void scsi_destroy_command_freelist(struct Scsi_Host *shost); #ifdef CONFIG_SCSI_LOGGING diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 0dee7afa93d6..7e4cd53139ed 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -771,12 +771,9 @@ static inline int scsi_host_in_recovery(struct Scsi_Host *shost) shost->tmf_in_progress; } -extern bool scsi_use_blk_mq; - static inline bool shost_use_blk_mq(struct Scsi_Host *shost) { - return scsi_use_blk_mq; - + return shost->use_blk_mq; } extern int scsi_queue_work(struct Scsi_Host *, struct work_struct *); From 9fb6de1b0bf4ec11573b76059da3c3b39ac7f2ff Mon Sep 17 00:00:00 2001 From: Ville Ranki Date: Thu, 15 Sep 2016 15:31:27 -0700 Subject: [PATCH 0873/1050] Input: joydev - recognize devices with Z axis as joysticks Current implementation of joydev's input_device_id table recognizes only devices with ABS_X, ABS_WHEEL or ABS_THROTTLE axes as joysticks. There are joystick devices that do not have those axes, for example TRC Rudder device. The device in question has ABS_Z, ABS_RX and ABS_RY axes causing it not being detected as joystick. This patch adds ABS_Z to the input_device_id list allowing devices with ABS_Z axis to be detected correctly. Signed-off-by: Ville Ranki Signed-off-by: Dmitry Torokhov --- drivers/input/joydev.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/input/joydev.c b/drivers/input/joydev.c index 5d11fea3c8ec..f3135ae22df4 100644 --- a/drivers/input/joydev.c +++ b/drivers/input/joydev.c @@ -946,6 +946,12 @@ static const struct input_device_id joydev_ids[] = { .evbit = { BIT_MASK(EV_ABS) }, .absbit = { BIT_MASK(ABS_X) }, }, + { + .flags = INPUT_DEVICE_ID_MATCH_EVBIT | + INPUT_DEVICE_ID_MATCH_ABSBIT, + .evbit = { BIT_MASK(EV_ABS) }, + .absbit = { BIT_MASK(ABS_Z) }, + }, { .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_ABSBIT, From 2b064fff8527a1052c7060c65c22cae80a9343b9 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Sat, 24 Sep 2016 02:10:04 +0530 Subject: [PATCH 0874/1050] bpf samples: fix compiler errors with sockex2 and sockex3 These samples fail to compile as 'struct flow_keys' conflicts with definition in net/flow_dissector.h. Fix the same by renaming the structure used in the sample. Signed-off-by: Naveen N. Rao Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/sockex2_kern.c | 10 +++++----- samples/bpf/sockex3_kern.c | 8 ++++---- samples/bpf/sockex3_user.c | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/samples/bpf/sockex2_kern.c b/samples/bpf/sockex2_kern.c index ba0e177ff561..44e5846c988f 100644 --- a/samples/bpf/sockex2_kern.c +++ b/samples/bpf/sockex2_kern.c @@ -14,7 +14,7 @@ struct vlan_hdr { __be16 h_vlan_encapsulated_proto; }; -struct flow_keys { +struct bpf_flow_keys { __be32 src; __be32 dst; union { @@ -59,7 +59,7 @@ static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off) } static inline __u64 parse_ip(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_proto, - struct flow_keys *flow) + struct bpf_flow_keys *flow) { __u64 verlen; @@ -83,7 +83,7 @@ static inline __u64 parse_ip(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_proto } static inline __u64 parse_ipv6(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_proto, - struct flow_keys *flow) + struct bpf_flow_keys *flow) { *ip_proto = load_byte(skb, nhoff + offsetof(struct ipv6hdr, nexthdr)); @@ -96,7 +96,7 @@ static inline __u64 parse_ipv6(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_pro return nhoff; } -static inline bool flow_dissector(struct __sk_buff *skb, struct flow_keys *flow) +static inline bool flow_dissector(struct __sk_buff *skb, struct bpf_flow_keys *flow) { __u64 nhoff = ETH_HLEN; __u64 ip_proto; @@ -198,7 +198,7 @@ struct bpf_map_def SEC("maps") hash_map = { SEC("socket2") int bpf_prog2(struct __sk_buff *skb) { - struct flow_keys flow; + struct bpf_flow_keys flow; struct pair *value; u32 key; diff --git a/samples/bpf/sockex3_kern.c b/samples/bpf/sockex3_kern.c index 41ae2fd21b13..95907f8d2b17 100644 --- a/samples/bpf/sockex3_kern.c +++ b/samples/bpf/sockex3_kern.c @@ -61,7 +61,7 @@ struct vlan_hdr { __be16 h_vlan_encapsulated_proto; }; -struct flow_keys { +struct bpf_flow_keys { __be32 src; __be32 dst; union { @@ -88,7 +88,7 @@ static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off) } struct globals { - struct flow_keys flow; + struct bpf_flow_keys flow; }; struct bpf_map_def SEC("maps") percpu_map = { @@ -114,14 +114,14 @@ struct pair { struct bpf_map_def SEC("maps") hash_map = { .type = BPF_MAP_TYPE_HASH, - .key_size = sizeof(struct flow_keys), + .key_size = sizeof(struct bpf_flow_keys), .value_size = sizeof(struct pair), .max_entries = 1024, }; static void update_stats(struct __sk_buff *skb, struct globals *g) { - struct flow_keys key = g->flow; + struct bpf_flow_keys key = g->flow; struct pair *value; value = bpf_map_lookup_elem(&hash_map, &key); diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c index d4184ab5f3ac..3fcfd8c4b2a3 100644 --- a/samples/bpf/sockex3_user.c +++ b/samples/bpf/sockex3_user.c @@ -7,7 +7,7 @@ #include #include -struct flow_keys { +struct bpf_flow_keys { __be32 src; __be32 dst; union { @@ -49,7 +49,7 @@ int main(int argc, char **argv) (void) f; for (i = 0; i < 5; i++) { - struct flow_keys key = {}, next_key; + struct bpf_flow_keys key = {}, next_key; struct pair value; sleep(1); From 973d94d8a87c32661f1308a118074972ac5d483a Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Sat, 24 Sep 2016 02:10:05 +0530 Subject: [PATCH 0875/1050] bpf samples: update tracex5 sample to use __seccomp_filter seccomp_phase1() does not exist anymore. Instead, update sample to use __seccomp_filter(). While at it, set max locked memory to unlimited. Signed-off-by: Naveen N. Rao Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/tracex5_kern.c | 16 +++++++--------- samples/bpf/tracex5_user.c | 3 +++ 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/samples/bpf/tracex5_kern.c b/samples/bpf/tracex5_kern.c index f95f232cbab9..fd12d7154d42 100644 --- a/samples/bpf/tracex5_kern.c +++ b/samples/bpf/tracex5_kern.c @@ -19,20 +19,18 @@ struct bpf_map_def SEC("maps") progs = { .max_entries = 1024, }; -SEC("kprobe/seccomp_phase1") +SEC("kprobe/__seccomp_filter") int bpf_prog1(struct pt_regs *ctx) { - struct seccomp_data sd; - - bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx)); + int sc_nr = (int)PT_REGS_PARM1(ctx); /* dispatch into next BPF program depending on syscall number */ - bpf_tail_call(ctx, &progs, sd.nr); + bpf_tail_call(ctx, &progs, sc_nr); /* fall through -> unknown syscall */ - if (sd.nr >= __NR_getuid && sd.nr <= __NR_getsid) { + if (sc_nr >= __NR_getuid && sc_nr <= __NR_getsid) { char fmt[] = "syscall=%d (one of get/set uid/pid/gid)\n"; - bpf_trace_printk(fmt, sizeof(fmt), sd.nr); + bpf_trace_printk(fmt, sizeof(fmt), sc_nr); } return 0; } @@ -42,7 +40,7 @@ PROG(__NR_write)(struct pt_regs *ctx) { struct seccomp_data sd; - bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx)); + bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx)); if (sd.args[2] == 512) { char fmt[] = "write(fd=%d, buf=%p, size=%d)\n"; bpf_trace_printk(fmt, sizeof(fmt), @@ -55,7 +53,7 @@ PROG(__NR_read)(struct pt_regs *ctx) { struct seccomp_data sd; - bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx)); + bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx)); if (sd.args[2] > 128 && sd.args[2] <= 1024) { char fmt[] = "read(fd=%d, buf=%p, size=%d)\n"; bpf_trace_printk(fmt, sizeof(fmt), diff --git a/samples/bpf/tracex5_user.c b/samples/bpf/tracex5_user.c index a04dd3cd4358..36b5925bb137 100644 --- a/samples/bpf/tracex5_user.c +++ b/samples/bpf/tracex5_user.c @@ -6,6 +6,7 @@ #include #include "libbpf.h" #include "bpf_load.h" +#include /* install fake seccomp program to enable seccomp code path inside the kernel, * so that our kprobe attached to seccomp_phase1() can be triggered @@ -27,8 +28,10 @@ int main(int ac, char **argv) { FILE *f; char filename[256]; + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + setrlimit(RLIMIT_MEMLOCK, &r); if (load_bpf_file(filename)) { printf("%s", bpf_log_buf); From 1955351da41caa1dbf4139191358fed84909d64b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Sat, 24 Sep 2016 20:01:50 +0200 Subject: [PATCH 0876/1050] bpf: Set register type according to is_valid_access() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This prevent future potential pointer leaks when an unprivileged eBPF program will read a pointer value from its context. Even if is_valid_access() returns a pointer type, the eBPF verifier replace it with UNKNOWN_VALUE. The register value that contains a kernel address is then allowed to leak. Moreover, this fix allows unprivileged eBPF programs to use functions with (legitimate) pointer arguments. Not an issue currently since reg_type is only set for PTR_TO_PACKET or PTR_TO_PACKET_END in XDP and TC programs that can only be loaded as privileged. For now, the only unprivileged eBPF program allowed is for socket filtering and all the types from its context are UNKNOWN_VALUE. However, this fix is important for future unprivileged eBPF programs which could use pointers in their context. Signed-off-by: Mickaël Salaün Cc: Alexei Starovoitov Cc: Daniel Borkmann Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8c3f794c7028..7ada3152a556 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -749,9 +749,8 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off, err = check_ctx_access(env, off, size, t, ®_type); if (!err && t == BPF_READ && value_regno >= 0) { mark_reg_unknown_value(state->regs, value_regno); - if (env->allow_ptr_leaks) - /* note that reg.[id|off|range] == 0 */ - state->regs[value_regno].type = reg_type; + /* note that reg.[id|off|range] == 0 */ + state->regs[value_regno].type = reg_type; } } else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) { From 653d37d8bca0be2c7c53c8578980b06840482427 Mon Sep 17 00:00:00 2001 From: Eric Nelson Date: Sat, 24 Sep 2016 07:42:17 -0700 Subject: [PATCH 0877/1050] net: fec: remove QUIRK_HAS_RACC from i.mx25 According to the i.MX25 reference manual, this SoC does not have support for the receive accelerator (RACC) register at offset 0x1C4. http://www.nxp.com/files/dsp/doc/ref_manual/IMX25RM.pdf Signed-off-by: Eric Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 01f7e811739b..3019f27b9ef8 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -89,7 +89,7 @@ static struct platform_device_id fec_devtype[] = { .driver_data = 0, }, { .name = "imx25-fec", - .driver_data = FEC_QUIRK_USE_GASKET | FEC_QUIRK_HAS_RACC, + .driver_data = FEC_QUIRK_USE_GASKET, }, { .name = "imx27-fec", .driver_data = FEC_QUIRK_HAS_RACC, From 97dc499c1ae396bc0d529f5b1aef3138de1ae7de Mon Sep 17 00:00:00 2001 From: Eric Nelson Date: Sat, 24 Sep 2016 07:42:18 -0700 Subject: [PATCH 0878/1050] net: fec: remove QUIRK_HAS_RACC from i.mx27 According to the i.MX27 reference manual, this SoC does not have support for the receive accelerator (RACC) register at offset 0x1C4. http://cache.nxp.com/files/32bit/doc/ref_manual/MCIMX27RM.pdf Signed-off-by: Eric Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 3019f27b9ef8..3fb11f929adc 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -92,7 +92,7 @@ static struct platform_device_id fec_devtype[] = { .driver_data = FEC_QUIRK_USE_GASKET, }, { .name = "imx27-fec", - .driver_data = FEC_QUIRK_HAS_RACC, + .driver_data = 0, }, { .name = "imx28-fec", .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME | From 3ac72b7b63d57b231dda1e8f8d13872e0d7e8603 Mon Sep 17 00:00:00 2001 From: Eric Nelson Date: Sat, 24 Sep 2016 07:42:19 -0700 Subject: [PATCH 0879/1050] net: fec: align IP header in hardware The FEC receive accelerator (RACC) supports shifting the data payload of received packets by 16-bits, which aligns the payload (IP header) on a 4-byte boundary, which is, if not required, at least strongly suggested by the Linux networking layer. Without this patch, a huge number of alignment faults will be taken by the IP stack, as seen in /proc/cpu/alignment: ~/$ cat /proc/cpu/alignment User: 0 System: 72645 (inet_gro_receive+0x104/0x27c) Skipped: 0 Half: 0 Word: 0 DWord: 0 Multi: 72645 User faults: 3 (fixup+warn) This patch was suggested by Andrew Lunn in this message to linux-netdev: http://marc.info/?l=linux-arm-kernel&m=147465452108384&w=2 and adapted from a patch by Russell King from 2014: http://git.arm.linux.org.uk/cgit/linux-arm.git/commit/?id=70d8a8a Signed-off-by: Eric Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 3fb11f929adc..692ee248e486 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -180,6 +180,7 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address"); /* FEC receive acceleration */ #define FEC_RACC_IPDIS (1 << 1) #define FEC_RACC_PRODIS (1 << 2) +#define FEC_RACC_SHIFT16 BIT(7) #define FEC_RACC_OPTIONS (FEC_RACC_IPDIS | FEC_RACC_PRODIS) /* @@ -945,9 +946,11 @@ fec_restart(struct net_device *ndev) #if !defined(CONFIG_M5272) if (fep->quirks & FEC_QUIRK_HAS_RACC) { - /* set RX checksum */ val = readl(fep->hwp + FEC_RACC); + /* align IP header */ + val |= FEC_RACC_SHIFT16; if (fep->csum_flags & FLAG_RX_CSUM_ENABLED) + /* set RX checksum */ val |= FEC_RACC_OPTIONS; else val &= ~FEC_RACC_OPTIONS; @@ -1428,6 +1431,12 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id) prefetch(skb->data - NET_IP_ALIGN); skb_put(skb, pkt_len - 4); data = skb->data; + +#if !defined(CONFIG_M5272) + if (fep->quirks & FEC_QUIRK_HAS_RACC) + data = skb_pull_inline(skb, 2); +#endif + if (!is_copybreak && need_swap) swap_buffer(data, pkt_len); From bf1a85a8381a0f749aa321d7881b456b36eb398d Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 24 Sep 2016 12:58:30 -0700 Subject: [PATCH 0880/1050] Revert "net: ethernet: bcmgenet: use phydev from struct net_device" This reverts commit 62469c76007e ("net: ethernet: bcmgenet: use phydev from struct net_device") because it causes GENETv1/2/3 adapters to expose the following behavior after an ifconfig down/up sequence: PING fainelli-linux (10.112.156.244): 56 data bytes 64 bytes from 10.112.156.244: seq=1 ttl=61 time=1.352 ms 64 bytes from 10.112.156.244: seq=1 ttl=61 time=1.472 ms (DUP!) 64 bytes from 10.112.156.244: seq=1 ttl=61 time=1.496 ms (DUP!) 64 bytes from 10.112.156.244: seq=1 ttl=61 time=1.517 ms (DUP!) 64 bytes from 10.112.156.244: seq=1 ttl=61 time=1.536 ms (DUP!) 64 bytes from 10.112.156.244: seq=1 ttl=61 time=1.557 ms (DUP!) 64 bytes from 10.112.156.244: seq=1 ttl=61 time=752.448 ms (DUP!) This was previously fixed by commit 5dbebbb44a6a ("net: bcmgenet: Software reset EPHY after power on") but the commit we are reverting was essentially making this previous commit void, here is why. Without commit 62469c76007e we would have the following scenario after an ifconfig down then up sequence: - bcmgenet_open() calls bcmgenet_power_up() to make sure the PHY is initialized *before* we get to initialize the UniMAC, this is critical to ensure the PHY is in a correct state, priv->phydev is valid, this code executes fine - second time from bcmgenet_mii_probe(), through the normal phy_init_hw() call (which arguably could be optimized out) Everything is fine in that case. With commit 62469c76007e, we would have the following scenario to happen after an ifconfig down then up sequence: - bcmgenet_close() calls phy_disonnect() which makes dev->phydev become NULL - when bcmgenet_open() executes again and calls bcmgenet_mii_reset() from bcmgenet_power_up() to initialize the internal PHY, the NULL check becomes true, so we do not reset the PHY, yet we keep going on and initialize the UniMAC, causing MAC activity to occur - we call bcmgenet_mii_reset() from bcmgenet_mii_probe(), but this is too late, the PHY is botched, and causes the above bogus pings/packets transmission/reception to occur Reported-by: Jaedon Shin Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/genet/bcmgenet.c | 45 ++++++++++--------- .../net/ethernet/broadcom/genet/bcmgenet.h | 1 + drivers/net/ethernet/broadcom/genet/bcmmii.c | 24 +++++----- 3 files changed, 39 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 8d4f8495dbb3..541456398dfb 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -453,25 +453,29 @@ static inline void bcmgenet_rdma_ring_writel(struct bcmgenet_priv *priv, static int bcmgenet_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { + struct bcmgenet_priv *priv = netdev_priv(dev); + if (!netif_running(dev)) return -EINVAL; - if (!dev->phydev) + if (!priv->phydev) return -ENODEV; - return phy_ethtool_gset(dev->phydev, cmd); + return phy_ethtool_gset(priv->phydev, cmd); } static int bcmgenet_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) { + struct bcmgenet_priv *priv = netdev_priv(dev); + if (!netif_running(dev)) return -EINVAL; - if (!dev->phydev) + if (!priv->phydev) return -ENODEV; - return phy_ethtool_sset(dev->phydev, cmd); + return phy_ethtool_sset(priv->phydev, cmd); } static int bcmgenet_set_rx_csum(struct net_device *dev, @@ -937,7 +941,7 @@ static int bcmgenet_get_eee(struct net_device *dev, struct ethtool_eee *e) e->eee_active = p->eee_active; e->tx_lpi_timer = bcmgenet_umac_readl(priv, UMAC_EEE_LPI_TIMER); - return phy_ethtool_get_eee(dev->phydev, e); + return phy_ethtool_get_eee(priv->phydev, e); } static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e) @@ -954,7 +958,7 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e) if (!p->eee_enabled) { bcmgenet_eee_enable_set(dev, false); } else { - ret = phy_init_eee(dev->phydev, 0); + ret = phy_init_eee(priv->phydev, 0); if (ret) { netif_err(priv, hw, dev, "EEE initialization failed\n"); return ret; @@ -964,12 +968,14 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e) bcmgenet_eee_enable_set(dev, true); } - return phy_ethtool_set_eee(dev->phydev, e); + return phy_ethtool_set_eee(priv->phydev, e); } static int bcmgenet_nway_reset(struct net_device *dev) { - return genphy_restart_aneg(dev->phydev); + struct bcmgenet_priv *priv = netdev_priv(dev); + + return genphy_restart_aneg(priv->phydev); } /* standard ethtool support functions. */ @@ -996,13 +1002,12 @@ static struct ethtool_ops bcmgenet_ethtool_ops = { static int bcmgenet_power_down(struct bcmgenet_priv *priv, enum bcmgenet_power_mode mode) { - struct net_device *ndev = priv->dev; int ret = 0; u32 reg; switch (mode) { case GENET_POWER_CABLE_SENSE: - phy_detach(ndev->phydev); + phy_detach(priv->phydev); break; case GENET_POWER_WOL_MAGIC: @@ -1063,6 +1068,7 @@ static void bcmgenet_power_up(struct bcmgenet_priv *priv, /* ioctl handle special commands that are not present in ethtool. */ static int bcmgenet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { + struct bcmgenet_priv *priv = netdev_priv(dev); int val = 0; if (!netif_running(dev)) @@ -1072,10 +1078,10 @@ static int bcmgenet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: - if (!dev->phydev) + if (!priv->phydev) val = -ENODEV; else - val = phy_mii_ioctl(dev->phydev, rq, cmd); + val = phy_mii_ioctl(priv->phydev, rq, cmd); break; default: @@ -2458,7 +2464,6 @@ static void bcmgenet_irq_task(struct work_struct *work) { struct bcmgenet_priv *priv = container_of( work, struct bcmgenet_priv, bcmgenet_irq_work); - struct net_device *ndev = priv->dev; netif_dbg(priv, intr, priv->dev, "%s\n", __func__); @@ -2471,7 +2476,7 @@ static void bcmgenet_irq_task(struct work_struct *work) /* Link UP/DOWN event */ if (priv->irq0_stat & UMAC_IRQ_LINK_EVENT) { - phy_mac_interrupt(ndev->phydev, + phy_mac_interrupt(priv->phydev, !!(priv->irq0_stat & UMAC_IRQ_LINK_UP)); priv->irq0_stat &= ~UMAC_IRQ_LINK_EVENT; } @@ -2833,7 +2838,7 @@ static void bcmgenet_netif_start(struct net_device *dev) /* Monitor link interrupts now */ bcmgenet_link_intr_enable(priv); - phy_start(dev->phydev); + phy_start(priv->phydev); } static int bcmgenet_open(struct net_device *dev) @@ -2932,7 +2937,7 @@ static void bcmgenet_netif_stop(struct net_device *dev) struct bcmgenet_priv *priv = netdev_priv(dev); netif_tx_stop_all_queues(dev); - phy_stop(dev->phydev); + phy_stop(priv->phydev); bcmgenet_intr_disable(priv); bcmgenet_disable_rx_napi(priv); bcmgenet_disable_tx_napi(priv); @@ -2958,7 +2963,7 @@ static int bcmgenet_close(struct net_device *dev) bcmgenet_netif_stop(dev); /* Really kill the PHY state machine and disconnect from it */ - phy_disconnect(dev->phydev); + phy_disconnect(priv->phydev); /* Disable MAC receive */ umac_enable_set(priv, CMD_RX_EN, false); @@ -3517,7 +3522,7 @@ static int bcmgenet_suspend(struct device *d) bcmgenet_netif_stop(dev); - phy_suspend(dev->phydev); + phy_suspend(priv->phydev); netif_device_detach(dev); @@ -3581,7 +3586,7 @@ static int bcmgenet_resume(struct device *d) if (priv->wolopts) clk_disable_unprepare(priv->clk_wol); - phy_init_hw(dev->phydev); + phy_init_hw(priv->phydev); /* Speed settings must be restored */ bcmgenet_mii_config(priv->dev); @@ -3614,7 +3619,7 @@ static int bcmgenet_resume(struct device *d) netif_device_attach(dev); - phy_resume(dev->phydev); + phy_resume(priv->phydev); if (priv->eee.eee_enabled) bcmgenet_eee_enable_set(dev, true); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 0f0868c56f05..1e2dc34d331a 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -597,6 +597,7 @@ struct bcmgenet_priv { /* MDIO bus variables */ wait_queue_head_t wq; + struct phy_device *phydev; bool internal_phy; struct device_node *phy_dn; struct device_node *mdio_dn; diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index e907acd81da9..457c3bc8cfff 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -86,7 +86,7 @@ static int bcmgenet_mii_write(struct mii_bus *bus, int phy_id, void bcmgenet_mii_setup(struct net_device *dev) { struct bcmgenet_priv *priv = netdev_priv(dev); - struct phy_device *phydev = dev->phydev; + struct phy_device *phydev = priv->phydev; u32 reg, cmd_bits = 0; bool status_changed = false; @@ -183,9 +183,9 @@ void bcmgenet_mii_reset(struct net_device *dev) if (GENET_IS_V4(priv)) return; - if (dev->phydev) { - phy_init_hw(dev->phydev); - phy_start_aneg(dev->phydev); + if (priv->phydev) { + phy_init_hw(priv->phydev); + phy_start_aneg(priv->phydev); } } @@ -236,7 +236,6 @@ static void bcmgenet_internal_phy_setup(struct net_device *dev) static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv) { - struct net_device *ndev = priv->dev; u32 reg; /* Speed settings are set in bcmgenet_mii_setup() */ @@ -245,14 +244,14 @@ static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv) bcmgenet_sys_writel(priv, reg, SYS_PORT_CTRL); if (priv->hw_params->flags & GENET_HAS_MOCA_LINK_DET) - fixed_phy_set_link_update(ndev->phydev, + fixed_phy_set_link_update(priv->phydev, bcmgenet_fixed_phy_link_update); } int bcmgenet_mii_config(struct net_device *dev) { struct bcmgenet_priv *priv = netdev_priv(dev); - struct phy_device *phydev = dev->phydev; + struct phy_device *phydev = priv->phydev; struct device *kdev = &priv->pdev->dev; const char *phy_name = NULL; u32 id_mode_dis = 0; @@ -303,7 +302,7 @@ int bcmgenet_mii_config(struct net_device *dev) * capabilities, use that knowledge to also configure the * Reverse MII interface correctly. */ - if ((phydev->supported & PHY_BASIC_FEATURES) == + if ((priv->phydev->supported & PHY_BASIC_FEATURES) == PHY_BASIC_FEATURES) port_ctrl = PORT_MODE_EXT_RVMII_25; else @@ -372,7 +371,7 @@ int bcmgenet_mii_probe(struct net_device *dev) return -ENODEV; } } else { - phydev = dev->phydev; + phydev = priv->phydev; phydev->dev_flags = phy_flags; ret = phy_connect_direct(dev, phydev, bcmgenet_mii_setup, @@ -383,6 +382,8 @@ int bcmgenet_mii_probe(struct net_device *dev) } } + priv->phydev = phydev; + /* Configure port multiplexer based on what the probed PHY device since * reading the 'max-speed' property determines the maximum supported * PHY speed which is needed for bcmgenet_mii_config() to configure @@ -390,7 +391,7 @@ int bcmgenet_mii_probe(struct net_device *dev) */ ret = bcmgenet_mii_config(dev); if (ret) { - phy_disconnect(phydev); + phy_disconnect(priv->phydev); return ret; } @@ -400,7 +401,7 @@ int bcmgenet_mii_probe(struct net_device *dev) * Ethernet MAC ISRs */ if (priv->internal_phy) - phydev->irq = PHY_IGNORE_INTERRUPT; + priv->phydev->irq = PHY_IGNORE_INTERRUPT; return 0; } @@ -605,6 +606,7 @@ static int bcmgenet_mii_pd_init(struct bcmgenet_priv *priv) } + priv->phydev = phydev; priv->phy_interface = pd->phy_interface; return 0; From 62c8d3daada97dab481a8235f39f2283daf5c8ba Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Mon, 26 Sep 2016 22:31:55 +0200 Subject: [PATCH 0881/1050] Revert "net: ethernet: bcmgenet: use new api ethtool_{get|set}_link_ksettings" This reverts commit 6b352ebccbcf ("net: ethernet: broadcom: bcmgenet: use new api ethtool_{get|set}_link_ksettings"). We needs to revert the commit 62469c76007e ("net: ethernet: bcmgenet: use phydev from struct net_device"), because this commit add a regression. As the commit 6b352ebccbcf ("net: ethernet: broadcom: bcmgenet: use new api ethtool_{get|set}_link_ksettings") depend on the first one, we also need to revert it first. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 7f478499b649..6d6f83b20654 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -450,8 +450,8 @@ static inline void bcmgenet_rdma_ring_writel(struct bcmgenet_priv *priv, genet_dma_ring_regs[r]); } -static int bcmgenet_get_link_ksettings(struct net_device *dev, - struct ethtool_link_ksettings *cmd) +static int bcmgenet_get_settings(struct net_device *dev, + struct ethtool_cmd *cmd) { if (!netif_running(dev)) return -EINVAL; @@ -459,11 +459,11 @@ static int bcmgenet_get_link_ksettings(struct net_device *dev, if (!dev->phydev) return -ENODEV; - return phy_ethtool_ksettings_get(dev->phydev, cmd); + return phy_ethtool_gset(dev->phydev, cmd); } -static int bcmgenet_set_link_ksettings(struct net_device *dev, - const struct ethtool_link_ksettings *cmd) +static int bcmgenet_set_settings(struct net_device *dev, + struct ethtool_cmd *cmd) { if (!netif_running(dev)) return -EINVAL; @@ -471,7 +471,7 @@ static int bcmgenet_set_link_ksettings(struct net_device *dev, if (!dev->phydev) return -ENODEV; - return phy_ethtool_ksettings_set(dev->phydev, cmd); + return phy_ethtool_sset(dev->phydev, cmd); } static int bcmgenet_set_rx_csum(struct net_device *dev, @@ -977,6 +977,8 @@ static const struct ethtool_ops bcmgenet_ethtool_ops = { .get_strings = bcmgenet_get_strings, .get_sset_count = bcmgenet_get_sset_count, .get_ethtool_stats = bcmgenet_get_ethtool_stats, + .get_settings = bcmgenet_get_settings, + .set_settings = bcmgenet_set_settings, .get_drvinfo = bcmgenet_get_drvinfo, .get_link = ethtool_op_get_link, .get_msglevel = bcmgenet_get_msglevel, @@ -988,8 +990,6 @@ static const struct ethtool_ops bcmgenet_ethtool_ops = { .nway_reset = bcmgenet_nway_reset, .get_coalesce = bcmgenet_get_coalesce, .set_coalesce = bcmgenet_set_coalesce, - .get_link_ksettings = bcmgenet_get_link_ksettings, - .set_link_ksettings = bcmgenet_set_link_ksettings, }; /* Power down the unimac, based on mode. */ From 0299b6acf977f2c5ce9feea8faed0e264d3f01d3 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 26 Sep 2016 22:31:56 +0200 Subject: [PATCH 0882/1050] Revert "net: ethernet: bcmgenet: use phydev from struct net_device" This reverts commit 62469c76007e ("net: ethernet: bcmgenet: use phydev from struct net_device") because it causes GENETv1/2/3 adapters to expose the following behavior after an ifconfig down/up sequence: PING fainelli-linux (10.112.156.244): 56 data bytes 64 bytes from 10.112.156.244: seq=1 ttl=61 time=1.352 ms 64 bytes from 10.112.156.244: seq=1 ttl=61 time=1.472 ms (DUP!) 64 bytes from 10.112.156.244: seq=1 ttl=61 time=1.496 ms (DUP!) 64 bytes from 10.112.156.244: seq=1 ttl=61 time=1.517 ms (DUP!) 64 bytes from 10.112.156.244: seq=1 ttl=61 time=1.536 ms (DUP!) 64 bytes from 10.112.156.244: seq=1 ttl=61 time=1.557 ms (DUP!) 64 bytes from 10.112.156.244: seq=1 ttl=61 time=752.448 ms (DUP!) This was previously fixed by commit 5dbebbb44a6a ("net: bcmgenet: Software reset EPHY after power on") but the commit we are reverting was essentially making this previous commit void, here is why. Without commit 62469c76007e we would have the following scenario after an ifconfig down then up sequence: - bcmgenet_open() calls bcmgenet_power_up() to make sure the PHY is initialized *before* we get to initialize the UniMAC, this is critical to ensure the PHY is in a correct state, priv->phydev is valid, this code executes fine - second time from bcmgenet_mii_probe(), through the normal phy_init_hw() call (which arguably could be optimized out) Everything is fine in that case. With commit 62469c76007e, we would have the following scenario to happen after an ifconfig down then up sequence: - bcmgenet_close() calls phy_disonnect() which makes dev->phydev become NULL - when bcmgenet_open() executes again and calls bcmgenet_mii_reset() from bcmgenet_power_up() to initialize the internal PHY, the NULL check becomes true, so we do not reset the PHY, yet we keep going on and initialize the UniMAC, causing MAC activity to occur - we call bcmgenet_mii_reset() from bcmgenet_mii_probe(), but this is too late, the PHY is botched, and causes the above bogus pings/packets transmission/reception to occur Reported-by: Jaedon Shin Signed-off-by: Florian Fainelli Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/genet/bcmgenet.c | 45 ++++++++++--------- .../net/ethernet/broadcom/genet/bcmgenet.h | 1 + drivers/net/ethernet/broadcom/genet/bcmmii.c | 24 +++++----- 3 files changed, 39 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 6d6f83b20654..55cbe2ed2929 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -453,25 +453,29 @@ static inline void bcmgenet_rdma_ring_writel(struct bcmgenet_priv *priv, static int bcmgenet_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { + struct bcmgenet_priv *priv = netdev_priv(dev); + if (!netif_running(dev)) return -EINVAL; - if (!dev->phydev) + if (!priv->phydev) return -ENODEV; - return phy_ethtool_gset(dev->phydev, cmd); + return phy_ethtool_gset(priv->phydev, cmd); } static int bcmgenet_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) { + struct bcmgenet_priv *priv = netdev_priv(dev); + if (!netif_running(dev)) return -EINVAL; - if (!dev->phydev) + if (!priv->phydev) return -ENODEV; - return phy_ethtool_sset(dev->phydev, cmd); + return phy_ethtool_sset(priv->phydev, cmd); } static int bcmgenet_set_rx_csum(struct net_device *dev, @@ -937,7 +941,7 @@ static int bcmgenet_get_eee(struct net_device *dev, struct ethtool_eee *e) e->eee_active = p->eee_active; e->tx_lpi_timer = bcmgenet_umac_readl(priv, UMAC_EEE_LPI_TIMER); - return phy_ethtool_get_eee(dev->phydev, e); + return phy_ethtool_get_eee(priv->phydev, e); } static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e) @@ -954,7 +958,7 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e) if (!p->eee_enabled) { bcmgenet_eee_enable_set(dev, false); } else { - ret = phy_init_eee(dev->phydev, 0); + ret = phy_init_eee(priv->phydev, 0); if (ret) { netif_err(priv, hw, dev, "EEE initialization failed\n"); return ret; @@ -964,12 +968,14 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e) bcmgenet_eee_enable_set(dev, true); } - return phy_ethtool_set_eee(dev->phydev, e); + return phy_ethtool_set_eee(priv->phydev, e); } static int bcmgenet_nway_reset(struct net_device *dev) { - return genphy_restart_aneg(dev->phydev); + struct bcmgenet_priv *priv = netdev_priv(dev); + + return genphy_restart_aneg(priv->phydev); } /* standard ethtool support functions. */ @@ -996,13 +1002,12 @@ static const struct ethtool_ops bcmgenet_ethtool_ops = { static int bcmgenet_power_down(struct bcmgenet_priv *priv, enum bcmgenet_power_mode mode) { - struct net_device *ndev = priv->dev; int ret = 0; u32 reg; switch (mode) { case GENET_POWER_CABLE_SENSE: - phy_detach(ndev->phydev); + phy_detach(priv->phydev); break; case GENET_POWER_WOL_MAGIC: @@ -1063,6 +1068,7 @@ static void bcmgenet_power_up(struct bcmgenet_priv *priv, /* ioctl handle special commands that are not present in ethtool. */ static int bcmgenet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { + struct bcmgenet_priv *priv = netdev_priv(dev); int val = 0; if (!netif_running(dev)) @@ -1072,10 +1078,10 @@ static int bcmgenet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: - if (!dev->phydev) + if (!priv->phydev) val = -ENODEV; else - val = phy_mii_ioctl(dev->phydev, rq, cmd); + val = phy_mii_ioctl(priv->phydev, rq, cmd); break; default: @@ -2458,7 +2464,6 @@ static void bcmgenet_irq_task(struct work_struct *work) { struct bcmgenet_priv *priv = container_of( work, struct bcmgenet_priv, bcmgenet_irq_work); - struct net_device *ndev = priv->dev; netif_dbg(priv, intr, priv->dev, "%s\n", __func__); @@ -2471,7 +2476,7 @@ static void bcmgenet_irq_task(struct work_struct *work) /* Link UP/DOWN event */ if (priv->irq0_stat & UMAC_IRQ_LINK_EVENT) { - phy_mac_interrupt(ndev->phydev, + phy_mac_interrupt(priv->phydev, !!(priv->irq0_stat & UMAC_IRQ_LINK_UP)); priv->irq0_stat &= ~UMAC_IRQ_LINK_EVENT; } @@ -2711,7 +2716,7 @@ static void bcmgenet_netif_start(struct net_device *dev) /* Monitor link interrupts now */ bcmgenet_link_intr_enable(priv); - phy_start(dev->phydev); + phy_start(priv->phydev); } static int bcmgenet_open(struct net_device *dev) @@ -2810,7 +2815,7 @@ static void bcmgenet_netif_stop(struct net_device *dev) struct bcmgenet_priv *priv = netdev_priv(dev); netif_tx_stop_all_queues(dev); - phy_stop(dev->phydev); + phy_stop(priv->phydev); bcmgenet_intr_disable(priv); bcmgenet_disable_rx_napi(priv); bcmgenet_disable_tx_napi(priv); @@ -2836,7 +2841,7 @@ static int bcmgenet_close(struct net_device *dev) bcmgenet_netif_stop(dev); /* Really kill the PHY state machine and disconnect from it */ - phy_disconnect(dev->phydev); + phy_disconnect(priv->phydev); /* Disable MAC receive */ umac_enable_set(priv, CMD_RX_EN, false); @@ -3395,7 +3400,7 @@ static int bcmgenet_suspend(struct device *d) bcmgenet_netif_stop(dev); - phy_suspend(dev->phydev); + phy_suspend(priv->phydev); netif_device_detach(dev); @@ -3459,7 +3464,7 @@ static int bcmgenet_resume(struct device *d) if (priv->wolopts) clk_disable_unprepare(priv->clk_wol); - phy_init_hw(dev->phydev); + phy_init_hw(priv->phydev); /* Speed settings must be restored */ bcmgenet_mii_config(priv->dev); @@ -3492,7 +3497,7 @@ static int bcmgenet_resume(struct device *d) netif_device_attach(dev); - phy_resume(dev->phydev); + phy_resume(priv->phydev); if (priv->eee.eee_enabled) bcmgenet_eee_enable_set(dev, true); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 0f0868c56f05..1e2dc34d331a 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -597,6 +597,7 @@ struct bcmgenet_priv { /* MDIO bus variables */ wait_queue_head_t wq; + struct phy_device *phydev; bool internal_phy; struct device_node *phy_dn; struct device_node *mdio_dn; diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index e907acd81da9..457c3bc8cfff 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -86,7 +86,7 @@ static int bcmgenet_mii_write(struct mii_bus *bus, int phy_id, void bcmgenet_mii_setup(struct net_device *dev) { struct bcmgenet_priv *priv = netdev_priv(dev); - struct phy_device *phydev = dev->phydev; + struct phy_device *phydev = priv->phydev; u32 reg, cmd_bits = 0; bool status_changed = false; @@ -183,9 +183,9 @@ void bcmgenet_mii_reset(struct net_device *dev) if (GENET_IS_V4(priv)) return; - if (dev->phydev) { - phy_init_hw(dev->phydev); - phy_start_aneg(dev->phydev); + if (priv->phydev) { + phy_init_hw(priv->phydev); + phy_start_aneg(priv->phydev); } } @@ -236,7 +236,6 @@ static void bcmgenet_internal_phy_setup(struct net_device *dev) static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv) { - struct net_device *ndev = priv->dev; u32 reg; /* Speed settings are set in bcmgenet_mii_setup() */ @@ -245,14 +244,14 @@ static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv) bcmgenet_sys_writel(priv, reg, SYS_PORT_CTRL); if (priv->hw_params->flags & GENET_HAS_MOCA_LINK_DET) - fixed_phy_set_link_update(ndev->phydev, + fixed_phy_set_link_update(priv->phydev, bcmgenet_fixed_phy_link_update); } int bcmgenet_mii_config(struct net_device *dev) { struct bcmgenet_priv *priv = netdev_priv(dev); - struct phy_device *phydev = dev->phydev; + struct phy_device *phydev = priv->phydev; struct device *kdev = &priv->pdev->dev; const char *phy_name = NULL; u32 id_mode_dis = 0; @@ -303,7 +302,7 @@ int bcmgenet_mii_config(struct net_device *dev) * capabilities, use that knowledge to also configure the * Reverse MII interface correctly. */ - if ((phydev->supported & PHY_BASIC_FEATURES) == + if ((priv->phydev->supported & PHY_BASIC_FEATURES) == PHY_BASIC_FEATURES) port_ctrl = PORT_MODE_EXT_RVMII_25; else @@ -372,7 +371,7 @@ int bcmgenet_mii_probe(struct net_device *dev) return -ENODEV; } } else { - phydev = dev->phydev; + phydev = priv->phydev; phydev->dev_flags = phy_flags; ret = phy_connect_direct(dev, phydev, bcmgenet_mii_setup, @@ -383,6 +382,8 @@ int bcmgenet_mii_probe(struct net_device *dev) } } + priv->phydev = phydev; + /* Configure port multiplexer based on what the probed PHY device since * reading the 'max-speed' property determines the maximum supported * PHY speed which is needed for bcmgenet_mii_config() to configure @@ -390,7 +391,7 @@ int bcmgenet_mii_probe(struct net_device *dev) */ ret = bcmgenet_mii_config(dev); if (ret) { - phy_disconnect(phydev); + phy_disconnect(priv->phydev); return ret; } @@ -400,7 +401,7 @@ int bcmgenet_mii_probe(struct net_device *dev) * Ethernet MAC ISRs */ if (priv->internal_phy) - phydev->irq = PHY_IGNORE_INTERRUPT; + priv->phydev->irq = PHY_IGNORE_INTERRUPT; return 0; } @@ -605,6 +606,7 @@ static int bcmgenet_mii_pd_init(struct bcmgenet_priv *priv) } + priv->phydev = phydev; priv->phy_interface = pd->phy_interface; return 0; From fa92bf04f583dddf275845838e686fd052f5b49a Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Mon, 26 Sep 2016 22:31:57 +0200 Subject: [PATCH 0883/1050] net: bcmgenet: use new api ethtool_{get|set}_link_ksettings The ethtool api {get|set}_settings is deprecated. We move this driver to new api {get|set}_link_ksettings. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 55cbe2ed2929..4464bc5db934 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -450,8 +450,8 @@ static inline void bcmgenet_rdma_ring_writel(struct bcmgenet_priv *priv, genet_dma_ring_regs[r]); } -static int bcmgenet_get_settings(struct net_device *dev, - struct ethtool_cmd *cmd) +static int bcmgenet_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct bcmgenet_priv *priv = netdev_priv(dev); @@ -461,11 +461,11 @@ static int bcmgenet_get_settings(struct net_device *dev, if (!priv->phydev) return -ENODEV; - return phy_ethtool_gset(priv->phydev, cmd); + return phy_ethtool_ksettings_get(priv->phydev, cmd); } -static int bcmgenet_set_settings(struct net_device *dev, - struct ethtool_cmd *cmd) +static int bcmgenet_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct bcmgenet_priv *priv = netdev_priv(dev); @@ -475,7 +475,7 @@ static int bcmgenet_set_settings(struct net_device *dev, if (!priv->phydev) return -ENODEV; - return phy_ethtool_sset(priv->phydev, cmd); + return phy_ethtool_ksettings_set(priv->phydev, cmd); } static int bcmgenet_set_rx_csum(struct net_device *dev, @@ -983,8 +983,6 @@ static const struct ethtool_ops bcmgenet_ethtool_ops = { .get_strings = bcmgenet_get_strings, .get_sset_count = bcmgenet_get_sset_count, .get_ethtool_stats = bcmgenet_get_ethtool_stats, - .get_settings = bcmgenet_get_settings, - .set_settings = bcmgenet_set_settings, .get_drvinfo = bcmgenet_get_drvinfo, .get_link = ethtool_op_get_link, .get_msglevel = bcmgenet_get_msglevel, @@ -996,6 +994,8 @@ static const struct ethtool_ops bcmgenet_ethtool_ops = { .nway_reset = bcmgenet_nway_reset, .get_coalesce = bcmgenet_get_coalesce, .set_coalesce = bcmgenet_set_coalesce, + .get_link_ksettings = bcmgenet_get_link_ksettings, + .set_link_ksettings = bcmgenet_set_link_ksettings, }; /* Power down the unimac, based on mode. */ From 1190cfdb1a19d89561ae51cff7d9c2ead24b3ebe Mon Sep 17 00:00:00 2001 From: Jorgen Hansen Date: Mon, 26 Sep 2016 23:59:53 -0700 Subject: [PATCH 0884/1050] VSOCK: Don't dec ack backlog twice for rejected connections If a pending socket is marked as rejected, we will decrease the sk_ack_backlog twice. So don't decrement it for rejected sockets in vsock_pending_work(). Testing of the rejected socket path was done through code modifications. Reported-by: Stefan Hajnoczi Signed-off-by: Jorgen Hansen Reviewed-by: Adit Ranadive Reviewed-by: Aditya Sarwade Signed-off-by: David S. Miller --- net/vmw_vsock/af_vsock.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 17dbbe64cd73..8a398b3fb532 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -465,6 +465,8 @@ void vsock_pending_work(struct work_struct *work) if (vsock_is_pending(sk)) { vsock_remove_pending(listener, sk); + + listener->sk_ack_backlog--; } else if (!vsk->rejected) { /* We are not on the pending list and accept() did not reject * us, so we must have been accepted by our user process. We @@ -475,8 +477,6 @@ void vsock_pending_work(struct work_struct *work) goto out; } - listener->sk_ack_backlog--; - /* We need to remove ourself from the global connected sockets list so * incoming packets can't find this socket, and to reduce the reference * count. @@ -2010,5 +2010,5 @@ EXPORT_SYMBOL_GPL(vsock_core_get_transport); MODULE_AUTHOR("VMware, Inc."); MODULE_DESCRIPTION("VMware Virtual Socket Family"); -MODULE_VERSION("1.0.1.0-k"); +MODULE_VERSION("1.0.2.0-k"); MODULE_LICENSE("GPL v2"); From e13dbead976d79968bd616b924f300cdaf15f852 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 26 Sep 2016 21:56:21 +0300 Subject: [PATCH 0885/1050] ath10k: spelling and miscellaneous neatening Correct some trivial comment typos. Remove unnecessary parentheses in a long line. Signed-off-by: Joe Perches [kvalo@qca.qualcomm.com: drop the change for return] Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/ce.c | 2 +- drivers/net/wireless/ath/ath10k/htt.h | 8 +++---- drivers/net/wireless/ath/ath10k/hw.c | 2 +- drivers/net/wireless/ath/ath10k/hw.h | 2 +- drivers/net/wireless/ath/ath10k/targaddrs.h | 2 +- drivers/net/wireless/ath/ath10k/wmi.h | 24 ++++++++++----------- 6 files changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/ce.c b/drivers/net/wireless/ath/ath10k/ce.c index 65d8d714e917..e7205546fa6b 100644 --- a/drivers/net/wireless/ath/ath10k/ce.c +++ b/drivers/net/wireless/ath/ath10k/ce.c @@ -39,7 +39,7 @@ * chooses what to send (buffer address, length). The destination * side keeps a supply of "anonymous receive buffers" available and * it handles incoming data as it arrives (when the destination - * recieves an interrupt). + * receives an interrupt). * * The sender may send a simple buffer (address/length) or it may * send a small list of buffers. When a small list is sent, hardware diff --git a/drivers/net/wireless/ath/ath10k/htt.h b/drivers/net/wireless/ath/ath10k/htt.h index 98c14247021b..0d2ed09f202b 100644 --- a/drivers/net/wireless/ath/ath10k/htt.h +++ b/drivers/net/wireless/ath/ath10k/htt.h @@ -595,7 +595,7 @@ enum htt_rx_mpdu_status { /* only accept EAPOL frames */ HTT_RX_IND_MPDU_STATUS_UNAUTH_PEER, HTT_RX_IND_MPDU_STATUS_OUT_OF_SYNC, - /* Non-data in promiscous mode */ + /* Non-data in promiscuous mode */ HTT_RX_IND_MPDU_STATUS_MGMT_CTRL, HTT_RX_IND_MPDU_STATUS_TKIP_MIC_ERR, HTT_RX_IND_MPDU_STATUS_DECRYPT_ERR, @@ -900,7 +900,7 @@ struct htt_rx_in_ord_ind { * Purpose: indicate how many 32-bit integers follow the message header * - NUM_CHARS * Bits 31:16 - * Purpose: indicate how many 8-bit charaters follow the series of integers + * Purpose: indicate how many 8-bit characters follow the series of integers */ struct htt_rx_test { u8 num_ints; @@ -1042,10 +1042,10 @@ struct htt_dbg_stats_wal_tx_stats { /* illegal rate phy errors */ __le32 illgl_rate_phy_err; - /* wal pdev continous xretry */ + /* wal pdev continuous xretry */ __le32 pdev_cont_xretry; - /* wal pdev continous xretry */ + /* wal pdev continuous xretry */ __le32 pdev_tx_timeout; /* wal pdev resets */ diff --git a/drivers/net/wireless/ath/ath10k/hw.c b/drivers/net/wireless/ath/ath10k/hw.c index c2ecb9bd824a..675e75d66db2 100644 --- a/drivers/net/wireless/ath/ath10k/hw.c +++ b/drivers/net/wireless/ath/ath10k/hw.c @@ -85,7 +85,7 @@ const struct ath10k_hw_regs qca99x0_regs = { .ce7_base_address = 0x0004bc00, /* Note: qca99x0 supports upto 12 Copy Engines. Other than address of * CE0 and CE1 no other copy engine is directly referred in the code. - * It is not really neccessary to assign address for newly supported + * It is not really necessary to assign address for newly supported * CEs in this address table. * Copy Engine Address * CE8 0x0004c000 diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h index 308e423d4b99..4a01bcff49a7 100644 --- a/drivers/net/wireless/ath/ath10k/hw.h +++ b/drivers/net/wireless/ath/ath10k/hw.h @@ -284,7 +284,7 @@ void ath10k_hw_fill_survey_time(struct ath10k *ar, struct survey_info *survey, #define QCA_REV_9377(ar) ((ar)->hw_rev == ATH10K_HW_QCA9377) #define QCA_REV_40XX(ar) ((ar)->hw_rev == ATH10K_HW_QCA4019) -/* Known pecularities: +/* Known peculiarities: * - raw appears in nwifi decap, raw and nwifi appear in ethernet decap * - raw have FCS, nwifi doesn't * - ethernet frames have 802.11 header decapped and parts (base hdr, cipher diff --git a/drivers/net/wireless/ath/ath10k/targaddrs.h b/drivers/net/wireless/ath/ath10k/targaddrs.h index aaf53a81e78b..a47cab44d9c8 100644 --- a/drivers/net/wireless/ath/ath10k/targaddrs.h +++ b/drivers/net/wireless/ath/ath10k/targaddrs.h @@ -405,7 +405,7 @@ Fw Mode/SubMode Mask * 1. target firmware would check magic number and if it's a match, firmware * would consider the bits[0:15] are valid and base on that to calculate * the end of DRAM. Early allocation would be located at that area and - * may be reclaimed when necesary + * may be reclaimed when necessary * 2. if no magic number is found, early allocation would happen at "_end" * symbol of ROM which is located before the app-data and might NOT be * re-claimable. If this is adopted, link script should keep this in diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h index 48e04b92e231..6a68817b90b8 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.h +++ b/drivers/net/wireless/ath/ath10k/wmi.h @@ -55,7 +55,7 @@ * type. * * 6. Comment each parameter part of the WMI command/event structure by - * using the 2 stars at the begining of C comment instead of one star to + * using the 2 stars at the beginning of C comment instead of one star to * enable HTML document generation using Doxygen. * */ @@ -2087,7 +2087,7 @@ struct wmi_resource_config { * In offload mode target supports features like WOW, chatter and * other protocol offloads. In order to support them some * functionalities like reorder buffering, PN checking need to be - * done in target. This determines maximum number of peers suported + * done in target. This determines maximum number of peers supported * by target in offload mode */ __le32 num_offload_peers; @@ -2268,7 +2268,7 @@ struct wmi_resource_config { * Max. number of Tx fragments per MSDU * This parameter controls the max number of Tx fragments per MSDU. * This is sent by the target as part of the WMI_SERVICE_READY event - * and is overriden by the OS shim as required. + * and is overridden by the OS shim as required. */ __le32 max_frag_entries; } __packed; @@ -2450,7 +2450,7 @@ struct wmi_resource_config_10x { * Max. number of Tx fragments per MSDU * This parameter controls the max number of Tx fragments per MSDU. * This is sent by the target as part of the WMI_SERVICE_READY event - * and is overriden by the OS shim as required. + * and is overridden by the OS shim as required. */ __le32 max_frag_entries; } __packed; @@ -2744,7 +2744,7 @@ struct wmi_init_cmd { struct wmi_host_mem_chunks mem_chunks; } __packed; -/* _10x stucture is from 10.X FW API */ +/* _10x structure is from 10.X FW API */ struct wmi_init_cmd_10x { struct wmi_resource_config_10x resource_config; struct wmi_host_mem_chunks mem_chunks; @@ -3967,7 +3967,7 @@ struct wmi_pdev_stats_tx { /* illegal rate phy errors */ __le32 illgl_rate_phy_err; - /* wal pdev continous xretry */ + /* wal pdev continuous xretry */ __le32 pdev_cont_xretry; /* wal pdev continous xretry */ @@ -4461,9 +4461,9 @@ struct wmi_vdev_start_request_cmd { __le32 flags; /* ssid field. Only valid for AP/GO/IBSS/BTAmp VDEV type. */ struct wmi_ssid ssid; - /* beacon/probe reponse xmit rate. Applicable for SoftAP. */ + /* beacon/probe response xmit rate. Applicable for SoftAP. */ __le32 bcn_tx_rate; - /* beacon/probe reponse xmit power. Applicable for SoftAP. */ + /* beacon/probe response xmit power. Applicable for SoftAP. */ __le32 bcn_tx_power; /* number of p2p NOA descriptor(s) from scan entry */ __le32 num_noa_descriptors; @@ -4691,7 +4691,7 @@ enum wmi_vdev_param { WMI_VDEV_PARAM_BEACON_INTERVAL, /* Listen interval in TUs */ WMI_VDEV_PARAM_LISTEN_INTERVAL, - /* muticast rate in Mbps */ + /* multicast rate in Mbps */ WMI_VDEV_PARAM_MULTICAST_RATE, /* management frame rate in Mbps */ WMI_VDEV_PARAM_MGMT_TX_RATE, @@ -4822,7 +4822,7 @@ enum wmi_10x_vdev_param { WMI_10X_VDEV_PARAM_BEACON_INTERVAL, /* Listen interval in TUs */ WMI_10X_VDEV_PARAM_LISTEN_INTERVAL, - /* muticast rate in Mbps */ + /* multicast rate in Mbps */ WMI_10X_VDEV_PARAM_MULTICAST_RATE, /* management frame rate in Mbps */ WMI_10X_VDEV_PARAM_MGMT_TX_RATE, @@ -5067,7 +5067,7 @@ struct wmi_vdev_simple_event { } __packed; /* VDEV start response status codes */ -/* VDEV succesfully started */ +/* VDEV successfully started */ #define WMI_INIFIED_VDEV_START_RESPONSE_STATUS_SUCCESS 0x0 /* requested VDEV not found */ @@ -5383,7 +5383,7 @@ enum wmi_sta_ps_param_pspoll_count { #define WMI_UAPSD_AC_TYPE_TRIG 1 #define WMI_UAPSD_AC_BIT_MASK(ac, type) \ - ((type == WMI_UAPSD_AC_TYPE_DELI) ? (1 << (ac << 1)) : (1 << ((ac << 1) + 1))) + (type == WMI_UAPSD_AC_TYPE_DELI ? 1 << (ac << 1) : 1 << ((ac << 1) + 1)) enum wmi_sta_ps_param_uapsd { WMI_STA_PS_UAPSD_AC0_DELIVERY_EN = (1 << 0), From 828662753d60e5f95d082dd50dfc6ce1abe82095 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 26 Sep 2016 21:56:22 +0300 Subject: [PATCH 0886/1050] ath10k: use devm_clk_get() instead of clk_get() Use the managed variant of clk_get() to simplify the failure path and the .remove callback. Signed-off-by: Masahiro Yamada Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/ahb.c | 34 +++++---------------------- 1 file changed, 6 insertions(+), 28 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/ahb.c b/drivers/net/wireless/ath/ath10k/ahb.c index b99ad5df383d..2de481b48e5d 100644 --- a/drivers/net/wireless/ath/ath10k/ahb.c +++ b/drivers/net/wireless/ath/ath10k/ahb.c @@ -91,59 +91,37 @@ static int ath10k_ahb_clock_init(struct ath10k *ar) { struct ath10k_ahb *ar_ahb = ath10k_ahb_priv(ar); struct device *dev; - int ret; dev = &ar_ahb->pdev->dev; - ar_ahb->cmd_clk = clk_get(dev, "wifi_wcss_cmd"); + ar_ahb->cmd_clk = devm_clk_get(dev, "wifi_wcss_cmd"); if (IS_ERR_OR_NULL(ar_ahb->cmd_clk)) { ath10k_err(ar, "failed to get cmd clk: %ld\n", PTR_ERR(ar_ahb->cmd_clk)); - ret = ar_ahb->cmd_clk ? PTR_ERR(ar_ahb->cmd_clk) : -ENODEV; - goto out; + return ar_ahb->cmd_clk ? PTR_ERR(ar_ahb->cmd_clk) : -ENODEV; } - ar_ahb->ref_clk = clk_get(dev, "wifi_wcss_ref"); + ar_ahb->ref_clk = devm_clk_get(dev, "wifi_wcss_ref"); if (IS_ERR_OR_NULL(ar_ahb->ref_clk)) { ath10k_err(ar, "failed to get ref clk: %ld\n", PTR_ERR(ar_ahb->ref_clk)); - ret = ar_ahb->ref_clk ? PTR_ERR(ar_ahb->ref_clk) : -ENODEV; - goto err_cmd_clk_put; + return ar_ahb->ref_clk ? PTR_ERR(ar_ahb->ref_clk) : -ENODEV; } - ar_ahb->rtc_clk = clk_get(dev, "wifi_wcss_rtc"); + ar_ahb->rtc_clk = devm_clk_get(dev, "wifi_wcss_rtc"); if (IS_ERR_OR_NULL(ar_ahb->rtc_clk)) { ath10k_err(ar, "failed to get rtc clk: %ld\n", PTR_ERR(ar_ahb->rtc_clk)); - ret = ar_ahb->rtc_clk ? PTR_ERR(ar_ahb->rtc_clk) : -ENODEV; - goto err_ref_clk_put; + return ar_ahb->rtc_clk ? PTR_ERR(ar_ahb->rtc_clk) : -ENODEV; } return 0; - -err_ref_clk_put: - clk_put(ar_ahb->ref_clk); - -err_cmd_clk_put: - clk_put(ar_ahb->cmd_clk); - -out: - return ret; } static void ath10k_ahb_clock_deinit(struct ath10k *ar) { struct ath10k_ahb *ar_ahb = ath10k_ahb_priv(ar); - if (!IS_ERR_OR_NULL(ar_ahb->cmd_clk)) - clk_put(ar_ahb->cmd_clk); - - if (!IS_ERR_OR_NULL(ar_ahb->ref_clk)) - clk_put(ar_ahb->ref_clk); - - if (!IS_ERR_OR_NULL(ar_ahb->rtc_clk)) - clk_put(ar_ahb->rtc_clk); - ar_ahb->cmd_clk = NULL; ar_ahb->ref_clk = NULL; ar_ahb->rtc_clk = NULL; From c5d8a34675d9160493a990007ae85ced02241d29 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 26 Sep 2016 21:56:23 +0300 Subject: [PATCH 0887/1050] ath10k: use devm_reset_control_get() instead of reset_control_get() Use the managed variant of reset_control_get() to simplify the failure path and the .remove callback. Signed-off-by: Masahiro Yamada Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/ahb.c | 56 +++++---------------------- 1 file changed, 10 insertions(+), 46 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/ahb.c b/drivers/net/wireless/ath/ath10k/ahb.c index 2de481b48e5d..751b7fed536d 100644 --- a/drivers/net/wireless/ath/ath10k/ahb.c +++ b/drivers/net/wireless/ath/ath10k/ahb.c @@ -191,92 +191,56 @@ static int ath10k_ahb_rst_ctrl_init(struct ath10k *ar) { struct ath10k_ahb *ar_ahb = ath10k_ahb_priv(ar); struct device *dev; - int ret; dev = &ar_ahb->pdev->dev; - ar_ahb->core_cold_rst = reset_control_get(dev, "wifi_core_cold"); + ar_ahb->core_cold_rst = devm_reset_control_get(dev, "wifi_core_cold"); if (IS_ERR_OR_NULL(ar_ahb->core_cold_rst)) { ath10k_err(ar, "failed to get core cold rst ctrl: %ld\n", PTR_ERR(ar_ahb->core_cold_rst)); - ret = ar_ahb->core_cold_rst ? + return ar_ahb->core_cold_rst ? PTR_ERR(ar_ahb->core_cold_rst) : -ENODEV; - goto out; } - ar_ahb->radio_cold_rst = reset_control_get(dev, "wifi_radio_cold"); + ar_ahb->radio_cold_rst = devm_reset_control_get(dev, "wifi_radio_cold"); if (IS_ERR_OR_NULL(ar_ahb->radio_cold_rst)) { ath10k_err(ar, "failed to get radio cold rst ctrl: %ld\n", PTR_ERR(ar_ahb->radio_cold_rst)); - ret = ar_ahb->radio_cold_rst ? + return ar_ahb->radio_cold_rst ? PTR_ERR(ar_ahb->radio_cold_rst) : -ENODEV; - goto err_core_cold_rst_put; } - ar_ahb->radio_warm_rst = reset_control_get(dev, "wifi_radio_warm"); + ar_ahb->radio_warm_rst = devm_reset_control_get(dev, "wifi_radio_warm"); if (IS_ERR_OR_NULL(ar_ahb->radio_warm_rst)) { ath10k_err(ar, "failed to get radio warm rst ctrl: %ld\n", PTR_ERR(ar_ahb->radio_warm_rst)); - ret = ar_ahb->radio_warm_rst ? + return ar_ahb->radio_warm_rst ? PTR_ERR(ar_ahb->radio_warm_rst) : -ENODEV; - goto err_radio_cold_rst_put; } - ar_ahb->radio_srif_rst = reset_control_get(dev, "wifi_radio_srif"); + ar_ahb->radio_srif_rst = devm_reset_control_get(dev, "wifi_radio_srif"); if (IS_ERR_OR_NULL(ar_ahb->radio_srif_rst)) { ath10k_err(ar, "failed to get radio srif rst ctrl: %ld\n", PTR_ERR(ar_ahb->radio_srif_rst)); - ret = ar_ahb->radio_srif_rst ? + return ar_ahb->radio_srif_rst ? PTR_ERR(ar_ahb->radio_srif_rst) : -ENODEV; - goto err_radio_warm_rst_put; } - ar_ahb->cpu_init_rst = reset_control_get(dev, "wifi_cpu_init"); + ar_ahb->cpu_init_rst = devm_reset_control_get(dev, "wifi_cpu_init"); if (IS_ERR_OR_NULL(ar_ahb->cpu_init_rst)) { ath10k_err(ar, "failed to get cpu init rst ctrl: %ld\n", PTR_ERR(ar_ahb->cpu_init_rst)); - ret = ar_ahb->cpu_init_rst ? + return ar_ahb->cpu_init_rst ? PTR_ERR(ar_ahb->cpu_init_rst) : -ENODEV; - goto err_radio_srif_rst_put; } return 0; - -err_radio_srif_rst_put: - reset_control_put(ar_ahb->radio_srif_rst); - -err_radio_warm_rst_put: - reset_control_put(ar_ahb->radio_warm_rst); - -err_radio_cold_rst_put: - reset_control_put(ar_ahb->radio_cold_rst); - -err_core_cold_rst_put: - reset_control_put(ar_ahb->core_cold_rst); - -out: - return ret; } static void ath10k_ahb_rst_ctrl_deinit(struct ath10k *ar) { struct ath10k_ahb *ar_ahb = ath10k_ahb_priv(ar); - if (!IS_ERR_OR_NULL(ar_ahb->core_cold_rst)) - reset_control_put(ar_ahb->core_cold_rst); - - if (!IS_ERR_OR_NULL(ar_ahb->radio_cold_rst)) - reset_control_put(ar_ahb->radio_cold_rst); - - if (!IS_ERR_OR_NULL(ar_ahb->radio_warm_rst)) - reset_control_put(ar_ahb->radio_warm_rst); - - if (!IS_ERR_OR_NULL(ar_ahb->radio_srif_rst)) - reset_control_put(ar_ahb->radio_srif_rst); - - if (!IS_ERR_OR_NULL(ar_ahb->cpu_init_rst)) - reset_control_put(ar_ahb->cpu_init_rst); - ar_ahb->core_cold_rst = NULL; ar_ahb->radio_cold_rst = NULL; ar_ahb->radio_warm_rst = NULL; From 65901a9e70584afb840b1c013d582d07b7f61696 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 26 Sep 2016 21:56:24 +0300 Subject: [PATCH 0888/1050] ath10k: do not check if reset is NULL Since reset_control_get() never returns NULL, we can use IS_ERR() instead of IS_ERR_OR_NULL(). The return statements can be simpler as well. Signed-off-by: Masahiro Yamada Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/ahb.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/ahb.c b/drivers/net/wireless/ath/ath10k/ahb.c index 751b7fed536d..fe38b459544d 100644 --- a/drivers/net/wireless/ath/ath10k/ahb.c +++ b/drivers/net/wireless/ath/ath10k/ahb.c @@ -195,43 +195,38 @@ static int ath10k_ahb_rst_ctrl_init(struct ath10k *ar) dev = &ar_ahb->pdev->dev; ar_ahb->core_cold_rst = devm_reset_control_get(dev, "wifi_core_cold"); - if (IS_ERR_OR_NULL(ar_ahb->core_cold_rst)) { + if (IS_ERR(ar_ahb->core_cold_rst)) { ath10k_err(ar, "failed to get core cold rst ctrl: %ld\n", PTR_ERR(ar_ahb->core_cold_rst)); - return ar_ahb->core_cold_rst ? - PTR_ERR(ar_ahb->core_cold_rst) : -ENODEV; + return PTR_ERR(ar_ahb->core_cold_rst); } ar_ahb->radio_cold_rst = devm_reset_control_get(dev, "wifi_radio_cold"); - if (IS_ERR_OR_NULL(ar_ahb->radio_cold_rst)) { + if (IS_ERR(ar_ahb->radio_cold_rst)) { ath10k_err(ar, "failed to get radio cold rst ctrl: %ld\n", PTR_ERR(ar_ahb->radio_cold_rst)); - return ar_ahb->radio_cold_rst ? - PTR_ERR(ar_ahb->radio_cold_rst) : -ENODEV; + return PTR_ERR(ar_ahb->radio_cold_rst); } ar_ahb->radio_warm_rst = devm_reset_control_get(dev, "wifi_radio_warm"); - if (IS_ERR_OR_NULL(ar_ahb->radio_warm_rst)) { + if (IS_ERR(ar_ahb->radio_warm_rst)) { ath10k_err(ar, "failed to get radio warm rst ctrl: %ld\n", PTR_ERR(ar_ahb->radio_warm_rst)); - return ar_ahb->radio_warm_rst ? - PTR_ERR(ar_ahb->radio_warm_rst) : -ENODEV; + return PTR_ERR(ar_ahb->radio_warm_rst); } ar_ahb->radio_srif_rst = devm_reset_control_get(dev, "wifi_radio_srif"); - if (IS_ERR_OR_NULL(ar_ahb->radio_srif_rst)) { + if (IS_ERR(ar_ahb->radio_srif_rst)) { ath10k_err(ar, "failed to get radio srif rst ctrl: %ld\n", PTR_ERR(ar_ahb->radio_srif_rst)); - return ar_ahb->radio_srif_rst ? - PTR_ERR(ar_ahb->radio_srif_rst) : -ENODEV; + return PTR_ERR(ar_ahb->radio_srif_rst); } ar_ahb->cpu_init_rst = devm_reset_control_get(dev, "wifi_cpu_init"); - if (IS_ERR_OR_NULL(ar_ahb->cpu_init_rst)) { + if (IS_ERR(ar_ahb->cpu_init_rst)) { ath10k_err(ar, "failed to get cpu init rst ctrl: %ld\n", PTR_ERR(ar_ahb->cpu_init_rst)); - return ar_ahb->cpu_init_rst ? - PTR_ERR(ar_ahb->cpu_init_rst) : -ENODEV; + return PTR_ERR(ar_ahb->cpu_init_rst); } return 0; From 2f38c3c01de945234d23dd163e3528ccb413066d Mon Sep 17 00:00:00 2001 From: Vasanthakumar Thiagarajan Date: Mon, 26 Sep 2016 21:56:24 +0300 Subject: [PATCH 0889/1050] ath10k: fix rfc1042 header retrieval in QCA4019 with eth decap mode Chipset from QCA99X0 onwards (QCA99X0, QCA9984, QCA4019 & future) rx_hdr_status is not padded to align in 4-byte boundary. Define a new hw_params field to handle different alignment behaviour between different hw. This patch fixes improper retrieval of rfc1042 header with QCA4019. This patch along with "ath10k: Properly remove padding from the start of rx payload" will fix traffic failure in ethernet decap mode for QCA4019. Signed-off-by: Vasanthakumar Thiagarajan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.c | 12 ++++++++++++ drivers/net/wireless/ath/ath10k/htt_rx.c | 5 +++-- drivers/net/wireless/ath/ath10k/hw.h | 3 +++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index 3a8984ba9f74..98af0053d30d 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -68,6 +68,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_ext_size = QCA988X_BOARD_EXT_DATA_SZ, }, .hw_ops = &qca988x_ops, + .decap_align_bytes = 4, }, { .id = QCA9887_HW_1_0_VERSION, @@ -87,6 +88,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_ext_size = QCA9887_BOARD_EXT_DATA_SZ, }, .hw_ops = &qca988x_ops, + .decap_align_bytes = 4, }, { .id = QCA6174_HW_2_1_VERSION, @@ -105,6 +107,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ, }, .hw_ops = &qca988x_ops, + .decap_align_bytes = 4, }, { .id = QCA6174_HW_2_1_VERSION, @@ -123,6 +126,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ, }, .hw_ops = &qca988x_ops, + .decap_align_bytes = 4, }, { .id = QCA6174_HW_3_0_VERSION, @@ -141,6 +145,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ, }, .hw_ops = &qca988x_ops, + .decap_align_bytes = 4, }, { .id = QCA6174_HW_3_2_VERSION, @@ -160,6 +165,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ, }, .hw_ops = &qca988x_ops, + .decap_align_bytes = 4, }, { .id = QCA99X0_HW_2_0_DEV_VERSION, @@ -184,6 +190,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { }, .sw_decrypt_mcast_mgmt = true, .hw_ops = &qca99x0_ops, + .decap_align_bytes = 1, }, { .id = QCA9984_HW_1_0_DEV_VERSION, @@ -208,6 +215,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { }, .sw_decrypt_mcast_mgmt = true, .hw_ops = &qca99x0_ops, + .decap_align_bytes = 1, }, { .id = QCA9888_HW_2_0_DEV_VERSION, @@ -231,6 +239,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { }, .sw_decrypt_mcast_mgmt = true, .hw_ops = &qca99x0_ops, + .decap_align_bytes = 1, }, { .id = QCA9377_HW_1_0_DEV_VERSION, @@ -249,6 +258,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_ext_size = QCA9377_BOARD_EXT_DATA_SZ, }, .hw_ops = &qca988x_ops, + .decap_align_bytes = 4, }, { .id = QCA9377_HW_1_1_DEV_VERSION, @@ -267,6 +277,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_ext_size = QCA9377_BOARD_EXT_DATA_SZ, }, .hw_ops = &qca988x_ops, + .decap_align_bytes = 4, }, { .id = QCA4019_HW_1_0_DEV_VERSION, @@ -292,6 +303,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { }, .sw_decrypt_mcast_mgmt = true, .hw_ops = &qca99x0_ops, + .decap_align_bytes = 1, }, }; diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index a3785a9aa843..0b4c1562420f 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -1103,6 +1103,7 @@ static void *ath10k_htt_rx_h_find_rfc1042(struct ath10k *ar, size_t hdr_len, crypto_len; void *rfc1042; bool is_first, is_last, is_amsdu; + int bytes_aligned = ar->hw_params.decap_align_bytes; rxd = (void *)msdu->data - sizeof(*rxd); hdr = (void *)rxd->rx_hdr_status; @@ -1119,8 +1120,8 @@ static void *ath10k_htt_rx_h_find_rfc1042(struct ath10k *ar, hdr_len = ieee80211_hdrlen(hdr->frame_control); crypto_len = ath10k_htt_rx_crypto_param_len(ar, enctype); - rfc1042 += round_up(hdr_len, 4) + - round_up(crypto_len, 4); + rfc1042 += round_up(hdr_len, bytes_aligned) + + round_up(crypto_len, bytes_aligned); } if (is_amsdu) diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h index 4a01bcff49a7..6038b7486f1d 100644 --- a/drivers/net/wireless/ath/ath10k/hw.h +++ b/drivers/net/wireless/ath/ath10k/hw.h @@ -408,6 +408,9 @@ struct ath10k_hw_params { bool sw_decrypt_mcast_mgmt; const struct ath10k_hw_ops *hw_ops; + + /* Number of bytes used for alignment in rx_hdr_status of rx desc. */ + int decap_align_bytes; }; struct htt_rx_desc; From aa66ba0c31c69d13e5a59096a67775b776dccbec Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Mon, 26 Sep 2016 21:56:25 +0300 Subject: [PATCH 0890/1050] ath10k: fix typo in logging message Signed-off-by: Ben Greear Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 0a44dab5a287..76297d69f1ed 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -2793,7 +2793,7 @@ static void ath10k_bss_disassoc(struct ieee80211_hw *hw, ret = ath10k_wmi_vdev_down(ar, arvif->vdev_id); if (ret) - ath10k_warn(ar, "faield to down vdev %i: %d\n", + ath10k_warn(ar, "failed to down vdev %i: %d\n", arvif->vdev_id, ret); arvif->def_wep_key_idx = -1; From 15138fdf327da6132b6e00e3d8c083476eb9aea2 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Mon, 26 Sep 2016 21:56:26 +0300 Subject: [PATCH 0891/1050] ath10k: document cycle count related counters They are not necessarily named in an intuitive manner, so at least add some comments to help the next person. Signed-off-by: Ben Greear Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.h | 8 ++++---- drivers/net/wireless/ath/ath10k/wmi.h | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h index 6ec9495bcc04..dda49af1eb74 100644 --- a/drivers/net/wireless/ath/ath10k/core.h +++ b/drivers/net/wireless/ath/ath10k/core.h @@ -201,10 +201,10 @@ struct ath10k_fw_stats_pdev { /* PDEV stats */ s32 ch_noise_floor; - u32 tx_frame_count; - u32 rx_frame_count; - u32 rx_clear_count; - u32 cycle_count; + u32 tx_frame_count; /* Cycles spent transmitting frames */ + u32 rx_frame_count; /* Cycles spent receiving frames */ + u32 rx_clear_count; /* Total channel busy time, evidently */ + u32 cycle_count; /* Total on-channel time */ u32 phy_err_count; u32 chan_tx_power; u32 ack_rx_bad; diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h index 6a68817b90b8..1b243c899bef 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.h +++ b/drivers/net/wireless/ath/ath10k/wmi.h @@ -4222,10 +4222,10 @@ struct wmi_10_2_stats_event { */ struct wmi_pdev_stats_base { __le32 chan_nf; - __le32 tx_frame_count; - __le32 rx_frame_count; - __le32 rx_clear_count; - __le32 cycle_count; + __le32 tx_frame_count; /* Cycles spent transmitting frames */ + __le32 rx_frame_count; /* Cycles spent receiving frames */ + __le32 rx_clear_count; /* Total channel busy time, evidently */ + __le32 cycle_count; /* Total on-channel time */ __le32 phy_err_count; __le32 chan_tx_pwr; } __packed; From 30d2049b3277cdf6964996f86626add08cf160df Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Mon, 26 Sep 2016 21:56:26 +0300 Subject: [PATCH 0892/1050] ath10k: support up to 64 vdevs The (1 << x) - 1 trick won't work when you are trying to fill up all 64 bits, so add special case for that. Signed-off-by: Ben Greear [kvalo@qca.qualcomm.com: remove the sentence about moving limits] Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index 98af0053d30d..21ae8d663e67 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -1972,7 +1972,10 @@ int ath10k_core_start(struct ath10k *ar, enum ath10k_firmware_mode mode, goto err_hif_stop; } - ar->free_vdev_map = (1LL << ar->max_num_vdevs) - 1; + if (ar->max_num_vdevs >= 64) + ar->free_vdev_map = 0xFFFFFFFFFFFFFFFFLL; + else + ar->free_vdev_map = (1LL << ar->max_num_vdevs) - 1; INIT_LIST_HEAD(&ar->arvifs); From 6bf563d50af2de34024bde0de1733d4028f9400c Mon Sep 17 00:00:00 2001 From: Nelson Chang Date: Mon, 26 Sep 2016 14:33:49 +0800 Subject: [PATCH 0893/1050] net: ethernet: mediatek: add to stop PDMA while stopping the frame engine Stop PDMA while the frame engine is going to stop. Signed-off-by: Nelson Chang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 4cc50c03c12b..62de68dac036 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1784,6 +1784,7 @@ static int mtk_stop(struct net_device *dev) napi_disable(ð->rx_napi); mtk_stop_dma(eth, MTK_QDMA_GLO_CFG); + mtk_stop_dma(eth, MTK_PDMA_GLO_CFG); mtk_dma_free(eth); From ca3ba106a9979dd21988cec296462b4f49b37ace Mon Sep 17 00:00:00 2001 From: Nelson Chang Date: Mon, 26 Sep 2016 14:33:50 +0800 Subject: [PATCH 0894/1050] net: ethernet: mediatek: bug fix to disable HW LRO (1) Modify the register settings for LRO relinquishments (2) Jump out from the waiting loop while LRO relinquishments are done Signed-off-by: Nelson Chang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 1 + drivers/net/ethernet/mediatek/mtk_eth_soc.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 62de68dac036..ddf20a00654e 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1374,6 +1374,7 @@ static void mtk_hwlro_rx_uninit(struct mtk_eth *eth) msleep(20); continue; } + break; } /* invalidate lro rings */ diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 7e194f795c9a..30031959d6de 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -102,8 +102,8 @@ #define MTK_LRO_EN BIT(0) #define MTK_L3_CKS_UPD_EN BIT(7) #define MTK_LRO_ALT_PKT_CNT_MODE BIT(21) -#define MTK_LRO_RING_RELINQUISH_REQ (0x3 << 26) -#define MTK_LRO_RING_RELINQUISH_DONE (0x3 << 29) +#define MTK_LRO_RING_RELINQUISH_REQ (0x7 << 26) +#define MTK_LRO_RING_RELINQUISH_DONE (0x7 << 29) #define MTK_PDMA_LRO_CTRL_DW1 0x984 #define MTK_PDMA_LRO_CTRL_DW2 0x988 From 4b1d488a285a446329825d5fe91f987b7880e6e5 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Mon, 26 Sep 2016 13:45:25 +0300 Subject: [PATCH 0895/1050] act_ife: Fix external mac header on encode On ife encode side, external mac header is copied from the original packet and may be overridden if the user requests. Before, the mac header copy was done from memory region that might not be accessible anymore, as skb_cow_head might free it and copy the packet. This led to random values in the external mac header once the values were not set by user. This fix takes the internal mac header from the packet, after the call to skb_cow_head. Fixes: ef6980b6becb ("net sched: introduce IFE action") Acked-by: Jamal Hadi Salim Signed-off-by: Yotam Gigi Signed-off-by: David S. Miller --- net/sched/act_ife.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index e87cd81315e1..88ac9a8c9bbd 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -740,8 +740,6 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, return TC_ACT_SHOT; } - iethh = eth_hdr(skb); - err = skb_cow_head(skb, hdrm); if (unlikely(err)) { ife->tcf_qstats.drops++; @@ -752,6 +750,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, if (!(at & AT_EGRESS)) skb_push(skb, skb->dev->hard_header_len); + iethh = (struct ethhdr *)skb->data; __skb_push(skb, hdrm); memcpy(skb->data, iethh, skb->mac_len); skb_reset_mac_header(skb); From c006da0be033b6ddcd27ee603d0ee01491236642 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Mon, 26 Sep 2016 13:45:26 +0300 Subject: [PATCH 0896/1050] act_ife: Fix false encoding On ife encode side, the action stores the different tlvs inside the ife header, where each tlv length field should refer to the length of the whole tlv (without additional padding) and not just the data length. On ife decode side, the action iterates over the tlvs in the ife header and parses them one by one, where in each iteration the current pointer is advanced according to the tlv size. Before, the encoding encoded only the data length inside the tlv, which led to false parsing of ife the header. In addition, due to the fact that the loop counter was unsigned, it could lead to infinite parsing loop. This fix changes the loop counter to be signed and fixes the encoding to take into account the tlv type and size. Fixes: 28a10c426e81 ("net sched: fix encoding to use real length") Acked-by: Jamal Hadi Salim Signed-off-by: Yotam Gigi Signed-off-by: David S. Miller --- net/sched/act_ife.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 88ac9a8c9bbd..4a60cd5e1875 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -53,7 +53,7 @@ int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, const void *dval) u32 *tlv = (u32 *)(skbdata); u16 totlen = nla_total_size(dlen); /*alignment + hdr */ char *dptr = (char *)tlv + NLA_HDRLEN; - u32 htlv = attrtype << 16 | dlen; + u32 htlv = attrtype << 16 | (dlen + NLA_HDRLEN); *tlv = htonl(htlv); memset(dptr, 0, totlen - NLA_HDRLEN); @@ -627,7 +627,7 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, struct tcf_ife_info *ife = to_ife(a); int action = ife->tcf_action; struct ifeheadr *ifehdr = (struct ifeheadr *)skb->data; - u16 ifehdrln = ifehdr->metalen; + int ifehdrln = (int)ifehdr->metalen; struct meta_tlvhdr *tlv = (struct meta_tlvhdr *)(ifehdr->tlv_data); spin_lock(&ife->tcf_lock); From e96e0eded1335b9cfac71fcdd989d682eb3f8412 Mon Sep 17 00:00:00 2001 From: "jbaron@akamai.com" Date: Mon, 26 Sep 2016 11:00:44 -0400 Subject: [PATCH 0897/1050] bnx2x: free the mac filter group list before freeing the cmd The group list must be freed prior to freeing the command otherwise we have a use-after-free. Signed-off-by: Jason Baron Cc: Yuval Mintz Cc: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c index 4947a9cbf0c1..cea6bdcde33f 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c @@ -2714,8 +2714,8 @@ static int bnx2x_mcast_enqueue_cmd(struct bnx2x *bp, elem_group = (struct bnx2x_mcast_elem_group *) __get_free_page(GFP_ATOMIC | __GFP_ZERO); if (!elem_group) { - kfree(new_cmd); bnx2x_free_groups(&new_cmd->group_head); + kfree(new_cmd); return -ENOMEM; } total_elems -= MCAST_MAC_ELEMS_PER_PG; From ad5748893b27b9b27b2deb597443ad6702719c20 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 23 Sep 2016 11:27:19 -0700 Subject: [PATCH 0898/1050] rtlwifi: Add switch variable to 'switch case not processed' messages Help along debugging by showing what switch/case variable is not being processed in these messages. Signed-off-by: Joe Perches Acked-by: Larry Finger Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/core.c | 3 ++- drivers/net/wireless/realtek/rtlwifi/pci.c | 3 ++- drivers/net/wireless/realtek/rtlwifi/ps.c | 2 +- .../wireless/realtek/rtlwifi/rtl8188ee/fw.c | 4 ++-- .../wireless/realtek/rtlwifi/rtl8188ee/hw.c | 9 +++++---- .../wireless/realtek/rtlwifi/rtl8188ee/led.c | 4 ++-- .../wireless/realtek/rtlwifi/rtl8188ee/phy.c | 10 ++++++---- .../realtek/rtlwifi/rtl8192c/fw_common.c | 4 ++-- .../realtek/rtlwifi/rtl8192c/phy_common.c | 8 +++++--- .../wireless/realtek/rtlwifi/rtl8192ce/hw.c | 7 ++++--- .../wireless/realtek/rtlwifi/rtl8192ce/led.c | 4 ++-- .../wireless/realtek/rtlwifi/rtl8192ce/phy.c | 7 ++----- .../wireless/realtek/rtlwifi/rtl8192cu/hw.c | 4 ++-- .../wireless/realtek/rtlwifi/rtl8192cu/led.c | 4 ++-- .../wireless/realtek/rtlwifi/rtl8192cu/phy.c | 7 ++----- .../wireless/realtek/rtlwifi/rtl8192de/fw.c | 4 ++-- .../wireless/realtek/rtlwifi/rtl8192de/hw.c | 9 +++++---- .../wireless/realtek/rtlwifi/rtl8192de/led.c | 4 ++-- .../wireless/realtek/rtlwifi/rtl8192de/phy.c | 15 +++++++------- .../wireless/realtek/rtlwifi/rtl8192ee/fw.c | 4 ++-- .../wireless/realtek/rtlwifi/rtl8192ee/hw.c | 9 +++++---- .../wireless/realtek/rtlwifi/rtl8192ee/led.c | 4 ++-- .../wireless/realtek/rtlwifi/rtl8192ee/phy.c | 10 ++++++---- .../wireless/realtek/rtlwifi/rtl8192se/hw.c | 9 +++++---- .../wireless/realtek/rtlwifi/rtl8192se/led.c | 4 ++-- .../wireless/realtek/rtlwifi/rtl8192se/phy.c | 5 +++-- .../wireless/realtek/rtlwifi/rtl8723ae/fw.c | 4 ++-- .../wireless/realtek/rtlwifi/rtl8723ae/hw.c | 9 +++++---- .../wireless/realtek/rtlwifi/rtl8723ae/led.c | 4 ++-- .../wireless/realtek/rtlwifi/rtl8723ae/phy.c | 10 ++++++---- .../wireless/realtek/rtlwifi/rtl8723be/fw.c | 4 ++-- .../wireless/realtek/rtlwifi/rtl8723be/hw.c | 10 +++++----- .../wireless/realtek/rtlwifi/rtl8723be/led.c | 4 ++-- .../wireless/realtek/rtlwifi/rtl8723be/phy.c | 12 ++++++----- .../wireless/realtek/rtlwifi/rtl8821ae/fw.c | 4 ++-- .../wireless/realtek/rtlwifi/rtl8821ae/hw.c | 9 +++++---- .../wireless/realtek/rtlwifi/rtl8821ae/led.c | 4 ++-- .../wireless/realtek/rtlwifi/rtl8821ae/phy.c | 20 ++++++------------- 38 files changed, 128 insertions(+), 123 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c index 7aee5ebb147d..f95760c13c56 100644 --- a/drivers/net/wireless/realtek/rtlwifi/core.c +++ b/drivers/net/wireless/realtek/rtlwifi/core.c @@ -765,7 +765,8 @@ static int rtl_op_config(struct ieee80211_hw *hw, u32 changed) mac->bw_40 = false; mac->bw_80 = false; RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", + channel_type); break; } } diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c index d12586d4f845..0dfa9eac3926 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.c +++ b/drivers/net/wireless/realtek/rtlwifi/pci.c @@ -179,7 +179,8 @@ static void _rtl_pci_update_default_setting(struct ieee80211_hw *hw) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", + rtlpci->const_support_pciaspm); break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/ps.c b/drivers/net/wireless/realtek/rtlwifi/ps.c index 9a64f9b703e5..18d979affc18 100644 --- a/drivers/net/wireless/realtek/rtlwifi/ps.c +++ b/drivers/net/wireless/realtek/rtlwifi/ps.c @@ -151,7 +151,7 @@ static bool rtl_ps_set_rf_state(struct ieee80211_hw *hw, default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", state_toset); break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/fw.c index 629125658b87..5360d5332359 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/fw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/fw.c @@ -334,7 +334,7 @@ static void _rtl88e_fill_h2c_command(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", boxnum); break; } isfw_read = _rtl88e_check_fw_read_last_h2c(hw, boxnum); @@ -405,7 +405,7 @@ static void _rtl88e_fill_h2c_command(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", cmd_len); break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c index 4ab6201daf1a..3285117845f5 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c @@ -357,7 +357,7 @@ void rtl88ee_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; } default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process %x\n", variable); + "switch case %#x not processed\n", variable); break; } } @@ -571,7 +571,8 @@ void rtl88ee_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", + e_aci); break; } } @@ -735,7 +736,7 @@ void rtl88ee_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; } default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process %x\n", variable); + "switch case %#x not processed\n", variable); break; } } @@ -2352,7 +2353,7 @@ void rtl88ee_set_key(struct ieee80211_hw *hw, u32 key_index, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", enc_algo); enc_algo = CAM_TKIP; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/led.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/led.c index b504bd092fc4..f05c2c674165 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/led.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/led.c @@ -62,7 +62,7 @@ void rtl88ee_sw_led_on(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = true; @@ -100,7 +100,7 @@ void rtl88ee_sw_led_off(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = false; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/phy.c index 7498a1218cba..fffaa92eda81 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/phy.c @@ -1346,7 +1346,8 @@ static bool _rtl88e_phy_sw_chnl_step_by_step(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + currentcmd->cmdid); break; } @@ -2128,7 +2129,7 @@ bool rtl88e_phy_set_io_cmd(struct ieee80211_hw *hw, enum io_type iotype) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", iotype); break; } } while (false); @@ -2166,7 +2167,8 @@ static void rtl88e_phy_set_io(struct ieee80211_hw *hw) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + rtlphy->current_io_type); break; } rtlphy->set_io_inprogress = false; @@ -2319,7 +2321,7 @@ static bool _rtl88ee_phy_set_rf_power_state(struct ieee80211_hw *hw, } default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", rfpwr_state); bresult = false; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192c/fw_common.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192c/fw_common.c index 43fcb25c885f..7d152466152b 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192c/fw_common.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192c/fw_common.c @@ -352,7 +352,7 @@ static void _rtl92c_fill_h2c_command(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", boxnum); break; } @@ -456,7 +456,7 @@ static void _rtl92c_fill_h2c_command(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", cmd_len); break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192c/phy_common.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192c/phy_common.c index 60ab2ec4f4ef..27e3d5f9ca34 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192c/phy_common.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192c/phy_common.c @@ -910,7 +910,8 @@ bool _rtl92c_phy_sw_chnl_step_by_step(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + currentcmd->cmdid); break; } @@ -1567,7 +1568,7 @@ bool rtl92c_phy_set_io_cmd(struct ieee80211_hw *hw, enum io_type iotype) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", iotype); break; } } while (false); @@ -1605,7 +1606,8 @@ void rtl92c_phy_set_io(struct ieee80211_hw *hw) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + rtlphy->current_io_type); break; } rtlphy->set_io_inprogress = false; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c index 244607951e28..6d308f9b7ff9 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c @@ -143,7 +143,7 @@ void rtl92ce_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) } default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", variable); break; } } @@ -367,7 +367,8 @@ void rtl92ce_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", + e_aci); break; } } @@ -2154,7 +2155,7 @@ void rtl92ce_set_key(struct ieee80211_hw *hw, u32 key_index, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", enc_algo); enc_algo = CAM_TKIP; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/led.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/led.c index 8283e9b27639..24e483ba3fa4 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/led.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/led.c @@ -62,7 +62,7 @@ void rtl92ce_sw_led_on(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = true; @@ -97,7 +97,7 @@ void rtl92ce_sw_led_off(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = false; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/phy.c index 1ee5a6ae9960..46d0d945f283 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/phy.c @@ -300,12 +300,9 @@ bool rtl92c_phy_config_rf_with_headerfile(struct ieee80211_hw *hw, } break; case RF90_PATH_C: - RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); - break; case RF90_PATH_D: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", rfpath); break; default: break; @@ -554,7 +551,7 @@ static bool _rtl92ce_phy_set_rf_power_state(struct ieee80211_hw *hw, } default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", rfpwr_state); bresult = false; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c index 8789752f8143..ae8f055483fa 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c @@ -1560,7 +1560,7 @@ void rtl92cu_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", variable); break; } } @@ -1931,7 +1931,7 @@ void rtl92cu_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) } default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", variable); break; } } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/led.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/led.c index 75a2deb23af1..8514ab652520 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/led.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/led.c @@ -62,7 +62,7 @@ void rtl92cu_sw_led_on(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = true; @@ -95,7 +95,7 @@ void rtl92cu_sw_led_off(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = false; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/phy.c index c972fa50926d..4b2976465905 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/phy.c @@ -277,12 +277,9 @@ bool rtl92cu_phy_config_rf_with_headerfile(struct ieee80211_hw *hw, } break; case RF90_PATH_C: - RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); - break; case RF90_PATH_D: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", rfpath); break; default: break; @@ -517,7 +514,7 @@ static bool _rtl92cu_phy_set_rf_power_state(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", rfpwr_state); bresult = false; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/fw.c index 62ef8209718f..8de29cc3ced0 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/fw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/fw.c @@ -435,7 +435,7 @@ static void _rtl92d_fill_h2c_command(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", boxnum); break; } isfw_read = _rtl92d_check_fw_read_last_h2c(hw, boxnum); @@ -512,7 +512,7 @@ static void _rtl92d_fill_h2c_command(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", cmd_len); break; } bwrite_success = true; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c index 57205514801c..5369011914bb 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c @@ -166,7 +166,7 @@ void rtl92de_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", variable); break; } } @@ -361,7 +361,8 @@ void rtl92de_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", + e_aci); break; } } @@ -502,7 +503,7 @@ void rtl92de_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) } default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", variable); break; } } @@ -2171,7 +2172,7 @@ void rtl92de_set_key(struct ieee80211_hw *hw, u32 key_index, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", enc_algo); enc_algo = CAM_TKIP; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/led.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/led.c index 76a57ae4af3e..811ba57eb9bb 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/led.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/led.c @@ -71,7 +71,7 @@ void rtl92de_sw_led_on(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = true; @@ -106,7 +106,7 @@ void rtl92de_sw_led_off(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = false; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c index 2a4810d32182..2a1edfd21b96 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c @@ -836,12 +836,9 @@ bool rtl92d_phy_config_rf_with_headerfile(struct ieee80211_hw *hw, } break; case RF90_PATH_C: - RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); - break; case RF90_PATH_D: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", rfpath); break; } return true; @@ -2850,7 +2847,8 @@ static bool _rtl92d_phy_sw_chnl_step_by_step(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", + currentcmd->cmdid); break; } break; @@ -2963,7 +2961,8 @@ static void rtl92d_phy_set_io(struct ieee80211_hw *hw) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", + rtlphy->current_io_type); break; } rtlphy->set_io_inprogress = false; @@ -2994,7 +2993,7 @@ bool rtl92d_phy_set_io_cmd(struct ieee80211_hw *hw, enum io_type iotype) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", iotype); break; } } while (false); @@ -3182,7 +3181,7 @@ bool rtl92d_phy_set_rf_power_state(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", rfpwr_state); bresult = false; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c index 0708eedd9671..b3f6a9ed15d4 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c @@ -344,7 +344,7 @@ static void _rtl92ee_fill_h2c_command(struct ieee80211_hw *hw, u8 element_id, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", boxnum); break; } @@ -433,7 +433,7 @@ static void _rtl92ee_fill_h2c_command(struct ieee80211_hw *hw, u8 element_id, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", cmd_len); break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c index b07af8d15273..47bb6d8c8912 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c @@ -340,7 +340,7 @@ void rtl92ee_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_DMESG, - "switch case not process %x\n", variable); + "switch case %#x not processed\n", variable); break; } } @@ -566,7 +566,8 @@ void rtl92ee_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_DMESG, - "switch case not process\n"); + "switch case %#x not processed\n", + e_aci); break; } } @@ -685,7 +686,7 @@ void rtl92ee_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_DMESG, - "switch case not process %x\n", variable); + "switch case %#x not processed\n", variable); break; } } @@ -2463,7 +2464,7 @@ void rtl92ee_set_key(struct ieee80211_hw *hw, u32 key_index, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_DMESG, - "switch case not process\n"); + "switch case %#x not processed\n", enc_algo); enc_algo = CAM_TKIP; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/led.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/led.c index 8388e371c8e2..47da05dd3076 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/led.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/led.c @@ -61,7 +61,7 @@ void rtl92ee_sw_led_on(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = true; @@ -91,7 +91,7 @@ void rtl92ee_sw_led_off(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = false; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/phy.c index beafc9a10ad8..5ad7e753c357 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/phy.c @@ -1927,7 +1927,8 @@ static bool _rtl92ee_phy_sw_chnl_step_by_step(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + currentcmd->cmdid); break; } @@ -3001,7 +3002,7 @@ bool rtl92ee_phy_set_io_cmd(struct ieee80211_hw *hw, enum io_type iotype) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", iotype); break; } } while (false); @@ -3041,7 +3042,8 @@ static void rtl92ee_phy_set_io(struct ieee80211_hw *hw) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + rtlphy->current_io_type); break; } rtlphy->set_io_inprogress = false; @@ -3187,7 +3189,7 @@ static bool _rtl92ee_phy_set_rf_power_state(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", rfpwr_state); bresult = false; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c index ddfa0aee5bf8..5bad9c9ef609 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c @@ -79,7 +79,7 @@ void rtl92se_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) } default: { RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", variable); break; } } @@ -297,7 +297,8 @@ void rtl92se_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", + e_aci); break; } } @@ -433,7 +434,7 @@ void rtl92se_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; } default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", variable); break; } @@ -2465,7 +2466,7 @@ void rtl92se_set_key(struct ieee80211_hw *hw, u32 key_index, u8 *p_macaddr, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", enc_algo); enc_algo = CAM_TKIP; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/led.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/led.c index 44949b5cbb87..9849cb988186 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/led.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/led.c @@ -68,7 +68,7 @@ void rtl92se_sw_led_on(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = true; @@ -104,7 +104,7 @@ void rtl92se_sw_led_off(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = false; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/phy.c index 881821f4e243..4bb75581ab38 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/phy.c @@ -442,7 +442,8 @@ static bool _rtl92s_phy_sw_chnl_step_by_step(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", + currentcmd->cmdid); break; } @@ -648,7 +649,7 @@ bool rtl92s_phy_set_rf_power_state(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", rfpwr_state); bresult = false; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/fw.c index b7c0d38ee5b5..1186755e55b8 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/fw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/fw.c @@ -124,7 +124,7 @@ static void _rtl8723e_fill_h2c_command(struct ieee80211_hw *hw, u8 element_id, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", boxnum); break; } @@ -230,7 +230,7 @@ static void _rtl8723e_fill_h2c_command(struct ieee80211_hw *hw, u8 element_id, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", cmd_len); break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c index ba30efc2d195..2bf603b4e12c 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c @@ -143,7 +143,7 @@ void rtl8723e_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) } default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", variable); break; } } @@ -366,7 +366,8 @@ void rtl8723e_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + e_aci); break; } } @@ -546,7 +547,7 @@ void rtl8723e_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) } default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", variable); break; } } @@ -2225,7 +2226,7 @@ void rtl8723e_set_key(struct ieee80211_hw *hw, u32 key_index, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", enc_algo); enc_algo = CAM_TKIP; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/led.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/led.c index 13173351cbfd..c7be9342136c 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/led.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/led.c @@ -63,7 +63,7 @@ void rtl8723e_sw_led_on(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = true; @@ -105,7 +105,7 @@ void rtl8723e_sw_led_off(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = false; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/phy.c index 601b78efedfb..17b58cb32d55 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/phy.c @@ -1023,7 +1023,8 @@ static bool _rtl8723e_phy_sw_chnl_step_by_step(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + currentcmd->cmdid); break; } @@ -1499,7 +1500,7 @@ bool rtl8723e_phy_set_io_cmd(struct ieee80211_hw *hw, enum io_type iotype) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", iotype); break; } } while (false); @@ -1536,7 +1537,8 @@ static void rtl8723e_phy_set_io(struct ieee80211_hw *hw) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + rtlphy->current_io_type); break; } rtlphy->set_io_inprogress = false; @@ -1682,7 +1684,7 @@ static bool _rtl8723e_phy_set_rf_power_state(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", rfpwr_state); bresult = false; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/fw.c index d5da0f3c1217..8c5c27ce8e05 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/fw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/fw.c @@ -122,7 +122,7 @@ static void _rtl8723be_fill_h2c_command(struct ieee80211_hw *hw, u8 element_id, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", boxnum); break; } @@ -195,7 +195,7 @@ static void _rtl8723be_fill_h2c_command(struct ieee80211_hw *hw, u8 element_id, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", cmd_len); break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c index 82e4476cab23..999c1ac3ee57 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c @@ -350,7 +350,7 @@ void rtl8723be_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process %x\n", variable); + "switch case %#x not processed\n", variable); break; } } @@ -607,7 +607,8 @@ void rtl8723be_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + e_aci); break; } } @@ -723,8 +724,7 @@ void rtl8723be_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process %x\n", - variable); + "switch case %#x not processed\n", variable); break; } } @@ -2565,7 +2565,7 @@ void rtl8723be_set_key(struct ieee80211_hw *hw, u32 key_index, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", enc_algo); enc_algo = CAM_TKIP; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/led.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/led.c index 4196efb723a2..497913eb3b37 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/led.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/led.c @@ -58,7 +58,7 @@ void rtl8723be_sw_led_on(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = true; @@ -100,7 +100,7 @@ void rtl8723be_sw_led_off(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = false; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c index 285818df149b..3cc2232f25ca 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c @@ -837,7 +837,7 @@ bool rtl8723be_phy_config_rf_with_headerfile(struct ieee80211_hw *hw, break; case RF90_PATH_D: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", rfpath); break; } return true; @@ -1507,7 +1507,8 @@ static bool _rtl8723be_phy_sw_chnl_step_by_step(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + currentcmd->cmdid); break; } @@ -2515,7 +2516,7 @@ bool rtl8723be_phy_set_io_cmd(struct ieee80211_hw *hw, enum io_type iotype) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", iotype); break; } } while (false); @@ -2553,7 +2554,8 @@ static void rtl8723be_phy_set_io(struct ieee80211_hw *hw) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + rtlphy->current_io_type); break; } rtlphy->set_io_inprogress = false; @@ -2705,7 +2707,7 @@ static bool _rtl8723be_phy_set_rf_power_state(struct ieee80211_hw *hw, default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", rfpwr_state); bresult = false; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/fw.c index a4fc70e8c9c0..b665446351a4 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/fw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/fw.c @@ -392,7 +392,7 @@ static void _rtl8821ae_fill_h2c_command(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", boxnum); break; } @@ -481,7 +481,7 @@ static void _rtl8821ae_fill_h2c_command(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", cmd_len); break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c index 0cddf1ad0fff..1281ebe0c30a 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c @@ -480,7 +480,7 @@ void rtl8821ae_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process %x\n", variable); + "switch case %#x not processed\n", variable); break; } } @@ -671,7 +671,8 @@ void rtl8821ae_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + e_aci); break; } } @@ -800,7 +801,7 @@ void rtl8821ae_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; } default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process %x\n", variable); + "switch case %#x not processed\n", variable); break; } } @@ -3934,7 +3935,7 @@ void rtl8821ae_set_key(struct ieee80211_hw *hw, u32 key_index, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", enc_algo); enc_algo = CAM_TKIP; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/led.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/led.c index ba1946a0280e..fcb3b28c6b8f 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/led.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/led.c @@ -60,7 +60,7 @@ void rtl8821ae_sw_led_on(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = true; @@ -133,7 +133,7 @@ void rtl8821ae_sw_led_off(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = false; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c index a71bfe38e7e1..5dad402171c2 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c @@ -2063,12 +2063,9 @@ bool rtl8812ae_phy_config_rf_with_headerfile(struct ieee80211_hw *hw, } break; case RF90_PATH_C: - RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); - break; case RF90_PATH_D: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", rfpath); break; } return true; @@ -2133,16 +2130,10 @@ bool rtl8821ae_phy_config_rf_with_headerfile(struct ieee80211_hw *hw, break; case RF90_PATH_B: - RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); - break; case RF90_PATH_C: - RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); - break; case RF90_PATH_D: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", rfpath); break; } return true; @@ -4670,7 +4661,7 @@ bool rtl8821ae_phy_set_io_cmd(struct ieee80211_hw *hw, enum io_type iotype) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", iotype); break; } } while (false); @@ -4714,7 +4705,8 @@ static void rtl8821ae_phy_set_io(struct ieee80211_hw *hw) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", + rtlphy->current_io_type); break; } rtlphy->set_io_inprogress = false; @@ -4820,7 +4812,7 @@ static bool _rtl8821ae_phy_set_rf_power_state(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", rfpwr_state); bresult = false; break; } From 1cc49a5b5466e30a26700af3ac9d85c8ebb1b936 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Sat, 24 Sep 2016 11:57:18 -0500 Subject: [PATCH 0899/1050] rtlwifi: Add HAL_DEF_WOWLAN case to *_get_hw() routines Only rtl8821ae implements WOWLAN; however, the other drivers may receive a call requesting information about this mode. The other drivers need to ignore the request rather than logging that the default branch of the switch statement has been reached. Reported by: Jean Delvare Signed-off-by: Larry Finger Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c | 2 ++ drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c | 2 ++ drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c | 2 ++ drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c | 2 ++ drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c | 2 ++ drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c | 2 ++ drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c | 2 ++ 7 files changed, 14 insertions(+) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c index 3285117845f5..37d6efc3d240 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c @@ -355,6 +355,8 @@ void rtl88ee_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) *((u64 *)(val)) = tsf; break; } + case HAL_DEF_WOWLAN: + break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "switch case %#x not processed\n", variable); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c index 6d308f9b7ff9..a47be73a0980 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c @@ -141,6 +141,8 @@ void rtl92ce_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; } + case HAL_DEF_WOWLAN: + break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "switch case %#x not processed\n", variable); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c index 5369011914bb..d91f8bbfe7a0 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c @@ -164,6 +164,8 @@ void rtl92de_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) case HW_VAR_INT_AC: *((bool *)(val)) = rtlpriv->dm.disable_tx_int; break; + case HAL_DEF_WOWLAN: + break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "switch case %#x not processed\n", variable); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c index 47bb6d8c8912..ebf663e1a81a 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c @@ -338,6 +338,8 @@ void rtl92ee_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) *((u64 *)(val)) = tsf; } break; + case HAL_DEF_WOWLAN: + break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_DMESG, "switch case %#x not processed\n", variable); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c index 5bad9c9ef609..52e4430edb54 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c @@ -77,6 +77,8 @@ void rtl92se_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) *((bool *)(val)) = rtlpriv->dm.current_mrc_switch; break; } + case HAL_DEF_WOWLAN: + break; default: { RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "switch case %#x not processed\n", variable); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c index 2bf603b4e12c..f8be0bd7e326 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c @@ -141,6 +141,8 @@ void rtl8723e_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; } + case HAL_DEF_WOWLAN: + break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, "switch case %#x not processed\n", variable); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c index 999c1ac3ee57..aba60c3145c5 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c @@ -348,6 +348,8 @@ void rtl8723be_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) *((u64 *)(val)) = tsf; } break; + case HAL_DEF_WOWLAN: + break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, "switch case %#x not processed\n", variable); From 8334ffdc8290949bd838e0d83e200a87f2c0efc4 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Sat, 24 Sep 2016 11:57:19 -0500 Subject: [PATCH 0900/1050] rtlwifi: Add explicit values to hw_variables enum The entries in this enum may be referenced in debug output. Adding explicit values simplifies the lookup. Signed-off-by: Larry Finger Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/wifi.h | 198 ++++++++++---------- 1 file changed, 99 insertions(+), 99 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h index c5086c2229aa..595f7d5d091a 100644 --- a/drivers/net/wireless/realtek/rtlwifi/wifi.h +++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h @@ -394,110 +394,110 @@ enum io_type { }; enum hw_variables { - HW_VAR_ETHER_ADDR, - HW_VAR_MULTICAST_REG, - HW_VAR_BASIC_RATE, - HW_VAR_BSSID, - HW_VAR_MEDIA_STATUS, - HW_VAR_SECURITY_CONF, - HW_VAR_BEACON_INTERVAL, - HW_VAR_ATIM_WINDOW, - HW_VAR_LISTEN_INTERVAL, - HW_VAR_CS_COUNTER, - HW_VAR_DEFAULTKEY0, - HW_VAR_DEFAULTKEY1, - HW_VAR_DEFAULTKEY2, - HW_VAR_DEFAULTKEY3, - HW_VAR_SIFS, - HW_VAR_R2T_SIFS, - HW_VAR_DIFS, - HW_VAR_EIFS, - HW_VAR_SLOT_TIME, - HW_VAR_ACK_PREAMBLE, - HW_VAR_CW_CONFIG, - HW_VAR_CW_VALUES, - HW_VAR_RATE_FALLBACK_CONTROL, - HW_VAR_CONTENTION_WINDOW, - HW_VAR_RETRY_COUNT, - HW_VAR_TR_SWITCH, - HW_VAR_COMMAND, - HW_VAR_WPA_CONFIG, - HW_VAR_AMPDU_MIN_SPACE, - HW_VAR_SHORTGI_DENSITY, - HW_VAR_AMPDU_FACTOR, - HW_VAR_MCS_RATE_AVAILABLE, - HW_VAR_AC_PARAM, - HW_VAR_ACM_CTRL, - HW_VAR_DIS_Req_Qsize, - HW_VAR_CCX_CHNL_LOAD, - HW_VAR_CCX_NOISE_HISTOGRAM, - HW_VAR_CCX_CLM_NHM, - HW_VAR_TxOPLimit, - HW_VAR_TURBO_MODE, - HW_VAR_RF_STATE, - HW_VAR_RF_OFF_BY_HW, - HW_VAR_BUS_SPEED, - HW_VAR_SET_DEV_POWER, + HW_VAR_ETHER_ADDR = 0x0, + HW_VAR_MULTICAST_REG = 0x1, + HW_VAR_BASIC_RATE = 0x2, + HW_VAR_BSSID = 0x3, + HW_VAR_MEDIA_STATUS= 0x4, + HW_VAR_SECURITY_CONF= 0x5, + HW_VAR_BEACON_INTERVAL = 0x6, + HW_VAR_ATIM_WINDOW = 0x7, + HW_VAR_LISTEN_INTERVAL = 0x8, + HW_VAR_CS_COUNTER = 0x9, + HW_VAR_DEFAULTKEY0 = 0xa, + HW_VAR_DEFAULTKEY1 = 0xb, + HW_VAR_DEFAULTKEY2 = 0xc, + HW_VAR_DEFAULTKEY3 = 0xd, + HW_VAR_SIFS = 0xe, + HW_VAR_R2T_SIFS = 0xf, + HW_VAR_DIFS = 0x10, + HW_VAR_EIFS = 0x11, + HW_VAR_SLOT_TIME = 0x12, + HW_VAR_ACK_PREAMBLE = 0x13, + HW_VAR_CW_CONFIG = 0x14, + HW_VAR_CW_VALUES = 0x15, + HW_VAR_RATE_FALLBACK_CONTROL= 0x16, + HW_VAR_CONTENTION_WINDOW = 0x17, + HW_VAR_RETRY_COUNT = 0x18, + HW_VAR_TR_SWITCH = 0x19, + HW_VAR_COMMAND = 0x1a, + HW_VAR_WPA_CONFIG = 0x1b, + HW_VAR_AMPDU_MIN_SPACE = 0x1c, + HW_VAR_SHORTGI_DENSITY = 0x1d, + HW_VAR_AMPDU_FACTOR = 0x1e, + HW_VAR_MCS_RATE_AVAILABLE = 0x1f, + HW_VAR_AC_PARAM = 0x20, + HW_VAR_ACM_CTRL = 0x21, + HW_VAR_DIS_Req_Qsize = 0x22, + HW_VAR_CCX_CHNL_LOAD = 0x23, + HW_VAR_CCX_NOISE_HISTOGRAM = 0x24, + HW_VAR_CCX_CLM_NHM = 0x25, + HW_VAR_TxOPLimit = 0x26, + HW_VAR_TURBO_MODE = 0x27, + HW_VAR_RF_STATE = 0x28, + HW_VAR_RF_OFF_BY_HW = 0x29, + HW_VAR_BUS_SPEED = 0x2a, + HW_VAR_SET_DEV_POWER = 0x2b, - HW_VAR_RCR, - HW_VAR_RATR_0, - HW_VAR_RRSR, - HW_VAR_CPU_RST, - HW_VAR_CHECK_BSSID, - HW_VAR_LBK_MODE, - HW_VAR_AES_11N_FIX, - HW_VAR_USB_RX_AGGR, - HW_VAR_USER_CONTROL_TURBO_MODE, - HW_VAR_RETRY_LIMIT, - HW_VAR_INIT_TX_RATE, - HW_VAR_TX_RATE_REG, - HW_VAR_EFUSE_USAGE, - HW_VAR_EFUSE_BYTES, - HW_VAR_AUTOLOAD_STATUS, - HW_VAR_RF_2R_DISABLE, - HW_VAR_SET_RPWM, - HW_VAR_H2C_FW_PWRMODE, - HW_VAR_H2C_FW_JOINBSSRPT, - HW_VAR_H2C_FW_MEDIASTATUSRPT, - HW_VAR_H2C_FW_P2P_PS_OFFLOAD, - HW_VAR_FW_PSMODE_STATUS, - HW_VAR_INIT_RTS_RATE, - HW_VAR_RESUME_CLK_ON, - HW_VAR_FW_LPS_ACTION, - HW_VAR_1X1_RECV_COMBINE, - HW_VAR_STOP_SEND_BEACON, - HW_VAR_TSF_TIMER, - HW_VAR_IO_CMD, + HW_VAR_RCR = 0x2c, + HW_VAR_RATR_0 = 0x2d, + HW_VAR_RRSR = 0x2e, + HW_VAR_CPU_RST = 0x2f, + HW_VAR_CHECK_BSSID = 0x30, + HW_VAR_LBK_MODE = 0x31, + HW_VAR_AES_11N_FIX = 0x32, + HW_VAR_USB_RX_AGGR = 0x33, + HW_VAR_USER_CONTROL_TURBO_MODE = 0x34, + HW_VAR_RETRY_LIMIT = 0x35, + HW_VAR_INIT_TX_RATE = 0x36, + HW_VAR_TX_RATE_REG = 0x37, + HW_VAR_EFUSE_USAGE = 0x38, + HW_VAR_EFUSE_BYTES = 0x39, + HW_VAR_AUTOLOAD_STATUS = 0x3a, + HW_VAR_RF_2R_DISABLE = 0x3b, + HW_VAR_SET_RPWM = 0x3c, + HW_VAR_H2C_FW_PWRMODE = 0x3d, + HW_VAR_H2C_FW_JOINBSSRPT = 0x3e, + HW_VAR_H2C_FW_MEDIASTATUSRPT = 0x3f, + HW_VAR_H2C_FW_P2P_PS_OFFLOAD = 0x40, + HW_VAR_FW_PSMODE_STATUS = 0x41, + HW_VAR_INIT_RTS_RATE = 0x42, + HW_VAR_RESUME_CLK_ON = 0x43, + HW_VAR_FW_LPS_ACTION = 0x44, + HW_VAR_1X1_RECV_COMBINE = 0x45, + HW_VAR_STOP_SEND_BEACON = 0x46, + HW_VAR_TSF_TIMER = 0x47, + HW_VAR_IO_CMD = 0x48, - HW_VAR_RF_RECOVERY, - HW_VAR_H2C_FW_UPDATE_GTK, - HW_VAR_WF_MASK, - HW_VAR_WF_CRC, - HW_VAR_WF_IS_MAC_ADDR, - HW_VAR_H2C_FW_OFFLOAD, - HW_VAR_RESET_WFCRC, + HW_VAR_RF_RECOVERY = 0x49, + HW_VAR_H2C_FW_UPDATE_GTK = 0x4a, + HW_VAR_WF_MASK = 0x4b, + HW_VAR_WF_CRC = 0x4c, + HW_VAR_WF_IS_MAC_ADDR = 0x4d, + HW_VAR_H2C_FW_OFFLOAD = 0x4e, + HW_VAR_RESET_WFCRC = 0x4f, - HW_VAR_HANDLE_FW_C2H, - HW_VAR_DL_FW_RSVD_PAGE, - HW_VAR_AID, - HW_VAR_HW_SEQ_ENABLE, - HW_VAR_CORRECT_TSF, - HW_VAR_BCN_VALID, - HW_VAR_FWLPS_RF_ON, - HW_VAR_DUAL_TSF_RST, - HW_VAR_SWITCH_EPHY_WoWLAN, - HW_VAR_INT_MIGRATION, - HW_VAR_INT_AC, - HW_VAR_RF_TIMING, + HW_VAR_HANDLE_FW_C2H = 0x50, + HW_VAR_DL_FW_RSVD_PAGE = 0x51, + HW_VAR_AID = 0x52, + HW_VAR_HW_SEQ_ENABLE = 0x53, + HW_VAR_CORRECT_TSF = 0x54, + HW_VAR_BCN_VALID = 0x55, + HW_VAR_FWLPS_RF_ON = 0x56, + HW_VAR_DUAL_TSF_RST = 0x57, + HW_VAR_SWITCH_EPHY_WoWLAN = 0x58, + HW_VAR_INT_MIGRATION = 0x59, + HW_VAR_INT_AC = 0x5a, + HW_VAR_RF_TIMING = 0x5b, - HAL_DEF_WOWLAN, - HW_VAR_MRC, - HW_VAR_KEEP_ALIVE, - HW_VAR_NAV_UPPER, + HAL_DEF_WOWLAN = 0x5c, + HW_VAR_MRC = 0x5d, + HW_VAR_KEEP_ALIVE = 0x5e, + HW_VAR_NAV_UPPER = 0x5f, - HW_VAR_MGT_FILTER, - HW_VAR_CTRL_FILTER, - HW_VAR_DATA_FILTER, + HW_VAR_MGT_FILTER = 0x60, + HW_VAR_CTRL_FILTER = 0x61, + HW_VAR_DATA_FILTER = 0x62, }; enum rt_media_status { From 2f0e56fa37cce60a5ac5d451bcadec51cd711436 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Tue, 27 Sep 2016 12:12:24 +0200 Subject: [PATCH 0901/1050] brcmfmac: replace WARNING on timeout with a simple error message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Even with timeout increased to 950 ms we get WARNINGs from time to time. It mostly happens on A-MPDU stalls (e.g. when station goes out of range). It may take up to 5-10 secods for the firmware to recover and for that time it doesn't process packets. It's still useful to have a message on time out as it may indicate some firmware problem and incorrect key update. Raising a WARNING however wasn't really that necessary, it doesn't point to any driver bug anymore and backtrace wasn't much useful. Signed-off-by: Rafał Miłecki Acked-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c index 17152805d96f..5eaac13e2317 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c @@ -1155,7 +1155,8 @@ int brcmf_netdev_wait_pend8021x(struct brcmf_if *ifp) !brcmf_get_pend_8021x_cnt(ifp), MAX_WAIT_FOR_8021X_TX); - WARN_ON(!err); + if (!err) + brcmf_err("Timed out waiting for no pending 802.1x packets\n"); return !err; } From 7f00ee2bbc630900ba16fc2690473f3e2db0e264 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Tue, 27 Sep 2016 14:11:04 +0200 Subject: [PATCH 0902/1050] brcmfmac: use correct skb freeing helper when deleting flowring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flowrings contain skbs waiting for transmission that were passed to us by netif. It means we checked every one of them looking for 802.1x Ethernet type. When deleting flowring we have to use freeing function that will check for 802.1x type as well. Freeing skbs without a proper check was leading to counter not being properly decreased. This was triggering a WARNING every time brcmf_netdev_wait_pend8021x was called. Signed-off-by: Rafał Miłecki Acked-by: Arend van Spriel Cc: stable@vger.kernel.org # 4.5+ Signed-off-by: Kalle Valo --- .../net/wireless/broadcom/brcm80211/brcmfmac/flowring.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/flowring.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/flowring.c index b16b367b0569..d0b738da2458 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/flowring.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/flowring.c @@ -234,13 +234,20 @@ static void brcmf_flowring_block(struct brcmf_flowring *flow, u16 flowid, void brcmf_flowring_delete(struct brcmf_flowring *flow, u16 flowid) { + struct brcmf_bus *bus_if = dev_get_drvdata(flow->dev); struct brcmf_flowring_ring *ring; + struct brcmf_if *ifp; u16 hash_idx; + u8 ifidx; struct sk_buff *skb; ring = flow->rings[flowid]; if (!ring) return; + + ifidx = brcmf_flowring_ifidx_get(flow, flowid); + ifp = brcmf_get_ifp(bus_if->drvr, ifidx); + brcmf_flowring_block(flow, flowid, false); hash_idx = ring->hash_id; flow->hash[hash_idx].ifidx = BRCMF_FLOWRING_INVALID_IFIDX; @@ -249,7 +256,7 @@ void brcmf_flowring_delete(struct brcmf_flowring *flow, u16 flowid) skb = skb_dequeue(&ring->skblist); while (skb) { - brcmu_pkt_buf_free_skb(skb); + brcmf_txfinalize(ifp, skb, false); skb = skb_dequeue(&ring->skblist); } From a951ed85abd4615e98e36b536e3b3b07b22a88ac Mon Sep 17 00:00:00 2001 From: Grazvydas Ignotas Date: Sun, 25 Sep 2016 23:34:48 +0300 Subject: [PATCH 0903/1050] drm/amdgpu: disable CRTCs before teardown Some code called by drm_crtc_force_disable_all() wants to wait for all fences, so only do fence teardown after CRTCs are disabled. Fixes: 84b89bdcedf8 ("drm/amdgpu: Turn off CRTCs on driver unload") Cc: stable@vger.kernel.org # v4.8+ Signed-off-by: Grazvydas Ignotas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index df7ab2458e50..39c01b942ee4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1708,11 +1708,11 @@ void amdgpu_device_fini(struct amdgpu_device *adev) DRM_INFO("amdgpu: finishing device.\n"); adev->shutdown = true; + drm_crtc_force_disable_all(adev->ddev); /* evict vram memory */ amdgpu_bo_evict_vram(adev); amdgpu_ib_pool_fini(adev); amdgpu_fence_driver_fini(adev); - drm_crtc_force_disable_all(adev->ddev); amdgpu_fbdev_fini(adev); r = amdgpu_fini(adev); kfree(adev->ip_block_status); From 670bb4fd21c966d0d2a59ad4a99bb4889f9a2987 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 26 Sep 2016 15:32:50 -0400 Subject: [PATCH 0904/1050] drm/radeon/si/dpm: add workaround for for Jet parts Add clock quirks for Jet parts. Reviewed-by: Sonny Jiang Tested-by: Sonny Jiang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/si_dpm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index e6abc09b67e3..1f78ec2548ec 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -3015,6 +3015,12 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, if (rdev->pdev->device == 0x6811 && rdev->pdev->revision == 0x81) max_mclk = 120000; + /* limit sclk/mclk on Jet parts for stability */ + if (rdev->pdev->device == 0x6665 && + rdev->pdev->revision == 0xc3) { + max_sclk = 75000; + max_mclk = 80000; + } if (rps->vce_active) { rps->evclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].evclk; From 7a823471ad42cba6c3b658494d8437ca5c166292 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 23 Aug 2016 15:08:09 +0000 Subject: [PATCH 0905/1050] igb: fix non static symbol warning Fixes the following sparse warning: drivers/net/ethernet/intel/igb/igb_ethtool.c:2707:5: warning: symbol 'igb_rxnfc_write_vlan_prio_filter' was not declared. Should it be static? Signed-off-by: Wei Yongjun Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 0c33eca7c832..737b664d004c 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2704,8 +2704,8 @@ static int igb_rxnfc_write_etype_filter(struct igb_adapter *adapter, return 0; } -int igb_rxnfc_write_vlan_prio_filter(struct igb_adapter *adapter, - struct igb_nfc_filter *input) +static int igb_rxnfc_write_vlan_prio_filter(struct igb_adapter *adapter, + struct igb_nfc_filter *input) { struct e1000_hw *hw = &adapter->hw; u8 vlan_priority; From 3d05fd0a99a4a8b49d486116c01c7ac089b64670 Mon Sep 17 00:00:00 2001 From: Todd Fujinaka Date: Wed, 24 Aug 2016 08:38:46 -0700 Subject: [PATCH 0906/1050] igbvf: bump version to igbvf-2.4.0 Bump version to match other igbvf drivers. Signed-off-by: Todd Fujinaka Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igbvf/netdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index b0778ba65083..12bb877df860 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -47,7 +47,7 @@ #include "igbvf.h" -#define DRV_VERSION "2.0.2-k" +#define DRV_VERSION "2.4.0-k" char igbvf_driver_name[] = "igbvf"; const char igbvf_driver_version[] = DRV_VERSION; static const char igbvf_driver_string[] = From 0742337c1984c7cdbac9465cdda004cbdc69d41c Mon Sep 17 00:00:00 2001 From: Todd Fujinaka Date: Wed, 24 Aug 2016 08:40:23 -0700 Subject: [PATCH 0907/1050] igb: bump version to igb-5.4.0 Bump igb version to match other igb drivers. Signed-off-by: Todd Fujinaka Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index a83aa13a5bf4..edc9a6ac5169 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -58,7 +58,7 @@ #include "igb.h" #define MAJ 5 -#define MIN 3 +#define MIN 4 #define BUILD 0 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \ __stringify(BUILD) "-k" From ac28b41aac35e1000712aaa3aee19bf30fd9a312 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 9 Sep 2016 09:10:51 -0700 Subject: [PATCH 0908/1050] igb: restore PPS signal on igb_ptp_reset When a reset occurs, the PPS SYS_WRAP interrupt was not re-enabled which resulted in disabling of the PPS signaling. Fix this by recording when the interrupt is on and ensuring that we re-enable it every time we reset. Signed-off-by: Jacob Keller Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb.h | 1 + drivers/net/ethernet/intel/igb/igb_ptp.c | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 03fbe4b7663b..d11093dce1b9 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -489,6 +489,7 @@ struct igb_adapter { struct timecounter tc; u32 tx_hwtstamp_timeouts; u32 rx_hwtstamp_cleared; + bool pps_sys_wrap_on; struct ptp_pin_desc sdp_config[IGB_N_SDP]; struct { diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 1dd14e166dc8..a7895c4cbcc3 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -591,6 +591,7 @@ static int igb_ptp_feature_enable_i210(struct ptp_clock_info *ptp, tsim |= TSINTR_SYS_WRAP; else tsim &= ~TSINTR_SYS_WRAP; + igb->pps_sys_wrap_on = !!on; wr32(E1000_TSIM, tsim); spin_unlock_irqrestore(&igb->tmreg_lock, flags); return 0; @@ -1235,7 +1236,9 @@ void igb_ptp_reset(struct igb_adapter *adapter) case e1000_i211: wr32(E1000_TSAUXC, 0x0); wr32(E1000_TSSDP, 0x0); - wr32(E1000_TSIM, TSYNC_INTERRUPTS); + wr32(E1000_TSIM, + TSYNC_INTERRUPTS | + (adapter->pps_sys_wrap_on ? TSINTR_SYS_WRAP : 0)); wr32(E1000_IMS, E1000_IMS_TS); break; default: From b761fe226be61eba6ae20a424b288559b8a606b5 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 27 Sep 2016 08:42:41 -0700 Subject: [PATCH 0909/1050] bpf: clean up put_cpu_var usage put_cpu_var takes the percpu data, not the data returned from get_cpu_var. This doesn't change the behavior. Cc: Tejun Heo Cc: Alexei Starovoitov Signed-off-by: Shaohua Li Acked-by: Alexei Starovoitov Acked-by: Tejun Heo Signed-off-by: David S. Miller --- kernel/bpf/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 7b7baaed9ed4..aa6d98154106 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1031,7 +1031,7 @@ BPF_CALL_0(bpf_user_rnd_u32) state = &get_cpu_var(bpf_user_rnd_state); res = prandom_u32_state(state); - put_cpu_var(state); + put_cpu_var(bpf_user_rnd_state); return res; } From 3796c3cbfb91ad1e3269aa0d15060f888063924b Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 27 Sep 2016 08:42:42 -0700 Subject: [PATCH 0910/1050] lib: clean up put_cpu_var usage put_cpu_var takes the percpu data, not the data returned from get_cpu_var. This doesn't change the behavior. Cc: Tejun Heo Signed-off-by: Shaohua Li Acked-by: Tejun Heo Signed-off-by: David S. Miller --- lib/random32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/random32.c b/lib/random32.c index 69ed593aab07..915982b304bb 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -81,7 +81,7 @@ u32 prandom_u32(void) u32 res; res = prandom_u32_state(state); - put_cpu_var(state); + put_cpu_var(net_rand_state); return res; } @@ -128,7 +128,7 @@ void prandom_bytes(void *buf, size_t bytes) struct rnd_state *state = &get_cpu_var(net_rand_state); prandom_bytes_state(state, buf, bytes); - put_cpu_var(state); + put_cpu_var(net_rand_state); } EXPORT_SYMBOL(prandom_bytes); From 90fd68dcf9a763f7e575c8467415bd8a66d073f4 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Fri, 23 Sep 2016 12:36:02 +0200 Subject: [PATCH 0911/1050] drm/udl: fix line iterator in damage handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The udl damage handler is supposed to render 'height' lines, but its iterator has an obvious typo that makes it miss most lines if the rectangle does not cover 0/0. Fix the damage handler to correctly render all lines. This is a fallout from: commit e375882406d0cc24030746638592004755ed4ae0 Author: Noralf Trønnes Date: Thu Apr 28 17:18:37 2016 +0200 drm/udl: Use drm_fb_helper deferred_io support Tested-by: poma Cc: stable@vger.kernel.org # 4.7+ Reviewed-by: Daniel Vetter Signed-off-by: David Herrmann Reviewed-by: Eric Engestrom Signed-off-by: Dave Airlie --- drivers/gpu/drm/udl/udl_fb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c index 9688bfa92ccd..611b6b9bb3cb 100644 --- a/drivers/gpu/drm/udl/udl_fb.c +++ b/drivers/gpu/drm/udl/udl_fb.c @@ -122,7 +122,7 @@ int udl_handle_damage(struct udl_framebuffer *fb, int x, int y, return 0; cmd = urb->transfer_buffer; - for (i = y; i < height ; i++) { + for (i = y; i < y + height ; i++) { const int line_offset = fb->base.pitches[0] * i; const int byte_offset = line_offset + (x * bpp); const int dev_byte_offset = (fb->base.width * bpp * i) + (x * bpp); From eb523f42d77a43f80bb9c57a34fbdc8406c7b075 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Tue, 27 Sep 2016 11:21:18 +0300 Subject: [PATCH 0912/1050] net/sched: cls_flower: Use a proper mask value for enc key id parameter The current code use the encapsulation key id value as the mask of that parameter which is wrong. Fix that by using a full mask. Fixes: bc3103f1ed40 ('net/sched: cls_flower: Classify packet in ip tunnels') Signed-off-by: Hadar Hen Zion Acked-by: Amir Vadai Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 2af09c872a1a..f6f40fba599b 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -481,7 +481,7 @@ static int fl_set_key(struct net *net, struct nlattr **tb, } fl_set_key_val(tb, &key->enc_key_id.keyid, TCA_FLOWER_KEY_ENC_KEY_ID, - &mask->enc_key_id.keyid, TCA_FLOWER_KEY_ENC_KEY_ID, + &mask->enc_key_id.keyid, TCA_FLOWER_UNSPEC, sizeof(key->enc_key_id.keyid)); return 0; @@ -919,7 +919,7 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, goto nla_put_failure; if (fl_dump_key_val(skb, &key->enc_key_id, TCA_FLOWER_KEY_ENC_KEY_ID, - &mask->enc_key_id, TCA_FLOWER_KEY_ENC_KEY_ID, + &mask->enc_key_id, TCA_FLOWER_UNSPEC, sizeof(key->enc_key_id))) goto nla_put_failure; From b90eb754949931b2e4481b1df9a03f84d4be66ba Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 26 Sep 2016 12:52:29 +0200 Subject: [PATCH 0913/1050] fib: introduce FIB notification infrastructure This allows to pass information about added/deleted FIB entries/rules to whoever is interested. This is done in a very similar way as devinet notifies address additions/removals. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip_fib.h | 34 ++++++++++++++++++++-- net/ipv4/fib_frontend.c | 16 +++++------ net/ipv4/fib_rules.c | 10 +++++++ net/ipv4/fib_trie.c | 62 +++++++++++++++++++++++++++++++++++++++-- 4 files changed, 108 insertions(+), 14 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 7d4a72e75f33..116a9c0eb455 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -22,6 +22,7 @@ #include #include #include +#include struct fib_config { u8 fc_dst_len; @@ -185,6 +186,33 @@ __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh); #define FIB_RES_PREFSRC(net, res) ((res).fi->fib_prefsrc ? : \ FIB_RES_SADDR(net, res)) +struct fib_notifier_info { + struct net *net; +}; + +struct fib_entry_notifier_info { + struct fib_notifier_info info; /* must be first */ + u32 dst; + int dst_len; + struct fib_info *fi; + u8 tos; + u8 type; + u32 tb_id; + u32 nlflags; +}; + +enum fib_event_type { + FIB_EVENT_ENTRY_ADD, + FIB_EVENT_ENTRY_DEL, + FIB_EVENT_RULE_ADD, + FIB_EVENT_RULE_DEL, +}; + +int register_fib_notifier(struct notifier_block *nb); +int unregister_fib_notifier(struct notifier_block *nb); +int call_fib_notifiers(struct net *net, enum fib_event_type event_type, + struct fib_notifier_info *info); + struct fib_table { struct hlist_node tb_hlist; u32 tb_id; @@ -196,11 +224,11 @@ struct fib_table { int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, struct fib_result *res, int fib_flags); -int fib_table_insert(struct fib_table *, struct fib_config *); -int fib_table_delete(struct fib_table *, struct fib_config *); +int fib_table_insert(struct net *, struct fib_table *, struct fib_config *); +int fib_table_delete(struct net *, struct fib_table *, struct fib_config *); int fib_table_dump(struct fib_table *table, struct sk_buff *skb, struct netlink_callback *cb); -int fib_table_flush(struct fib_table *table); +int fib_table_flush(struct net *net, struct fib_table *table); struct fib_table *fib_trie_unmerge(struct fib_table *main_tb); void fib_table_flush_external(struct fib_table *table); void fib_free_table(struct fib_table *tb); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 4e56a4c20a3c..86c43dc9a60e 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -182,7 +182,7 @@ static void fib_flush(struct net *net) struct fib_table *tb; hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) - flushed += fib_table_flush(tb); + flushed += fib_table_flush(net, tb); } if (flushed) @@ -590,13 +590,13 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (cmd == SIOCDELRT) { tb = fib_get_table(net, cfg.fc_table); if (tb) - err = fib_table_delete(tb, &cfg); + err = fib_table_delete(net, tb, &cfg); else err = -ESRCH; } else { tb = fib_new_table(net, cfg.fc_table); if (tb) - err = fib_table_insert(tb, &cfg); + err = fib_table_insert(net, tb, &cfg); else err = -ENOBUFS; } @@ -719,7 +719,7 @@ static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) goto errout; } - err = fib_table_delete(tb, &cfg); + err = fib_table_delete(net, tb, &cfg); errout: return err; } @@ -741,7 +741,7 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) goto errout; } - err = fib_table_insert(tb, &cfg); + err = fib_table_insert(net, tb, &cfg); errout: return err; } @@ -828,9 +828,9 @@ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifad cfg.fc_scope = RT_SCOPE_HOST; if (cmd == RTM_NEWROUTE) - fib_table_insert(tb, &cfg); + fib_table_insert(net, tb, &cfg); else - fib_table_delete(tb, &cfg); + fib_table_delete(net, tb, &cfg); } void fib_add_ifaddr(struct in_ifaddr *ifa) @@ -1254,7 +1254,7 @@ static void ip_fib_net_exit(struct net *net) hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) { hlist_del(&tb->tb_hlist); - fib_table_flush(tb); + fib_table_flush(net, tb); fib_free_table(tb); } } diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 770bebed6b28..ebadf6b99499 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -164,6 +164,14 @@ static struct fib_table *fib_empty_table(struct net *net) return NULL; } +static int call_fib_rule_notifiers(struct net *net, + enum fib_event_type event_type) +{ + struct fib_notifier_info info; + + return call_fib_notifiers(net, event_type, &info); +} + static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = { FRA_GENERIC_POLICY, [FRA_FLOW] = { .type = NLA_U32 }, @@ -221,6 +229,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, net->ipv4.fib_has_custom_rules = true; fib_flush_external(rule->fr_net); + call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD); err = 0; errout: @@ -243,6 +252,7 @@ static int fib4_rule_delete(struct fib_rule *rule) #endif net->ipv4.fib_has_custom_rules = true; fib_flush_external(rule->fr_net); + call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL); errout: return err; } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 241f27bbd7ad..51a4537eb145 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -73,6 +73,7 @@ #include #include #include +#include #include #include #include @@ -84,6 +85,44 @@ #include #include "fib_lookup.h" +static BLOCKING_NOTIFIER_HEAD(fib_chain); + +int register_fib_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&fib_chain, nb); +} +EXPORT_SYMBOL(register_fib_notifier); + +int unregister_fib_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&fib_chain, nb); +} +EXPORT_SYMBOL(unregister_fib_notifier); + +int call_fib_notifiers(struct net *net, enum fib_event_type event_type, + struct fib_notifier_info *info) +{ + info->net = net; + return blocking_notifier_call_chain(&fib_chain, event_type, info); +} + +static int call_fib_entry_notifiers(struct net *net, + enum fib_event_type event_type, u32 dst, + int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 tb_id, u32 nlflags) +{ + struct fib_entry_notifier_info info = { + .dst = dst, + .dst_len = dst_len, + .fi = fi, + .tos = tos, + .type = type, + .tb_id = tb_id, + .nlflags = nlflags, + }; + return call_fib_notifiers(net, event_type, &info.info); +} + #define MAX_STAT_DEPTH 32 #define KEYLENGTH (8*sizeof(t_key)) @@ -1076,7 +1115,8 @@ static int fib_insert_alias(struct trie *t, struct key_vector *tp, } /* Caller must hold RTNL. */ -int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) +int fib_table_insert(struct net *net, struct fib_table *tb, + struct fib_config *cfg) { struct trie *t = (struct trie *)tb->tb_data; struct fib_alias *fa, *new_fa; @@ -1193,6 +1233,11 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) fib_release_info(fi_drop); if (state & FA_S_ACCESSED) rt_cache_flush(cfg->fc_nlinfo.nl_net); + + call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, + key, plen, fi, + new_fa->fa_tos, cfg->fc_type, + tb->tb_id, cfg->fc_nlflags); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, &cfg->fc_nlinfo, nlflags); @@ -1245,6 +1290,8 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) tb->tb_num_default++; rt_cache_flush(cfg->fc_nlinfo.nl_net); + call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, key, plen, fi, tos, + cfg->fc_type, tb->tb_id, cfg->fc_nlflags); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id, &cfg->fc_nlinfo, nlflags); succeeded: @@ -1490,7 +1537,8 @@ static void fib_remove_alias(struct trie *t, struct key_vector *tp, } /* Caller must hold RTNL. */ -int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) +int fib_table_delete(struct net *net, struct fib_table *tb, + struct fib_config *cfg) { struct trie *t = (struct trie *) tb->tb_data; struct fib_alias *fa, *fa_to_delete; @@ -1546,6 +1594,9 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) switchdev_fib_ipv4_del(key, plen, fa_to_delete->fa_info, tos, cfg->fc_type, tb->tb_id); + call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, plen, + fa_to_delete->fa_info, tos, cfg->fc_type, + tb->tb_id, 0); rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id, &cfg->fc_nlinfo, 0); @@ -1809,7 +1860,7 @@ void fib_table_flush_external(struct fib_table *tb) } /* Caller must hold RTNL. */ -int fib_table_flush(struct fib_table *tb) +int fib_table_flush(struct net *net, struct fib_table *tb) { struct trie *t = (struct trie *)tb->tb_data; struct key_vector *pn = t->kv; @@ -1861,6 +1912,11 @@ int fib_table_flush(struct fib_table *tb) switchdev_fib_ipv4_del(n->key, KEYLENGTH - fa->fa_slen, fi, fa->fa_tos, fa->fa_type, tb->tb_id); + call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, + n->key, + KEYLENGTH - fa->fa_slen, + fi, fa->fa_tos, fa->fa_type, + tb->tb_id, 0); hlist_del_rcu(&fa->fa_list); fib_release_info(fa->fa_info); alias_free_mem_rcu(fa); From c98501879b1b1af90c7325574f2672e9efca592c Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 26 Sep 2016 12:52:30 +0200 Subject: [PATCH 0914/1050] fib: introduce FIB info offload flag helpers These helpers are to be used in case someone offloads the FIB entry. The result is that if the entry is offloaded to at least one device, the offload flag is set. Signed-off-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/ip_fib.h | 13 +++++++++++++ net/switchdev/switchdev.c | 4 ++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 116a9c0eb455..ffccf1787914 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -123,6 +123,7 @@ struct fib_info { #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_weight; #endif + unsigned int fib_offload_cnt; struct rcu_head rcu; struct fib_nh fib_nh[0]; #define fib_dev fib_nh[0].nh_dev @@ -174,6 +175,18 @@ struct fib_result_nl { __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh); +static inline void fib_info_offload_inc(struct fib_info *fi) +{ + fi->fib_offload_cnt++; + fi->fib_flags |= RTNH_F_OFFLOAD; +} + +static inline void fib_info_offload_dec(struct fib_info *fi) +{ + if (--fi->fib_offload_cnt == 0) + fi->fib_flags &= ~RTNH_F_OFFLOAD; +} + #define FIB_RES_SADDR(net, res) \ ((FIB_RES_NH(res).nh_saddr_genid == \ atomic_read(&(net)->ipv4.dev_addr_genid)) ? \ diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 10b819308439..abd8d2a38a7d 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -1216,7 +1216,7 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, ipv4_fib.obj.orig_dev = dev; err = switchdev_port_obj_add(dev, &ipv4_fib.obj); if (!err) - fi->fib_flags |= RTNH_F_OFFLOAD; + fib_info_offload_inc(fi); return err == -EOPNOTSUPP ? 0 : err; } @@ -1260,7 +1260,7 @@ int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, ipv4_fib.obj.orig_dev = dev; err = switchdev_port_obj_del(dev, &ipv4_fib.obj); if (!err) - fi->fib_flags &= ~RTNH_F_OFFLOAD; + fib_info_offload_dec(fi); return err == -EOPNOTSUPP ? 0 : err; } From b45f64d16d456fde027a220cc62d6b4cc315f97b Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 26 Sep 2016 12:52:31 +0200 Subject: [PATCH 0915/1050] mlxsw: spectrum_router: Use FIB notifications instead of switchdev calls Until now, in order to offload a FIB entry to HW we use switchdev op. However that has limits. Mainly in case we need to make the HW aware of all route prefixes configured in kernel. HW needs to know those in order to properly trap appropriate packets and pass the to kernel to do the forwarding. Abort mechanism is now handled within the mlxsw driver. Signed-off-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.h | 9 +- .../ethernet/mellanox/mlxsw/spectrum_router.c | 441 ++++++++++-------- .../mellanox/mlxsw/spectrum_switchdev.c | 9 - 3 files changed, 262 insertions(+), 197 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 73cae211a5ce..9b22863a924b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -45,7 +45,7 @@ #include #include #include -#include +#include #include "port.h" #include "core.h" @@ -257,6 +257,7 @@ struct mlxsw_sp_router { #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */ struct list_head nexthop_group_list; struct list_head nexthop_neighs_list; + bool aborted; }; struct mlxsw_sp { @@ -296,6 +297,7 @@ struct mlxsw_sp { struct mlxsw_sp_span_entry *entries; int entries_count; } span; + struct notifier_block fib_nb; }; static inline struct mlxsw_sp_upper * @@ -584,11 +586,6 @@ static inline void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port) int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp); -int mlxsw_sp_router_fib4_add(struct mlxsw_sp_port *mlxsw_sp_port, - const struct switchdev_obj_ipv4_fib *fib4, - struct switchdev_trans *trans); -int mlxsw_sp_router_fib4_del(struct mlxsw_sp_port *mlxsw_sp_port, - const struct switchdev_obj_ipv4_fib *fib4); int mlxsw_sp_router_neigh_construct(struct net_device *dev, struct neighbour *n); void mlxsw_sp_router_neigh_destroy(struct net_device *dev, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index cc653ac2d7d6..48d50efec5e2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -43,6 +43,7 @@ #include #include #include +#include #include "spectrum.h" #include "core.h" @@ -122,17 +123,20 @@ struct mlxsw_sp_nexthop_group; struct mlxsw_sp_fib_entry { struct rhash_head ht_node; + struct list_head list; struct mlxsw_sp_fib_key key; enum mlxsw_sp_fib_entry_type type; unsigned int ref_count; u16 rif; /* used for action local */ struct mlxsw_sp_vr *vr; + struct fib_info *fi; struct list_head nexthop_group_node; struct mlxsw_sp_nexthop_group *nh_group; }; struct mlxsw_sp_fib { struct rhashtable ht; + struct list_head entry_list; unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT]; struct mlxsw_sp_prefix_usage prefix_usage; }; @@ -154,6 +158,7 @@ static int mlxsw_sp_fib_entry_insert(struct mlxsw_sp_fib *fib, mlxsw_sp_fib_ht_params); if (err) return err; + list_add_tail(&fib_entry->list, &fib->entry_list); if (fib->prefix_ref_count[prefix_len]++ == 0) mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len); return 0; @@ -166,6 +171,7 @@ static void mlxsw_sp_fib_entry_remove(struct mlxsw_sp_fib *fib, if (--fib->prefix_ref_count[prefix_len] == 0) mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len); + list_del(&fib_entry->list); rhashtable_remove_fast(&fib->ht, &fib_entry->ht_node, mlxsw_sp_fib_ht_params); } @@ -216,6 +222,7 @@ static struct mlxsw_sp_fib *mlxsw_sp_fib_create(void) err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params); if (err) goto err_rhashtable_init; + INIT_LIST_HEAD(&fib->entry_list); return fib; err_rhashtable_init: @@ -1520,85 +1527,6 @@ static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp); } -static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) -{ - struct mlxsw_resources *resources; - char rgcr_pl[MLXSW_REG_RGCR_LEN]; - int err; - - resources = mlxsw_core_resources_get(mlxsw_sp->core); - if (!resources->max_rif_valid) - return -EIO; - - mlxsw_sp->rifs = kcalloc(resources->max_rif, - sizeof(struct mlxsw_sp_rif *), GFP_KERNEL); - if (!mlxsw_sp->rifs) - return -ENOMEM; - - mlxsw_reg_rgcr_pack(rgcr_pl, true); - mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, resources->max_rif); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); - if (err) - goto err_rgcr_fail; - - return 0; - -err_rgcr_fail: - kfree(mlxsw_sp->rifs); - return err; -} - -static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) -{ - struct mlxsw_resources *resources; - char rgcr_pl[MLXSW_REG_RGCR_LEN]; - int i; - - mlxsw_reg_rgcr_pack(rgcr_pl, false); - mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); - - resources = mlxsw_core_resources_get(mlxsw_sp->core); - for (i = 0; i < resources->max_rif; i++) - WARN_ON_ONCE(mlxsw_sp->rifs[i]); - - kfree(mlxsw_sp->rifs); -} - -int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) -{ - int err; - - INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list); - INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_group_list); - err = __mlxsw_sp_router_init(mlxsw_sp); - if (err) - return err; - - mlxsw_sp_lpm_init(mlxsw_sp); - err = mlxsw_sp_vrs_init(mlxsw_sp); - if (err) - goto err_vrs_init; - - err = mlxsw_sp_neigh_init(mlxsw_sp); - if (err) - goto err_neigh_init; - - return 0; - -err_neigh_init: - mlxsw_sp_vrs_fini(mlxsw_sp); -err_vrs_init: - __mlxsw_sp_router_fini(mlxsw_sp); - return err; -} - -void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) -{ - mlxsw_sp_neigh_fini(mlxsw_sp); - mlxsw_sp_vrs_fini(mlxsw_sp); - __mlxsw_sp_router_fini(mlxsw_sp); -} - static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, enum mlxsw_reg_ralue_op op) @@ -1706,94 +1634,98 @@ static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp, MLXSW_REG_RALUE_OP_WRITE_DELETE); } -struct mlxsw_sp_router_fib4_add_info { - struct switchdev_trans_item tritem; - struct mlxsw_sp *mlxsw_sp; - struct mlxsw_sp_fib_entry *fib_entry; -}; - -static void mlxsw_sp_router_fib4_add_info_destroy(void const *data) -{ - const struct mlxsw_sp_router_fib4_add_info *info = data; - struct mlxsw_sp_fib_entry *fib_entry = info->fib_entry; - struct mlxsw_sp *mlxsw_sp = info->mlxsw_sp; - struct mlxsw_sp_vr *vr = fib_entry->vr; - - mlxsw_sp_fib_entry_destroy(fib_entry); - mlxsw_sp_vr_put(mlxsw_sp, vr); - kfree(info); -} - static int mlxsw_sp_router_fib4_entry_init(struct mlxsw_sp *mlxsw_sp, - const struct switchdev_obj_ipv4_fib *fib4, + const struct fib_entry_notifier_info *fen_info, struct mlxsw_sp_fib_entry *fib_entry) { - struct fib_info *fi = fib4->fi; + struct fib_info *fi = fen_info->fi; + struct mlxsw_sp_rif *r; + int nhsel; + int err; - if (fib4->type == RTN_LOCAL || fib4->type == RTN_BROADCAST) { + if (fen_info->type == RTN_LOCAL || fen_info->type == RTN_BROADCAST) { fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; return 0; } - if (fib4->type != RTN_UNICAST) + if (fen_info->type != RTN_UNICAST) return -EINVAL; - if (fi->fib_scope != RT_SCOPE_UNIVERSE) { - struct mlxsw_sp_rif *r; + for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { + const struct fib_nh *nh = &fi->fib_nh[nhsel]; - fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; - r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, fi->fib_dev); - if (!r) - return -EINVAL; - fib_entry->rif = r->rif; - return 0; + if (!nh->nh_dev) + continue; + r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, nh->nh_dev); + if (!r) { + /* In case router interface is not found for + * at least one of the nexthops, that means + * the nexthop points to some device unrelated + * to us. Set trap and pass the packets for + * this prefix to kernel. + */ + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + return 0; + } } - fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; - return mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fi); + + if (fi->fib_scope != RT_SCOPE_UNIVERSE) { + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; + fib_entry->rif = r->rif; + } else { + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; + err = mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fi); + if (err) + return err; + } + fib_info_offload_inc(fen_info->fi); + return 0; } static void mlxsw_sp_router_fib4_entry_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry) { - if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_REMOTE) - return; - mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry); + if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP) + fib_info_offload_dec(fib_entry->fi); + if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_REMOTE) + mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry); } static struct mlxsw_sp_fib_entry * mlxsw_sp_fib_entry_get(struct mlxsw_sp *mlxsw_sp, - const struct switchdev_obj_ipv4_fib *fib4) + const struct fib_entry_notifier_info *fen_info) { struct mlxsw_sp_fib_entry *fib_entry; - struct fib_info *fi = fib4->fi; + struct fib_info *fi = fen_info->fi; struct mlxsw_sp_vr *vr; int err; - vr = mlxsw_sp_vr_get(mlxsw_sp, fib4->dst_len, fib4->tb_id, + vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->dst_len, fen_info->tb_id, MLXSW_SP_L3_PROTO_IPV4); if (IS_ERR(vr)) return ERR_CAST(vr); - fib_entry = mlxsw_sp_fib_entry_lookup(vr->fib, &fib4->dst, - sizeof(fib4->dst), - fib4->dst_len, fi->fib_dev); + fib_entry = mlxsw_sp_fib_entry_lookup(vr->fib, &fen_info->dst, + sizeof(fen_info->dst), + fen_info->dst_len, fi->fib_dev); if (fib_entry) { /* Already exists, just take a reference */ fib_entry->ref_count++; return fib_entry; } - fib_entry = mlxsw_sp_fib_entry_create(vr->fib, &fib4->dst, - sizeof(fib4->dst), - fib4->dst_len, fi->fib_dev); + fib_entry = mlxsw_sp_fib_entry_create(vr->fib, &fen_info->dst, + sizeof(fen_info->dst), + fen_info->dst_len, fi->fib_dev); if (!fib_entry) { err = -ENOMEM; goto err_fib_entry_create; } fib_entry->vr = vr; + fib_entry->fi = fi; fib_entry->ref_count = 1; - err = mlxsw_sp_router_fib4_entry_init(mlxsw_sp, fib4, fib_entry); + err = mlxsw_sp_router_fib4_entry_init(mlxsw_sp, fen_info, fib_entry); if (err) goto err_fib4_entry_init; @@ -1809,17 +1741,19 @@ err_fib_entry_create: static struct mlxsw_sp_fib_entry * mlxsw_sp_fib_entry_find(struct mlxsw_sp *mlxsw_sp, - const struct switchdev_obj_ipv4_fib *fib4) + const struct fib_entry_notifier_info *fen_info) { struct mlxsw_sp_vr *vr; - vr = mlxsw_sp_vr_find(mlxsw_sp, fib4->tb_id, MLXSW_SP_L3_PROTO_IPV4); + vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id, + MLXSW_SP_L3_PROTO_IPV4); if (!vr) return NULL; - return mlxsw_sp_fib_entry_lookup(vr->fib, &fib4->dst, - sizeof(fib4->dst), fib4->dst_len, - fib4->fi->fib_dev); + return mlxsw_sp_fib_entry_lookup(vr->fib, &fen_info->dst, + sizeof(fen_info->dst), + fen_info->dst_len, + fen_info->fi->fib_dev); } static void mlxsw_sp_fib_entry_put(struct mlxsw_sp *mlxsw_sp, @@ -1834,60 +1768,43 @@ static void mlxsw_sp_fib_entry_put(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_vr_put(mlxsw_sp, vr); } -static int -mlxsw_sp_router_fib4_add_prepare(struct mlxsw_sp_port *mlxsw_sp_port, - const struct switchdev_obj_ipv4_fib *fib4, - struct switchdev_trans *trans) +static void mlxsw_sp_fib_entry_put_all(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - struct mlxsw_sp_router_fib4_add_info *info; - struct mlxsw_sp_fib_entry *fib_entry; - int err; + unsigned int last_ref_count; - fib_entry = mlxsw_sp_fib_entry_get(mlxsw_sp, fib4); - if (IS_ERR(fib_entry)) - return PTR_ERR(fib_entry); - - info = kmalloc(sizeof(*info), GFP_KERNEL); - if (!info) { - err = -ENOMEM; - goto err_alloc_info; - } - info->mlxsw_sp = mlxsw_sp; - info->fib_entry = fib_entry; - switchdev_trans_item_enqueue(trans, info, - mlxsw_sp_router_fib4_add_info_destroy, - &info->tritem); - return 0; - -err_alloc_info: - mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry); - return err; + do { + last_ref_count = fib_entry->ref_count; + mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry); + } while (last_ref_count != 1); } -static int -mlxsw_sp_router_fib4_add_commit(struct mlxsw_sp_port *mlxsw_sp_port, - const struct switchdev_obj_ipv4_fib *fib4, - struct switchdev_trans *trans) +static int mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp, + struct fib_entry_notifier_info *fen_info) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - struct mlxsw_sp_router_fib4_add_info *info; struct mlxsw_sp_fib_entry *fib_entry; struct mlxsw_sp_vr *vr; int err; - info = switchdev_trans_item_dequeue(trans); - fib_entry = info->fib_entry; - kfree(info); + if (mlxsw_sp->router.aborted) + return 0; + + fib_entry = mlxsw_sp_fib_entry_get(mlxsw_sp, fen_info); + if (IS_ERR(fib_entry)) { + dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB4 entry being added.\n"); + return PTR_ERR(fib_entry); + } if (fib_entry->ref_count != 1) return 0; vr = fib_entry->vr; err = mlxsw_sp_fib_entry_insert(vr->fib, fib_entry); - if (err) + if (err) { + dev_warn(mlxsw_sp->bus_info->dev, "Failed to insert FIB4 entry being added.\n"); goto err_fib_entry_insert; - err = mlxsw_sp_fib_entry_update(mlxsw_sp_port->mlxsw_sp, fib_entry); + } + err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); if (err) goto err_fib_entry_add; return 0; @@ -1899,24 +1816,15 @@ err_fib_entry_insert: return err; } -int mlxsw_sp_router_fib4_add(struct mlxsw_sp_port *mlxsw_sp_port, - const struct switchdev_obj_ipv4_fib *fib4, - struct switchdev_trans *trans) +static int mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp, + struct fib_entry_notifier_info *fen_info) { - if (switchdev_trans_ph_prepare(trans)) - return mlxsw_sp_router_fib4_add_prepare(mlxsw_sp_port, - fib4, trans); - return mlxsw_sp_router_fib4_add_commit(mlxsw_sp_port, - fib4, trans); -} - -int mlxsw_sp_router_fib4_del(struct mlxsw_sp_port *mlxsw_sp_port, - const struct switchdev_obj_ipv4_fib *fib4) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_fib_entry *fib_entry; - fib_entry = mlxsw_sp_fib_entry_find(mlxsw_sp, fib4); + if (mlxsw_sp->router.aborted) + return 0; + + fib_entry = mlxsw_sp_fib_entry_find(mlxsw_sp, fen_info); if (!fib_entry) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to find FIB4 entry being removed.\n"); return -ENOENT; @@ -1930,3 +1838,172 @@ int mlxsw_sp_router_fib4_del(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry); return 0; } + +static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) +{ + char ralta_pl[MLXSW_REG_RALTA_LEN]; + char ralst_pl[MLXSW_REG_RALST_LEN]; + char raltb_pl[MLXSW_REG_RALTB_LEN]; + char ralue_pl[MLXSW_REG_RALUE_LEN]; + int err; + + mlxsw_reg_ralta_pack(ralta_pl, true, MLXSW_REG_RALXX_PROTOCOL_IPV4, + MLXSW_SP_LPM_TREE_MIN); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); + if (err) + return err; + + mlxsw_reg_ralst_pack(ralst_pl, 0xff, MLXSW_SP_LPM_TREE_MIN); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl); + if (err) + return err; + + mlxsw_reg_raltb_pack(raltb_pl, 0, MLXSW_REG_RALXX_PROTOCOL_IPV4, 0); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); + if (err) + return err; + + mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4, + MLXSW_REG_RALUE_OP_WRITE_WRITE, 0, 0, 0); + mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); +} + +static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_resources *resources; + struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_fib_entry *tmp; + struct mlxsw_sp_vr *vr; + int i; + int err; + + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_virtual_routers; i++) { + vr = &mlxsw_sp->router.vrs[i]; + if (!vr->used) + continue; + + list_for_each_entry_safe(fib_entry, tmp, + &vr->fib->entry_list, list) { + bool do_break = &tmp->list == &vr->fib->entry_list; + + mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry); + mlxsw_sp_fib_entry_remove(fib_entry->vr->fib, + fib_entry); + mlxsw_sp_fib_entry_put_all(mlxsw_sp, fib_entry); + if (do_break) + break; + } + } + mlxsw_sp->router.aborted = true; + err = mlxsw_sp_router_set_abort_trap(mlxsw_sp); + if (err) + dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n"); +} + +static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_resources *resources; + char rgcr_pl[MLXSW_REG_RGCR_LEN]; + int err; + + resources = mlxsw_core_resources_get(mlxsw_sp->core); + if (!resources->max_rif_valid) + return -EIO; + + mlxsw_sp->rifs = kcalloc(resources->max_rif, + sizeof(struct mlxsw_sp_rif *), GFP_KERNEL); + if (!mlxsw_sp->rifs) + return -ENOMEM; + + mlxsw_reg_rgcr_pack(rgcr_pl, true); + mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, resources->max_rif); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); + if (err) + goto err_rgcr_fail; + + return 0; + +err_rgcr_fail: + kfree(mlxsw_sp->rifs); + return err; +} + +static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_resources *resources; + char rgcr_pl[MLXSW_REG_RGCR_LEN]; + int i; + + mlxsw_reg_rgcr_pack(rgcr_pl, false); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); + + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_rif; i++) + WARN_ON_ONCE(mlxsw_sp->rifs[i]); + + kfree(mlxsw_sp->rifs); +} + +static int mlxsw_sp_router_fib_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb); + struct fib_entry_notifier_info *fen_info = ptr; + int err; + + switch (event) { + case FIB_EVENT_ENTRY_ADD: + err = mlxsw_sp_router_fib4_add(mlxsw_sp, fen_info); + if (err) + mlxsw_sp_router_fib4_abort(mlxsw_sp); + break; + case FIB_EVENT_ENTRY_DEL: + mlxsw_sp_router_fib4_del(mlxsw_sp, fen_info); + break; + case FIB_EVENT_RULE_ADD: /* fall through */ + case FIB_EVENT_RULE_DEL: + mlxsw_sp_router_fib4_abort(mlxsw_sp); + break; + } + return NOTIFY_DONE; +} + +int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) +{ + int err; + + INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list); + INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_group_list); + err = __mlxsw_sp_router_init(mlxsw_sp); + if (err) + return err; + + mlxsw_sp_lpm_init(mlxsw_sp); + err = mlxsw_sp_vrs_init(mlxsw_sp); + if (err) + goto err_vrs_init; + + err = mlxsw_sp_neigh_init(mlxsw_sp); + if (err) + goto err_neigh_init; + + mlxsw_sp->fib_nb.notifier_call = mlxsw_sp_router_fib_event; + register_fib_notifier(&mlxsw_sp->fib_nb); + return 0; + +err_neigh_init: + mlxsw_sp_vrs_fini(mlxsw_sp); +err_vrs_init: + __mlxsw_sp_router_fini(mlxsw_sp); + return err; +} + +void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) +{ + unregister_fib_notifier(&mlxsw_sp->fib_nb); + mlxsw_sp_neigh_fini(mlxsw_sp); + mlxsw_sp_vrs_fini(mlxsw_sp); + __mlxsw_sp_router_fini(mlxsw_sp); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 2b04b76b503e..5e00c79e8133 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1044,11 +1044,6 @@ static int mlxsw_sp_port_obj_add(struct net_device *dev, SWITCHDEV_OBJ_PORT_VLAN(obj), trans); break; - case SWITCHDEV_OBJ_ID_IPV4_FIB: - err = mlxsw_sp_router_fib4_add(mlxsw_sp_port, - SWITCHDEV_OBJ_IPV4_FIB(obj), - trans); - break; case SWITCHDEV_OBJ_ID_PORT_FDB: err = mlxsw_sp_port_fdb_static_add(mlxsw_sp_port, SWITCHDEV_OBJ_PORT_FDB(obj), @@ -1181,10 +1176,6 @@ static int mlxsw_sp_port_obj_del(struct net_device *dev, err = mlxsw_sp_port_vlans_del(mlxsw_sp_port, SWITCHDEV_OBJ_PORT_VLAN(obj)); break; - case SWITCHDEV_OBJ_ID_IPV4_FIB: - err = mlxsw_sp_router_fib4_del(mlxsw_sp_port, - SWITCHDEV_OBJ_IPV4_FIB(obj)); - break; case SWITCHDEV_OBJ_ID_PORT_FDB: err = mlxsw_sp_port_fdb_static_del(mlxsw_sp_port, SWITCHDEV_OBJ_PORT_FDB(obj)); From 936bd486564aa3edb52313aa7c2e7381e0bcaba3 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 26 Sep 2016 12:52:32 +0200 Subject: [PATCH 0916/1050] rocker: use FIB notifications instead of switchdev calls Until now, in order to offload a FIB entry to HW we use switchdev op. Use the newly introduced FIB notifier infrasturucture to process FIB entries to offload the in HW. Abort mechanism is now handled within the rocker driver. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker.h | 15 ++- drivers/net/ethernet/rocker/rocker_main.c | 120 +++++++++++++++------ drivers/net/ethernet/rocker/rocker_ofdpa.c | 115 +++++++++++++++----- 3 files changed, 185 insertions(+), 65 deletions(-) diff --git a/drivers/net/ethernet/rocker/rocker.h b/drivers/net/ethernet/rocker/rocker.h index 1ab995f7146b..2eb9b49569d5 100644 --- a/drivers/net/ethernet/rocker/rocker.h +++ b/drivers/net/ethernet/rocker/rocker.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -52,6 +53,9 @@ struct rocker_port { struct rocker_dma_ring_info rx_ring; }; +struct rocker_port *rocker_port_dev_lower_find(struct net_device *dev, + struct rocker *rocker); + struct rocker_world_ops; struct rocker { @@ -66,6 +70,7 @@ struct rocker { spinlock_t cmd_ring_lock; /* for cmd ring accesses */ struct rocker_dma_ring_info cmd_ring; struct rocker_dma_ring_info event_ring; + struct notifier_block fib_nb; struct rocker_world_ops *wops; void *wpriv; }; @@ -117,11 +122,6 @@ struct rocker_world_ops { int (*port_obj_vlan_dump)(const struct rocker_port *rocker_port, struct switchdev_obj_port_vlan *vlan, switchdev_obj_dump_cb_t *cb); - int (*port_obj_fib4_add)(struct rocker_port *rocker_port, - const struct switchdev_obj_ipv4_fib *fib4, - struct switchdev_trans *trans); - int (*port_obj_fib4_del)(struct rocker_port *rocker_port, - const struct switchdev_obj_ipv4_fib *fib4); int (*port_obj_fdb_add)(struct rocker_port *rocker_port, const struct switchdev_obj_port_fdb *fdb, struct switchdev_trans *trans); @@ -141,6 +141,11 @@ struct rocker_world_ops { int (*port_ev_mac_vlan_seen)(struct rocker_port *rocker_port, const unsigned char *addr, __be16 vlan_id); + int (*fib4_add)(struct rocker *rocker, + const struct fib_entry_notifier_info *fen_info); + int (*fib4_del)(struct rocker *rocker, + const struct fib_entry_notifier_info *fen_info); + void (*fib4_abort)(struct rocker *rocker); }; extern struct rocker_world_ops rocker_ofdpa_ops; diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index 1f0c08602eba..5424fb341613 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -1624,29 +1624,6 @@ rocker_world_port_obj_vlan_dump(const struct rocker_port *rocker_port, return wops->port_obj_vlan_dump(rocker_port, vlan, cb); } -static int -rocker_world_port_obj_fib4_add(struct rocker_port *rocker_port, - const struct switchdev_obj_ipv4_fib *fib4, - struct switchdev_trans *trans) -{ - struct rocker_world_ops *wops = rocker_port->rocker->wops; - - if (!wops->port_obj_fib4_add) - return -EOPNOTSUPP; - return wops->port_obj_fib4_add(rocker_port, fib4, trans); -} - -static int -rocker_world_port_obj_fib4_del(struct rocker_port *rocker_port, - const struct switchdev_obj_ipv4_fib *fib4) -{ - struct rocker_world_ops *wops = rocker_port->rocker->wops; - - if (!wops->port_obj_fib4_del) - return -EOPNOTSUPP; - return wops->port_obj_fib4_del(rocker_port, fib4); -} - static int rocker_world_port_obj_fdb_add(struct rocker_port *rocker_port, const struct switchdev_obj_port_fdb *fdb, @@ -1733,6 +1710,34 @@ static int rocker_world_port_ev_mac_vlan_seen(struct rocker_port *rocker_port, return wops->port_ev_mac_vlan_seen(rocker_port, addr, vlan_id); } +static int rocker_world_fib4_add(struct rocker *rocker, + const struct fib_entry_notifier_info *fen_info) +{ + struct rocker_world_ops *wops = rocker->wops; + + if (!wops->fib4_add) + return 0; + return wops->fib4_add(rocker, fen_info); +} + +static int rocker_world_fib4_del(struct rocker *rocker, + const struct fib_entry_notifier_info *fen_info) +{ + struct rocker_world_ops *wops = rocker->wops; + + if (!wops->fib4_del) + return 0; + return wops->fib4_del(rocker, fen_info); +} + +static void rocker_world_fib4_abort(struct rocker *rocker) +{ + struct rocker_world_ops *wops = rocker->wops; + + if (wops->fib4_abort) + wops->fib4_abort(rocker); +} + /***************** * Net device ops *****************/ @@ -2096,11 +2101,6 @@ static int rocker_port_obj_add(struct net_device *dev, SWITCHDEV_OBJ_PORT_VLAN(obj), trans); break; - case SWITCHDEV_OBJ_ID_IPV4_FIB: - err = rocker_world_port_obj_fib4_add(rocker_port, - SWITCHDEV_OBJ_IPV4_FIB(obj), - trans); - break; case SWITCHDEV_OBJ_ID_PORT_FDB: err = rocker_world_port_obj_fdb_add(rocker_port, SWITCHDEV_OBJ_PORT_FDB(obj), @@ -2125,10 +2125,6 @@ static int rocker_port_obj_del(struct net_device *dev, err = rocker_world_port_obj_vlan_del(rocker_port, SWITCHDEV_OBJ_PORT_VLAN(obj)); break; - case SWITCHDEV_OBJ_ID_IPV4_FIB: - err = rocker_world_port_obj_fib4_del(rocker_port, - SWITCHDEV_OBJ_IPV4_FIB(obj)); - break; case SWITCHDEV_OBJ_ID_PORT_FDB: err = rocker_world_port_obj_fdb_del(rocker_port, SWITCHDEV_OBJ_PORT_FDB(obj)); @@ -2175,6 +2171,31 @@ static const struct switchdev_ops rocker_port_switchdev_ops = { .switchdev_port_obj_dump = rocker_port_obj_dump, }; +static int rocker_router_fib_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct rocker *rocker = container_of(nb, struct rocker, fib_nb); + struct fib_entry_notifier_info *fen_info = ptr; + int err; + + switch (event) { + case FIB_EVENT_ENTRY_ADD: + err = rocker_world_fib4_add(rocker, fen_info); + if (err) + rocker_world_fib4_abort(rocker); + else + break; + case FIB_EVENT_ENTRY_DEL: + rocker_world_fib4_del(rocker, fen_info); + break; + case FIB_EVENT_RULE_ADD: /* fall through */ + case FIB_EVENT_RULE_DEL: + rocker_world_fib4_abort(rocker); + break; + } + return NOTIFY_DONE; +} + /******************** * ethtool interface ********************/ @@ -2740,6 +2761,9 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_probe_ports; } + rocker->fib_nb.notifier_call = rocker_router_fib_event; + register_fib_notifier(&rocker->fib_nb); + dev_info(&pdev->dev, "Rocker switch with id %*phN\n", (int)sizeof(rocker->hw.id), &rocker->hw.id); @@ -2771,6 +2795,7 @@ static void rocker_remove(struct pci_dev *pdev) { struct rocker *rocker = pci_get_drvdata(pdev); + unregister_fib_notifier(&rocker->fib_nb); rocker_write32(rocker, CONTROL, ROCKER_CONTROL_RESET); rocker_remove_ports(rocker); free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_EVENT), rocker); @@ -2799,6 +2824,37 @@ static bool rocker_port_dev_check(const struct net_device *dev) return dev->netdev_ops == &rocker_port_netdev_ops; } +static bool rocker_port_dev_check_under(const struct net_device *dev, + struct rocker *rocker) +{ + struct rocker_port *rocker_port; + + if (!rocker_port_dev_check(dev)) + return false; + + rocker_port = netdev_priv(dev); + if (rocker_port->rocker != rocker) + return false; + + return true; +} + +struct rocker_port *rocker_port_dev_lower_find(struct net_device *dev, + struct rocker *rocker) +{ + struct net_device *lower_dev; + struct list_head *iter; + + if (rocker_port_dev_check_under(dev, rocker)) + return netdev_priv(dev); + + netdev_for_each_all_lower_dev(dev, lower_dev, iter) { + if (rocker_port_dev_check_under(lower_dev, rocker)) + return netdev_priv(lower_dev); + } + return NULL; +} + static int rocker_netdevice_event(struct notifier_block *unused, unsigned long event, void *ptr) { diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c index fcad907baecf..431a60804272 100644 --- a/drivers/net/ethernet/rocker/rocker_ofdpa.c +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -99,6 +99,7 @@ struct ofdpa_flow_tbl_entry { struct ofdpa_flow_tbl_key key; size_t key_len; u32 key_crc32; /* key */ + struct fib_info *fi; }; struct ofdpa_group_tbl_entry { @@ -189,6 +190,7 @@ struct ofdpa { spinlock_t neigh_tbl_lock; /* for neigh tbl accesses */ u32 neigh_tbl_next_index; unsigned long ageing_time; + bool fib_aborted; }; struct ofdpa_port { @@ -1043,7 +1045,8 @@ static int ofdpa_flow_tbl_ucast4_routing(struct ofdpa_port *ofdpa_port, __be16 eth_type, __be32 dst, __be32 dst_mask, u32 priority, enum rocker_of_dpa_table_id goto_tbl, - u32 group_id, int flags) + u32 group_id, struct fib_info *fi, + int flags) { struct ofdpa_flow_tbl_entry *entry; @@ -1060,6 +1063,7 @@ static int ofdpa_flow_tbl_ucast4_routing(struct ofdpa_port *ofdpa_port, entry->key.ucast_routing.group_id = group_id; entry->key_len = offsetof(struct ofdpa_flow_tbl_key, ucast_routing.group_id); + entry->fi = fi; return ofdpa_flow_tbl_do(ofdpa_port, trans, flags, entry); } @@ -1425,7 +1429,7 @@ static int ofdpa_port_ipv4_neigh(struct ofdpa_port *ofdpa_port, eth_type, ip_addr, inet_make_mask(32), priority, goto_tbl, - group_id, flags); + group_id, NULL, flags); if (err) netdev_err(ofdpa_port->dev, "Error (%d) /32 unicast route %pI4 group 0x%08x\n", @@ -2390,7 +2394,7 @@ found: static int ofdpa_port_fib_ipv4(struct ofdpa_port *ofdpa_port, struct switchdev_trans *trans, __be32 dst, - int dst_len, const struct fib_info *fi, + int dst_len, struct fib_info *fi, u32 tb_id, int flags) { const struct fib_nh *nh; @@ -2426,7 +2430,7 @@ static int ofdpa_port_fib_ipv4(struct ofdpa_port *ofdpa_port, err = ofdpa_flow_tbl_ucast4_routing(ofdpa_port, trans, eth_type, dst, dst_mask, priority, goto_tbl, - group_id, flags); + group_id, fi, flags); if (err) netdev_err(ofdpa_port->dev, "Error (%d) IPv4 route %pI4\n", err, &dst); @@ -2718,28 +2722,6 @@ static int ofdpa_port_obj_vlan_dump(const struct rocker_port *rocker_port, return err; } -static int ofdpa_port_obj_fib4_add(struct rocker_port *rocker_port, - const struct switchdev_obj_ipv4_fib *fib4, - struct switchdev_trans *trans) -{ - struct ofdpa_port *ofdpa_port = rocker_port->wpriv; - - return ofdpa_port_fib_ipv4(ofdpa_port, trans, - htonl(fib4->dst), fib4->dst_len, - fib4->fi, fib4->tb_id, 0); -} - -static int ofdpa_port_obj_fib4_del(struct rocker_port *rocker_port, - const struct switchdev_obj_ipv4_fib *fib4) -{ - struct ofdpa_port *ofdpa_port = rocker_port->wpriv; - - return ofdpa_port_fib_ipv4(ofdpa_port, NULL, - htonl(fib4->dst), fib4->dst_len, - fib4->fi, fib4->tb_id, - OFDPA_OP_FLAG_REMOVE); -} - static int ofdpa_port_obj_fdb_add(struct rocker_port *rocker_port, const struct switchdev_obj_port_fdb *fdb, struct switchdev_trans *trans) @@ -2922,6 +2904,82 @@ static int ofdpa_port_ev_mac_vlan_seen(struct rocker_port *rocker_port, return ofdpa_port_fdb(ofdpa_port, NULL, addr, vlan_id, flags); } +static struct ofdpa_port *ofdpa_port_dev_lower_find(struct net_device *dev, + struct rocker *rocker) +{ + struct rocker_port *rocker_port; + + rocker_port = rocker_port_dev_lower_find(dev, rocker); + return rocker_port ? rocker_port->wpriv : NULL; +} + +static int ofdpa_fib4_add(struct rocker *rocker, + const struct fib_entry_notifier_info *fen_info) +{ + struct ofdpa *ofdpa = rocker->wpriv; + struct ofdpa_port *ofdpa_port; + int err; + + if (ofdpa->fib_aborted) + return 0; + ofdpa_port = ofdpa_port_dev_lower_find(fen_info->fi->fib_dev, rocker); + if (!ofdpa_port) + return 0; + err = ofdpa_port_fib_ipv4(ofdpa_port, NULL, htonl(fen_info->dst), + fen_info->dst_len, fen_info->fi, + fen_info->tb_id, 0); + if (err) + return err; + fib_info_offload_inc(fen_info->fi); + return 0; +} + +static int ofdpa_fib4_del(struct rocker *rocker, + const struct fib_entry_notifier_info *fen_info) +{ + struct ofdpa *ofdpa = rocker->wpriv; + struct ofdpa_port *ofdpa_port; + + if (ofdpa->fib_aborted) + return 0; + ofdpa_port = ofdpa_port_dev_lower_find(fen_info->fi->fib_dev, rocker); + if (!ofdpa_port) + return 0; + fib_info_offload_dec(fen_info->fi); + return ofdpa_port_fib_ipv4(ofdpa_port, NULL, htonl(fen_info->dst), + fen_info->dst_len, fen_info->fi, + fen_info->tb_id, OFDPA_OP_FLAG_REMOVE); +} + +static void ofdpa_fib4_abort(struct rocker *rocker) +{ + struct ofdpa *ofdpa = rocker->wpriv; + struct ofdpa_port *ofdpa_port; + struct ofdpa_flow_tbl_entry *flow_entry; + struct hlist_node *tmp; + unsigned long flags; + int bkt; + + if (ofdpa->fib_aborted) + return; + + spin_lock_irqsave(&ofdpa->flow_tbl_lock, flags); + hash_for_each_safe(ofdpa->flow_tbl, bkt, tmp, flow_entry, entry) { + if (flow_entry->key.tbl_id != + ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING) + continue; + ofdpa_port = ofdpa_port_dev_lower_find(flow_entry->fi->fib_dev, + rocker); + if (!ofdpa_port) + continue; + fib_info_offload_dec(flow_entry->fi); + ofdpa_flow_tbl_del(ofdpa_port, NULL, OFDPA_OP_FLAG_REMOVE, + flow_entry); + } + spin_unlock_irqrestore(&ofdpa->flow_tbl_lock, flags); + ofdpa->fib_aborted = true; +} + struct rocker_world_ops rocker_ofdpa_ops = { .kind = "ofdpa", .priv_size = sizeof(struct ofdpa), @@ -2941,8 +2999,6 @@ struct rocker_world_ops rocker_ofdpa_ops = { .port_obj_vlan_add = ofdpa_port_obj_vlan_add, .port_obj_vlan_del = ofdpa_port_obj_vlan_del, .port_obj_vlan_dump = ofdpa_port_obj_vlan_dump, - .port_obj_fib4_add = ofdpa_port_obj_fib4_add, - .port_obj_fib4_del = ofdpa_port_obj_fib4_del, .port_obj_fdb_add = ofdpa_port_obj_fdb_add, .port_obj_fdb_del = ofdpa_port_obj_fdb_del, .port_obj_fdb_dump = ofdpa_port_obj_fdb_dump, @@ -2951,4 +3007,7 @@ struct rocker_world_ops rocker_ofdpa_ops = { .port_neigh_update = ofdpa_port_neigh_update, .port_neigh_destroy = ofdpa_port_neigh_destroy, .port_ev_mac_vlan_seen = ofdpa_port_ev_mac_vlan_seen, + .fib4_add = ofdpa_fib4_add, + .fib4_del = ofdpa_fib4_del, + .fib4_abort = ofdpa_fib4_abort, }; From 347e3b28c1ba24c1ae2f30290d8247480ab9ce14 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 26 Sep 2016 12:52:33 +0200 Subject: [PATCH 0917/1050] switchdev: remove FIB offload infrastructure Since this is now taken care of by FIB notifier, remove the code, with all unused dependencies. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip_fib.h | 2 - include/net/switchdev.h | 40 --------- net/ipv4/fib_frontend.c | 13 --- net/ipv4/fib_rules.c | 2 - net/ipv4/fib_trie.c | 104 +--------------------- net/switchdev/switchdev.c | 181 -------------------------------------- 6 files changed, 1 insertion(+), 341 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index ffccf1787914..b9314b48e39f 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -243,7 +243,6 @@ int fib_table_dump(struct fib_table *table, struct sk_buff *skb, struct netlink_callback *cb); int fib_table_flush(struct net *net, struct fib_table *table); struct fib_table *fib_trie_unmerge(struct fib_table *main_tb); -void fib_table_flush_external(struct fib_table *table); void fib_free_table(struct fib_table *tb); #ifndef CONFIG_IP_MULTIPLE_TABLES @@ -356,7 +355,6 @@ static inline int fib_num_tclassid_users(struct net *net) } #endif int fib_unmerge(struct net *net); -void fib_flush_external(struct net *net); /* Exported by fib_semantics.c */ int ip_fib_check_default(__be32 gw, struct net_device *dev); diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 729fe1534160..eba80c4fc56f 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -68,7 +68,6 @@ struct switchdev_attr { enum switchdev_obj_id { SWITCHDEV_OBJ_ID_UNDEFINED, SWITCHDEV_OBJ_ID_PORT_VLAN, - SWITCHDEV_OBJ_ID_IPV4_FIB, SWITCHDEV_OBJ_ID_PORT_FDB, SWITCHDEV_OBJ_ID_PORT_MDB, }; @@ -92,21 +91,6 @@ struct switchdev_obj_port_vlan { #define SWITCHDEV_OBJ_PORT_VLAN(obj) \ container_of(obj, struct switchdev_obj_port_vlan, obj) -/* SWITCHDEV_OBJ_ID_IPV4_FIB */ -struct switchdev_obj_ipv4_fib { - struct switchdev_obj obj; - u32 dst; - int dst_len; - struct fib_info *fi; - u8 tos; - u8 type; - u32 nlflags; - u32 tb_id; -}; - -#define SWITCHDEV_OBJ_IPV4_FIB(obj) \ - container_of(obj, struct switchdev_obj_ipv4_fib, obj) - /* SWITCHDEV_OBJ_ID_PORT_FDB */ struct switchdev_obj_port_fdb { struct switchdev_obj obj; @@ -209,11 +193,6 @@ int switchdev_port_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags); int switchdev_port_bridge_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags); -int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 nlflags, u32 tb_id); -int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id); -void switchdev_fib_ipv4_abort(struct fib_info *fi); int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, u16 nlm_flags); @@ -304,25 +283,6 @@ static inline int switchdev_port_bridge_dellink(struct net_device *dev, return -EOPNOTSUPP; } -static inline int switchdev_fib_ipv4_add(u32 dst, int dst_len, - struct fib_info *fi, - u8 tos, u8 type, - u32 nlflags, u32 tb_id) -{ - return 0; -} - -static inline int switchdev_fib_ipv4_del(u32 dst, int dst_len, - struct fib_info *fi, - u8 tos, u8 type, u32 tb_id) -{ - return 0; -} - -static inline void switchdev_fib_ipv4_abort(struct fib_info *fi) -{ -} - static inline int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 86c43dc9a60e..c3b80478226e 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -189,19 +189,6 @@ static void fib_flush(struct net *net) rt_cache_flush(net); } -void fib_flush_external(struct net *net) -{ - struct fib_table *tb; - struct hlist_head *head; - unsigned int h; - - for (h = 0; h < FIB_TABLE_HASHSZ; h++) { - head = &net->ipv4.fib_table_hash[h]; - hlist_for_each_entry(tb, head, tb_hlist) - fib_table_flush_external(tb); - } -} - /* * Find address type as if only "dev" was present in the system. If * on_dev is NULL then all interfaces are taken into consideration. diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index ebadf6b99499..2e50062f642d 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -228,7 +228,6 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, rule4->tos = frh->tos; net->ipv4.fib_has_custom_rules = true; - fib_flush_external(rule->fr_net); call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD); err = 0; @@ -251,7 +250,6 @@ static int fib4_rule_delete(struct fib_rule *rule) net->ipv4.fib_num_tclassid_users--; #endif net->ipv4.fib_has_custom_rules = true; - fib_flush_external(rule->fr_net); call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL); errout: return err; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 51a4537eb145..31cef3602585 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -81,7 +81,6 @@ #include #include #include -#include #include #include "fib_lookup.h" @@ -1215,17 +1214,6 @@ int fib_table_insert(struct net *net, struct fib_table *tb, new_fa->tb_id = tb->tb_id; new_fa->fa_default = -1; - err = switchdev_fib_ipv4_add(key, plen, fi, - new_fa->fa_tos, - cfg->fc_type, - cfg->fc_nlflags, - tb->tb_id); - if (err) { - switchdev_fib_ipv4_abort(fi); - kmem_cache_free(fn_alias_kmem, new_fa); - goto out; - } - hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list); alias_free_mem_rcu(fa); @@ -1273,18 +1261,10 @@ int fib_table_insert(struct net *net, struct fib_table *tb, new_fa->tb_id = tb->tb_id; new_fa->fa_default = -1; - /* (Optionally) offload fib entry to switch hardware. */ - err = switchdev_fib_ipv4_add(key, plen, fi, tos, cfg->fc_type, - cfg->fc_nlflags, tb->tb_id); - if (err) { - switchdev_fib_ipv4_abort(fi); - goto out_free_new_fa; - } - /* Insert new entry to the list. */ err = fib_insert_alias(t, tp, l, new_fa, fa, key); if (err) - goto out_sw_fib_del; + goto out_free_new_fa; if (!plen) tb->tb_num_default++; @@ -1297,8 +1277,6 @@ int fib_table_insert(struct net *net, struct fib_table *tb, succeeded: return 0; -out_sw_fib_del: - switchdev_fib_ipv4_del(key, plen, fi, tos, cfg->fc_type, tb->tb_id); out_free_new_fa: kmem_cache_free(fn_alias_kmem, new_fa); out: @@ -1591,9 +1569,6 @@ int fib_table_delete(struct net *net, struct fib_table *tb, if (!fa_to_delete) return -ESRCH; - switchdev_fib_ipv4_del(key, plen, fa_to_delete->fa_info, tos, - cfg->fc_type, tb->tb_id); - call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, plen, fa_to_delete->fa_info, tos, cfg->fc_type, tb->tb_id, 0); @@ -1785,80 +1760,6 @@ out: return NULL; } -/* Caller must hold RTNL */ -void fib_table_flush_external(struct fib_table *tb) -{ - struct trie *t = (struct trie *)tb->tb_data; - struct key_vector *pn = t->kv; - unsigned long cindex = 1; - struct hlist_node *tmp; - struct fib_alias *fa; - - /* walk trie in reverse order */ - for (;;) { - unsigned char slen = 0; - struct key_vector *n; - - if (!(cindex--)) { - t_key pkey = pn->key; - - /* cannot resize the trie vector */ - if (IS_TRIE(pn)) - break; - - /* resize completed node */ - pn = resize(t, pn); - cindex = get_index(pkey, pn); - - continue; - } - - /* grab the next available node */ - n = get_child(pn, cindex); - if (!n) - continue; - - if (IS_TNODE(n)) { - /* record pn and cindex for leaf walking */ - pn = n; - cindex = 1ul << n->bits; - - continue; - } - - hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { - struct fib_info *fi = fa->fa_info; - - /* if alias was cloned to local then we just - * need to remove the local copy from main - */ - if (tb->tb_id != fa->tb_id) { - hlist_del_rcu(&fa->fa_list); - alias_free_mem_rcu(fa); - continue; - } - - /* record local slen */ - slen = fa->fa_slen; - - if (!fi || !(fi->fib_flags & RTNH_F_OFFLOAD)) - continue; - - switchdev_fib_ipv4_del(n->key, KEYLENGTH - fa->fa_slen, - fi, fa->fa_tos, fa->fa_type, - tb->tb_id); - } - - /* update leaf slen */ - n->slen = slen; - - if (hlist_empty(&n->leaf)) { - put_child_root(pn, n->key, NULL); - node_free(n); - } - } -} - /* Caller must hold RTNL. */ int fib_table_flush(struct net *net, struct fib_table *tb) { @@ -1909,9 +1810,6 @@ int fib_table_flush(struct net *net, struct fib_table *tb) continue; } - switchdev_fib_ipv4_del(n->key, KEYLENGTH - fa->fa_slen, - fi, fa->fa_tos, fa->fa_type, - tb->tb_id); call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, n->key, KEYLENGTH - fa->fa_slen, diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index abd8d2a38a7d..02beb35f577f 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -21,7 +21,6 @@ #include #include #include -#include #include /** @@ -344,8 +343,6 @@ static size_t switchdev_obj_size(const struct switchdev_obj *obj) switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_VLAN: return sizeof(struct switchdev_obj_port_vlan); - case SWITCHDEV_OBJ_ID_IPV4_FIB: - return sizeof(struct switchdev_obj_ipv4_fib); case SWITCHDEV_OBJ_ID_PORT_FDB: return sizeof(struct switchdev_obj_port_fdb); case SWITCHDEV_OBJ_ID_PORT_MDB: @@ -1108,184 +1105,6 @@ int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, } EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump); -static struct net_device *switchdev_get_lowest_dev(struct net_device *dev) -{ - const struct switchdev_ops *ops = dev->switchdev_ops; - struct net_device *lower_dev; - struct net_device *port_dev; - struct list_head *iter; - - /* Recusively search down until we find a sw port dev. - * (A sw port dev supports switchdev_port_attr_get). - */ - - if (ops && ops->switchdev_port_attr_get) - return dev; - - netdev_for_each_lower_dev(dev, lower_dev, iter) { - port_dev = switchdev_get_lowest_dev(lower_dev); - if (port_dev) - return port_dev; - } - - return NULL; -} - -static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi) -{ - struct switchdev_attr attr = { - .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, - }; - struct switchdev_attr prev_attr; - struct net_device *dev = NULL; - int nhsel; - - ASSERT_RTNL(); - - /* For this route, all nexthop devs must be on the same switch. */ - - for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { - const struct fib_nh *nh = &fi->fib_nh[nhsel]; - - if (!nh->nh_dev) - return NULL; - - dev = switchdev_get_lowest_dev(nh->nh_dev); - if (!dev) - return NULL; - - attr.orig_dev = dev; - if (switchdev_port_attr_get(dev, &attr)) - return NULL; - - if (nhsel > 0 && - !netdev_phys_item_id_same(&prev_attr.u.ppid, &attr.u.ppid)) - return NULL; - - prev_attr = attr; - } - - return dev; -} - -/** - * switchdev_fib_ipv4_add - Add/modify switch IPv4 route entry - * - * @dst: route's IPv4 destination address - * @dst_len: destination address length (prefix length) - * @fi: route FIB info structure - * @tos: route TOS - * @type: route type - * @nlflags: netlink flags passed in (NLM_F_*) - * @tb_id: route table ID - * - * Add/modify switch IPv4 route entry. - */ -int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 nlflags, u32 tb_id) -{ - struct switchdev_obj_ipv4_fib ipv4_fib = { - .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB, - .dst = dst, - .dst_len = dst_len, - .fi = fi, - .tos = tos, - .type = type, - .nlflags = nlflags, - .tb_id = tb_id, - }; - struct net_device *dev; - int err = 0; - - /* Don't offload route if using custom ip rules or if - * IPv4 FIB offloading has been disabled completely. - */ - -#ifdef CONFIG_IP_MULTIPLE_TABLES - if (fi->fib_net->ipv4.fib_has_custom_rules) - return 0; -#endif - - if (fi->fib_net->ipv4.fib_offload_disabled) - return 0; - - dev = switchdev_get_dev_by_nhs(fi); - if (!dev) - return 0; - - ipv4_fib.obj.orig_dev = dev; - err = switchdev_port_obj_add(dev, &ipv4_fib.obj); - if (!err) - fib_info_offload_inc(fi); - - return err == -EOPNOTSUPP ? 0 : err; -} -EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add); - -/** - * switchdev_fib_ipv4_del - Delete IPv4 route entry from switch - * - * @dst: route's IPv4 destination address - * @dst_len: destination address length (prefix length) - * @fi: route FIB info structure - * @tos: route TOS - * @type: route type - * @tb_id: route table ID - * - * Delete IPv4 route entry from switch device. - */ -int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id) -{ - struct switchdev_obj_ipv4_fib ipv4_fib = { - .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB, - .dst = dst, - .dst_len = dst_len, - .fi = fi, - .tos = tos, - .type = type, - .nlflags = 0, - .tb_id = tb_id, - }; - struct net_device *dev; - int err = 0; - - if (!(fi->fib_flags & RTNH_F_OFFLOAD)) - return 0; - - dev = switchdev_get_dev_by_nhs(fi); - if (!dev) - return 0; - - ipv4_fib.obj.orig_dev = dev; - err = switchdev_port_obj_del(dev, &ipv4_fib.obj); - if (!err) - fib_info_offload_dec(fi); - - return err == -EOPNOTSUPP ? 0 : err; -} -EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_del); - -/** - * switchdev_fib_ipv4_abort - Abort an IPv4 FIB operation - * - * @fi: route FIB info structure - */ -void switchdev_fib_ipv4_abort(struct fib_info *fi) -{ - /* There was a problem installing this route to the offload - * device. For now, until we come up with more refined - * policy handling, abruptly end IPv4 fib offloading for - * for entire net by flushing offload device(s) of all - * IPv4 routes, and mark IPv4 fib offloading broken from - * this point forward. - */ - - fib_flush_external(fi->fib_net); - fi->fib_net->ipv4.fib_offload_disabled = true; -} -EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort); - bool switchdev_port_same_parent_id(struct net_device *a, struct net_device *b) { From fd41b0eaa06a8a0516f9e0b0a5889035bf423784 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 26 Sep 2016 12:52:34 +0200 Subject: [PATCH 0918/1050] doc: update switchdev L3 section This is to reflect the change of FIB offload infrastructure from switchdev objects to FIB notifier. Signed-off-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- Documentation/networking/switchdev.txt | 27 +++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt index 44235e83799b..2bbac05ab9e2 100644 --- a/Documentation/networking/switchdev.txt +++ b/Documentation/networking/switchdev.txt @@ -314,30 +314,29 @@ the kernel, with the device doing the FIB lookup and forwarding. The device does a longest prefix match (LPM) on FIB entries matching route prefix and forwards the packet to the matching FIB entry's nexthop(s) egress ports. -To program the device, the driver implements support for -SWITCHDEV_OBJ_IPV[4|6]_FIB object using switchdev_port_obj_xxx ops. -switchdev_port_obj_add is used for both adding a new FIB entry to the device, -or modifying an existing entry on the device. +To program the device, the driver has to register a FIB notifier handler +using register_fib_notifier. The following events are available: +FIB_EVENT_ENTRY_ADD: used for both adding a new FIB entry to the device, + or modifying an existing entry on the device. +FIB_EVENT_ENTRY_DEL: used for removing a FIB entry +FIB_EVENT_RULE_ADD, FIB_EVENT_RULE_DEL: used to propagate FIB rule changes -XXX: Currently, only SWITCHDEV_OBJ_ID_IPV4_FIB objects are supported. +FIB_EVENT_ENTRY_ADD and FIB_EVENT_ENTRY_DEL events pass: -SWITCHDEV_OBJ_ID_IPV4_FIB object passes: - - struct switchdev_obj_ipv4_fib { /* IPV4_FIB */ + struct fib_entry_notifier_info { + struct fib_notifier_info info; /* must be first */ u32 dst; int dst_len; struct fib_info *fi; u8 tos; u8 type; - u32 nlflags; u32 tb_id; - } ipv4_fib; + u32 nlflags; + }; to add/modify/delete IPv4 dst/dest_len prefix on table tb_id. The *fi structure holds details on the route and route's nexthops. *dev is one of the -port netdevs mentioned in the routes next hop list. If the output port netdevs -referenced in the route's nexthop list don't all have the same switch ID, the -driver is not called to add/modify/delete the FIB entry. +port netdevs mentioned in the route's next hop list. Routes offloaded to the device are labeled with "offload" in the ip route listing: @@ -355,6 +354,8 @@ listing: 12.0.0.4 via 11.0.0.9 dev sw1p2 proto zebra metric 20 offload 192.168.0.0/24 dev eth0 proto kernel scope link src 192.168.0.15 +The "offload" flag is set in case at least one device offloads the FIB entry. + XXX: add/mod/del IPv6 FIB API Nexthop Resolution From c099ff3cdb9dd2281dee3dc4ef89fec1c516df22 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Tue, 27 Sep 2016 01:23:26 +0300 Subject: [PATCH 0919/1050] sh_eth: add R8A7743/5 support Add support for the first two members of the Renesas RZ/G family, RZ/G1M/E (also known as R8A7743/5). The Ether core is the same as in the R-Car gen2 SoCs, so will share the code/data with them... Signed-off-by: Sergei Shtylyov Acked-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/sh_eth.txt | 2 ++ drivers/net/ethernet/renesas/Kconfig | 2 +- drivers/net/ethernet/renesas/sh_eth.c | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/sh_eth.txt b/Documentation/devicetree/bindings/net/sh_eth.txt index 2f6ec85fda8e..0115c85a2425 100644 --- a/Documentation/devicetree/bindings/net/sh_eth.txt +++ b/Documentation/devicetree/bindings/net/sh_eth.txt @@ -5,6 +5,8 @@ interface contains. Required properties: - compatible: "renesas,gether-r8a7740" if the device is a part of R8A7740 SoC. + "renesas,ether-r8a7743" if the device is a part of R8A7743 SoC. + "renesas,ether-r8a7745" if the device is a part of R8A7745 SoC. "renesas,ether-r8a7778" if the device is a part of R8A7778 SoC. "renesas,ether-r8a7779" if the device is a part of R8A7779 SoC. "renesas,ether-r8a7790" if the device is a part of R8A7790 SoC. diff --git a/drivers/net/ethernet/renesas/Kconfig b/drivers/net/ethernet/renesas/Kconfig index 4f132cf177cd..85ec447c2d18 100644 --- a/drivers/net/ethernet/renesas/Kconfig +++ b/drivers/net/ethernet/renesas/Kconfig @@ -27,7 +27,7 @@ config SH_ETH Renesas SuperH Ethernet device driver. This driver supporting CPUs are: - SH7619, SH7710, SH7712, SH7724, SH7734, SH7763, SH7757, - R8A7740, R8A777x and R8A779x. + R8A7740, R8A774x, R8A777x and R8A779x. config RAVB tristate "Renesas Ethernet AVB support" diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 440ae272161c..05b0dc55de77 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -2959,6 +2959,8 @@ static struct sh_eth_plat_data *sh_eth_parse_dt(struct device *dev) static const struct of_device_id sh_eth_match_table[] = { { .compatible = "renesas,gether-r8a7740", .data = &r8a7740_data }, + { .compatible = "renesas,ether-r8a7743", .data = &r8a779x_data }, + { .compatible = "renesas,ether-r8a7745", .data = &r8a779x_data }, { .compatible = "renesas,ether-r8a7778", .data = &r8a777x_data }, { .compatible = "renesas,ether-r8a7779", .data = &r8a777x_data }, { .compatible = "renesas,ether-r8a7790", .data = &r8a779x_data }, From fa5effe7664b1606dfd639dff58686f6dbb119d7 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Tue, 27 Sep 2016 11:09:51 +0300 Subject: [PATCH 0920/1050] net/sched: pkt_cls: change tc actions order to be as the user sets Currently the created tc actions list is reversed against the order set by the user. Change the actions list order to be the same as was set by the user. This patch doesn't affect dump actions behavior. For dumping, action->order parameter is used so the list order doesn't matter. Signed-off-by: Hadar Hen Zion Acked-by: Jamal Hadi Salim Acked-by: Cong Wang Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 5ccaa4be7d96..767b03a3fe67 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -123,7 +123,7 @@ static inline void tcf_exts_to_list(const struct tcf_exts *exts, for (i = 0; i < exts->nr_actions; i++) { struct tc_action *a = exts->actions[i]; - list_add(&a->list, actions); + list_add_tail(&a->list, actions); } #endif } From 931eb6b7fee31f63d33fd5e1e62a60375fa6fb32 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 16 Sep 2016 13:05:35 +0000 Subject: [PATCH 0921/1050] ath10k: fix error return code in ahb Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in function ath10k_ahb_probe() or ath10k_ahb_resource_init(). Signed-off-by: Wei Yongjun Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/ahb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/ahb.c b/drivers/net/wireless/ath/ath10k/ahb.c index fe38b459544d..766c63bf05c4 100644 --- a/drivers/net/wireless/ath/ath10k/ahb.c +++ b/drivers/net/wireless/ath/ath10k/ahb.c @@ -509,6 +509,7 @@ static int ath10k_ahb_resource_init(struct ath10k *ar) ar_ahb->irq = platform_get_irq_byname(pdev, "legacy"); if (ar_ahb->irq < 0) { ath10k_err(ar, "failed to get irq number: %d\n", ar_ahb->irq); + ret = ar_ahb->irq; goto err_clock_deinit; } @@ -787,6 +788,7 @@ static int ath10k_ahb_probe(struct platform_device *pdev) chip_id = ath10k_ahb_soc_read32(ar, SOC_CHIP_ID_ADDRESS); if (chip_id == 0xffffffff) { ath10k_err(ar, "failed to get chip id\n"); + ret = -ENODEV; goto err_halt_device; } From b63b33ecafa5d00edbbb438c7e208392d5f4de23 Mon Sep 17 00:00:00 2001 From: Mohammed Shafi Shajakhan Date: Tue, 20 Sep 2016 13:52:07 +0530 Subject: [PATCH 0922/1050] ath10k: Ignore SWBA event for a vif if its marked for no beacon Ignore processing further in SWBA event scheduled for a vif, if mac80211 has marked the particular vif for stop beaconing and brought the vdev down in 'ath10k_control_beaconing'. This should potentially avoid ath10k warning/error messages while running continuous wifi down/up with max number of vaps configured. Found this change during code walk through and going through other beacon configuration related functions in ath10k Signed-off-by: Mohammed Shafi Shajakhan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/wmi.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index 38993d72f5e6..54df425bb0fc 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -3514,6 +3514,12 @@ void ath10k_wmi_event_host_swba(struct ath10k *ar, struct sk_buff *skb) continue; } + /* mac80211 would have already asked us to stop beaconing and + * bring the vdev down, so continue in that case + */ + if (!arvif->is_up) + continue; + /* There are no completions for beacons so wait for next SWBA * before telling mac80211 to decrement CSA counter * From 0628467f97b5227755428bac10a68257322f7e34 Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Wed, 21 Sep 2016 16:28:06 +0530 Subject: [PATCH 0923/1050] ath10k: fix copy engine 5 destination ring stuck Firmware is running watchdog timer for tracking copy engine ring index and write index. Whenever both indices are stuck at same location for given duration, watchdog will be trigger to assert target. While updating copy engine destination ring write index, driver ensures that write index will not be same as read index by finding delta between these two indices (CE_RING_DELTA). HTT target to host copy engine (CE5) is special case where ring buffers will be reused and delta check is not applied while updating write index. In rare scenario, whenever CE5 ring is full, both indices will be referring same location and this is causing CE ring stuck issue as explained above. This issue is originally reported on IPQ4019 during long hour stress testing and during veriwave max clients testsuites. The same issue is also observed in other chips as well. Fix this by ensuring that write index is one less than read index which means that full ring is available for receiving data. Cc: stable@vger.kernel.org Tested-by: Tamizh chelvam Signed-off-by: Rajkumar Manoharan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/ce.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/ce.c b/drivers/net/wireless/ath/ath10k/ce.c index e7205546fa6b..0b4d79659884 100644 --- a/drivers/net/wireless/ath/ath10k/ce.c +++ b/drivers/net/wireless/ath/ath10k/ce.c @@ -433,6 +433,13 @@ void ath10k_ce_rx_update_write_idx(struct ath10k_ce_pipe *pipe, u32 nentries) unsigned int nentries_mask = dest_ring->nentries_mask; unsigned int write_index = dest_ring->write_index; u32 ctrl_addr = pipe->ctrl_addr; + u32 cur_write_idx = ath10k_ce_dest_ring_write_index_get(ar, ctrl_addr); + + /* Prevent CE ring stuck issue that will occur when ring is full. + * Make sure that write index is 1 less than read index. + */ + if ((cur_write_idx + nentries) == dest_ring->sw_index) + nentries -= 1; write_index = CE_RING_IDX_ADD(nentries_mask, write_index, nentries); ath10k_ce_dest_ring_write_index_set(ar, ctrl_addr, write_index); From 739ccd76b40ed7f2e851b55ae16bdb655cc2ba1d Mon Sep 17 00:00:00 2001 From: Miaoqing Pan Date: Tue, 9 Aug 2016 15:02:27 +0800 Subject: [PATCH 0924/1050] ath9k: disable RNG by default ath9k RNG will dominates all the noise sources from the real HW RNG, disable it by default. But we strongly recommand to enable it if the system without HW RNG, especially on embedded systems. Signed-off-by: Miaoqing Pan Acked-by: Stephan Mueller Acked-by: Stephan Mueller Reviewed-by: Jason Cooper Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath9k/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/Kconfig b/drivers/net/wireless/ath/ath9k/Kconfig index f68cb00450e0..8f231c67dd51 100644 --- a/drivers/net/wireless/ath/ath9k/Kconfig +++ b/drivers/net/wireless/ath/ath9k/Kconfig @@ -180,7 +180,7 @@ config ATH9K_HTC_DEBUGFS config ATH9K_HWRNG bool "Random number generator support" depends on ATH9K && (HW_RANDOM = y || HW_RANDOM = ATH9K) - default y + default n ---help--- This option incorporates the ADC register output as a source of randomness into Linux entropy pool (/dev/urandom and /dev/random) From b93015057e31933e1ab600290e014779efe5b5a3 Mon Sep 17 00:00:00 2001 From: Chaehyun Lim Date: Sun, 18 Sep 2016 15:30:24 +0900 Subject: [PATCH 0925/1050] ath6kl: fix return value in ath6kl_wmi_set_pvb_cmd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building with W=1, we got one warning as belows: drivers/net/wireless/ath/ath6kl/wmi.c:3509:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable] At the end of ath6kl_wmi_set_pvb_cmd, it is returned by 0 regardless of return value of ath6kl_wmi_cmd_send. This patch fixes return value from 0 to ret that has result of ath6kl_wmi_cmd_send execution. Signed-off-by: Chaehyun Lim Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath6kl/wmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath6kl/wmi.c b/drivers/net/wireless/ath/ath6kl/wmi.c index b8cf04d11975..3fd1cc98fd2f 100644 --- a/drivers/net/wireless/ath/ath6kl/wmi.c +++ b/drivers/net/wireless/ath/ath6kl/wmi.c @@ -3520,7 +3520,7 @@ int ath6kl_wmi_set_pvb_cmd(struct wmi *wmi, u8 if_idx, u16 aid, ret = ath6kl_wmi_cmd_send(wmi, if_idx, skb, WMI_AP_SET_PVB_CMDID, NO_SYNC_WMIFLAG); - return 0; + return ret; } int ath6kl_wmi_set_rx_frame_format_cmd(struct wmi *wmi, u8 if_idx, From ffd74aca44b70a2564da86e8878c1dd296009ffb Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Tue, 27 Sep 2016 18:42:58 +0300 Subject: [PATCH 0926/1050] MAINTAINERS: hostap: Mark the Host AP driver obsolete This is old code for old hardware and it is not really accurate to claim this to be maintained anymore. Change the status to "Obsolete" to make it clearer that minor cleanup and other unnecessary changes from automated tools is not necessarily beneficial and has larger risk of breaking something without being quickly noticed due to lack of testing. In addition, remove the old mailing list that does not work anymore and point the web-page to a more accurate URL. Signed-off-by: Jouni Malinen Signed-off-by: Kalle Valo --- MAINTAINERS | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index ce80b36aab69..463daa52b73a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5592,10 +5592,9 @@ F: Documentation/devicetree/bindings/scsi/hisilicon-sas.txt HOST AP DRIVER M: Jouni Malinen -L: hostap@shmoo.com (subscribers-only) L: linux-wireless@vger.kernel.org -W: http://hostap.epitest.fi/ -S: Maintained +W: http://w1.fi/hostap-driver.html +S: Obsolete F: drivers/net/wireless/intersil/hostap/ HP COMPAQ TC1100 TABLET WMI EXTRAS DRIVER From 3acf3ec3f4b0fd4263989f2e4227bbd1c42b5fe1 Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Tue, 27 Sep 2016 19:03:37 -0700 Subject: [PATCH 0927/1050] tcp: Change txhash on every SYN and RTO retransmit The current code changes txhash (flowlables) on every retransmitted SYN/ACK, but only after the 2nd retransmitted SYN and only after tcp_retries1 RTO retransmits. With this patch: 1) txhash is changed with every SYN retransmits 2) txhash is changed with every RTO. The result is that we can start re-routing around failed (or very congested paths) as soon as possible. Otherwise application health checks may fail and the connection may be terminated before we start to change txhash. v4: Removed sysctl, txhash is changed for all RTOs v3: Removed text saying default value of sysctl is 0 (it is 100) v2: Added sysctl documentation and cleaned code Tested with packetdrill tests Signed-off-by: Lawrence Brakmo Signed-off-by: David S. Miller --- net/ipv4/tcp_timer.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index f712b411f6ed..3ea1cf804748 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -192,6 +192,8 @@ static int tcp_write_timeout(struct sock *sk) if (tp->syn_data && icsk->icsk_retransmits == 1) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL); + } else if (!tp->syn_data && !tp->syn_fastopen) { + sk_rethink_txhash(sk); } retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; syn_set = true; @@ -213,6 +215,8 @@ static int tcp_write_timeout(struct sock *sk) tcp_mtu_probing(icsk, sk); dst_negative_advice(sk); + } else { + sk_rethink_txhash(sk); } retry_until = net->ipv4.sysctl_tcp_retries2; From bdf2f59e64eb9cd9e9dd90f990a9577640470c8a Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Sat, 6 Aug 2016 00:31:48 -0400 Subject: [PATCH 0928/1050] sparc64: fix section mismatch in find_numa_latencies_for_group To fix: WARNING: vmlinux.o(.text.unlikely+0x580): Section mismatch in reference from the function find_numa_latencies_for_group() to the function .init.text:find_mlgroup() The function find_numa_latencies_for_group() references the function __init find_mlgroup(). This is often because find_numa_latencies_for_group lacks a __init annotation or the annotation of find_mlgroup is wrong. It turns out find_numa_latencies_for_group is only called from: static int __init numa_parse_mdesc(void) and hence we can tag find_numa_latencies_for_group with __init. In doing so we see that find_best_numa_node_for_mlgroup is only called from within __init and hence can also be marked with __init. Cc: "David S. Miller" Cc: Nitin Gupta Cc: Chris Hyser Cc: Santosh Shilimkar Cc: sparclinux@vger.kernel.org Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- arch/sparc/mm/init_64.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 65457c9f1365..80031fc2e2d6 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -1160,7 +1160,7 @@ int __node_distance(int from, int to) return numa_latency[from][to]; } -static int find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp) +static int __init find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp) { int i; @@ -1173,8 +1173,8 @@ static int find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp) return i; } -static void find_numa_latencies_for_group(struct mdesc_handle *md, u64 grp, - int index) +static void __init find_numa_latencies_for_group(struct mdesc_handle *md, + u64 grp, int index) { u64 arc; From 1e953d846ac015fbfcf09c857e8f893924cb629c Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 31 Aug 2016 13:48:19 -0700 Subject: [PATCH 0929/1050] sparc64 mm: Fix more TSB sizing issues Commit af1b1a9b36b8 ("sparc64 mm: Fix base TSB sizing when hugetlb pages are used") addressed the difference between hugetlb and THP pages when computing TSB sizes. The following additional issues were also discovered while working with the code. In order to save memory, THP makes use of a huge zero page. This huge zero page does not count against a task's RSS, but it does consume TSB entries. This is similar to hugetlb pages. Therefore, count huge zero page entries in hugetlb_pte_count. Accounting of THP pages is done in the routine set_pmd_at(). Unfortunately, this does not catch the case where a THP page is split. To handle this case, decrement the count in pmdp_invalidate(). pmdp_invalidate is only called when splitting a THP. However, 'sanity checks' are added in case it is ever called for other purposes. A more general issue exists with HPAGE_SIZE accounting. hugetlb_pte_count tracks the number of HPAGE_SIZE (8M) pages. This value is used to size the TSB for HPAGE_SIZE pages. However, each HPAGE_SIZE page consists of two REAL_HPAGE_SIZE (4M) pages. The TSB contains an entry for each REAL_HPAGE_SIZE page. Therefore, the number of REAL_HPAGE_SIZE pages should be used to size the huge page TSB. A new compile time constant REAL_HPAGE_PER_HPAGE is used to multiply hugetlb_pte_count before sizing the TSB. Changes from V1 - Fixed build issue if hugetlb or THP not configured Signed-off-by: Mike Kravetz Signed-off-by: David S. Miller --- arch/sparc/include/asm/page_64.h | 1 + arch/sparc/mm/fault_64.c | 1 + arch/sparc/mm/tlb.c | 35 ++++++++++++++++++++++++++++---- arch/sparc/mm/tsb.c | 18 ++++++++++------ 4 files changed, 45 insertions(+), 10 deletions(-) diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h index 8c2a8c937540..c1263fc390db 100644 --- a/arch/sparc/include/asm/page_64.h +++ b/arch/sparc/include/asm/page_64.h @@ -25,6 +25,7 @@ #define HPAGE_MASK (~(HPAGE_SIZE - 1UL)) #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA +#define REAL_HPAGE_PER_HPAGE (_AC(1,UL) << (HPAGE_SHIFT - REAL_HPAGE_SHIFT)) #endif #ifndef __ASSEMBLY__ diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index e16fdd28a931..3f291d8c57f7 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -484,6 +484,7 @@ good_area: tsb_grow(mm, MM_TSB_BASE, mm_rss); #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) mm_rss = mm->context.hugetlb_pte_count + mm->context.thp_pte_count; + mm_rss *= REAL_HPAGE_PER_HPAGE; if (unlikely(mm_rss > mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) { if (mm->context.tsb_block[MM_TSB_HUGE].tsb) diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index 3659d37b4d81..c56a195c9071 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -174,10 +174,25 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, return; if ((pmd_val(pmd) ^ pmd_val(orig)) & _PAGE_PMD_HUGE) { - if (pmd_val(pmd) & _PAGE_PMD_HUGE) - mm->context.thp_pte_count++; - else - mm->context.thp_pte_count--; + /* + * Note that this routine only sets pmds for THP pages. + * Hugetlb pages are handled elsewhere. We need to check + * for huge zero page. Huge zero pages are like hugetlb + * pages in that there is no RSS, but there is the need + * for TSB entries. So, huge zero page counts go into + * hugetlb_pte_count. + */ + if (pmd_val(pmd) & _PAGE_PMD_HUGE) { + if (is_huge_zero_page(pmd_page(pmd))) + mm->context.hugetlb_pte_count++; + else + mm->context.thp_pte_count++; + } else { + if (is_huge_zero_page(pmd_page(orig))) + mm->context.hugetlb_pte_count--; + else + mm->context.thp_pte_count--; + } /* Do not try to allocate the TSB hash table if we * don't have one already. We have various locks held @@ -204,6 +219,9 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, } } +/* + * This routine is only called when splitting a THP + */ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { @@ -213,6 +231,15 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, set_pmd_at(vma->vm_mm, address, pmdp, entry); flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + + /* + * set_pmd_at() will not be called in a way to decrement + * thp_pte_count when splitting a THP, so do it now. + * Sanity check pmd before doing the actual decrement. + */ + if ((pmd_val(entry) & _PAGE_PMD_HUGE) && + !is_huge_zero_page(pmd_page(entry))) + (vma->vm_mm)->context.thp_pte_count--; } void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c index 6725ed45580e..f2b77112e9d8 100644 --- a/arch/sparc/mm/tsb.c +++ b/arch/sparc/mm/tsb.c @@ -469,8 +469,10 @@ retry_tsb_alloc: int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { + unsigned long mm_rss = get_mm_rss(mm); #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - unsigned long total_huge_pte_count; + unsigned long saved_hugetlb_pte_count; + unsigned long saved_thp_pte_count; #endif unsigned int i; @@ -483,10 +485,12 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) * will re-increment the counters as the parent PTEs are * copied into the child address space. */ - total_huge_pte_count = mm->context.hugetlb_pte_count + - mm->context.thp_pte_count; + saved_hugetlb_pte_count = mm->context.hugetlb_pte_count; + saved_thp_pte_count = mm->context.thp_pte_count; mm->context.hugetlb_pte_count = 0; mm->context.thp_pte_count = 0; + + mm_rss -= saved_thp_pte_count * (HPAGE_SIZE / PAGE_SIZE); #endif /* copy_mm() copies over the parent's mm_struct before calling @@ -499,11 +503,13 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) /* If this is fork, inherit the parent's TSB size. We would * grow it to that size on the first page fault anyways. */ - tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm)); + tsb_grow(mm, MM_TSB_BASE, mm_rss); #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - if (unlikely(total_huge_pte_count)) - tsb_grow(mm, MM_TSB_HUGE, total_huge_pte_count); + if (unlikely(saved_hugetlb_pte_count + saved_thp_pte_count)) + tsb_grow(mm, MM_TSB_HUGE, + (saved_hugetlb_pte_count + saved_thp_pte_count) * + REAL_HPAGE_PER_HPAGE); #endif if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb)) From 9b2f753ec23710aa32c0d837d2499db92fe9115b Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Thu, 15 Sep 2016 14:54:40 -0600 Subject: [PATCH 0930/1050] sparc64: Fix cpu_possible_mask if nr_cpus is set If kernel boot parameter nr_cpus is set, it should define the number of CPUs that can ever be available in the system i.e. cpu_possible_mask. setup_nr_cpu_ids() overrides the nr_cpu_ids based on the cpu_possible_mask during kernel initialization. If cpu_possible_mask is not set based on the nr_cpus value, earlier part of the kernel would be initialized using nr_cpus value leading to a kernel crash. Set cpu_possible_mask based on nr_cpus value. Thus setup_nr_cpu_ids() becomes redundant and does not corrupt nr_cpu_ids value. Signed-off-by: Atish Patra Reviewed-by: Bob Picco Reviewed-by: Vijay Kumar Signed-off-by: David S. Miller --- arch/sparc/include/asm/smp_64.h | 1 + arch/sparc/kernel/setup_64.c | 1 + arch/sparc/kernel/smp_64.c | 14 ++++++++++++++ 3 files changed, 16 insertions(+) diff --git a/arch/sparc/include/asm/smp_64.h b/arch/sparc/include/asm/smp_64.h index 26d9e7726867..87b05751955a 100644 --- a/arch/sparc/include/asm/smp_64.h +++ b/arch/sparc/include/asm/smp_64.h @@ -43,6 +43,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask); int hard_smp_processor_id(void); #define raw_smp_processor_id() (current_thread_info()->cpu) +void smp_fill_in_cpu_possible_map(void); void smp_fill_in_sib_core_maps(void); void cpu_play_dead(void); diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index 599f1207eed2..f32cfe83dfd1 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -650,6 +650,7 @@ void __init setup_arch(char **cmdline_p) paging_init(); init_sparc64_elf_hwcap(); + smp_fill_in_cpu_possible_map(); } extern int stop_a_enabled; diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 8a6151a628ce..d3035ba6cd31 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1227,6 +1227,20 @@ void __init smp_setup_processor_id(void) xcall_deliver_impl = hypervisor_xcall_deliver; } +void __init smp_fill_in_cpu_possible_map(void) +{ + int possible_cpus = num_possible_cpus(); + int i; + + if (possible_cpus > nr_cpu_ids) + possible_cpus = nr_cpu_ids; + + for (i = 0; i < possible_cpus; i++) + set_cpu_possible(i, true); + for (; i < NR_CPUS; i++) + set_cpu_possible(i, false); +} + void smp_fill_in_sib_core_maps(void) { unsigned int i; From ebb99a4c12e4daabe1940ae936e8e7e97ae68c6f Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Thu, 15 Sep 2016 14:54:41 -0600 Subject: [PATCH 0931/1050] sparc64: Fix irq stack bootmem allocation. Currently, irq stack bootmem is allocated for all possible cpus before nr_cpus value changes the list of possible cpus. As a result, there is unnecessary wastage of bootmemory. Move the irq stack bootmem allocation so that it happens after possible cpu list is modified based on nr_cpus value. Signed-off-by: Atish Patra Reviewed-by: Bob Picco Reviewed-by: Vijay Kumar Signed-off-by: David S. Miller --- arch/sparc/kernel/setup_64.c | 25 +++++++++++++++++++++++++ arch/sparc/mm/init_64.c | 16 ---------------- 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index f32cfe83dfd1..6b7331d198e9 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -50,6 +51,8 @@ #include #include #include +#include +#include #ifdef CONFIG_IP_PNP #include @@ -590,6 +593,22 @@ static void __init init_sparc64_elf_hwcap(void) pause_patch(); } +void __init alloc_irqstack_bootmem(void) +{ + unsigned int i, node; + + for_each_possible_cpu(i) { + node = cpu_to_node(i); + + softirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node), + THREAD_SIZE, + THREAD_SIZE, 0); + hardirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node), + THREAD_SIZE, + THREAD_SIZE, 0); + } +} + void __init setup_arch(char **cmdline_p) { /* Initialize PROM console and command line. */ @@ -651,6 +670,12 @@ void __init setup_arch(char **cmdline_p) paging_init(); init_sparc64_elf_hwcap(); smp_fill_in_cpu_possible_map(); + /* + * Once the OF device tree and MDESC have been setup and nr_cpus has + * been parsed, we know the list of possible cpus. Therefore we can + * allocate the IRQ stacks. + */ + alloc_irqstack_bootmem(); } extern int stop_a_enabled; diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 80031fc2e2d6..7ac6b62fb7c1 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2081,7 +2081,6 @@ void __init paging_init(void) { unsigned long end_pfn, shift, phys_base; unsigned long real_end, i; - int node; setup_page_offset(); @@ -2250,21 +2249,6 @@ void __init paging_init(void) /* Setup bootmem... */ last_valid_pfn = end_pfn = bootmem_init(phys_base); - /* Once the OF device tree and MDESC have been setup, we know - * the list of possible cpus. Therefore we can allocate the - * IRQ stacks. - */ - for_each_possible_cpu(i) { - node = cpu_to_node(i); - - softirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node), - THREAD_SIZE, - THREAD_SIZE, 0); - hardirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node), - THREAD_SIZE, - THREAD_SIZE, 0); - } - kernel_physical_mapping_init(); { From 0a966fa8914aafeb430f9e8489227e86d8468625 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Wed, 28 Sep 2016 12:51:04 -0700 Subject: [PATCH 0932/1050] MAINTAINERS: Update my e-mail I will be starting employment at Versity next week and would like to update my MAINTAINERS e-mail to reflect that change. My versity e-mail is already activated so I shouldn't get any bounces on the new one. My ability to help with Ocfs2 kernel maintenance won't change as a result of the new job. Signed-off-by: Mark Fasheh Signed-off-by: Linus Torvalds --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 01bff8ea28d8..b003d0ca6238 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8745,7 +8745,7 @@ F: drivers/oprofile/ F: include/linux/oprofile.h ORACLE CLUSTER FILESYSTEM 2 (OCFS2) -M: Mark Fasheh +M: Mark Fasheh M: Joel Becker L: ocfs2-devel@oss.oracle.com (moderated for non-subscribers) W: http://ocfs2.wiki.kernel.org From 5b398e416e880159fe55eefd93c6588fa072cd66 Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Wed, 28 Sep 2016 15:22:30 -0700 Subject: [PATCH 0933/1050] mm,ksm: fix endless looping in allocating memory when ksm enable I hit the following hung task when runing a OOM LTP test case with 4.1 kernel. Call trace: [] __switch_to+0x74/0x8c [] __schedule+0x23c/0x7bc [] schedule+0x3c/0x94 [] rwsem_down_write_failed+0x214/0x350 [] down_write+0x64/0x80 [] __ksm_exit+0x90/0x19c [] mmput+0x118/0x11c [] do_exit+0x2dc/0xa74 [] do_group_exit+0x4c/0xe4 [] get_signal+0x444/0x5e0 [] do_signal+0x1d8/0x450 [] do_notify_resume+0x70/0x78 The oom victim cannot terminate because it needs to take mmap_sem for write while the lock is held by ksmd for read which loops in the page allocator ksm_do_scan scan_get_next_rmap_item down_read get_next_rmap_item alloc_rmap_item #ksmd will loop permanently. There is no way forward because the oom victim cannot release any memory in 4.1 based kernel. Since 4.6 we have the oom reaper which would solve this problem because it would release the memory asynchronously. Nevertheless we can relax alloc_rmap_item requirements and use __GFP_NORETRY because the allocation failure is acceptable as ksm_do_scan would just retry later after the lock got dropped. Such a patch would be also easy to backport to older stable kernels which do not have oom_reaper. While we are at it add GFP_NOWARN so the admin doesn't have to be alarmed by the allocation failure. Link: http://lkml.kernel.org/r/1474165570-44398-1-git-send-email-zhongjiang@huawei.com Signed-off-by: zhong jiang Suggested-by: Hugh Dickins Suggested-by: Michal Hocko Acked-by: Michal Hocko Acked-by: Hugh Dickins Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/ksm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/ksm.c b/mm/ksm.c index 73d43bafd9fb..5048083b60f2 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -283,7 +283,8 @@ static inline struct rmap_item *alloc_rmap_item(void) { struct rmap_item *rmap_item; - rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL); + rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL | + __GFP_NORETRY | __GFP_NOWARN); if (rmap_item) ksm_rmap_items++; return rmap_item; From 2481366afd71a0c0b7cd725e6750c04cf589673b Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Wed, 28 Sep 2016 15:22:33 -0700 Subject: [PATCH 0934/1050] dma-mapping.h: preserve unmap info for CONFIG_DMA_API_DEBUG When CONFIG_DMA_API_DEBUG is enabled we need to preserve unmapping address even if "unmap" is a no-op for our architecutre because we need debug_dma_unmap_page() to correctly cleanup all of the debug bookkeeping. Failing to do so results in a false positive warnings about previously mapped areas never being unmapped. Link: http://lkml.kernel.org/r/1474387125-3713-1-git-send-email-andrew.smirnov@gmail.com Signed-off-by: Andrey Smirnov Reviewed-by: Robin Murphy Cc: Joerg Roedel Cc: Will Deacon Cc: Zhen Lei Cc: "Luis R. Rodriguez" Cc: Christian Borntraeger Cc: Geliang Tang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dma-mapping.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 66533e18276c..dc69df04abc1 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -718,7 +718,7 @@ static inline int dma_mmap_wc(struct device *dev, #define dma_mmap_writecombine dma_mmap_wc #endif -#ifdef CONFIG_NEED_DMA_MAP_STATE +#if defined(CONFIG_NEED_DMA_MAP_STATE) || defined(CONFIG_DMA_API_DEBUG) #define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME) dma_addr_t ADDR_NAME #define DEFINE_DMA_UNMAP_LEN(LEN_NAME) __u32 LEN_NAME #define dma_unmap_addr(PTR, ADDR_NAME) ((PTR)->ADDR_NAME) From e436fd61a8f62cb7a16310a42b95ab076ff72eff Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Wed, 28 Sep 2016 15:22:36 -0700 Subject: [PATCH 0935/1050] scripts/recordmcount.c: account for .softirqentry.text be7635e7287e ("arch, ftrace: for KASAN put hard/soft IRQ entries into separate sections") added .softirqentry.text section, but it was not added to recordmcount. So functions in the section are untracable. Add the section to scripts/recordmcount.c and scripts/recordmcount.pl. Fixes: be7635e7287e ("arch, ftrace: for KASAN put hard/soft IRQ entries into separate sections") Link: http://lkml.kernel.org/r/1474902626-73468-1-git-send-email-dvyukov@google.com Signed-off-by: Dmitry Vyukov Acked-by: Steve Rostedt Cc: [4.6+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/recordmcount.c | 1 + scripts/recordmcount.pl | 1 + 2 files changed, 2 insertions(+) diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index 42396a74405d..a68f03133df9 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -363,6 +363,7 @@ is_mcounted_section_name(char const *const txtname) strcmp(".sched.text", txtname) == 0 || strcmp(".spinlock.text", txtname) == 0 || strcmp(".irqentry.text", txtname) == 0 || + strcmp(".softirqentry.text", txtname) == 0 || strcmp(".kprobes.text", txtname) == 0 || strcmp(".text.unlikely", txtname) == 0; } diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 96e2486a6fc4..2d48011bc362 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -134,6 +134,7 @@ my %text_sections = ( ".sched.text" => 1, ".spinlock.text" => 1, ".irqentry.text" => 1, + ".softirqentry.text" => 1, ".kprobes.text" => 1, ".text.unlikely" => 1, ); From 231e97e2b8ec9a1556ced5d8a89cda03a480b179 Mon Sep 17 00:00:00 2001 From: Li Zhong Date: Wed, 28 Sep 2016 15:22:38 -0700 Subject: [PATCH 0936/1050] mem-hotplug: use nodes that contain memory as mask in new_node_page() 9bb627be47a5 ("mem-hotplug: don't clear the only node in new_node_page()") prevents allocating from an empty nodemask, but as David points out, it is still wrong. As node_online_map may include memoryless nodes, only allocating from these nodes is meaningless. This patch uses node_states[N_MEMORY] mask to prevent the above case. Fixes: 9bb627be47a5 ("mem-hotplug: don't clear the only node in new_node_page()") Fixes: 394e31d2ceb4 ("mem-hotplug: alloc new page from a nearest neighbor node when mem-offline") Link: http://lkml.kernel.org/r/1474447117.28370.6.camel@TP420 Signed-off-by: Li Zhong Suggested-by: David Rientjes Acked-by: Vlastimil Babka Cc: Michal Hocko Cc: John Allen Cc: Xishi Qiu Cc: Joonsoo Kim Cc: Naoya Horiguchi Cc: Tetsuo Handa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index b58906b6215c..9d29ba0f7192 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1555,8 +1555,8 @@ static struct page *new_node_page(struct page *page, unsigned long private, { gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE; int nid = page_to_nid(page); - nodemask_t nmask = node_online_map; - struct page *new_page; + nodemask_t nmask = node_states[N_MEMORY]; + struct page *new_page = NULL; /* * TODO: allocate a destination hugepage from a nearest neighbor node, @@ -1567,14 +1567,14 @@ static struct page *new_node_page(struct page *page, unsigned long private, return alloc_huge_page_node(page_hstate(compound_head(page)), next_node_in(nid, nmask)); - if (nid != next_node_in(nid, nmask)) - node_clear(nid, nmask); + node_clear(nid, nmask); if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE)) gfp_mask |= __GFP_HIGHMEM; - new_page = __alloc_pages_nodemask(gfp_mask, 0, + if (!nodes_empty(nmask)) + new_page = __alloc_pages_nodemask(gfp_mask, 0, node_zonelist(nid, gfp_mask), &nmask); if (!new_page) new_page = __alloc_pages(gfp_mask, 0, From 7836667cec5e02ed2ae3eb09b88047b5b5f2343a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Sep 2016 08:41:16 -0700 Subject: [PATCH 0937/1050] net: do not export sk_stream_write_space Since commit 900f65d361d3 ("tcp: move duplicate code from tcp_v4_init_sock()/tcp_v6_init_sock()") we no longer need to export sk_stream_write_space() From: Eric Dumazet Cc: Neal Cardwell Signed-off-by: David S. Miller --- net/core/stream.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/core/stream.c b/net/core/stream.c index 159516a11b7e..1086c8b280a8 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -43,7 +43,6 @@ void sk_stream_write_space(struct sock *sk) rcu_read_unlock(); } } -EXPORT_SYMBOL(sk_stream_write_space); /** * sk_stream_wait_connect - Wait for a socket to get into the connected state From 2a0100d7beebd2fb2c436852648f75396253e9f8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 28 Sep 2016 20:40:52 -0400 Subject: [PATCH 0938/1050] sparc64: Fix non-SMP build. Need to provide a dummy smp_fill_in_cpu_possible_map. Fixes: 9b2f753ec237 ("sparc64: Fix cpu_possible_mask if nr_cpus is set") Reported-by: kbuild test robot Signed-off-by: David S. Miller --- arch/sparc/include/asm/smp_64.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/sparc/include/asm/smp_64.h b/arch/sparc/include/asm/smp_64.h index 87b05751955a..ce2233f7e662 100644 --- a/arch/sparc/include/asm/smp_64.h +++ b/arch/sparc/include/asm/smp_64.h @@ -73,6 +73,7 @@ void __cpu_die(unsigned int cpu); #define smp_fill_in_sib_core_maps() do { } while (0) #define smp_fetch_global_regs() do { } while (0) #define smp_fetch_global_pmu() do { } while (0) +#define smp_fill_in_cpu_possible_map() do { } while (0) #endif /* !(CONFIG_SMP) */ From 484611357c19f9e19ef742ebef4505a07d243cc9 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 28 Sep 2016 10:54:32 -0400 Subject: [PATCH 0939/1050] bpf: allow access into map value arrays Suppose you have a map array value that is something like this struct foo { unsigned iter; int array[SOME_CONSTANT]; }; You can easily insert this into an array, but you cannot modify the contents of foo->array[] after the fact. This is because we have no way to verify we won't go off the end of the array at verification time. This patch provides a start for this work. We accomplish this by keeping track of a minimum and maximum value a register could be while we're checking the code. Then at the time we try to do an access into a MAP_VALUE we verify that the maximum offset into that region is a valid access into that memory region. So in practice, code such as this unsigned index = 0; if (foo->iter >= SOME_CONSTANT) foo->iter = index; else index = foo->iter++; foo->array[index] = bar; would be allowed, as we can verify that index will always be between 0 and SOME_CONSTANT-1. If you wish to use signed values you'll have to have an extra check to make sure the index isn't less than 0, or do something like index %= SOME_CONSTANT. Signed-off-by: Josef Bacik Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 7 + include/linux/bpf_verifier.h | 12 ++ kernel/bpf/verifier.c | 329 +++++++++++++++++++++++++++++++++-- samples/bpf/libbpf.h | 8 + samples/bpf/test_verifier.c | 243 +++++++++++++++++++++++++- 5 files changed, 577 insertions(+), 22 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5691fdc83819..c201017b5730 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -139,6 +139,13 @@ enum bpf_reg_type { */ PTR_TO_PACKET, PTR_TO_PACKET_END, /* skb->data + headlen */ + + /* PTR_TO_MAP_VALUE_ADJ is used for doing pointer math inside of a map + * elem value. We only allow this if we can statically verify that + * access from this register are going to fall within the size of the + * map element. + */ + PTR_TO_MAP_VALUE_ADJ, }; struct bpf_prog; diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index c5cb661712c9..7035b997aaa5 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -10,8 +10,19 @@ #include /* for enum bpf_reg_type */ #include /* for MAX_BPF_STACK */ + /* Just some arbitrary values so we can safely do math without overflowing and + * are obviously wrong for any sort of memory access. + */ +#define BPF_REGISTER_MAX_RANGE (1024 * 1024 * 1024) +#define BPF_REGISTER_MIN_RANGE -(1024 * 1024 * 1024) + struct bpf_reg_state { enum bpf_reg_type type; + /* + * Used to determine if any memory access using this register will + * result in a bad access. + */ + u64 min_value, max_value; union { /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */ s64 imm; @@ -81,6 +92,7 @@ struct bpf_verifier_env { u32 id_gen; /* used to generate unique reg IDs */ bool allow_ptr_leaks; bool seen_direct_write; + bool varlen_map_value_access; struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ }; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 7ada3152a556..99a7e5b388f2 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -182,6 +182,7 @@ static const char * const reg_type_str[] = { [CONST_PTR_TO_MAP] = "map_ptr", [PTR_TO_MAP_VALUE] = "map_value", [PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null", + [PTR_TO_MAP_VALUE_ADJ] = "map_value_adj", [FRAME_PTR] = "fp", [PTR_TO_STACK] = "fp", [CONST_IMM] = "imm", @@ -209,10 +210,17 @@ static void print_verifier_state(struct bpf_verifier_state *state) else if (t == UNKNOWN_VALUE && reg->imm) verbose("%lld", reg->imm); else if (t == CONST_PTR_TO_MAP || t == PTR_TO_MAP_VALUE || - t == PTR_TO_MAP_VALUE_OR_NULL) + t == PTR_TO_MAP_VALUE_OR_NULL || + t == PTR_TO_MAP_VALUE_ADJ) verbose("(ks=%d,vs=%d)", reg->map_ptr->key_size, reg->map_ptr->value_size); + if (reg->min_value != BPF_REGISTER_MIN_RANGE) + verbose(",min_value=%llu", + (unsigned long long)reg->min_value); + if (reg->max_value != BPF_REGISTER_MAX_RANGE) + verbose(",max_value=%llu", + (unsigned long long)reg->max_value); } for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { if (state->stack_slot_type[i] == STACK_SPILL) @@ -424,6 +432,8 @@ static void init_reg_state(struct bpf_reg_state *regs) for (i = 0; i < MAX_BPF_REG; i++) { regs[i].type = NOT_INIT; regs[i].imm = 0; + regs[i].min_value = BPF_REGISTER_MIN_RANGE; + regs[i].max_value = BPF_REGISTER_MAX_RANGE; } /* frame pointer */ @@ -440,6 +450,12 @@ static void mark_reg_unknown_value(struct bpf_reg_state *regs, u32 regno) regs[regno].imm = 0; } +static void reset_reg_range_values(struct bpf_reg_state *regs, u32 regno) +{ + regs[regno].min_value = BPF_REGISTER_MIN_RANGE; + regs[regno].max_value = BPF_REGISTER_MAX_RANGE; +} + enum reg_arg_type { SRC_OP, /* register is used as source operand */ DST_OP, /* register is used as destination operand */ @@ -665,7 +681,7 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno) static int check_ptr_alignment(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int off, int size) { - if (reg->type != PTR_TO_PACKET) { + if (reg->type != PTR_TO_PACKET && reg->type != PTR_TO_MAP_VALUE_ADJ) { if (off % size != 0) { verbose("misaligned access off %d size %d\n", off, size); @@ -675,16 +691,6 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, } } - switch (env->prog->type) { - case BPF_PROG_TYPE_SCHED_CLS: - case BPF_PROG_TYPE_SCHED_ACT: - case BPF_PROG_TYPE_XDP: - break; - default: - verbose("verifier is misconfigured\n"); - return -EACCES; - } - if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) /* misaligned access to packet is ok on x86,arm,arm64 */ return 0; @@ -695,7 +701,8 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, } /* skb->data is NET_IP_ALIGN-ed */ - if ((NET_IP_ALIGN + reg->off + off) % size != 0) { + if (reg->type == PTR_TO_PACKET && + (NET_IP_ALIGN + reg->off + off) % size != 0) { verbose("misaligned packet access off %d+%d+%d size %d\n", NET_IP_ALIGN, reg->off, off, size); return -EACCES; @@ -728,12 +735,52 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off, if (err) return err; - if (reg->type == PTR_TO_MAP_VALUE) { + if (reg->type == PTR_TO_MAP_VALUE || + reg->type == PTR_TO_MAP_VALUE_ADJ) { if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) { verbose("R%d leaks addr into map\n", value_regno); return -EACCES; } + + /* If we adjusted the register to this map value at all then we + * need to change off and size to min_value and max_value + * respectively to make sure our theoretical access will be + * safe. + */ + if (reg->type == PTR_TO_MAP_VALUE_ADJ) { + if (log_level) + print_verifier_state(state); + env->varlen_map_value_access = true; + /* The minimum value is only important with signed + * comparisons where we can't assume the floor of a + * value is 0. If we are using signed variables for our + * index'es we need to make sure that whatever we use + * will have a set floor within our range. + */ + if ((s64)reg->min_value < 0) { + verbose("R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", + regno); + return -EACCES; + } + err = check_map_access(env, regno, reg->min_value + off, + size); + if (err) { + verbose("R%d min value is outside of the array range\n", + regno); + return err; + } + + /* If we haven't set a max value then we need to bail + * since we can't be sure we won't do bad things. + */ + if (reg->max_value == BPF_REGISTER_MAX_RANGE) { + verbose("R%d unbounded memory access, make sure to bounds check any array access into a map\n", + regno); + return -EACCES; + } + off += reg->max_value; + } err = check_map_access(env, regno, off, size); if (!err && t == BPF_READ && value_regno >= 0) mark_reg_unknown_value(state->regs, value_regno); @@ -1195,6 +1242,7 @@ static int check_call(struct bpf_verifier_env *env, int func_id) regs[BPF_REG_0].type = NOT_INIT; } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) { regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL; + regs[BPF_REG_0].max_value = regs[BPF_REG_0].min_value = 0; /* remember map_ptr, so that check_map_access() * can check 'value_size' boundary of memory access * to map element returned from bpf_map_lookup_elem() @@ -1416,6 +1464,106 @@ static int evaluate_reg_imm_alu(struct bpf_verifier_env *env, return 0; } +static void check_reg_overflow(struct bpf_reg_state *reg) +{ + if (reg->max_value > BPF_REGISTER_MAX_RANGE) + reg->max_value = BPF_REGISTER_MAX_RANGE; + if ((s64)reg->min_value < BPF_REGISTER_MIN_RANGE) + reg->min_value = BPF_REGISTER_MIN_RANGE; +} + +static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, + struct bpf_insn *insn) +{ + struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg; + u64 min_val = BPF_REGISTER_MIN_RANGE, max_val = BPF_REGISTER_MAX_RANGE; + bool min_set = false, max_set = false; + u8 opcode = BPF_OP(insn->code); + + dst_reg = ®s[insn->dst_reg]; + if (BPF_SRC(insn->code) == BPF_X) { + check_reg_overflow(®s[insn->src_reg]); + min_val = regs[insn->src_reg].min_value; + max_val = regs[insn->src_reg].max_value; + + /* If the source register is a random pointer then the + * min_value/max_value values represent the range of the known + * accesses into that value, not the actual min/max value of the + * register itself. In this case we have to reset the reg range + * values so we know it is not safe to look at. + */ + if (regs[insn->src_reg].type != CONST_IMM && + regs[insn->src_reg].type != UNKNOWN_VALUE) { + min_val = BPF_REGISTER_MIN_RANGE; + max_val = BPF_REGISTER_MAX_RANGE; + } + } else if (insn->imm < BPF_REGISTER_MAX_RANGE && + (s64)insn->imm > BPF_REGISTER_MIN_RANGE) { + min_val = max_val = insn->imm; + min_set = max_set = true; + } + + /* We don't know anything about what was done to this register, mark it + * as unknown. + */ + if (min_val == BPF_REGISTER_MIN_RANGE && + max_val == BPF_REGISTER_MAX_RANGE) { + reset_reg_range_values(regs, insn->dst_reg); + return; + } + + switch (opcode) { + case BPF_ADD: + dst_reg->min_value += min_val; + dst_reg->max_value += max_val; + break; + case BPF_SUB: + dst_reg->min_value -= min_val; + dst_reg->max_value -= max_val; + break; + case BPF_MUL: + dst_reg->min_value *= min_val; + dst_reg->max_value *= max_val; + break; + case BPF_AND: + /* & is special since it could end up with 0 bits set. */ + dst_reg->min_value &= min_val; + dst_reg->max_value = max_val; + break; + case BPF_LSH: + /* Gotta have special overflow logic here, if we're shifting + * more than MAX_RANGE then just assume we have an invalid + * range. + */ + if (min_val > ilog2(BPF_REGISTER_MAX_RANGE)) + dst_reg->min_value = BPF_REGISTER_MIN_RANGE; + else + dst_reg->min_value <<= min_val; + + if (max_val > ilog2(BPF_REGISTER_MAX_RANGE)) + dst_reg->max_value = BPF_REGISTER_MAX_RANGE; + else + dst_reg->max_value <<= max_val; + break; + case BPF_RSH: + dst_reg->min_value >>= min_val; + dst_reg->max_value >>= max_val; + break; + case BPF_MOD: + /* % is special since it is an unsigned modulus, so the floor + * will always be 0. + */ + dst_reg->min_value = 0; + dst_reg->max_value = max_val - 1; + break; + default: + reset_reg_range_values(regs, insn->dst_reg); + break; + } + + check_reg_overflow(dst_reg); +} + /* check validity of 32-bit and 64-bit arithmetic operations */ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) { @@ -1479,6 +1627,11 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) if (err) return err; + /* we are setting our register to something new, we need to + * reset its range values. + */ + reset_reg_range_values(regs, insn->dst_reg); + if (BPF_SRC(insn->code) == BPF_X) { if (BPF_CLASS(insn->code) == BPF_ALU64) { /* case: R1 = R2 @@ -1500,6 +1653,8 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) */ regs[insn->dst_reg].type = CONST_IMM; regs[insn->dst_reg].imm = insn->imm; + regs[insn->dst_reg].max_value = insn->imm; + regs[insn->dst_reg].min_value = insn->imm; } } else if (opcode > BPF_END) { @@ -1552,6 +1707,9 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) dst_reg = ®s[insn->dst_reg]; + /* first we want to adjust our ranges. */ + adjust_reg_min_max_vals(env, insn); + /* pattern match 'bpf_add Rx, imm' instruction */ if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 && dst_reg->type == FRAME_PTR && BPF_SRC(insn->code) == BPF_K) { @@ -1586,8 +1744,17 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) return -EACCES; } - /* mark dest operand */ - mark_reg_unknown_value(regs, insn->dst_reg); + /* If we did pointer math on a map value then just set it to our + * PTR_TO_MAP_VALUE_ADJ type so we can deal with any stores or + * loads to this register appropriately, otherwise just mark the + * register as unknown. + */ + if (env->allow_ptr_leaks && + (dst_reg->type == PTR_TO_MAP_VALUE || + dst_reg->type == PTR_TO_MAP_VALUE_ADJ)) + dst_reg->type = PTR_TO_MAP_VALUE_ADJ; + else + mark_reg_unknown_value(regs, insn->dst_reg); } return 0; @@ -1642,6 +1809,104 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state, } } +/* Adjusts the register min/max values in the case that the dst_reg is the + * variable register that we are working on, and src_reg is a constant or we're + * simply doing a BPF_K check. + */ +static void reg_set_min_max(struct bpf_reg_state *true_reg, + struct bpf_reg_state *false_reg, u64 val, + u8 opcode) +{ + switch (opcode) { + case BPF_JEQ: + /* If this is false then we know nothing Jon Snow, but if it is + * true then we know for sure. + */ + true_reg->max_value = true_reg->min_value = val; + break; + case BPF_JNE: + /* If this is true we know nothing Jon Snow, but if it is false + * we know the value for sure; + */ + false_reg->max_value = false_reg->min_value = val; + break; + case BPF_JGT: + /* Unsigned comparison, the minimum value is 0. */ + false_reg->min_value = 0; + case BPF_JSGT: + /* If this is false then we know the maximum val is val, + * otherwise we know the min val is val+1. + */ + false_reg->max_value = val; + true_reg->min_value = val + 1; + break; + case BPF_JGE: + /* Unsigned comparison, the minimum value is 0. */ + false_reg->min_value = 0; + case BPF_JSGE: + /* If this is false then we know the maximum value is val - 1, + * otherwise we know the mimimum value is val. + */ + false_reg->max_value = val - 1; + true_reg->min_value = val; + break; + default: + break; + } + + check_reg_overflow(false_reg); + check_reg_overflow(true_reg); +} + +/* Same as above, but for the case that dst_reg is a CONST_IMM reg and src_reg + * is the variable reg. + */ +static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, + struct bpf_reg_state *false_reg, u64 val, + u8 opcode) +{ + switch (opcode) { + case BPF_JEQ: + /* If this is false then we know nothing Jon Snow, but if it is + * true then we know for sure. + */ + true_reg->max_value = true_reg->min_value = val; + break; + case BPF_JNE: + /* If this is true we know nothing Jon Snow, but if it is false + * we know the value for sure; + */ + false_reg->max_value = false_reg->min_value = val; + break; + case BPF_JGT: + /* Unsigned comparison, the minimum value is 0. */ + true_reg->min_value = 0; + case BPF_JSGT: + /* + * If this is false, then the val is <= the register, if it is + * true the register <= to the val. + */ + false_reg->min_value = val; + true_reg->max_value = val - 1; + break; + case BPF_JGE: + /* Unsigned comparison, the minimum value is 0. */ + true_reg->min_value = 0; + case BPF_JSGE: + /* If this is false then constant < register, if it is true then + * the register < constant. + */ + false_reg->min_value = val + 1; + true_reg->max_value = val; + break; + default: + break; + } + + check_reg_overflow(false_reg); + check_reg_overflow(true_reg); +} + static int check_cond_jmp_op(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx) { @@ -1708,6 +1973,23 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, if (!other_branch) return -EFAULT; + /* detect if we are comparing against a constant value so we can adjust + * our min/max values for our dst register. + */ + if (BPF_SRC(insn->code) == BPF_X) { + if (regs[insn->src_reg].type == CONST_IMM) + reg_set_min_max(&other_branch->regs[insn->dst_reg], + dst_reg, regs[insn->src_reg].imm, + opcode); + else if (dst_reg->type == CONST_IMM) + reg_set_min_max_inv(&other_branch->regs[insn->src_reg], + ®s[insn->src_reg], dst_reg->imm, + opcode); + } else { + reg_set_min_max(&other_branch->regs[insn->dst_reg], + dst_reg, insn->imm, opcode); + } + /* detect if R == 0 where R is returned from bpf_map_lookup_elem() */ if (BPF_SRC(insn->code) == BPF_K && insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) && @@ -2144,7 +2426,8 @@ static bool compare_ptrs_to_packet(struct bpf_reg_state *old, * whereas register type in current state is meaningful, it means that * the current state will reach 'bpf_exit' instruction safely */ -static bool states_equal(struct bpf_verifier_state *old, +static bool states_equal(struct bpf_verifier_env *env, + struct bpf_verifier_state *old, struct bpf_verifier_state *cur) { struct bpf_reg_state *rold, *rcur; @@ -2157,6 +2440,13 @@ static bool states_equal(struct bpf_verifier_state *old, if (memcmp(rold, rcur, sizeof(*rold)) == 0) continue; + /* If the ranges were not the same, but everything else was and + * we didn't do a variable access into a map then we are a-ok. + */ + if (!env->varlen_map_value_access && + rold->type == rcur->type && rold->imm == rcur->imm) + continue; + if (rold->type == NOT_INIT || (rold->type == UNKNOWN_VALUE && rcur->type != NOT_INIT)) continue; @@ -2213,7 +2503,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) return 0; while (sl != STATE_LIST_MARK) { - if (states_equal(&sl->state, &env->cur_state)) + if (states_equal(env, &sl->state, &env->cur_state)) /* reached equivalent register/stack state, * prune the search */ @@ -2259,6 +2549,7 @@ static int do_check(struct bpf_verifier_env *env) init_reg_state(regs); insn_idx = 0; + env->varlen_map_value_access = false; for (;;) { struct bpf_insn *insn; u8 class; @@ -2339,6 +2630,7 @@ static int do_check(struct bpf_verifier_env *env) if (err) return err; + reset_reg_range_values(regs, insn->dst_reg); if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) { insn_idx++; @@ -2509,6 +2801,7 @@ process_bpf_exit: verbose("invalid BPF_LD mode\n"); return -EINVAL; } + reset_reg_range_values(regs, insn->dst_reg); } else { verbose("unknown insn class %d\n", class); return -EINVAL; diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h index 364582b77888..ac6edb61b64a 100644 --- a/samples/bpf/libbpf.h +++ b/samples/bpf/libbpf.h @@ -85,6 +85,14 @@ extern char bpf_log_buf[LOG_BUF_SIZE]; .off = 0, \ .imm = IMM }) +#define BPF_MOV32_IMM(DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_MOV | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + /* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */ #define BPF_LD_IMM64(DST, IMM) \ BPF_LD_IMM64_RAW(DST, 0, IMM) diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c index ac590d4b7f02..369ffaad3799 100644 --- a/samples/bpf/test_verifier.c +++ b/samples/bpf/test_verifier.c @@ -29,6 +29,7 @@ struct bpf_test { struct bpf_insn insns[MAX_INSNS]; int fixup[MAX_FIXUPS]; int prog_array_fixup[MAX_FIXUPS]; + int test_val_map_fixup[MAX_FIXUPS]; const char *errstr; const char *errstr_unpriv; enum { @@ -39,6 +40,19 @@ struct bpf_test { enum bpf_prog_type prog_type; }; +/* Note we want this to be 64 bit aligned so that the end of our array is + * actually the end of the structure. + */ +#define MAX_ENTRIES 11 +struct test_val { + unsigned index; + int foo[MAX_ENTRIES]; +}; + +struct other_val { + unsigned int action[32]; +}; + static struct bpf_test tests[] = { { "add+sub+mul", @@ -2163,6 +2177,212 @@ static struct bpf_test tests[] = { .errstr = "invalid access to packet", .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, + { + "valid map access into an array with a constant", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .test_val_map_fixup = {3}, + .errstr_unpriv = "R0 leaks addr", + .result_unpriv = REJECT, + .result = ACCEPT, + }, + { + "valid map access into an array with a register", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .test_val_map_fixup = {3}, + .errstr_unpriv = "R0 leaks addr", + .result_unpriv = REJECT, + .result = ACCEPT, + }, + { + "valid map access into an array with a variable", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES, 3), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .test_val_map_fixup = {3}, + .errstr_unpriv = "R0 leaks addr", + .result_unpriv = REJECT, + .result = ACCEPT, + }, + { + "valid map access into an array with a signed variable", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 0xffffffff, 1), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES), + BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .test_val_map_fixup = {3}, + .errstr_unpriv = "R0 leaks addr", + .result_unpriv = REJECT, + .result = ACCEPT, + }, + { + "invalid map access into an array with a constant", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, (MAX_ENTRIES + 1) << 2, + offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .test_val_map_fixup = {3}, + .errstr = "invalid access to map value, value_size=48 off=48 size=8", + .result = REJECT, + }, + { + "invalid map access into an array with a register", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_IMM(BPF_REG_1, MAX_ENTRIES + 1), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .test_val_map_fixup = {3}, + .errstr = "R0 min value is outside of the array range", + .result = REJECT, + }, + { + "invalid map access into an array with a variable", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .test_val_map_fixup = {3}, + .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", + .result = REJECT, + }, + { + "invalid map access into an array with no floor check", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES), + BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .test_val_map_fixup = {3}, + .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", + .result = REJECT, + }, + { + "invalid map access into an array with a invalid max check", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES + 1), + BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .test_val_map_fixup = {3}, + .errstr = "invalid access to map value, value_size=48 off=44 size=8", + .result = REJECT, + }, + { + "invalid map access into an array with a invalid max check", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .test_val_map_fixup = {3, 11}, + .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", + .result = REJECT, + }, }; static int probe_filter_length(struct bpf_insn *fp) @@ -2176,12 +2396,12 @@ static int probe_filter_length(struct bpf_insn *fp) return len + 1; } -static int create_map(void) +static int create_map(size_t val_size, int num) { int map_fd; map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, - sizeof(long long), sizeof(long long), 1024, 0); + sizeof(long long), val_size, num, 0); if (map_fd < 0) printf("failed to create map '%s'\n", strerror(errno)); @@ -2211,12 +2431,13 @@ static int test(void) int prog_len = probe_filter_length(prog); int *fixup = tests[i].fixup; int *prog_array_fixup = tests[i].prog_array_fixup; + int *test_val_map_fixup = tests[i].test_val_map_fixup; int expected_result; const char *expected_errstr; - int map_fd = -1, prog_array_fd = -1; + int map_fd = -1, prog_array_fd = -1, test_val_map_fd = -1; if (*fixup) { - map_fd = create_map(); + map_fd = create_map(sizeof(long long), 1024); do { prog[*fixup].imm = map_fd; @@ -2231,6 +2452,18 @@ static int test(void) prog_array_fixup++; } while (*prog_array_fixup); } + if (*test_val_map_fixup) { + /* Unprivileged can't create a hash map.*/ + if (unpriv) + continue; + test_val_map_fd = create_map(sizeof(struct test_val), + 256); + do { + prog[*test_val_map_fixup].imm = test_val_map_fd; + test_val_map_fixup++; + } while (*test_val_map_fixup); + } + printf("#%d %s ", i, tests[i].descr); prog_fd = bpf_prog_load(prog_type ?: BPF_PROG_TYPE_SOCKET_FILTER, @@ -2277,6 +2510,8 @@ fail: close(map_fd); if (prog_array_fd >= 0) close(prog_array_fd); + if (test_val_map_fd >= 0) + close(test_val_map_fd); close(prog_fd); } From 54e19bc74f3380d414681762ceed9f7245bc6a6e Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Wed, 28 Sep 2016 11:58:42 -0500 Subject: [PATCH 0940/1050] net: qcom/emac: do not use devm on internal phy pdev The platform_device returned by of_find_device_by_node() is not automatically released when the driver unprobes. Therefore, managed calls like devm_ioremap_resource() should not be used. Instead, we manually allocate the resources and then free them on driver release. Signed-off-by: Timur Tabi Signed-off-by: David S. Miller --- .../net/ethernet/qualcomm/emac/emac-sgmii.c | 42 +++++++++++++++---- drivers/net/ethernet/qualcomm/emac/emac.c | 4 ++ 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c index 6ab0a3c96431..ad0e4209d948 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c @@ -681,6 +681,7 @@ int emac_sgmii_config(struct platform_device *pdev, struct emac_adapter *adpt) struct resource *res; const struct of_device_id *match; struct device_node *np; + int ret; np = of_parse_phandle(pdev->dev.of_node, "internal-phy", 0); if (!np) { @@ -697,25 +698,48 @@ int emac_sgmii_config(struct platform_device *pdev, struct emac_adapter *adpt) match = of_match_device(emac_sgmii_dt_match, &sgmii_pdev->dev); if (!match) { dev_err(&pdev->dev, "unrecognized internal phy node\n"); - return -ENODEV; + ret = -ENODEV; + goto error_put_device; } phy->initialize = (emac_sgmii_initialize)match->data; /* Base address is the first address */ res = platform_get_resource(sgmii_pdev, IORESOURCE_MEM, 0); - phy->base = devm_ioremap_resource(&sgmii_pdev->dev, res); - if (IS_ERR(phy->base)) - return PTR_ERR(phy->base); + phy->base = ioremap(res->start, resource_size(res)); + if (IS_ERR(phy->base)) { + ret = PTR_ERR(phy->base); + goto error_put_device; + } /* v2 SGMII has a per-lane digital digital, so parse it if it exists */ res = platform_get_resource(sgmii_pdev, IORESOURCE_MEM, 1); if (res) { - phy->digital = devm_ioremap_resource(&sgmii_pdev->dev, res); - if (IS_ERR(phy->base)) - return PTR_ERR(phy->base); - + phy->digital = ioremap(res->start, resource_size(res)); + if (IS_ERR(phy->digital)) { + ret = PTR_ERR(phy->digital); + goto error_unmap_base; + } } - return phy->initialize(adpt); + ret = phy->initialize(adpt); + if (ret) + goto error; + + /* We've remapped the addresses, so we don't need the device any + * more. of_find_device_by_node() says we should release it. + */ + put_device(&sgmii_pdev->dev); + + return 0; + +error: + if (phy->digital) + iounmap(phy->digital); +error_unmap_base: + iounmap(phy->base); +error_put_device: + put_device(&sgmii_pdev->dev); + + return ret; } diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index e47d38701d6c..429b4cb59ac4 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -723,6 +723,10 @@ static int emac_remove(struct platform_device *pdev) mdiobus_unregister(adpt->mii_bus); free_netdev(netdev); + if (adpt->phy.digital) + iounmap(adpt->phy.digital); + iounmap(adpt->phy.base); + return 0; } From 0de709acbc0d0001dc4b0953aebcfb0f030b9b0e Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Wed, 28 Sep 2016 11:58:43 -0500 Subject: [PATCH 0941/1050] net: qcom/emac: use device_get_mac_address Replace the DT-specific of_get_mac_address() function with device_get_mac_address, which works on both DT and ACPI platforms. This change makes it easier to add ACPI support. Signed-off-by: Timur Tabi Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index 429b4cb59ac4..551df1c52620 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -531,18 +531,16 @@ static void emac_clks_teardown(struct emac_adapter *adpt) static int emac_probe_resources(struct platform_device *pdev, struct emac_adapter *adpt) { - struct device_node *node = pdev->dev.of_node; struct net_device *netdev = adpt->netdev; struct resource *res; - const void *maddr; + char maddr[ETH_ALEN]; int ret = 0; /* get mac address */ - maddr = of_get_mac_address(node); - if (!maddr) - eth_hw_addr_random(netdev); - else + if (device_get_mac_address(&pdev->dev, maddr, ETH_ALEN)) ether_addr_copy(netdev->dev_addr, maddr); + else + eth_hw_addr_random(netdev); /* Core 0 interrupt */ ret = platform_get_irq(pdev, 0); From 5f3d38078c84f7bc12739a3b79fc59797cf0262d Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Wed, 28 Sep 2016 11:58:44 -0500 Subject: [PATCH 0942/1050] net: qcom/emac: initial ACPI support Add support for reading addresses, interrupts, and _DSD properties from ACPI tables, just like with device tree. The HID for the EMAC device itself is QCOM8070. The internal PHY is represented by a child node with a HID of QCOM8071. The EMAC also has some complex clock initialization requirements that are not represented by this patch. This will be addressed in a future patch. Signed-off-by: Timur Tabi Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac-phy.c | 37 ++++++++-- .../net/ethernet/qualcomm/emac/emac-sgmii.c | 72 ++++++++++++++----- drivers/net/ethernet/qualcomm/emac/emac.c | 12 ++++ 3 files changed, 95 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac-phy.c b/drivers/net/ethernet/qualcomm/emac/emac-phy.c index c412ba9a27e7..da4e90db4d98 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-phy.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-phy.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "emac.h" #include "emac-mac.h" #include "emac-phy.h" @@ -167,7 +168,6 @@ static int emac_mdio_write(struct mii_bus *bus, int addr, int regnum, u16 val) int emac_phy_config(struct platform_device *pdev, struct emac_adapter *adpt) { struct device_node *np = pdev->dev.of_node; - struct device_node *phy_np; struct mii_bus *mii_bus; int ret; @@ -183,14 +183,37 @@ int emac_phy_config(struct platform_device *pdev, struct emac_adapter *adpt) mii_bus->parent = &pdev->dev; mii_bus->priv = adpt; - ret = of_mdiobus_register(mii_bus, np); - if (ret) { - dev_err(&pdev->dev, "could not register mdio bus\n"); - return ret; + if (has_acpi_companion(&pdev->dev)) { + u32 phy_addr; + + ret = mdiobus_register(mii_bus); + if (ret) { + dev_err(&pdev->dev, "could not register mdio bus\n"); + return ret; + } + ret = device_property_read_u32(&pdev->dev, "phy-channel", + &phy_addr); + if (ret) + /* If we can't read a valid phy address, then assume + * that there is only one phy on this mdio bus. + */ + adpt->phydev = phy_find_first(mii_bus); + else + adpt->phydev = mdiobus_get_phy(mii_bus, phy_addr); + + } else { + struct device_node *phy_np; + + ret = of_mdiobus_register(mii_bus, np); + if (ret) { + dev_err(&pdev->dev, "could not register mdio bus\n"); + return ret; + } + + phy_np = of_parse_phandle(np, "phy-handle", 0); + adpt->phydev = of_phy_find_device(phy_np); } - phy_np = of_parse_phandle(np, "phy-handle", 0); - adpt->phydev = of_phy_find_device(phy_np); if (!adpt->phydev) { dev_err(&pdev->dev, "could not find external phy\n"); mdiobus_unregister(mii_bus); diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c index ad0e4209d948..3d2c05a40d2c 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c @@ -14,6 +14,7 @@ */ #include +#include #include #include "emac.h" #include "emac-mac.h" @@ -662,6 +663,24 @@ void emac_sgmii_reset(struct emac_adapter *adpt) clk_set_rate(adpt->clk[EMAC_CLK_HIGH_SPEED], 125000000); } +static int emac_sgmii_acpi_match(struct device *dev, void *data) +{ + static const struct acpi_device_id match_table[] = { + { + .id = "QCOM8071", + .driver_data = (kernel_ulong_t)emac_sgmii_init_v2, + }, + {} + }; + const struct acpi_device_id *id = acpi_match_device(match_table, dev); + emac_sgmii_initialize *initialize = data; + + if (id) + *initialize = (emac_sgmii_initialize)id->driver_data; + + return !!id; +} + static const struct of_device_id emac_sgmii_dt_match[] = { { .compatible = "qcom,fsm9900-emac-sgmii", @@ -679,30 +698,45 @@ int emac_sgmii_config(struct platform_device *pdev, struct emac_adapter *adpt) struct platform_device *sgmii_pdev = NULL; struct emac_phy *phy = &adpt->phy; struct resource *res; - const struct of_device_id *match; - struct device_node *np; int ret; - np = of_parse_phandle(pdev->dev.of_node, "internal-phy", 0); - if (!np) { - dev_err(&pdev->dev, "missing internal-phy property\n"); - return -ENODEV; - } + if (has_acpi_companion(&pdev->dev)) { + struct device *dev; - sgmii_pdev = of_find_device_by_node(np); - if (!sgmii_pdev) { - dev_err(&pdev->dev, "invalid internal-phy property\n"); - return -ENODEV; - } + dev = device_find_child(&pdev->dev, &phy->initialize, + emac_sgmii_acpi_match); - match = of_match_device(emac_sgmii_dt_match, &sgmii_pdev->dev); - if (!match) { - dev_err(&pdev->dev, "unrecognized internal phy node\n"); - ret = -ENODEV; - goto error_put_device; - } + if (!dev) { + dev_err(&pdev->dev, "cannot find internal phy node\n"); + return -ENODEV; + } - phy->initialize = (emac_sgmii_initialize)match->data; + sgmii_pdev = to_platform_device(dev); + } else { + const struct of_device_id *match; + struct device_node *np; + + np = of_parse_phandle(pdev->dev.of_node, "internal-phy", 0); + if (!np) { + dev_err(&pdev->dev, "missing internal-phy property\n"); + return -ENODEV; + } + + sgmii_pdev = of_find_device_by_node(np); + if (!sgmii_pdev) { + dev_err(&pdev->dev, "invalid internal-phy property\n"); + return -ENODEV; + } + + match = of_match_device(emac_sgmii_dt_match, &sgmii_pdev->dev); + if (!match) { + dev_err(&pdev->dev, "unrecognized internal phy node\n"); + ret = -ENODEV; + goto error_put_device; + } + + phy->initialize = (emac_sgmii_initialize)match->data; + } /* Base address is the first address */ res = platform_get_resource(sgmii_pdev, IORESOURCE_MEM, 0); diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index 551df1c52620..9bf3b2b82e95 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "emac.h" #include "emac-mac.h" #include "emac-phy.h" @@ -575,6 +576,16 @@ static const struct of_device_id emac_dt_match[] = { {} }; +#if IS_ENABLED(CONFIG_ACPI) +static const struct acpi_device_id emac_acpi_match[] = { + { + .id = "QCOM8070", + }, + {} +}; +MODULE_DEVICE_TABLE(acpi, emac_acpi_match); +#endif + static int emac_probe(struct platform_device *pdev) { struct net_device *netdev; @@ -734,6 +745,7 @@ static struct platform_driver emac_platform_driver = { .driver = { .name = "qcom-emac", .of_match_table = emac_dt_match, + .acpi_match_table = ACPI_PTR(emac_acpi_match), }, }; From ba6dea4f7cedb4b1c17e36f4087675d817c2e24b Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 26 Sep 2016 16:50:55 +0100 Subject: [PATCH 0943/1050] ARM: 8616/1: dt: Respect property size when parsing CPUs Whilst MPIDR values themselves are less than 32 bits, it is still perfectly valid for a DT to have #address-cells > 1 in the CPUs node, resulting in the "reg" property having leading zero cell(s). In that situation, the big-endian nature of the data conspires with the current behaviour of only reading the first cell to cause the kernel to think all CPUs have ID 0, and become resoundingly unhappy as a consequence. Take the full property length into account when parsing CPUs so as to be correct under any circumstances. Cc: Russell King Signed-off-by: Robin Murphy Signed-off-by: Russell King --- arch/arm/kernel/devtree.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c index 40ecd5f514a2..f676febbb270 100644 --- a/arch/arm/kernel/devtree.c +++ b/arch/arm/kernel/devtree.c @@ -88,6 +88,8 @@ void __init arm_dt_init_cpu_maps(void) return; for_each_child_of_node(cpus, cpu) { + const __be32 *cell; + int prop_bytes; u32 hwid; if (of_node_cmp(cpu->type, "cpu")) @@ -99,7 +101,8 @@ void __init arm_dt_init_cpu_maps(void) * properties is considered invalid to build the * cpu_logical_map. */ - if (of_property_read_u32(cpu, "reg", &hwid)) { + cell = of_get_property(cpu, "reg", &prop_bytes); + if (!cell || prop_bytes < sizeof(*cell)) { pr_debug(" * %s missing reg property\n", cpu->full_name); of_node_put(cpu); @@ -107,10 +110,15 @@ void __init arm_dt_init_cpu_maps(void) } /* - * 8 MSBs must be set to 0 in the DT since the reg property + * Bits n:24 must be set to 0 in the DT since the reg property * defines the MPIDR[23:0]. */ - if (hwid & ~MPIDR_HWID_BITMASK) { + do { + hwid = be32_to_cpu(*cell++); + prop_bytes -= sizeof(*cell); + } while (!hwid && prop_bytes > 0); + + if (prop_bytes || (hwid & ~MPIDR_HWID_BITMASK)) { of_node_put(cpu); return; } From d248220f0465b818887baa9829e691fe662b2c5e Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Thu, 29 Sep 2016 08:32:55 +0100 Subject: [PATCH 0944/1050] ARM: 8617/1: dma: fix dma_max_pfn() Since commit 6ce0d2001692 ("ARM: dma: Use dma_pfn_offset for dma address translation"), dma_to_pfn() already returns the PFN with the physical memory start offset so we don't need to add it again. This fixes USB mass storage lock-up problem on systems that can't do DMA over the entire physical memory range (e.g.) Keystone 2 systems with 4GB RAM can only do DMA over the first 2GB. [K2E-EVM]. What happens there is that without this patch SCSI layer sets a wrong bounce buffer limit in scsi_calculate_bounce_limit() for the USB mass storage device. dma_max_pfn() evaluates to 0x8fffff and bounce_limit is set to 0x8fffff000 whereas maximum DMA'ble physical memory on Keystone 2 is 0x87fffffff. This results in non DMA'ble pages being given to the USB controller and hence the lock-up. NOTE: in the above case, USB-SCSI-device's dma_pfn_offset was showing as 0. This should have really been 0x780000 as on K2e, LOWMEM_START is 0x80000000 and HIGHMEM_START is 0x800000000. DMA zone is 2GB so dma_max_pfn should be 0x87ffff. The incorrect dma_pfn_offset for the USB storage device is because USB devices are not correctly inheriting the dma_pfn_offset from the USB host controller. This will be fixed by a separate patch. Fixes: 6ce0d2001692 ("ARM: dma: Use dma_pfn_offset for dma address translation") Cc: stable@vger.kernel.org Cc: Greg Kroah-Hartman Cc: Santosh Shilimkar Cc: Arnd Bergmann Cc: Olof Johansson Cc: Catalin Marinas Cc: Linus Walleij Reported-by: Grygorii Strashko Signed-off-by: Roger Quadros Signed-off-by: Russell King --- arch/arm/include/asm/dma-mapping.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index d009f7911ffc..bf02dbd9ccda 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -111,7 +111,7 @@ static inline dma_addr_t virt_to_dma(struct device *dev, void *addr) /* The ARM override for dma_max_pfn() */ static inline unsigned long dma_max_pfn(struct device *dev) { - return PHYS_PFN_OFFSET + dma_to_pfn(dev, *dev->dma_mask); + return dma_to_pfn(dev, *dev->dma_mask); } #define dma_max_pfn(dev) dma_max_pfn(dev) From 6ca8ac773e97e2dfa5734ae435c40e672dd19ac4 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Thu, 22 Sep 2016 11:59:47 +0100 Subject: [PATCH 0945/1050] MIPS: smp-cps: Avoid BUG() when offlining pre-r6 CPUs Commit 0d2808f338c7 ("MIPS: smp-cps: Add support for CPU hotplug of MIPSr6 processors") added a call to mips_cm_lock_other in order to lock the CPC in CPUs containing a version 3 or higher Coherence Manager, which use the general CM core other register, where previous CMs had a dedicated core other register for the CPC. A kernel BUG() is triggered, however, if mips_cm_lock_other is called with a VP other than 0 on a CPU with CM < 3, a condition introduced by 0d2808f338c7. Avoid the BUG() by always locking VP0 when locking the CPC, since the required register, cpc_stat_conf, is shared by all vps in a core. Fixes: 0d2808f338c7 ("MIPS: smp-cps: Add support for CPU hotplug...) Signed-off-by: Matt Redfearn Cc: Qais Yousef Cc: Masahiro Yamada Cc: James Hogan Cc: Paul Burton Cc: Andrew Morton Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14297/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/smp-cps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c index e9d9fc6c754c..6183ad84cc73 100644 --- a/arch/mips/kernel/smp-cps.c +++ b/arch/mips/kernel/smp-cps.c @@ -513,7 +513,7 @@ static void cps_cpu_die(unsigned int cpu) * in which case the CPC will refuse to power down the core. */ do { - mips_cm_lock_other(core, vpe_id); + mips_cm_lock_other(core, 0); mips_cpc_lock_other(core); stat = read_cpc_co_stat_conf(); stat &= CPC_Cx_STAT_CONF_SEQSTATE_MSK; From db06068a4fd44a57b642b369d2a295b8448f6b65 Mon Sep 17 00:00:00 2001 From: Marcin Nowakowski Date: Thu, 22 Sep 2016 15:38:31 +0200 Subject: [PATCH 0946/1050] MIPS: fix uretprobe implementation arch_uretprobe_hijack_return_addr should replace the return address for a call with a trampoline address. Signed-off-by: Marcin Nowakowski Fixes: 40e084a506eb ('MIPS: Add uprobes support.') Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14298/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/uprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/uprobes.c b/arch/mips/kernel/uprobes.c index 1149b30c9aeb..cd8a68948066 100644 --- a/arch/mips/kernel/uprobes.c +++ b/arch/mips/kernel/uprobes.c @@ -257,7 +257,7 @@ unsigned long arch_uretprobe_hijack_return_addr( ra = regs->regs[31]; /* Replace the return address with the trampoline address */ - regs->regs[31] = ra; + regs->regs[31] = trampoline_vaddr; return ra; } From ddabfa5c2e33f1b495f3e0176de7057850915c0b Mon Sep 17 00:00:00 2001 From: Marcin Nowakowski Date: Thu, 22 Sep 2016 15:38:32 +0200 Subject: [PATCH 0947/1050] MIPS: uprobes: remove incorrect set_orig_insn Generic kernel code implements a weak version of set_orig_insn that moves cached 'insn' from arch_uprobe to the original code location when the trap is removed. MIPS variant used arch_uprobe->orig_inst which was never initialised properly, so this code only inserted a nop instead of the original instruction. With that change orig_inst can also be safely removed. Signed-off-by: Marcin Nowakowski Fixes: 40e084a506eb ('MIPS: Add uprobes support.') Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14299/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/uprobes.h | 1 - arch/mips/kernel/uprobes.c | 18 ------------------ 2 files changed, 19 deletions(-) diff --git a/arch/mips/include/asm/uprobes.h b/arch/mips/include/asm/uprobes.h index 34c325c674c4..70a4a2f173ff 100644 --- a/arch/mips/include/asm/uprobes.h +++ b/arch/mips/include/asm/uprobes.h @@ -36,7 +36,6 @@ struct arch_uprobe { unsigned long resume_epc; u32 insn[2]; u32 ixol[2]; - union mips_instruction orig_inst[MAX_UINSN_BYTES / 4]; }; struct arch_uprobe_task { diff --git a/arch/mips/kernel/uprobes.c b/arch/mips/kernel/uprobes.c index cd8a68948066..3b8accb0b4f0 100644 --- a/arch/mips/kernel/uprobes.c +++ b/arch/mips/kernel/uprobes.c @@ -280,24 +280,6 @@ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, return uprobe_write_opcode(mm, vaddr, UPROBE_SWBP_INSN); } -/** - * set_orig_insn - Restore the original instruction. - * @mm: the probed process address space. - * @auprobe: arch specific probepoint information. - * @vaddr: the virtual address to insert the opcode. - * - * For mm @mm, restore the original opcode (opcode) at @vaddr. - * Return 0 (success) or a negative errno. - * - * This overrides the weak version in kernel/events/uprobes.c. - */ -int set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, - unsigned long vaddr) -{ - return uprobe_write_opcode(mm, vaddr, - *(uprobe_opcode_t *)&auprobe->orig_inst[0].word); -} - void __weak arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, void *src, unsigned long len) { From ca86c9ef2b322ebf24772009fdea037688cbdac1 Mon Sep 17 00:00:00 2001 From: Marcin Nowakowski Date: Thu, 22 Sep 2016 15:38:33 +0200 Subject: [PATCH 0948/1050] MIPS: uprobes: fix use of uninitialised variable arch_uprobe_pre_xol needs to emulate a branch if a branch instruction has been replaced with a breakpoint, but in fact an uninitialised local variable was passed to the emulator routine instead of the original instruction Signed-off-by: Marcin Nowakowski Fixes: 40e084a506eb ('MIPS: Add uprobes support.') Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14300/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/uprobes.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/mips/kernel/uprobes.c b/arch/mips/kernel/uprobes.c index 3b8accb0b4f0..4c7c1558944a 100644 --- a/arch/mips/kernel/uprobes.c +++ b/arch/mips/kernel/uprobes.c @@ -157,7 +157,6 @@ bool is_trap_insn(uprobe_opcode_t *insn) int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs) { struct uprobe_task *utask = current->utask; - union mips_instruction insn; /* * Now find the EPC where to resume after the breakpoint has been @@ -168,10 +167,10 @@ int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs) unsigned long epc; epc = regs->cp0_epc; - __compute_return_epc_for_insn(regs, insn); + __compute_return_epc_for_insn(regs, + (union mips_instruction) aup->insn[0]); aup->resume_epc = regs->cp0_epc; } - utask->autask.saved_trap_nr = current->thread.trap_nr; current->thread.trap_nr = UPROBE_TRAP_NR; regs->cp0_epc = current->utask->xol_vaddr; From 0a900553715c39cfb6170ccc9846b194a4c13ceb Mon Sep 17 00:00:00 2001 From: "Steven J. Hill" Date: Fri, 26 Aug 2016 14:02:04 -0500 Subject: [PATCH 0949/1050] MIPS: Octeon: mark GPIO controller node not populated after IRQ init. We clear the OF_POPULATED flag for the GPIO controller node on Octeon processors. Otherwise, none of the devices hanging on the GPIO lines are probed. The 'gpio-leds' driver on OCTEON failed to probe in addition to other devices on Cavium 71xx and 78xx development boards. Fixes: 15cc2ed6dcf9 ("of/irq: Mark initialised interrupt controllers as populated") Signed-off-by: Steven J. Hill Tested-by: Aaro Koskinen Cc: David Daney Cc: Rob Herring Cc: linux-mips@linux-mips.org Cc: devicetree@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14091/ Signed-off-by: Ralf Baechle --- arch/mips/cavium-octeon/octeon-irq.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index 5a9b87b7993e..c1eb1ff7c800 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -1619,6 +1619,12 @@ static int __init octeon_irq_init_gpio( return -ENOMEM; } + /* + * Clear the OF_POPULATED flag that was set by of_irq_init() + * so that all GPIO devices will be probed. + */ + of_node_clear_flag(gpio_node, OF_POPULATED); + return 0; } /* From 3021773c7c3e75e20b693931a19362681e744ea9 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Tue, 20 Sep 2016 14:33:01 +0200 Subject: [PATCH 0950/1050] MIPS: DEC: Avoid la pseudo-instruction in delay slots When expanding the la or dla pseudo-instruction in a delay slot the GNU assembler will complain should the pseudo-instruction expand to multiple actual instructions, since only the first of them will be in the delay slot leading to the pseudo-instruction being only partially executed if the branch is taken. Use of PTR_LA in the dec int-handler.S leads to such warnings: arch/mips/dec/int-handler.S: Assembler messages: arch/mips/dec/int-handler.S:149: Warning: macro instruction expanded into multiple instructions in a branch delay slot arch/mips/dec/int-handler.S:198: Warning: macro instruction expanded into multiple instructions in a branch delay slot Avoid this by open coding the PTR_LA macros. Signed-off-by: Ralf Baechle --- arch/mips/dec/int-handler.S | 40 +++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/arch/mips/dec/int-handler.S b/arch/mips/dec/int-handler.S index d7b99180c6e1..1910223a9c02 100644 --- a/arch/mips/dec/int-handler.S +++ b/arch/mips/dec/int-handler.S @@ -146,7 +146,25 @@ /* * Find irq with highest priority */ - PTR_LA t1,cpu_mask_nr_tbl + # open coded PTR_LA t1, cpu_mask_nr_tbl +#if (_MIPS_SZPTR == 32) + # open coded la t1, cpu_mask_nr_tbl + lui t1, %hi(cpu_mask_nr_tbl) + addiu t1, %lo(cpu_mask_nr_tbl) + +#endif +#if (_MIPS_SZPTR == 64) + # open coded dla t1, cpu_mask_nr_tbl + .set push + .set noat + lui t1, %highest(cpu_mask_nr_tbl) + lui AT, %hi(cpu_mask_nr_tbl) + daddiu t1, t1, %higher(cpu_mask_nr_tbl) + daddiu AT, AT, %lo(cpu_mask_nr_tbl) + dsll t1, 32 + daddu t1, t1, AT + .set pop +#endif 1: lw t2,(t1) nop and t2,t0 @@ -195,7 +213,25 @@ /* * Find irq with highest priority */ - PTR_LA t1,asic_mask_nr_tbl + # open coded PTR_LA t1,asic_mask_nr_tbl +#if (_MIPS_SZPTR == 32) + # open coded la t1, asic_mask_nr_tbl + lui t1, %hi(asic_mask_nr_tbl) + addiu t1, %lo(asic_mask_nr_tbl) + +#endif +#if (_MIPS_SZPTR == 64) + # open coded dla t1, asic_mask_nr_tbl + .set push + .set noat + lui t1, %highest(asic_mask_nr_tbl) + lui AT, %hi(asic_mask_nr_tbl) + daddiu t1, t1, %higher(asic_mask_nr_tbl) + daddiu AT, AT, %lo(asic_mask_nr_tbl) + dsll t1, 32 + daddu t1, t1, AT + .set pop +#endif 2: lw t2,(t1) nop and t2,t0 From 72c70f010dfcc9ea6cc13500602a29e33748452f Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 19 Aug 2016 18:18:26 +0100 Subject: [PATCH 0951/1050] MIPS: Stop setting I6400 FTLBP The FTLBP field in Config7 for the I6400 is intended as chicken bits for debugging rather than as a field that software actually makes use of. For best performance, FTLBP should be left at its default value of 0 with all TLB writes hitting the FTLB by default. Additionally, since set_ftlb_enable is called from decode_configs before decode_config4 which determines the size of the TLBs, this was previously always setting FTLBP=3 for a 3:1 FTLB:VTLB write ratio which makes abysmal use of the available FTLB resources. This effectively reverts b0c4e1b79d8a ("MIPS: Set up FTLB probability for I6400"). Signed-off-by: Paul Burton Fixes: b0c4e1b79d8a ("MIPS: Set up FTLB probability for I6400") Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14021/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/mipsregs.h | 2 -- arch/mips/kernel/cpu-probe.c | 9 ++------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index def9d8d13f6e..7dd2dd47909a 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -660,8 +660,6 @@ #define MIPS_CONF7_IAR (_ULCAST_(1) << 10) #define MIPS_CONF7_AR (_ULCAST_(1) << 16) -/* FTLB probability bits for R6 */ -#define MIPS_CONF7_FTLBP_SHIFT (18) /* WatchLo* register definitions */ #define MIPS_WATCHLO_IRW (_ULCAST_(0x7) << 0) diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index a88d44247cc8..ae290506873b 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -555,13 +555,8 @@ static int set_ftlb_enable(struct cpuinfo_mips *c, int enable) write_c0_config6(config & ~MIPS_CONF6_FTLBEN); break; case CPU_I6400: - /* I6400 & related cores use Config7 to configure FTLB */ - config = read_c0_config7(); - /* Clear the old probability value */ - config &= ~(3 << MIPS_CONF7_FTLBP_SHIFT); - write_c0_config7(config | (calculate_ftlb_probability(c) - << MIPS_CONF7_FTLBP_SHIFT)); - break; + /* There's no way to disable the FTLB */ + return !enable; case CPU_LOONGSON3: /* Flush ITLB, DTLB, VTLB and FTLB */ write_c0_diag(LOONGSON_DIAG_ITLB | LOONGSON_DIAG_DTLB | From ebd0e0f503d0774407a63ebb5ec1a90bb54941f5 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 19 Aug 2016 18:18:27 +0100 Subject: [PATCH 0952/1050] MIPS: Configure FTLB after probing TLB sizes from config4 On some cores (proAptiv, P5600) we make use of the sizes of the TLBs to determine the desired FTLB:VTLB write ratio. However set_ftlb_enable & thus calculate_ftlb_probability is called before decode_config4. This results in us calculating a probability based on zero sizes, and we end up setting FTLBP=3 for a 3:1 FTLB:VTLB write ratio in all cases. This will make abysmal use of the available FTLB resources in the affected cores. Fix this by configuring the FTLB probability after having decoded config4. However we do need to have enabled the FTLB before that point such that fields in config4 actually reflect that an FTLB is present. So set_ftlb_enable is now called twice, with flags indicating that it should configure the write probability only the second time. Signed-off-by: Paul Burton Fixes: cf0a8aa0226d ("MIPS: cpu-probe: Set the FTLB probability bit on supported cores") Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14022/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/cpu-probe.c | 43 +++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index ae290506873b..5069b5b1488f 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -352,7 +352,12 @@ __setup("nohtw", htw_disable); static int mips_ftlb_disabled; static int mips_has_ftlb_configured; -static int set_ftlb_enable(struct cpuinfo_mips *c, int enable); +enum ftlb_flags { + FTLB_EN = 1 << 0, + FTLB_SET_PROB = 1 << 1, +}; + +static int set_ftlb_enable(struct cpuinfo_mips *c, enum ftlb_flags flags); static int __init ftlb_disable(char *s) { @@ -531,7 +536,7 @@ static unsigned int calculate_ftlb_probability(struct cpuinfo_mips *c) return 3; } -static int set_ftlb_enable(struct cpuinfo_mips *c, int enable) +static int set_ftlb_enable(struct cpuinfo_mips *c, enum ftlb_flags flags) { unsigned int config; @@ -542,28 +547,32 @@ static int set_ftlb_enable(struct cpuinfo_mips *c, int enable) case CPU_P6600: /* proAptiv & related cores use Config6 to enable the FTLB */ config = read_c0_config6(); - /* Clear the old probability value */ - config &= ~(3 << MIPS_CONF6_FTLBP_SHIFT); - if (enable) - /* Enable FTLB */ - write_c0_config6(config | - (calculate_ftlb_probability(c) - << MIPS_CONF6_FTLBP_SHIFT) - | MIPS_CONF6_FTLBEN); + + if (flags & FTLB_EN) + config |= MIPS_CONF6_FTLBEN; else - /* Disable FTLB */ - write_c0_config6(config & ~MIPS_CONF6_FTLBEN); + config &= ~MIPS_CONF6_FTLBEN; + + if (flags & FTLB_SET_PROB) { + config &= ~(3 << MIPS_CONF6_FTLBP_SHIFT); + config |= calculate_ftlb_probability(c) + << MIPS_CONF6_FTLBP_SHIFT; + } + + write_c0_config6(config); break; case CPU_I6400: /* There's no way to disable the FTLB */ - return !enable; + if (!(flags & FTLB_EN)) + return 1; + return 0; case CPU_LOONGSON3: /* Flush ITLB, DTLB, VTLB and FTLB */ write_c0_diag(LOONGSON_DIAG_ITLB | LOONGSON_DIAG_DTLB | LOONGSON_DIAG_VTLB | LOONGSON_DIAG_FTLB); /* Loongson-3 cores use Config6 to enable the FTLB */ config = read_c0_config6(); - if (enable) + if (flags & FTLB_EN) /* Enable FTLB */ write_c0_config6(config & ~MIPS_CONF6_FTLBDIS); else @@ -783,6 +792,7 @@ static inline unsigned int decode_config4(struct cpuinfo_mips *c) PAGE_SIZE, config4); /* Switch FTLB off */ set_ftlb_enable(c, 0); + mips_ftlb_disabled = 1; break; } c->tlbsizeftlbsets = 1 << @@ -847,7 +857,7 @@ static void decode_configs(struct cpuinfo_mips *c) c->scache.flags = MIPS_CACHE_NOT_PRESENT; /* Enable FTLB if present and not disabled */ - set_ftlb_enable(c, !mips_ftlb_disabled); + set_ftlb_enable(c, mips_ftlb_disabled ? 0 : FTLB_EN); ok = decode_config0(c); /* Read Config registers. */ BUG_ON(!ok); /* Arch spec violation! */ @@ -897,6 +907,9 @@ static void decode_configs(struct cpuinfo_mips *c) } } + /* configure the FTLB write probability */ + set_ftlb_enable(c, (mips_ftlb_disabled ? 0 : FTLB_EN) | FTLB_SET_PROB); + mips_probe_watch_registers(c); #ifndef CONFIG_MIPS_CPS From 67acd8d5c606cf42e6726767d705851dec9f6a34 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 19 Aug 2016 18:18:28 +0100 Subject: [PATCH 0953/1050] MIPS: clear execution hazard after changing FTLB enable On current P-series cores from Imagination the FTLB can be enabled or disabled via a bit in the Config6 register, and an execution hazard is created by changing the value of bit. The ftlb_disable function already cleared that hazard but that does no good for other callers. Clear the hazard in the set_ftlb_enable function that creates it, and only for the cores where it applies. This has the effect of reverting c982c6d6c48b ("MIPS: cpu-probe: Remove cp0 hazard barrier when enabling the FTLB") which was incorrect. Signed-off-by: Paul Burton Fixes: c982c6d6c48b ("MIPS: cpu-probe: Remove cp0 hazard barrier when enabling the FTLB") Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14023/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/cpu-probe.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 5069b5b1488f..dd3175442c9e 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -376,8 +376,6 @@ static int __init ftlb_disable(char *s) return 1; } - back_to_back_c0_hazard(); - config4 = read_c0_config4(); /* Check that FTLB has been disabled */ @@ -560,6 +558,7 @@ static int set_ftlb_enable(struct cpuinfo_mips *c, enum ftlb_flags flags) } write_c0_config6(config); + back_to_back_c0_hazard(); break; case CPU_I6400: /* There's no way to disable the FTLB */ From 1eefcbc89cf3a8e252e5aeb25825594699b47360 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 19 Aug 2016 18:15:40 +0100 Subject: [PATCH 0954/1050] MIPS: Fix BUILD_ROLLBACK_PROLOGUE for microMIPS When the kernel is built for microMIPS, branches targets need to be known to be microMIPS code in order to result in bit 0 of the PC being set. The branch target in the BUILD_ROLLBACK_PROLOGUE macro was simply the end of the macro, which may be pointing at padding rather than at code. This results in recent enough GNU linkers complaining like so: mips-img-linux-gnu-ld: arch/mips/built-in.o: .text+0x3e3c: Unsupported branch between ISA modes. mips-img-linux-gnu-ld: final link failed: Bad value Makefile:936: recipe for target 'vmlinux' failed make: *** [vmlinux] Error 1 Fix this by changing the branch target to be the start of the appropriate handler, skipping over any padding. Signed-off-by: Paul Burton Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14019/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/genex.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S index 17326a90d53c..dc0b29612891 100644 --- a/arch/mips/kernel/genex.S +++ b/arch/mips/kernel/genex.S @@ -142,9 +142,8 @@ LEAF(__r4k_wait) PTR_LA k1, __r4k_wait ori k0, 0x1f /* 32 byte rollback region */ xori k0, 0x1f - bne k0, k1, 9f + bne k0, k1, \handler MTC0 k0, CP0_EPC -9: .set pop .endm From 305723ab439e14debc1d339aa04e835d488b8253 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 2 Sep 2016 16:07:10 +0100 Subject: [PATCH 0955/1050] MIPS: Malta: Fix IOCU disable switch read for MIPS64 Malta boards used with CPU emulators feature a switch to disable use of an IOCU. Software has to check this switch & ignore any present IOCU if the switch is closed. The read used to do this was unsafe for 64 bit kernels, as it simply casted the address 0xbf403000 to a pointer & dereferenced it. Whilst in a 32 bit kernel this would access kseg1, in a 64 bit kernel this attempts to access xuseg & results in an address error exception. Fix by accessing a correctly formed ckseg1 address generated using the CKSEG1ADDR macro. Whilst modifying this code, define the name of the register and the bit we care about within it, which indicates whether PCI DMA is routed to the IOCU or straight to DRAM. The code previously checked that bit 0 was also set, but the least significant 7 bits of the CONFIG_GEN0 register contain the value of the MReqInfo signal provided to the IOCU OCP bus, so singling out bit 0 makes little sense & that part of the check is dropped. Signed-off-by: Paul Burton Fixes: b6d92b4a6bdb ("MIPS: Add option to disable software I/O coherency.") Cc: Matt Redfearn Cc: Masahiro Yamada Cc: Kees Cook Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14187/ Signed-off-by: Ralf Baechle --- arch/mips/mti-malta/malta-setup.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/mips/mti-malta/malta-setup.c b/arch/mips/mti-malta/malta-setup.c index ec5b21678fad..7e7364b0501e 100644 --- a/arch/mips/mti-malta/malta-setup.c +++ b/arch/mips/mti-malta/malta-setup.c @@ -39,6 +39,9 @@ #include #endif +#define ROCIT_CONFIG_GEN0 0x1f403000 +#define ROCIT_CONFIG_GEN0_PCI_IOCU BIT(7) + extern void malta_be_init(void); extern int malta_be_handler(struct pt_regs *regs, int is_fixup); @@ -107,6 +110,8 @@ static void __init fd_activate(void) static int __init plat_enable_iocoherency(void) { int supported = 0; + u32 cfg; + if (mips_revision_sconid == MIPS_REVISION_SCON_BONITO) { if (BONITO_PCICACHECTRL & BONITO_PCICACHECTRL_CPUCOH_PRES) { BONITO_PCICACHECTRL |= BONITO_PCICACHECTRL_CPUCOH_EN; @@ -129,7 +134,8 @@ static int __init plat_enable_iocoherency(void) } else if (mips_cm_numiocu() != 0) { /* Nothing special needs to be done to enable coherency */ pr_info("CMP IOCU detected\n"); - if ((*(unsigned int *)0xbf403000 & 0x81) != 0x81) { + cfg = __raw_readl((u32 *)CKSEG1ADDR(ROCIT_CONFIG_GEN0)); + if (!(cfg & ROCIT_CONFIG_GEN0_PCI_IOCU)) { pr_crit("IOCU OPERATION DISABLED BY SWITCH - DEFAULTING TO SW IO COHERENCY\n"); return 0; } From 058effe7fdc5776b017356f690976a857eea473f Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 2 Sep 2016 15:17:31 +0100 Subject: [PATCH 0956/1050] MIPS: Fix detection of unsupported highmem with cache aliases The paging_init() function contains code which detects that highmem is in use but unsupported due to dcache aliasing. However this code was ineffective because it was being run before the caches are probed, meaning that cpu_has_dc_aliases would always evaluate to false (unless a platform overrides it to a compile-time constant) and the detection of the unsupported case is never triggered. The kernel would then go on to attempt to use highmem & either hit coherency issues or trigger the BUG_ON in flush_kernel_dcache_page(). Fix this by running paging_init() later than cpu_cache_init(), such that the cpu_has_dc_aliases macro will evaluate correctly & the unsupported highmem case will be detected successfully. This then leads to a formerly hidden issue in that mem_init_free_highmem() will attempt to free all highmem pages, even though we're avoiding use of them & don't have valid page structs for them. This leads to an invalid pointer dereference & a TLB exception. Avoid this by skipping the loop in mem_init_free_highmem() if cpu_has_dc_aliases evaluates true. Signed-off-by: Paul Burton Cc: Rabin Vincent Cc: Matt Redfearn Cc: Jerome Marchand Cc: Alexander Sverdlin Cc: Aurelien Jarno Cc: Jaedon Shin Cc: Toshi Kani Cc: James Hogan Cc: Sergey Ryazanov Cc: Jonas Gorski Cc: Kirill A. Shutemov Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14184/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/setup.c | 2 +- arch/mips/mm/init.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 3be0e6ba2797..0d57909d9026 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -764,7 +764,6 @@ static void __init arch_mem_init(char **cmdline_p) device_tree_init(); sparse_init(); plat_swiotlb_setup(); - paging_init(); dma_contiguous_reserve(PFN_PHYS(max_low_pfn)); /* Tell bootmem about cma reserved memblock section */ @@ -877,6 +876,7 @@ void __init setup_arch(char **cmdline_p) prefill_possible_map(); cpu_cache_init(); + paging_init(); } unsigned long kernelsp[NR_CPUS]; diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 2c3749d98f04..72f7478ee068 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -440,6 +440,9 @@ static inline void mem_init_free_highmem(void) #ifdef CONFIG_HIGHMEM unsigned long tmp; + if (cpu_has_dc_aliases) + return; + for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) { struct page *page = pfn_to_page(tmp); From e1bfc11c5a6f40222a698a818dc269113245820e Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 28 Sep 2016 12:34:14 -0700 Subject: [PATCH 0957/1050] x86/init: Fix cr4_init_shadow() on CR4-less machines cr4_init_shadow() will panic on 486-like machines without CR4. Fix it using __read_cr4_safe(). Reported-by: david@saggiorato.net Signed-off-by: Andy Lutomirski Reviewed-by: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: 1e02ce4cccdc ("x86: Store a per-cpu shadow copy of CR4") Link: http://lkml.kernel.org/r/43a20f81fb504013bf613913dc25574b45336a61.1475091074.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/tlbflush.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 6fa85944af83..dee8a70382ba 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -81,7 +81,7 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate); /* Initialize cr4 shadow for this CPU. */ static inline void cr4_init_shadow(void) { - this_cpu_write(cpu_tlbstate.cr4, __read_cr4()); + this_cpu_write(cpu_tlbstate.cr4, __read_cr4_safe()); } /* Set in this cpu's CR4. */ From 8732db67c6b6dcdb455b73773ea2fc1e1d5024b1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 29 Sep 2016 22:37:15 +0100 Subject: [PATCH 0958/1050] rxrpc: Fix exclusive client connections Exclusive connections are currently reusable (which they shouldn't be) because rxrpc_alloc_client_connection() checks the exclusive flag in the rxrpc_connection struct before it's initialised from the function parameters. This means that the DONT_REUSE flag doesn't get set. Fix this by checking the function parameters for the exclusive flag. Signed-off-by: David Howells --- net/rxrpc/conn_client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index c76a125df891..f5ee8bfa5bef 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -200,7 +200,7 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp) } atomic_set(&conn->usage, 1); - if (conn->params.exclusive) + if (cp->exclusive) __set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); conn->params = *cp; From a1767077b0176de17fa40ec743a20cbdac7a0d56 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 29 Sep 2016 22:37:15 +0100 Subject: [PATCH 0959/1050] rxrpc: Make Tx loss-injection go through normal return and adjust tracing In rxrpc_send_data_packet() make the loss-injection path return through the same code as the transmission path so that the RTT determination is initiated and any future timer shuffling will be done, despite the packet having been binned. Whilst we're at it: (1) Add to the tx_data tracepoint an indication of whether or not we're retransmitting a data packet. (2) When we're deciding whether or not to request an ACK, rather than checking if we're in fast-retransmit mode check instead if we're retransmitting. (3) Don't invoke the lose_skb tracepoint when losing a Tx packet as we're not altering the sk_buff refcount nor are we just seeing it after getting it off the Tx list. (4) The rxrpc_skb_tx_lost note is then no longer used so remove it. (5) rxrpc_lose_skb() no longer needs to deal with rxrpc_skb_tx_lost. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 6 ++++-- net/rxrpc/ar-internal.h | 3 +-- net/rxrpc/call_event.c | 2 +- net/rxrpc/misc.c | 1 - net/rxrpc/output.c | 17 +++++++++-------- net/rxrpc/sendmsg.c | 2 +- net/rxrpc/skbuff.c | 11 +++-------- 7 files changed, 19 insertions(+), 23 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index ada12d00118c..8ba8d76e856a 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -258,15 +258,16 @@ TRACE_EVENT(rxrpc_rx_ack, TRACE_EVENT(rxrpc_tx_data, TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq, - rxrpc_serial_t serial, u8 flags, bool lose), + rxrpc_serial_t serial, u8 flags, bool retrans, bool lose), - TP_ARGS(call, seq, serial, flags, lose), + TP_ARGS(call, seq, serial, flags, retrans, lose), TP_STRUCT__entry( __field(struct rxrpc_call *, call ) __field(rxrpc_seq_t, seq ) __field(rxrpc_serial_t, serial ) __field(u8, flags ) + __field(bool, retrans ) __field(bool, lose ) ), @@ -275,6 +276,7 @@ TRACE_EVENT(rxrpc_tx_data, __entry->seq = seq; __entry->serial = serial; __entry->flags = flags; + __entry->retrans = retrans; __entry->lose = lose; ), diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index ca96e547cb9a..6aadaa7d8b43 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -603,7 +603,6 @@ enum rxrpc_skb_trace { rxrpc_skb_tx_cleaned, rxrpc_skb_tx_freed, rxrpc_skb_tx_got, - rxrpc_skb_tx_lost, rxrpc_skb_tx_new, rxrpc_skb_tx_rotated, rxrpc_skb_tx_seen, @@ -1073,7 +1072,7 @@ extern const s8 rxrpc_ack_priority[]; * output.c */ int rxrpc_send_call_packet(struct rxrpc_call *, u8); -int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *); +int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *, bool); void rxrpc_reject_packets(struct rxrpc_local *); /* diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 0e8478012212..1f6c7633b964 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -256,7 +256,7 @@ static void rxrpc_resend(struct rxrpc_call *call) rxrpc_get_skb(skb, rxrpc_skb_tx_got); spin_unlock_bh(&call->lock); - if (rxrpc_send_data_packet(call, skb) < 0) { + if (rxrpc_send_data_packet(call, skb, true) < 0) { rxrpc_free_skb(skb, rxrpc_skb_tx_freed); return; } diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index aedb8978226d..47dddacdbb91 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -108,7 +108,6 @@ const char rxrpc_skb_traces[rxrpc_skb__nr_trace][7] = { [rxrpc_skb_tx_cleaned] = "Tx CLN", [rxrpc_skb_tx_freed] = "Tx FRE", [rxrpc_skb_tx_got] = "Tx GOT", - [rxrpc_skb_tx_lost] = "Tx *L*", [rxrpc_skb_tx_new] = "Tx NEW", [rxrpc_skb_tx_rotated] = "Tx ROT", [rxrpc_skb_tx_seen] = "Tx SEE", diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index cf43a715685e..ac9a58b619a6 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -238,7 +238,8 @@ out: /* * send a packet through the transport endpoint */ -int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb) +int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, + bool retrans) { struct rxrpc_connection *conn = call->conn; struct rxrpc_wire_header whdr; @@ -247,6 +248,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb) struct kvec iov[2]; rxrpc_serial_t serial; size_t len; + bool lost = false; int ret, opt; _enter(",{%d}", skb->len); @@ -281,7 +283,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb) /* If our RTT cache needs working on, request an ACK. Also request * ACKs if a DATA packet appears to have been lost. */ - if (call->cong_mode == RXRPC_CALL_FAST_RETRANSMIT || + if (retrans || (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), ktime_get_real())) @@ -290,11 +292,9 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb) if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { static int lose; if ((lose++ & 7) == 7) { - trace_rxrpc_tx_data(call, sp->hdr.seq, serial, - whdr.flags, true); - rxrpc_lose_skb(skb, rxrpc_skb_tx_lost); - _leave(" = 0 [lose]"); - return 0; + ret = 0; + lost = true; + goto done; } } @@ -319,7 +319,8 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb) goto send_fragmentable; done: - trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, false); + trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, + retrans, lost); if (ret >= 0) { ktime_t now = ktime_get_real(); skb->tstamp = now; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 1f8040d82395..d8dfdce874d8 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -144,7 +144,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, if (seq == 1 && rxrpc_is_client_call(call)) rxrpc_expose_client_call(call); - ret = rxrpc_send_data_packet(call, skb); + ret = rxrpc_send_data_packet(call, skb, false); if (ret < 0) { _debug("need instant resend %d", ret); rxrpc_instant_resend(call, ix); diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index 5154cbf7e540..67b02c45271b 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -77,14 +77,9 @@ void rxrpc_lose_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) if (skb) { int n; CHECK_SLAB_OKAY(&skb->users); - if (op == rxrpc_skb_tx_lost) { - n = atomic_read(select_skb_count(op)); - trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); - } else { - n = atomic_dec_return(select_skb_count(op)); - trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); - kfree_skb(skb); - } + n = atomic_dec_return(select_skb_count(op)); + trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); + kfree_skb(skb); } } From 2629c7fa7c0adfdf023051b404cd538951bd0354 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 29 Sep 2016 22:37:15 +0100 Subject: [PATCH 0960/1050] rxrpc: When activating client conn channels, do state check inside lock In rxrpc_activate_channels(), the connection cache state is checked outside of the lock, which means it can change whilst we're waking calls up, thereby changing whether or not we're allowed to wake calls up. Fix this by moving the check inside the locked region. The check to see if all the channels are currently busy can stay outside of the locked region. Whilst we're at it: (1) Split the locked section out into its own function so that we can call it from other places in a later patch. (2) Determine the mask of channels dependent on the state as we're going to add another state in a later patch that will restrict the number of simultaneous calls to 1 on a connection. Signed-off-by: David Howells --- net/rxrpc/conn_client.c | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index f5ee8bfa5bef..60ef9605167e 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -575,29 +575,43 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, wake_up(&call->waitq); } +/* + * Assign channels and callNumbers to waiting calls with channel_lock + * held by caller. + */ +static void rxrpc_activate_channels_locked(struct rxrpc_connection *conn) +{ + u8 avail, mask; + + switch (conn->cache_state) { + case RXRPC_CONN_CLIENT_ACTIVE: + mask = RXRPC_ACTIVE_CHANS_MASK; + break; + default: + return; + } + + while (!list_empty(&conn->waiting_calls) && + (avail = ~conn->active_chans, + avail &= mask, + avail != 0)) + rxrpc_activate_one_channel(conn, __ffs(avail)); +} + /* * Assign channels and callNumbers to waiting calls. */ static void rxrpc_activate_channels(struct rxrpc_connection *conn) { - unsigned char mask; - _enter("%d", conn->debug_id); trace_rxrpc_client(conn, -1, rxrpc_client_activate_chans); - if (conn->cache_state != RXRPC_CONN_CLIENT_ACTIVE || - conn->active_chans == RXRPC_ACTIVE_CHANS_MASK) + if (conn->active_chans == RXRPC_ACTIVE_CHANS_MASK) return; spin_lock(&conn->channel_lock); - - while (!list_empty(&conn->waiting_calls) && - (mask = ~conn->active_chans, - mask &= RXRPC_ACTIVE_CHANS_MASK, - mask != 0)) - rxrpc_activate_one_channel(conn, __ffs(mask)); - + rxrpc_activate_channels_locked(conn); spin_unlock(&conn->channel_lock); _leave(""); } From 1e9e5c9521d3667664a6e3c97075f71afec23720 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 29 Sep 2016 22:37:15 +0100 Subject: [PATCH 0961/1050] rxrpc: Reduce the rxrpc_local::services list to a pointer Reduce the rxrpc_local::services list to just a pointer as we don't permit multiple service endpoints to bind to a single transport endpoints (this is excluded by rxrpc_lookup_local()). The reason we don't allow this is that if you send a request to an AFS filesystem service, it will try to talk back to your cache manager on the port you sent from (this is how file change notifications are handled). To prevent someone from stealing your CM callbacks, we don't let AF_RXRPC sockets share a UDP socket if at least one of them has a service bound. Signed-off-by: David Howells --- net/rxrpc/af_rxrpc.c | 21 ++++++++------------- net/rxrpc/ar-internal.h | 3 +-- net/rxrpc/call_accept.c | 8 ++++---- net/rxrpc/local_object.c | 3 +-- net/rxrpc/security.c | 8 ++++---- 5 files changed, 18 insertions(+), 25 deletions(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 8dbf7bed2cc4..44c9c2b0b190 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -136,7 +136,8 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *)saddr; struct sock *sk = sock->sk; struct rxrpc_local *local; - struct rxrpc_sock *rx = rxrpc_sk(sk), *prx; + struct rxrpc_sock *rx = rxrpc_sk(sk); + u16 service_id = srx->srx_service; int ret; _enter("%p,%p,%d", rx, saddr, len); @@ -160,15 +161,12 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) goto error_unlock; } - if (rx->srx.srx_service) { + if (service_id) { write_lock(&local->services_lock); - hlist_for_each_entry(prx, &local->services, listen_link) { - if (prx->srx.srx_service == rx->srx.srx_service) - goto service_in_use; - } - + if (rcu_access_pointer(local->service)) + goto service_in_use; rx->local = local; - hlist_add_head_rcu(&rx->listen_link, &local->services); + rcu_assign_pointer(local->service, rx); write_unlock(&local->services_lock); rx->sk.sk_state = RXRPC_SERVER_BOUND; @@ -599,7 +597,6 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol, rx->family = protocol; rx->calls = RB_ROOT; - INIT_HLIST_NODE(&rx->listen_link); spin_lock_init(&rx->incoming_lock); INIT_LIST_HEAD(&rx->sock_calls); INIT_LIST_HEAD(&rx->to_be_accepted); @@ -681,11 +678,9 @@ static int rxrpc_release_sock(struct sock *sk) sk->sk_state = RXRPC_CLOSE; spin_unlock_bh(&sk->sk_receive_queue.lock); - ASSERTCMP(rx->listen_link.next, !=, LIST_POISON1); - - if (!hlist_unhashed(&rx->listen_link)) { + if (rx->local && rx->local->service == rx) { write_lock(&rx->local->services_lock); - hlist_del_rcu(&rx->listen_link); + rx->local->service = NULL; write_unlock(&rx->local->services_lock); } diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 6aadaa7d8b43..539db54697f9 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -93,7 +93,6 @@ struct rxrpc_sock { rxrpc_notify_new_call_t notify_new_call; /* Func to notify of new call */ rxrpc_discard_new_call_t discard_new_call; /* Func to discard a new call */ struct rxrpc_local *local; /* local endpoint */ - struct hlist_node listen_link; /* link in the local endpoint's listen list */ struct rxrpc_backlog *backlog; /* Preallocation for services */ spinlock_t incoming_lock; /* Incoming call vs service shutdown lock */ struct list_head sock_calls; /* List of calls owned by this socket */ @@ -216,7 +215,7 @@ struct rxrpc_local { struct list_head link; struct socket *socket; /* my UDP socket */ struct work_struct processor; - struct hlist_head services; /* services listening on this endpoint */ + struct rxrpc_sock __rcu *service; /* Service(s) listening on this endpoint */ struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */ struct sk_buff_head reject_queue; /* packets awaiting rejection */ struct sk_buff_head event_queue; /* endpoint event packets awaiting processing */ diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index a8d39d7cf42c..3cac231d8405 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -331,14 +331,14 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_sock *rx; struct rxrpc_call *call; + u16 service_id = sp->hdr.serviceId; _enter(""); /* Get the socket providing the service */ - hlist_for_each_entry_rcu_bh(rx, &local->services, listen_link) { - if (rx->srx.srx_service == sp->hdr.serviceId) - goto found_service; - } + rx = rcu_dereference(local->service); + if (service_id == rx->srx.srx_service) + goto found_service; trace_rxrpc_abort("INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RX_INVALID_OPERATION, EOPNOTSUPP); diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index e3fad80b0795..ff4864d550b8 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -86,7 +86,6 @@ static struct rxrpc_local *rxrpc_alloc_local(const struct sockaddr_rxrpc *srx) atomic_set(&local->usage, 1); INIT_LIST_HEAD(&local->link); INIT_WORK(&local->processor, rxrpc_local_processor); - INIT_HLIST_HEAD(&local->services); init_rwsem(&local->defrag_sem); skb_queue_head_init(&local->reject_queue); skb_queue_head_init(&local->event_queue); @@ -292,7 +291,7 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local) mutex_unlock(&rxrpc_local_mutex); ASSERT(RB_EMPTY_ROOT(&local->client_conns)); - ASSERT(hlist_empty(&local->services)); + ASSERT(!local->service); if (socket) { local->socket = NULL; diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c index 82d8134e9287..7d921e56e715 100644 --- a/net/rxrpc/security.c +++ b/net/rxrpc/security.c @@ -131,10 +131,10 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *conn) /* find the service */ read_lock(&local->services_lock); - hlist_for_each_entry(rx, &local->services, listen_link) { - if (rx->srx.srx_service == conn->params.service_id) - goto found_service; - } + rx = rcu_dereference_protected(local->service, + lockdep_is_held(&local->services_lock)); + if (rx && rx->srx.srx_service == conn->params.service_id) + goto found_service; /* the service appears to have died */ read_unlock(&local->services_lock); From b112a67081e4b06652ecde588bf1d5778fe43d75 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 29 Sep 2016 22:37:16 +0100 Subject: [PATCH 0962/1050] rxrpc: Request more ACKs in slow-start mode Set the request-ACK on more DATA packets whilst we're in slow start mode so that we get sufficient ACKs back to supply information to configure the window. Signed-off-by: David Howells --- net/rxrpc/output.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index ac9a58b619a6..0d47db886f6e 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -284,6 +284,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, * ACKs if a DATA packet appears to have been lost. */ if (retrans || + call->cong_mode == RXRPC_CALL_SLOW_START || (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), ktime_get_real())) From ed1e8679d8bc6537077d1f24bc83b396f6062f09 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 29 Sep 2016 22:37:16 +0100 Subject: [PATCH 0963/1050] rxrpc: Note serial number being ACK'd in the congestion management trace Note the serial number of the packet being ACK'd in the congestion management trace rather than the serial number of the ACK packet. Whilst the serial number of the ACK packet is useful for matching ACK packet in the output of wireshark, the serial number that the ACK is in response to is of more use in working out how different trace lines relate. Signed-off-by: David Howells --- net/rxrpc/input.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 094720dd1eaf..1461d30583c9 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -41,10 +41,10 @@ static void rxrpc_proto_abort(const char *why, */ static void rxrpc_congestion_management(struct rxrpc_call *call, struct sk_buff *skb, - struct rxrpc_ack_summary *summary) + struct rxrpc_ack_summary *summary, + rxrpc_serial_t acked_serial) { enum rxrpc_congest_change change = rxrpc_cong_no_change; - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); unsigned int cumulative_acks = call->cong_cumul_acks; unsigned int cwnd = call->cong_cwnd; bool resend = false; @@ -172,7 +172,7 @@ out_no_clear_ca: cwnd = RXRPC_RXTX_BUFF_SIZE - 1; call->cong_cwnd = cwnd; call->cong_cumul_acks = cumulative_acks; - trace_rxrpc_congest(call, summary, sp->hdr.serial, change); + trace_rxrpc_congest(call, summary, acked_serial, change); if (resend && !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) rxrpc_queue_call(call); return; @@ -848,7 +848,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, false, true, rxrpc_propose_ack_ping_for_lost_reply); - return rxrpc_congestion_management(call, skb, &summary); + return rxrpc_congestion_management(call, skb, &summary, acked_serial); } /* From a935c0523c852feb619a050597bb545e7c818d81 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 29 Sep 2016 12:21:53 -0400 Subject: [PATCH 0964/1050] net: dsa: mv88e6xxx: add global1 helpers The Global (1) internal SMI device is an extended set of registers containing ATU, PPU, VTU, STU, etc. It is present on every switches, usually at SMI address 0x1B. But old models such as 88E6060 access it at address 0xF, thus using REG_GLOBAL is erroneous. Add a global1_addr info member used by mv88e6xxx_g1_{read,write} and mv88e6xxx_g1_wait helpers in a new global1.c file. This patch finally removes _mv88e6xxx_reg_{read,write}, in favor on the appropriate helpers. No functional changes here. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/Makefile | 1 + drivers/net/dsa/mv88e6xxx/chip.c | 505 +++++++++++++------------- drivers/net/dsa/mv88e6xxx/global1.c | 34 ++ drivers/net/dsa/mv88e6xxx/global1.h | 23 ++ drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 2 +- 5 files changed, 306 insertions(+), 259 deletions(-) create mode 100644 drivers/net/dsa/mv88e6xxx/global1.c create mode 100644 drivers/net/dsa/mv88e6xxx/global1.h diff --git a/drivers/net/dsa/mv88e6xxx/Makefile b/drivers/net/dsa/mv88e6xxx/Makefile index 697103934317..10ce820daa48 100644 --- a/drivers/net/dsa/mv88e6xxx/Makefile +++ b/drivers/net/dsa/mv88e6xxx/Makefile @@ -1,3 +1,4 @@ obj-$(CONFIG_NET_DSA_MV88E6XXX) += mv88e6xxx.o mv88e6xxx-objs := chip.o +mv88e6xxx-objs += global1.o mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2.o diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index b2c25daef294..98dee2c63163 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -31,6 +31,7 @@ #include #include "mv88e6xxx.h" +#include "global1.h" #include "global2.h" static void assert_reg_lock(struct mv88e6xxx_chip *chip) @@ -361,46 +362,27 @@ int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg, u16 update) return mv88e6xxx_write(chip, addr, reg, val); } -static int _mv88e6xxx_reg_read(struct mv88e6xxx_chip *chip, int addr, int reg) +static int mv88e6xxx_ppu_disable(struct mv88e6xxx_chip *chip) { u16 val; - int err; + int i, err; - err = mv88e6xxx_read(chip, addr, reg, &val); + err = mv88e6xxx_g1_read(chip, GLOBAL_CONTROL, &val); if (err) return err; - return val; -} - -static int _mv88e6xxx_reg_write(struct mv88e6xxx_chip *chip, int addr, - int reg, u16 val) -{ - return mv88e6xxx_write(chip, addr, reg, val); -} - -static int mv88e6xxx_ppu_disable(struct mv88e6xxx_chip *chip) -{ - int ret; - int i; - - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_CONTROL); - if (ret < 0) - return ret; - - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_CONTROL, - ret & ~GLOBAL_CONTROL_PPU_ENABLE); - if (ret) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_CONTROL, + val & ~GLOBAL_CONTROL_PPU_ENABLE); + if (err) + return err; for (i = 0; i < 16; i++) { - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_STATUS); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_read(chip, GLOBAL_STATUS, &val); + if (err) + return err; usleep_range(1000, 2000); - if ((ret & GLOBAL_STATUS_PPU_MASK) != - GLOBAL_STATUS_PPU_POLLING) + if ((val & GLOBAL_STATUS_PPU_MASK) != GLOBAL_STATUS_PPU_POLLING) return 0; } @@ -409,25 +391,25 @@ static int mv88e6xxx_ppu_disable(struct mv88e6xxx_chip *chip) static int mv88e6xxx_ppu_enable(struct mv88e6xxx_chip *chip) { - int ret, err, i; + u16 val; + int i, err; - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_CONTROL); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_read(chip, GLOBAL_CONTROL, &val); + if (err) + return err; - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_CONTROL, - ret | GLOBAL_CONTROL_PPU_ENABLE); + err = mv88e6xxx_g1_write(chip, GLOBAL_CONTROL, + val | GLOBAL_CONTROL_PPU_ENABLE); if (err) return err; for (i = 0; i < 16; i++) { - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_STATUS); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_read(chip, GLOBAL_STATUS, &val); + if (err) + return err; usleep_range(1000, 2000); - if ((ret & GLOBAL_STATUS_PPU_MASK) == - GLOBAL_STATUS_PPU_POLLING) + if ((val & GLOBAL_STATUS_PPU_MASK) == GLOBAL_STATUS_PPU_POLLING) return 0; } @@ -663,12 +645,12 @@ out: static int _mv88e6xxx_stats_wait(struct mv88e6xxx_chip *chip) { - int ret; - int i; + u16 val; + int i, err; for (i = 0; i < 10; i++) { - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_STATS_OP); - if ((ret & GLOBAL_STATS_OP_BUSY) == 0) + err = mv88e6xxx_g1_read(chip, GLOBAL_STATS_OP, &val); + if ((val & GLOBAL_STATS_OP_BUSY) == 0) return 0; } @@ -677,55 +659,52 @@ static int _mv88e6xxx_stats_wait(struct mv88e6xxx_chip *chip) static int _mv88e6xxx_stats_snapshot(struct mv88e6xxx_chip *chip, int port) { - int ret; + int err; if (mv88e6xxx_6320_family(chip) || mv88e6xxx_6352_family(chip)) port = (port + 1) << 5; /* Snapshot the hardware statistics counters for this port. */ - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_STATS_OP, - GLOBAL_STATS_OP_CAPTURE_PORT | - GLOBAL_STATS_OP_HIST_RX_TX | port); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_STATS_OP, + GLOBAL_STATS_OP_CAPTURE_PORT | + GLOBAL_STATS_OP_HIST_RX_TX | port); + if (err) + return err; /* Wait for the snapshotting to complete. */ - ret = _mv88e6xxx_stats_wait(chip); - if (ret < 0) - return ret; - - return 0; + return _mv88e6xxx_stats_wait(chip); } static void _mv88e6xxx_stats_read(struct mv88e6xxx_chip *chip, int stat, u32 *val) { - u32 _val; - int ret; + u32 value; + u16 reg; + int err; *val = 0; - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_STATS_OP, - GLOBAL_STATS_OP_READ_CAPTURED | - GLOBAL_STATS_OP_HIST_RX_TX | stat); - if (ret < 0) + err = mv88e6xxx_g1_write(chip, GLOBAL_STATS_OP, + GLOBAL_STATS_OP_READ_CAPTURED | + GLOBAL_STATS_OP_HIST_RX_TX | stat); + if (err) return; - ret = _mv88e6xxx_stats_wait(chip); - if (ret < 0) + err = _mv88e6xxx_stats_wait(chip); + if (err) return; - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_STATS_COUNTER_32); - if (ret < 0) + err = mv88e6xxx_g1_read(chip, GLOBAL_STATS_COUNTER_32, ®); + if (err) return; - _val = ret << 16; + value = reg << 16; - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_STATS_COUNTER_01); - if (ret < 0) + err = mv88e6xxx_g1_read(chip, GLOBAL_STATS_COUNTER_01, ®); + if (err) return; - *val = _val | ret; + *val = value | reg; } static struct mv88e6xxx_hw_stat mv88e6xxx_hw_stats[] = { @@ -932,8 +911,7 @@ static void mv88e6xxx_get_regs(struct dsa_switch *ds, int port, static int _mv88e6xxx_atu_wait(struct mv88e6xxx_chip *chip) { - return mv88e6xxx_wait(chip, REG_GLOBAL, GLOBAL_ATU_OP, - GLOBAL_ATU_OP_BUSY); + return mv88e6xxx_g1_wait(chip, GLOBAL_ATU_OP, GLOBAL_ATU_OP_BUSY); } static int mv88e6xxx_get_eee(struct dsa_switch *ds, int port, @@ -997,32 +975,31 @@ out: static int _mv88e6xxx_atu_cmd(struct mv88e6xxx_chip *chip, u16 fid, u16 cmd) { - int ret; + u16 val; + int err; if (mv88e6xxx_has_fid_reg(chip)) { - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_ATU_FID, - fid); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_ATU_FID, fid); + if (err) + return err; } else if (mv88e6xxx_num_databases(chip) == 256) { /* ATU DBNum[7:4] are located in ATU Control 15:12 */ - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_ATU_CONTROL); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_read(chip, GLOBAL_ATU_CONTROL, &val); + if (err) + return err; - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_ATU_CONTROL, - (ret & 0xfff) | - ((fid << 8) & 0xf000)); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_ATU_CONTROL, + (val & 0xfff) | ((fid << 8) & 0xf000)); + if (err) + return err; /* ATU DBNum[3:0] are located in ATU Operation 3:0 */ cmd |= fid & 0xf; } - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_ATU_OP, cmd); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_ATU_OP, cmd); + if (err) + return err; return _mv88e6xxx_atu_wait(chip); } @@ -1047,7 +1024,7 @@ static int _mv88e6xxx_atu_data_write(struct mv88e6xxx_chip *chip, data |= (entry->portv_trunkid << shift) & mask; } - return _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_ATU_DATA, data); + return mv88e6xxx_g1_write(chip, GLOBAL_ATU_DATA, data); } static int _mv88e6xxx_atu_flush_move(struct mv88e6xxx_chip *chip, @@ -1277,17 +1254,16 @@ static int _mv88e6xxx_port_pvid_set(struct mv88e6xxx_chip *chip, static int _mv88e6xxx_vtu_wait(struct mv88e6xxx_chip *chip) { - return mv88e6xxx_wait(chip, REG_GLOBAL, GLOBAL_VTU_OP, - GLOBAL_VTU_OP_BUSY); + return mv88e6xxx_g1_wait(chip, GLOBAL_VTU_OP, GLOBAL_VTU_OP_BUSY); } static int _mv88e6xxx_vtu_cmd(struct mv88e6xxx_chip *chip, u16 op) { - int ret; + int err; - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_OP, op); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_VTU_OP, op); + if (err) + return err; return _mv88e6xxx_vtu_wait(chip); } @@ -1308,16 +1284,14 @@ static int _mv88e6xxx_vtu_stu_data_read(struct mv88e6xxx_chip *chip, unsigned int nibble_offset) { u16 regs[3]; - int i; - int ret; + int i, err; for (i = 0; i < 3; ++i) { - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, - GLOBAL_VTU_DATA_0_3 + i); - if (ret < 0) - return ret; + u16 *reg = ®s[i]; - regs[i] = ret; + err = mv88e6xxx_g1_read(chip, GLOBAL_VTU_DATA_0_3 + i, reg); + if (err) + return err; } for (i = 0; i < chip->info->num_ports; ++i) { @@ -1347,8 +1321,7 @@ static int _mv88e6xxx_vtu_stu_data_write(struct mv88e6xxx_chip *chip, unsigned int nibble_offset) { u16 regs[3] = { 0 }; - int i; - int ret; + int i, err; for (i = 0; i < chip->info->num_ports; ++i) { unsigned int shift = (i % 4) * 4 + nibble_offset; @@ -1358,10 +1331,11 @@ static int _mv88e6xxx_vtu_stu_data_write(struct mv88e6xxx_chip *chip, } for (i = 0; i < 3; ++i) { - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, - GLOBAL_VTU_DATA_0_3 + i, regs[i]); - if (ret < 0) - return ret; + u16 reg = regs[i]; + + err = mv88e6xxx_g1_write(chip, GLOBAL_VTU_DATA_0_3 + i, reg); + if (err) + return err; } return 0; @@ -1381,63 +1355,61 @@ static int mv88e6xxx_stu_data_write(struct mv88e6xxx_chip *chip, static int _mv88e6xxx_vtu_vid_write(struct mv88e6xxx_chip *chip, u16 vid) { - return _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_VID, - vid & GLOBAL_VTU_VID_MASK); + return mv88e6xxx_g1_write(chip, GLOBAL_VTU_VID, + vid & GLOBAL_VTU_VID_MASK); } static int _mv88e6xxx_vtu_getnext(struct mv88e6xxx_chip *chip, struct mv88e6xxx_vtu_stu_entry *entry) { struct mv88e6xxx_vtu_stu_entry next = { 0 }; - int ret; + u16 val; + int err; - ret = _mv88e6xxx_vtu_wait(chip); - if (ret < 0) - return ret; + err = _mv88e6xxx_vtu_wait(chip); + if (err) + return err; - ret = _mv88e6xxx_vtu_cmd(chip, GLOBAL_VTU_OP_VTU_GET_NEXT); - if (ret < 0) - return ret; + err = _mv88e6xxx_vtu_cmd(chip, GLOBAL_VTU_OP_VTU_GET_NEXT); + if (err) + return err; - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_VTU_VID); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_read(chip, GLOBAL_VTU_VID, &val); + if (err) + return err; - next.vid = ret & GLOBAL_VTU_VID_MASK; - next.valid = !!(ret & GLOBAL_VTU_VID_VALID); + next.vid = val & GLOBAL_VTU_VID_MASK; + next.valid = !!(val & GLOBAL_VTU_VID_VALID); if (next.valid) { - ret = mv88e6xxx_vtu_data_read(chip, &next); - if (ret < 0) - return ret; + err = mv88e6xxx_vtu_data_read(chip, &next); + if (err) + return err; if (mv88e6xxx_has_fid_reg(chip)) { - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, - GLOBAL_VTU_FID); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_read(chip, GLOBAL_VTU_FID, &val); + if (err) + return err; - next.fid = ret & GLOBAL_VTU_FID_MASK; + next.fid = val & GLOBAL_VTU_FID_MASK; } else if (mv88e6xxx_num_databases(chip) == 256) { /* VTU DBNum[7:4] are located in VTU Operation 11:8, and * VTU DBNum[3:0] are located in VTU Operation 3:0 */ - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, - GLOBAL_VTU_OP); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_read(chip, GLOBAL_VTU_OP, &val); + if (err) + return err; - next.fid = (ret & 0xf00) >> 4; - next.fid |= ret & 0xf; + next.fid = (val & 0xf00) >> 4; + next.fid |= val & 0xf; } if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_STU)) { - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, - GLOBAL_VTU_SID); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_read(chip, GLOBAL_VTU_SID, &val); + if (err) + return err; - next.sid = ret & GLOBAL_VTU_SID_MASK; + next.sid = val & GLOBAL_VTU_SID_MASK; } } @@ -1505,34 +1477,32 @@ static int _mv88e6xxx_vtu_loadpurge(struct mv88e6xxx_chip *chip, { u16 op = GLOBAL_VTU_OP_VTU_LOAD_PURGE; u16 reg = 0; - int ret; + int err; - ret = _mv88e6xxx_vtu_wait(chip); - if (ret < 0) - return ret; + err = _mv88e6xxx_vtu_wait(chip); + if (err) + return err; if (!entry->valid) goto loadpurge; /* Write port member tags */ - ret = mv88e6xxx_vtu_data_write(chip, entry); - if (ret < 0) - return ret; + err = mv88e6xxx_vtu_data_write(chip, entry); + if (err) + return err; if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_STU)) { reg = entry->sid & GLOBAL_VTU_SID_MASK; - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_SID, - reg); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_VTU_SID, reg); + if (err) + return err; } if (mv88e6xxx_has_fid_reg(chip)) { reg = entry->fid & GLOBAL_VTU_FID_MASK; - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_FID, - reg); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_VTU_FID, reg); + if (err) + return err; } else if (mv88e6xxx_num_databases(chip) == 256) { /* VTU DBNum[7:4] are located in VTU Operation 11:8, and * VTU DBNum[3:0] are located in VTU Operation 3:0 @@ -1544,9 +1514,9 @@ static int _mv88e6xxx_vtu_loadpurge(struct mv88e6xxx_chip *chip, reg = GLOBAL_VTU_VID_VALID; loadpurge: reg |= entry->vid & GLOBAL_VTU_VID_MASK; - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_VID, reg); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_VTU_VID, reg); + if (err) + return err; return _mv88e6xxx_vtu_cmd(chip, op); } @@ -1555,37 +1525,38 @@ static int _mv88e6xxx_stu_getnext(struct mv88e6xxx_chip *chip, u8 sid, struct mv88e6xxx_vtu_stu_entry *entry) { struct mv88e6xxx_vtu_stu_entry next = { 0 }; - int ret; + u16 val; + int err; - ret = _mv88e6xxx_vtu_wait(chip); - if (ret < 0) - return ret; + err = _mv88e6xxx_vtu_wait(chip); + if (err) + return err; - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_SID, - sid & GLOBAL_VTU_SID_MASK); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_VTU_SID, + sid & GLOBAL_VTU_SID_MASK); + if (err) + return err; - ret = _mv88e6xxx_vtu_cmd(chip, GLOBAL_VTU_OP_STU_GET_NEXT); - if (ret < 0) - return ret; + err = _mv88e6xxx_vtu_cmd(chip, GLOBAL_VTU_OP_STU_GET_NEXT); + if (err) + return err; - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_VTU_SID); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_read(chip, GLOBAL_VTU_SID, &val); + if (err) + return err; - next.sid = ret & GLOBAL_VTU_SID_MASK; + next.sid = val & GLOBAL_VTU_SID_MASK; - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_VTU_VID); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_read(chip, GLOBAL_VTU_VID, &val); + if (err) + return err; - next.valid = !!(ret & GLOBAL_VTU_VID_VALID); + next.valid = !!(val & GLOBAL_VTU_VID_VALID); if (next.valid) { - ret = mv88e6xxx_stu_data_read(chip, &next); - if (ret < 0) - return ret; + err = mv88e6xxx_stu_data_read(chip, &next); + if (err) + return err; } *entry = next; @@ -1596,30 +1567,30 @@ static int _mv88e6xxx_stu_loadpurge(struct mv88e6xxx_chip *chip, struct mv88e6xxx_vtu_stu_entry *entry) { u16 reg = 0; - int ret; + int err; - ret = _mv88e6xxx_vtu_wait(chip); - if (ret < 0) - return ret; + err = _mv88e6xxx_vtu_wait(chip); + if (err) + return err; if (!entry->valid) goto loadpurge; /* Write port states */ - ret = mv88e6xxx_stu_data_write(chip, entry); - if (ret < 0) - return ret; + err = mv88e6xxx_stu_data_write(chip, entry); + if (err) + return err; reg = GLOBAL_VTU_VID_VALID; loadpurge: - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_VID, reg); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_VTU_VID, reg); + if (err) + return err; reg = entry->sid & GLOBAL_VTU_SID_MASK; - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_SID, reg); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_VTU_SID, reg); + if (err) + return err; return _mv88e6xxx_vtu_cmd(chip, GLOBAL_VTU_OP_STU_LOAD_PURGE); } @@ -2057,14 +2028,13 @@ unlock: static int _mv88e6xxx_atu_mac_write(struct mv88e6xxx_chip *chip, const unsigned char *addr) { - int i, ret; + int i, err; for (i = 0; i < 3; i++) { - ret = _mv88e6xxx_reg_write( - chip, REG_GLOBAL, GLOBAL_ATU_MAC_01 + i, - (addr[i * 2] << 8) | addr[i * 2 + 1]); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_ATU_MAC_01 + i, + (addr[i * 2] << 8) | addr[i * 2 + 1]); + if (err) + return err; } return 0; @@ -2073,15 +2043,16 @@ static int _mv88e6xxx_atu_mac_write(struct mv88e6xxx_chip *chip, static int _mv88e6xxx_atu_mac_read(struct mv88e6xxx_chip *chip, unsigned char *addr) { - int i, ret; + u16 val; + int i, err; for (i = 0; i < 3; i++) { - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, - GLOBAL_ATU_MAC_01 + i); - if (ret < 0) - return ret; - addr[i * 2] = ret >> 8; - addr[i * 2 + 1] = ret & 0xff; + err = mv88e6xxx_g1_read(chip, GLOBAL_ATU_MAC_01 + i, &val); + if (err) + return err; + + addr[i * 2] = val >> 8; + addr[i * 2 + 1] = val & 0xff; } return 0; @@ -2217,31 +2188,32 @@ static int _mv88e6xxx_atu_getnext(struct mv88e6xxx_chip *chip, u16 fid, struct mv88e6xxx_atu_entry *entry) { struct mv88e6xxx_atu_entry next = { 0 }; - int ret; + u16 val; + int err; next.fid = fid; - ret = _mv88e6xxx_atu_wait(chip); - if (ret < 0) - return ret; + err = _mv88e6xxx_atu_wait(chip); + if (err) + return err; - ret = _mv88e6xxx_atu_cmd(chip, fid, GLOBAL_ATU_OP_GET_NEXT_DB); - if (ret < 0) - return ret; + err = _mv88e6xxx_atu_cmd(chip, fid, GLOBAL_ATU_OP_GET_NEXT_DB); + if (err) + return err; - ret = _mv88e6xxx_atu_mac_read(chip, next.mac); - if (ret < 0) - return ret; + err = _mv88e6xxx_atu_mac_read(chip, next.mac); + if (err) + return err; - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_ATU_DATA); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_read(chip, GLOBAL_ATU_DATA, &val); + if (err) + return err; - next.state = ret & GLOBAL_ATU_DATA_STATE_MASK; + next.state = val & GLOBAL_ATU_DATA_STATE_MASK; if (next.state != GLOBAL_ATU_DATA_STATE_UNUSED) { unsigned int mask, shift; - if (ret & GLOBAL_ATU_DATA_TRUNK) { + if (val & GLOBAL_ATU_DATA_TRUNK) { next.trunk = true; mask = GLOBAL_ATU_DATA_TRUNK_ID_MASK; shift = GLOBAL_ATU_DATA_TRUNK_ID_SHIFT; @@ -2251,7 +2223,7 @@ static int _mv88e6xxx_atu_getnext(struct mv88e6xxx_chip *chip, u16 fid, shift = GLOBAL_ATU_DATA_PORT_VECTOR_SHIFT; } - next.portv_trunkid = (ret & mask) >> shift; + next.portv_trunkid = (val & mask) >> shift; } *entry = next; @@ -2422,8 +2394,8 @@ static int mv88e6xxx_switch_reset(struct mv88e6xxx_chip *chip) u16 is_reset = (ppu_active ? 0x8800 : 0xc800); struct gpio_desc *gpiod = chip->reset; unsigned long timeout; - int err, ret; u16 reg; + int err; int i; /* Set all ports to the disabled state. */ @@ -2454,20 +2426,20 @@ static int mv88e6xxx_switch_reset(struct mv88e6xxx_chip *chip) * through global registers 0x18 and 0x19. */ if (ppu_active) - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, 0x04, 0xc000); + err = mv88e6xxx_g1_write(chip, 0x04, 0xc000); else - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, 0x04, 0xc400); + err = mv88e6xxx_g1_write(chip, 0x04, 0xc400); if (err) return err; /* Wait up to one second for reset to complete. */ timeout = jiffies + 1 * HZ; while (time_before(jiffies, timeout)) { - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, 0x00); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_read(chip, 0x00, ®); + if (err) + return err; - if ((ret & is_reset) == is_reset) + if ((reg & is_reset) == is_reset) break; usleep_range(1000, 2000); } @@ -2749,22 +2721,23 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) return mv88e6xxx_port_write(chip, port, PORT_DEFAULT_VLAN, 0x0000); } -static int mv88e6xxx_g1_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr) +int mv88e6xxx_g1_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr) { int err; - err = mv88e6xxx_write(chip, REG_GLOBAL, GLOBAL_MAC_01, - (addr[0] << 8) | addr[1]); + err = mv88e6xxx_g1_write(chip, GLOBAL_MAC_01, (addr[0] << 8) | addr[1]); if (err) return err; - err = mv88e6xxx_write(chip, REG_GLOBAL, GLOBAL_MAC_23, - (addr[2] << 8) | addr[3]); + err = mv88e6xxx_g1_write(chip, GLOBAL_MAC_23, (addr[2] << 8) | addr[3]); if (err) return err; - return mv88e6xxx_write(chip, REG_GLOBAL, GLOBAL_MAC_45, - (addr[4] << 8) | addr[5]); + err = mv88e6xxx_g1_write(chip, GLOBAL_MAC_45, (addr[4] << 8) | addr[5]); + if (err) + return err; + + return 0; } static int mv88e6xxx_g1_set_age_time(struct mv88e6xxx_chip *chip, @@ -2783,7 +2756,7 @@ static int mv88e6xxx_g1_set_age_time(struct mv88e6xxx_chip *chip, /* Round to nearest multiple of coeff */ age_time = (msecs + coeff / 2) / coeff; - err = mv88e6xxx_read(chip, REG_GLOBAL, GLOBAL_ATU_CONTROL, &val); + err = mv88e6xxx_g1_read(chip, GLOBAL_ATU_CONTROL, &val); if (err) return err; @@ -2791,7 +2764,7 @@ static int mv88e6xxx_g1_set_age_time(struct mv88e6xxx_chip *chip, val &= ~0xff0; val |= age_time << 4; - return mv88e6xxx_write(chip, REG_GLOBAL, GLOBAL_ATU_CONTROL, val); + return mv88e6xxx_g1_write(chip, GLOBAL_ATU_CONTROL, val); } static int mv88e6xxx_set_ageing_time(struct dsa_switch *ds, @@ -2822,7 +2795,7 @@ static int mv88e6xxx_g1_setup(struct mv88e6xxx_chip *chip) mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU_ACTIVE)) reg |= GLOBAL_CONTROL_PPU_ENABLE; - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_CONTROL, reg); + err = mv88e6xxx_g1_write(chip, GLOBAL_CONTROL, reg); if (err) return err; @@ -2832,15 +2805,14 @@ static int mv88e6xxx_g1_setup(struct mv88e6xxx_chip *chip) reg = upstream_port << GLOBAL_MONITOR_CONTROL_INGRESS_SHIFT | upstream_port << GLOBAL_MONITOR_CONTROL_EGRESS_SHIFT | upstream_port << GLOBAL_MONITOR_CONTROL_ARP_SHIFT; - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_MONITOR_CONTROL, - reg); + err = mv88e6xxx_g1_write(chip, GLOBAL_MONITOR_CONTROL, reg); if (err) return err; /* Disable remote management, and set the switch's DSA device number. */ - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_CONTROL_2, - GLOBAL_CONTROL_2_MULTIPLE_CASCADE | - (ds->index & 0x1f)); + err = mv88e6xxx_g1_write(chip, GLOBAL_CONTROL_2, + GLOBAL_CONTROL_2_MULTIPLE_CASCADE | + (ds->index & 0x1f)); if (err) return err; @@ -2853,8 +2825,8 @@ static int mv88e6xxx_g1_setup(struct mv88e6xxx_chip *chip) * enable address learn messages to be sent to all message * ports. */ - err = mv88e6xxx_write(chip, REG_GLOBAL, GLOBAL_ATU_CONTROL, - GLOBAL_ATU_CONTROL_LEARN2ALL); + err = mv88e6xxx_g1_write(chip, GLOBAL_ATU_CONTROL, + GLOBAL_ATU_CONTROL_LEARN2ALL); if (err) return err; @@ -2868,39 +2840,39 @@ static int mv88e6xxx_g1_setup(struct mv88e6xxx_chip *chip) return err; /* Configure the IP ToS mapping registers. */ - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_0, 0x0000); + err = mv88e6xxx_g1_write(chip, GLOBAL_IP_PRI_0, 0x0000); if (err) return err; - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_1, 0x0000); + err = mv88e6xxx_g1_write(chip, GLOBAL_IP_PRI_1, 0x0000); if (err) return err; - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_2, 0x5555); + err = mv88e6xxx_g1_write(chip, GLOBAL_IP_PRI_2, 0x5555); if (err) return err; - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_3, 0x5555); + err = mv88e6xxx_g1_write(chip, GLOBAL_IP_PRI_3, 0x5555); if (err) return err; - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_4, 0xaaaa); + err = mv88e6xxx_g1_write(chip, GLOBAL_IP_PRI_4, 0xaaaa); if (err) return err; - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_5, 0xaaaa); + err = mv88e6xxx_g1_write(chip, GLOBAL_IP_PRI_5, 0xaaaa); if (err) return err; - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_6, 0xffff); + err = mv88e6xxx_g1_write(chip, GLOBAL_IP_PRI_6, 0xffff); if (err) return err; - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_7, 0xffff); + err = mv88e6xxx_g1_write(chip, GLOBAL_IP_PRI_7, 0xffff); if (err) return err; /* Configure the IEEE 802.1p priority mapping register. */ - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IEEE_PRI, 0xfa41); + err = mv88e6xxx_g1_write(chip, GLOBAL_IEEE_PRI, 0xfa41); if (err) return err; /* Clear the statistics counters for all ports */ - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_STATS_OP, - GLOBAL_STATS_OP_FLUSH_ALL); + err = mv88e6xxx_g1_write(chip, GLOBAL_STATS_OP, + GLOBAL_STATS_OP_FLUSH_ALL); if (err) return err; @@ -3265,6 +3237,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 10, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6097, }, @@ -3276,6 +3249,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 256, .num_ports = 11, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6095, }, @@ -3287,6 +3261,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 3, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6165, }, @@ -3298,6 +3273,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 256, .num_ports = 8, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6185, }, @@ -3309,6 +3285,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 6, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6165, }, @@ -3320,6 +3297,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 6, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6165, }, @@ -3331,6 +3309,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6351, }, @@ -3342,6 +3321,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6352, }, @@ -3353,6 +3333,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6351, }, @@ -3364,6 +3345,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6352, }, @@ -3375,6 +3357,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 256, .num_ports = 10, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6185, }, @@ -3386,6 +3369,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6352, }, @@ -3397,6 +3381,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6320, }, @@ -3408,6 +3393,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6320, }, @@ -3419,6 +3405,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6351, }, @@ -3430,6 +3417,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6351, }, @@ -3441,6 +3429,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6352, }, diff --git a/drivers/net/dsa/mv88e6xxx/global1.c b/drivers/net/dsa/mv88e6xxx/global1.c new file mode 100644 index 000000000000..d358720b6c2d --- /dev/null +++ b/drivers/net/dsa/mv88e6xxx/global1.c @@ -0,0 +1,34 @@ +/* + * Marvell 88E6xxx Switch Global (1) Registers support + * + * Copyright (c) 2008 Marvell Semiconductor + * + * Copyright (c) 2016 Vivien Didelot + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include "mv88e6xxx.h" +#include "global1.h" + +int mv88e6xxx_g1_read(struct mv88e6xxx_chip *chip, int reg, u16 *val) +{ + int addr = chip->info->global1_addr; + + return mv88e6xxx_read(chip, addr, reg, val); +} + +int mv88e6xxx_g1_write(struct mv88e6xxx_chip *chip, int reg, u16 val) +{ + int addr = chip->info->global1_addr; + + return mv88e6xxx_write(chip, addr, reg, val); +} + +int mv88e6xxx_g1_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask) +{ + return mv88e6xxx_wait(chip, chip->info->global1_addr, reg, mask); +} diff --git a/drivers/net/dsa/mv88e6xxx/global1.h b/drivers/net/dsa/mv88e6xxx/global1.h new file mode 100644 index 000000000000..62291e6fe3a3 --- /dev/null +++ b/drivers/net/dsa/mv88e6xxx/global1.h @@ -0,0 +1,23 @@ +/* + * Marvell 88E6xxx Switch Global (1) Registers support + * + * Copyright (c) 2008 Marvell Semiconductor + * + * Copyright (c) 2016 Vivien Didelot + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef _MV88E6XXX_GLOBAL1_H +#define _MV88E6XXX_GLOBAL1_H + +#include "mv88e6xxx.h" + +int mv88e6xxx_g1_read(struct mv88e6xxx_chip *chip, int reg, u16 *val); +int mv88e6xxx_g1_write(struct mv88e6xxx_chip *chip, int reg, u16 val); +int mv88e6xxx_g1_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask); + +#endif /* _MV88E6XXX_GLOBAL1_H */ diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index 827988397fd8..bf78f6dc18cf 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -159,7 +159,6 @@ #define PORT_TAG_REGMAP_0123 0x18 #define PORT_TAG_REGMAP_4567 0x19 -#define REG_GLOBAL 0x1b #define GLOBAL_STATUS 0x00 #define GLOBAL_STATUS_PPU_STATE BIT(15) /* 6351 and 6171 */ /* Two bits for 6165, 6185 etc */ @@ -613,6 +612,7 @@ struct mv88e6xxx_info { unsigned int num_databases; unsigned int num_ports; unsigned int port_base_addr; + unsigned int global1_addr; unsigned int age_time_coeff; unsigned long long flags; }; From 9fe850fb219e3fb729277b11229c2943bc5096a9 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 29 Sep 2016 12:21:54 -0400 Subject: [PATCH 0965/1050] net: dsa: mv88e6xxx: abstract REG_GLOBAL2 Similarly to the ports, phys, and Global SMI devices, abstract the SMI device address of the Global 2 registers in a few g2 static helpers. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/global2.c | 78 +++++++++++++++++---------- drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 1 - 2 files changed, 49 insertions(+), 30 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c index 99ed028298ac..f31d553c7448 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.c +++ b/drivers/net/dsa/mv88e6xxx/global2.c @@ -14,6 +14,28 @@ #include "mv88e6xxx.h" #include "global2.h" +#define ADDR_GLOBAL2 0x1c + +static int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val) +{ + return mv88e6xxx_read(chip, ADDR_GLOBAL2, reg, val); +} + +static int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val) +{ + return mv88e6xxx_write(chip, ADDR_GLOBAL2, reg, val); +} + +static int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update) +{ + return mv88e6xxx_update(chip, ADDR_GLOBAL2, reg, update); +} + +static int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask) +{ + return mv88e6xxx_wait(chip, ADDR_GLOBAL2, reg, mask); +} + /* Offset 0x06: Device Mapping Table register */ static int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip, @@ -21,7 +43,7 @@ static int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip, { u16 val = (target << 8) | (port & 0xf); - return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_DEVICE_MAPPING, val); + return mv88e6xxx_g2_update(chip, GLOBAL2_DEVICE_MAPPING, val); } static int mv88e6xxx_g2_set_device_mapping(struct mv88e6xxx_chip *chip) @@ -58,7 +80,7 @@ static int mv88e6xxx_g2_trunk_mask_write(struct mv88e6xxx_chip *chip, int num, if (hask) val |= GLOBAL2_TRUNK_MASK_HASK; - return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_TRUNK_MASK, val); + return mv88e6xxx_g2_update(chip, GLOBAL2_TRUNK_MASK, val); } /* Offset 0x08: Trunk Mapping Table register */ @@ -69,7 +91,7 @@ static int mv88e6xxx_g2_trunk_mapping_write(struct mv88e6xxx_chip *chip, int id, const u16 port_mask = BIT(chip->info->num_ports) - 1; u16 val = (id << 11) | (map & port_mask); - return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_TRUNK_MAPPING, val); + return mv88e6xxx_g2_update(chip, GLOBAL2_TRUNK_MAPPING, val); } static int mv88e6xxx_g2_clear_trunk(struct mv88e6xxx_chip *chip) @@ -105,15 +127,15 @@ static int mv88e6xxx_g2_clear_irl(struct mv88e6xxx_chip *chip) /* Init all Ingress Rate Limit resources of all ports */ for (port = 0; port < chip->info->num_ports; ++port) { /* XXX newer chips (like 88E6390) have different 2-bit ops */ - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_IRL_CMD, - GLOBAL2_IRL_CMD_OP_INIT_ALL | - (port << 8)); + err = mv88e6xxx_g2_write(chip, GLOBAL2_IRL_CMD, + GLOBAL2_IRL_CMD_OP_INIT_ALL | + (port << 8)); if (err) break; /* Wait for the operation to complete */ - err = mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_IRL_CMD, - GLOBAL2_IRL_CMD_BUSY); + err = mv88e6xxx_g2_wait(chip, GLOBAL2_IRL_CMD, + GLOBAL2_IRL_CMD_BUSY); if (err) break; } @@ -128,7 +150,7 @@ static int mv88e6xxx_g2_switch_mac_write(struct mv88e6xxx_chip *chip, { u16 val = (pointer << 8) | data; - return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_SWITCH_MAC, val); + return mv88e6xxx_g2_update(chip, GLOBAL2_SWITCH_MAC, val); } int mv88e6xxx_g2_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr) @@ -151,7 +173,7 @@ static int mv88e6xxx_g2_pot_write(struct mv88e6xxx_chip *chip, int pointer, { u16 val = (pointer << 8) | (data & 0x7); - return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_PRIO_OVERRIDE, val); + return mv88e6xxx_g2_update(chip, GLOBAL2_PRIO_OVERRIDE, val); } static int mv88e6xxx_g2_clear_pot(struct mv88e6xxx_chip *chip) @@ -174,16 +196,16 @@ static int mv88e6xxx_g2_clear_pot(struct mv88e6xxx_chip *chip) static int mv88e6xxx_g2_eeprom_wait(struct mv88e6xxx_chip *chip) { - return mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD, - GLOBAL2_EEPROM_CMD_BUSY | - GLOBAL2_EEPROM_CMD_RUNNING); + return mv88e6xxx_g2_wait(chip, GLOBAL2_EEPROM_CMD, + GLOBAL2_EEPROM_CMD_BUSY | + GLOBAL2_EEPROM_CMD_RUNNING); } static int mv88e6xxx_g2_eeprom_cmd(struct mv88e6xxx_chip *chip, u16 cmd) { int err; - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD, cmd); + err = mv88e6xxx_g2_write(chip, GLOBAL2_EEPROM_CMD, cmd); if (err) return err; @@ -204,7 +226,7 @@ static int mv88e6xxx_g2_eeprom_read16(struct mv88e6xxx_chip *chip, if (err) return err; - return mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_EEPROM_DATA, data); + return mv88e6xxx_g2_read(chip, GLOBAL2_EEPROM_DATA, data); } static int mv88e6xxx_g2_eeprom_write16(struct mv88e6xxx_chip *chip, @@ -217,7 +239,7 @@ static int mv88e6xxx_g2_eeprom_write16(struct mv88e6xxx_chip *chip, if (err) return err; - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_EEPROM_DATA, data); + err = mv88e6xxx_g2_write(chip, GLOBAL2_EEPROM_DATA, data); if (err) return err; @@ -283,7 +305,7 @@ int mv88e6xxx_g2_set_eeprom16(struct mv88e6xxx_chip *chip, int err; /* Ensure the RO WriteEn bit is set */ - err = mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD, &val); + err = mv88e6xxx_g2_read(chip, GLOBAL2_EEPROM_CMD, &val); if (err) return err; @@ -346,15 +368,15 @@ int mv88e6xxx_g2_set_eeprom16(struct mv88e6xxx_chip *chip, static int mv88e6xxx_g2_smi_phy_wait(struct mv88e6xxx_chip *chip) { - return mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_CMD, - GLOBAL2_SMI_PHY_CMD_BUSY); + return mv88e6xxx_g2_wait(chip, GLOBAL2_SMI_PHY_CMD, + GLOBAL2_SMI_PHY_CMD_BUSY); } static int mv88e6xxx_g2_smi_phy_cmd(struct mv88e6xxx_chip *chip, u16 cmd) { int err; - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_CMD, cmd); + err = mv88e6xxx_g2_write(chip, GLOBAL2_SMI_PHY_CMD, cmd); if (err) return err; @@ -375,7 +397,7 @@ int mv88e6xxx_g2_smi_phy_read(struct mv88e6xxx_chip *chip, int addr, int reg, if (err) return err; - return mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_DATA, val); + return mv88e6xxx_g2_read(chip, GLOBAL2_SMI_PHY_DATA, val); } int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, int addr, int reg, @@ -388,7 +410,7 @@ int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, int addr, int reg, if (err) return err; - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_DATA, val); + err = mv88e6xxx_g2_write(chip, GLOBAL2_SMI_PHY_DATA, val); if (err) return err; @@ -404,8 +426,7 @@ int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip) /* Consider the frames with reserved multicast destination * addresses matching 01:80:c2:00:00:2x as MGMT. */ - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_MGMT_EN_2X, - 0xffff); + err = mv88e6xxx_g2_write(chip, GLOBAL2_MGMT_EN_2X, 0xffff); if (err) return err; } @@ -414,8 +435,7 @@ int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip) /* Consider the frames with reserved multicast destination * addresses matching 01:80:c2:00:00:0x as MGMT. */ - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_MGMT_EN_0X, - 0xffff); + err = mv88e6xxx_g2_write(chip, GLOBAL2_MGMT_EN_0X, 0xffff); if (err) return err; } @@ -429,7 +449,7 @@ int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip) if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X) || mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X)) reg |= GLOBAL2_SWITCH_MGMT_RSVD2CPU | 0x7; - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SWITCH_MGMT, reg); + err = mv88e6xxx_g2_write(chip, GLOBAL2_SWITCH_MGMT, reg); if (err) return err; @@ -454,8 +474,8 @@ int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip) if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_PVT)) { /* Initialize Cross-chip Port VLAN Table to reset defaults */ - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_PVT_ADDR, - GLOBAL2_PVT_ADDR_OP_INIT_ONES); + err = mv88e6xxx_g2_write(chip, GLOBAL2_PVT_ADDR, + GLOBAL2_PVT_ADDR_OP_INIT_ONES); if (err) return err; } diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index bf78f6dc18cf..2f1010818a92 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -276,7 +276,6 @@ #define GLOBAL_STATS_COUNTER_32 0x1e #define GLOBAL_STATS_COUNTER_01 0x1f -#define REG_GLOBAL2 0x1c #define GLOBAL2_INT_SOURCE 0x00 #define GLOBAL2_INT_MASK 0x01 #define GLOBAL2_MGMT_EN_2X 0x02 From 6dc10bbc467d6f76e2665b865d0d8f9e0049b3e6 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 29 Sep 2016 12:21:55 -0400 Subject: [PATCH 0966/1050] net: dsa: mv88e6xxx: add flags for FID registers Add flags to describe the presence of Global 1 ATU FID register (0x01) and VTU FID register (0x02), instead of checking families. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 16 +++------------- drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 24 ++++++++++++++++++++---- 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 98dee2c63163..b7eecc957bcc 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -565,16 +565,6 @@ static unsigned int mv88e6xxx_num_databases(struct mv88e6xxx_chip *chip) return chip->info->num_databases; } -static bool mv88e6xxx_has_fid_reg(struct mv88e6xxx_chip *chip) -{ - /* Does the device have dedicated FID registers for ATU and VTU ops? */ - if (mv88e6xxx_6097_family(chip) || mv88e6xxx_6165_family(chip) || - mv88e6xxx_6351_family(chip) || mv88e6xxx_6352_family(chip)) - return true; - - return false; -} - /* We expect the switch to perform auto negotiation if there is a real * phy. However, in the case of a fixed link phy, we force the port * settings from the fixed link settings. @@ -978,7 +968,7 @@ static int _mv88e6xxx_atu_cmd(struct mv88e6xxx_chip *chip, u16 fid, u16 cmd) u16 val; int err; - if (mv88e6xxx_has_fid_reg(chip)) { + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G1_ATU_FID)) { err = mv88e6xxx_g1_write(chip, GLOBAL_ATU_FID, fid); if (err) return err; @@ -1386,7 +1376,7 @@ static int _mv88e6xxx_vtu_getnext(struct mv88e6xxx_chip *chip, if (err) return err; - if (mv88e6xxx_has_fid_reg(chip)) { + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G1_VTU_FID)) { err = mv88e6xxx_g1_read(chip, GLOBAL_VTU_FID, &val); if (err) return err; @@ -1498,7 +1488,7 @@ static int _mv88e6xxx_vtu_loadpurge(struct mv88e6xxx_chip *chip, return err; } - if (mv88e6xxx_has_fid_reg(chip)) { + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G1_VTU_FID)) { reg = entry->fid & GLOBAL_VTU_FID_MASK; err = mv88e6xxx_g1_write(chip, GLOBAL_VTU_FID, reg); if (err) diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index 2f1010818a92..6c8584f14388 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -170,8 +170,8 @@ #define GLOBAL_MAC_01 0x01 #define GLOBAL_MAC_23 0x02 #define GLOBAL_MAC_45 0x03 -#define GLOBAL_ATU_FID 0x01 /* 6097 6165 6351 6352 */ -#define GLOBAL_VTU_FID 0x02 /* 6097 6165 6351 6352 */ +#define GLOBAL_ATU_FID 0x01 +#define GLOBAL_VTU_FID 0x02 #define GLOBAL_VTU_FID_MASK 0xfff #define GLOBAL_VTU_SID 0x03 /* 6097 6165 6351 6352 */ #define GLOBAL_VTU_SID_MASK 0x3f @@ -408,6 +408,11 @@ enum mv88e6xxx_cap { */ MV88E6XXX_CAP_SERDES, + /* Switch Global (1) Registers. + */ + MV88E6XXX_CAP_G1_ATU_FID, /* (0x01) ATU FID Register */ + MV88E6XXX_CAP_G1_VTU_FID, /* (0x02) VTU FID Register */ + /* Switch Global 2 Registers. * The device contains a second set of global 16-bit registers. */ @@ -460,6 +465,9 @@ enum mv88e6xxx_cap { #define MV88E6XXX_FLAG_SERDES BIT_ULL(MV88E6XXX_CAP_SERDES) +#define MV88E6XXX_FLAG_G1_ATU_FID BIT_ULL(MV88E6XXX_CAP_G1_ATU_FID) +#define MV88E6XXX_FLAG_G1_VTU_FID BIT_ULL(MV88E6XXX_CAP_G1_VTU_FID) + #define MV88E6XXX_FLAG_GLOBAL2 BIT_ULL(MV88E6XXX_CAP_GLOBAL2) #define MV88E6XXX_FLAG_G2_MGMT_EN_2X BIT_ULL(MV88E6XXX_CAP_G2_MGMT_EN_2X) #define MV88E6XXX_FLAG_G2_MGMT_EN_0X BIT_ULL(MV88E6XXX_CAP_G2_MGMT_EN_0X) @@ -519,7 +527,9 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAGS_MULTI_CHIP) #define MV88E6XXX_FLAGS_FAMILY_6097 \ - (MV88E6XXX_FLAG_GLOBAL2 | \ + (MV88E6XXX_FLAG_G1_ATU_FID | \ + MV88E6XXX_FLAG_G1_VTU_FID | \ + MV88E6XXX_FLAG_GLOBAL2 | \ MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ MV88E6XXX_FLAG_G2_POT | \ @@ -531,7 +541,9 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAGS_PVT) #define MV88E6XXX_FLAGS_FAMILY_6165 \ - (MV88E6XXX_FLAG_GLOBAL2 | \ + (MV88E6XXX_FLAG_G1_ATU_FID | \ + MV88E6XXX_FLAG_G1_VTU_FID | \ + MV88E6XXX_FLAG_GLOBAL2 | \ MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ MV88E6XXX_FLAG_G2_SWITCH_MAC | \ @@ -570,6 +582,8 @@ enum mv88e6xxx_cap { #define MV88E6XXX_FLAGS_FAMILY_6351 \ (MV88E6XXX_FLAG_EDSA | \ + MV88E6XXX_FLAG_G1_ATU_FID | \ + MV88E6XXX_FLAG_G1_VTU_FID | \ MV88E6XXX_FLAG_GLOBAL2 | \ MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ @@ -587,6 +601,8 @@ enum mv88e6xxx_cap { #define MV88E6XXX_FLAGS_FAMILY_6352 \ (MV88E6XXX_FLAG_EDSA | \ MV88E6XXX_FLAG_EEE | \ + MV88E6XXX_FLAG_G1_ATU_FID | \ + MV88E6XXX_FLAG_G1_VTU_FID | \ MV88E6XXX_FLAG_GLOBAL2 | \ MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ From de33376b39b6ea939d53ea44c8c6595b80826501 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 29 Sep 2016 12:21:56 -0400 Subject: [PATCH 0967/1050] net: dsa: mv88e6xxx: expose mv88e6xxx_num_databases The mv88e6xxx_num_databases will be used by shared code, so move it inline to the header file. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 5 ----- drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 5 +++++ 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index b7eecc957bcc..6a55bba943c3 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -560,11 +560,6 @@ static bool mv88e6xxx_6352_family(struct mv88e6xxx_chip *chip) return chip->info->family == MV88E6XXX_FAMILY_6352; } -static unsigned int mv88e6xxx_num_databases(struct mv88e6xxx_chip *chip) -{ - return chip->info->num_databases; -} - /* We expect the switch to perform auto negotiation if there is a real * phy. However, in the case of a fixed link phy, we force the port * settings from the fixed link settings. diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index 6c8584f14388..20fe6c61d5a7 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -732,6 +732,11 @@ static inline bool mv88e6xxx_has(struct mv88e6xxx_chip *chip, return (chip->info->flags & flags) == flags; } +static inline unsigned int mv88e6xxx_num_databases(struct mv88e6xxx_chip *chip) +{ + return chip->info->num_databases; +} + int mv88e6xxx_read(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val); int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val); int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg, From 370b4ffbd8dda2b1a61dc63a5ac3088d8e715d53 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 29 Sep 2016 12:21:57 -0400 Subject: [PATCH 0968/1050] net: dsa: mv88e6xxx: add mv88e6xxx_num_ports helper Add an mv88e6xxx_num_ports helper instead of digging in the chip info structure. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 30 +++++++++++++-------------- drivers/net/dsa/mv88e6xxx/global2.c | 8 +++---- drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 5 +++++ 3 files changed, 24 insertions(+), 19 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 6a55bba943c3..9056d9eb3c13 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -613,7 +613,7 @@ static void mv88e6xxx_adjust_link(struct dsa_switch *ds, int port, reg |= PORT_PCS_CTRL_DUPLEX_FULL; if ((mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip)) && - (port >= chip->info->num_ports - 2)) { + (port >= mv88e6xxx_num_ports(chip) - 2)) { if (phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) reg |= PORT_PCS_CTRL_RGMII_DELAY_RXCLK; if (phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) @@ -1112,7 +1112,7 @@ static int _mv88e6xxx_port_state(struct mv88e6xxx_chip *chip, int port, static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port) { struct net_device *bridge = chip->ports[port].bridge_dev; - const u16 mask = (1 << chip->info->num_ports) - 1; + const u16 mask = (1 << mv88e6xxx_num_ports(chip)) - 1; struct dsa_switch *ds = chip->ds; u16 output_ports = 0; u16 reg; @@ -1123,7 +1123,7 @@ static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port) if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) { output_ports = mask; } else { - for (i = 0; i < chip->info->num_ports; ++i) { + for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { /* allow sending frames to every group member */ if (bridge && chip->ports[i].bridge_dev == bridge) output_ports |= BIT(i); @@ -1279,7 +1279,7 @@ static int _mv88e6xxx_vtu_stu_data_read(struct mv88e6xxx_chip *chip, return err; } - for (i = 0; i < chip->info->num_ports; ++i) { + for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { unsigned int shift = (i % 4) * 4 + nibble_offset; u16 reg = regs[i / 4]; @@ -1308,7 +1308,7 @@ static int _mv88e6xxx_vtu_stu_data_write(struct mv88e6xxx_chip *chip, u16 regs[3] = { 0 }; int i, err; - for (i = 0; i < chip->info->num_ports; ++i) { + for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { unsigned int shift = (i % 4) * 4 + nibble_offset; u8 data = entry->data[i]; @@ -1658,7 +1658,7 @@ static int _mv88e6xxx_fid_new(struct mv88e6xxx_chip *chip, u16 *fid) bitmap_zero(fid_bitmap, MV88E6XXX_N_FID); /* Set every FID bit used by the (un)bridged ports */ - for (i = 0; i < chip->info->num_ports; ++i) { + for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { err = _mv88e6xxx_port_fid_get(chip, i, fid); if (err) return err; @@ -1708,7 +1708,7 @@ static int _mv88e6xxx_vtu_new(struct mv88e6xxx_chip *chip, u16 vid, return err; /* exclude all ports except the CPU and DSA ports */ - for (i = 0; i < chip->info->num_ports; ++i) + for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) vlan.data[i] = dsa_is_cpu_port(ds, i) || dsa_is_dsa_port(ds, i) ? GLOBAL_VTU_DATA_MEMBER_TAG_UNMODIFIED : GLOBAL_VTU_DATA_MEMBER_TAG_NON_MEMBER; @@ -1797,7 +1797,7 @@ static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port, if (vlan.vid > vid_end) break; - for (i = 0; i < chip->info->num_ports; ++i) { + for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { if (dsa_is_dsa_port(ds, i) || dsa_is_cpu_port(ds, i)) continue; @@ -1959,7 +1959,7 @@ static int _mv88e6xxx_port_vlan_del(struct mv88e6xxx_chip *chip, /* keep the VLAN unless all ports are excluded */ vlan.valid = false; - for (i = 0; i < chip->info->num_ports; ++i) { + for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { if (dsa_is_cpu_port(ds, i) || dsa_is_dsa_port(ds, i)) continue; @@ -2340,7 +2340,7 @@ static int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port, /* Assign the bridge and remap each port's VLANTable */ chip->ports[port].bridge_dev = bridge; - for (i = 0; i < chip->info->num_ports; ++i) { + for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { if (chip->ports[i].bridge_dev == bridge) { err = _mv88e6xxx_port_based_vlan_map(chip, i); if (err) @@ -2364,7 +2364,7 @@ static void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port) /* Unassign the bridge and remap each port's VLANTable */ chip->ports[port].bridge_dev = NULL; - for (i = 0; i < chip->info->num_ports; ++i) + for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) if (i == port || chip->ports[i].bridge_dev == bridge) if (_mv88e6xxx_port_based_vlan_map(chip, i)) netdev_warn(ds->ports[i].netdev, @@ -2384,7 +2384,7 @@ static int mv88e6xxx_switch_reset(struct mv88e6xxx_chip *chip) int i; /* Set all ports to the disabled state. */ - for (i = 0; i < chip->info->num_ports; i++) { + for (i = 0; i < mv88e6xxx_num_ports(chip); i++) { err = mv88e6xxx_port_read(chip, i, PORT_CONTROL, ®); if (err) return err; @@ -2885,7 +2885,7 @@ static int mv88e6xxx_setup(struct dsa_switch *ds) goto unlock; /* Setup Switch Port Registers */ - for (i = 0; i < chip->info->num_ports; i++) { + for (i = 0; i < mv88e6xxx_num_ports(chip); i++) { err = mv88e6xxx_setup_port(chip, i); if (err) goto unlock; @@ -2933,7 +2933,7 @@ static int mv88e6xxx_mdio_read(struct mii_bus *bus, int phy, int reg) u16 val; int err; - if (phy >= chip->info->num_ports) + if (phy >= mv88e6xxx_num_ports(chip)) return 0xffff; mutex_lock(&chip->reg_lock); @@ -2948,7 +2948,7 @@ static int mv88e6xxx_mdio_write(struct mii_bus *bus, int phy, int reg, u16 val) struct mv88e6xxx_chip *chip = bus->priv; int err; - if (phy >= chip->info->num_ports) + if (phy >= mv88e6xxx_num_ports(chip)) return 0xffff; mutex_lock(&chip->reg_lock); diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c index f31d553c7448..cf686e7506a9 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.c +++ b/drivers/net/dsa/mv88e6xxx/global2.c @@ -74,7 +74,7 @@ static int mv88e6xxx_g2_set_device_mapping(struct mv88e6xxx_chip *chip) static int mv88e6xxx_g2_trunk_mask_write(struct mv88e6xxx_chip *chip, int num, bool hask, u16 mask) { - const u16 port_mask = BIT(chip->info->num_ports) - 1; + const u16 port_mask = BIT(mv88e6xxx_num_ports(chip)) - 1; u16 val = (num << 12) | (mask & port_mask); if (hask) @@ -88,7 +88,7 @@ static int mv88e6xxx_g2_trunk_mask_write(struct mv88e6xxx_chip *chip, int num, static int mv88e6xxx_g2_trunk_mapping_write(struct mv88e6xxx_chip *chip, int id, u16 map) { - const u16 port_mask = BIT(chip->info->num_ports) - 1; + const u16 port_mask = BIT(mv88e6xxx_num_ports(chip)) - 1; u16 val = (id << 11) | (map & port_mask); return mv88e6xxx_g2_update(chip, GLOBAL2_TRUNK_MAPPING, val); @@ -96,7 +96,7 @@ static int mv88e6xxx_g2_trunk_mapping_write(struct mv88e6xxx_chip *chip, int id, static int mv88e6xxx_g2_clear_trunk(struct mv88e6xxx_chip *chip) { - const u16 port_mask = BIT(chip->info->num_ports) - 1; + const u16 port_mask = BIT(mv88e6xxx_num_ports(chip)) - 1; int i, err; /* Clear all eight possible Trunk Mask vectors */ @@ -125,7 +125,7 @@ static int mv88e6xxx_g2_clear_irl(struct mv88e6xxx_chip *chip) int port, err; /* Init all Ingress Rate Limit resources of all ports */ - for (port = 0; port < chip->info->num_ports; ++port) { + for (port = 0; port < mv88e6xxx_num_ports(chip); ++port) { /* XXX newer chips (like 88E6390) have different 2-bit ops */ err = mv88e6xxx_g2_write(chip, GLOBAL2_IRL_CMD, GLOBAL2_IRL_CMD_OP_INIT_ALL | diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index 20fe6c61d5a7..cf639ed91d03 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -737,6 +737,11 @@ static inline unsigned int mv88e6xxx_num_databases(struct mv88e6xxx_chip *chip) return chip->info->num_databases; } +static inline unsigned int mv88e6xxx_num_ports(struct mv88e6xxx_chip *chip) +{ + return chip->info->num_ports; +} + int mv88e6xxx_read(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val); int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val); int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg, From b4e47c0fb94923781addbb4616fa82fd825ac7ec Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 29 Sep 2016 12:21:58 -0400 Subject: [PATCH 0969/1050] net: dsa: mv88e6xxx: rename mv88e6xxx_vtu_stu_entry The STU (if the switch has one) is abstracted and accessed through the VTU operations and data registers. Thus rename the mv88e6xxx_vtu_stu_entry struct to mv88e6xxx_vtu_entry. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 46 +++++++++++++-------------- drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 5 +-- 2 files changed, 24 insertions(+), 27 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 9056d9eb3c13..d805661af3cc 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1265,7 +1265,7 @@ static int _mv88e6xxx_vtu_stu_flush(struct mv88e6xxx_chip *chip) } static int _mv88e6xxx_vtu_stu_data_read(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_stu_entry *entry, + struct mv88e6xxx_vtu_entry *entry, unsigned int nibble_offset) { u16 regs[3]; @@ -1290,19 +1290,19 @@ static int _mv88e6xxx_vtu_stu_data_read(struct mv88e6xxx_chip *chip, } static int mv88e6xxx_vtu_data_read(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_stu_entry *entry) + struct mv88e6xxx_vtu_entry *entry) { return _mv88e6xxx_vtu_stu_data_read(chip, entry, 0); } static int mv88e6xxx_stu_data_read(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_stu_entry *entry) + struct mv88e6xxx_vtu_entry *entry) { return _mv88e6xxx_vtu_stu_data_read(chip, entry, 2); } static int _mv88e6xxx_vtu_stu_data_write(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_stu_entry *entry, + struct mv88e6xxx_vtu_entry *entry, unsigned int nibble_offset) { u16 regs[3] = { 0 }; @@ -1327,13 +1327,13 @@ static int _mv88e6xxx_vtu_stu_data_write(struct mv88e6xxx_chip *chip, } static int mv88e6xxx_vtu_data_write(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_stu_entry *entry) + struct mv88e6xxx_vtu_entry *entry) { return _mv88e6xxx_vtu_stu_data_write(chip, entry, 0); } static int mv88e6xxx_stu_data_write(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_stu_entry *entry) + struct mv88e6xxx_vtu_entry *entry) { return _mv88e6xxx_vtu_stu_data_write(chip, entry, 2); } @@ -1345,9 +1345,9 @@ static int _mv88e6xxx_vtu_vid_write(struct mv88e6xxx_chip *chip, u16 vid) } static int _mv88e6xxx_vtu_getnext(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_stu_entry *entry) + struct mv88e6xxx_vtu_entry *entry) { - struct mv88e6xxx_vtu_stu_entry next = { 0 }; + struct mv88e6xxx_vtu_entry next = { 0 }; u16 val; int err; @@ -1407,7 +1407,7 @@ static int mv88e6xxx_port_vlan_dump(struct dsa_switch *ds, int port, int (*cb)(struct switchdev_obj *obj)) { struct mv88e6xxx_chip *chip = ds->priv; - struct mv88e6xxx_vtu_stu_entry next; + struct mv88e6xxx_vtu_entry next; u16 pvid; int err; @@ -1458,7 +1458,7 @@ unlock: } static int _mv88e6xxx_vtu_loadpurge(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_stu_entry *entry) + struct mv88e6xxx_vtu_entry *entry) { u16 op = GLOBAL_VTU_OP_VTU_LOAD_PURGE; u16 reg = 0; @@ -1507,9 +1507,9 @@ loadpurge: } static int _mv88e6xxx_stu_getnext(struct mv88e6xxx_chip *chip, u8 sid, - struct mv88e6xxx_vtu_stu_entry *entry) + struct mv88e6xxx_vtu_entry *entry) { - struct mv88e6xxx_vtu_stu_entry next = { 0 }; + struct mv88e6xxx_vtu_entry next = { 0 }; u16 val; int err; @@ -1549,7 +1549,7 @@ static int _mv88e6xxx_stu_getnext(struct mv88e6xxx_chip *chip, u8 sid, } static int _mv88e6xxx_stu_loadpurge(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_stu_entry *entry) + struct mv88e6xxx_vtu_entry *entry) { u16 reg = 0; int err; @@ -1652,7 +1652,7 @@ static int _mv88e6xxx_port_fid_set(struct mv88e6xxx_chip *chip, static int _mv88e6xxx_fid_new(struct mv88e6xxx_chip *chip, u16 *fid) { DECLARE_BITMAP(fid_bitmap, MV88E6XXX_N_FID); - struct mv88e6xxx_vtu_stu_entry vlan; + struct mv88e6xxx_vtu_entry vlan; int i, err; bitmap_zero(fid_bitmap, MV88E6XXX_N_FID); @@ -1694,10 +1694,10 @@ static int _mv88e6xxx_fid_new(struct mv88e6xxx_chip *chip, u16 *fid) } static int _mv88e6xxx_vtu_new(struct mv88e6xxx_chip *chip, u16 vid, - struct mv88e6xxx_vtu_stu_entry *entry) + struct mv88e6xxx_vtu_entry *entry) { struct dsa_switch *ds = chip->ds; - struct mv88e6xxx_vtu_stu_entry vlan = { + struct mv88e6xxx_vtu_entry vlan = { .valid = true, .vid = vid, }; @@ -1715,7 +1715,7 @@ static int _mv88e6xxx_vtu_new(struct mv88e6xxx_chip *chip, u16 vid, if (mv88e6xxx_6097_family(chip) || mv88e6xxx_6165_family(chip) || mv88e6xxx_6351_family(chip) || mv88e6xxx_6352_family(chip)) { - struct mv88e6xxx_vtu_stu_entry vstp; + struct mv88e6xxx_vtu_entry vstp; /* Adding a VTU entry requires a valid STU entry. As VSTP is not * implemented, only one STU entry is needed to cover all VTU @@ -1742,7 +1742,7 @@ static int _mv88e6xxx_vtu_new(struct mv88e6xxx_chip *chip, u16 vid, } static int _mv88e6xxx_vtu_get(struct mv88e6xxx_chip *chip, u16 vid, - struct mv88e6xxx_vtu_stu_entry *entry, bool creat) + struct mv88e6xxx_vtu_entry *entry, bool creat) { int err; @@ -1774,7 +1774,7 @@ static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port, u16 vid_begin, u16 vid_end) { struct mv88e6xxx_chip *chip = ds->priv; - struct mv88e6xxx_vtu_stu_entry vlan; + struct mv88e6xxx_vtu_entry vlan; int i, err; if (!vid_begin) @@ -1899,7 +1899,7 @@ mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, int port, static int _mv88e6xxx_port_vlan_add(struct mv88e6xxx_chip *chip, int port, u16 vid, bool untagged) { - struct mv88e6xxx_vtu_stu_entry vlan; + struct mv88e6xxx_vtu_entry vlan; int err; err = _mv88e6xxx_vtu_get(chip, vid, &vlan, true); @@ -1944,7 +1944,7 @@ static int _mv88e6xxx_port_vlan_del(struct mv88e6xxx_chip *chip, int port, u16 vid) { struct dsa_switch *ds = chip->ds; - struct mv88e6xxx_vtu_stu_entry vlan; + struct mv88e6xxx_vtu_entry vlan; int i, err; err = _mv88e6xxx_vtu_get(chip, vid, &vlan, false); @@ -2103,7 +2103,7 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, const unsigned char *addr, u16 vid, u8 state) { - struct mv88e6xxx_vtu_stu_entry vlan; + struct mv88e6xxx_vtu_entry vlan; struct mv88e6xxx_atu_entry entry; int err; @@ -2278,7 +2278,7 @@ static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port, struct switchdev_obj *obj, int (*cb)(struct switchdev_obj *obj)) { - struct mv88e6xxx_vtu_stu_entry vlan = { + struct mv88e6xxx_vtu_entry vlan = { .vid = GLOBAL_VTU_VID_MASK, /* all ones */ }; u16 fid; diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index cf639ed91d03..ee7873aaf127 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -640,12 +640,9 @@ struct mv88e6xxx_atu_entry { u8 mac[ETH_ALEN]; }; -struct mv88e6xxx_vtu_stu_entry { - /* VTU only */ +struct mv88e6xxx_vtu_entry { u16 vid; u16 fid; - - /* VTU and STU */ u8 sid; bool valid; u8 data[DSA_MAX_PORTS]; From c08026aba70a97925512266d29429dbd62df497d Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 29 Sep 2016 12:21:59 -0400 Subject: [PATCH 0970/1050] net: dsa: mv88e6xxx: rename mv88e6xxx_ops The mv88e6xxx_ops is used to describe how to access the chip registers. It can be through SMI (via an MDIO bus), or via another interface such as crafted remote management frames. The correct BUS operations structure is chosen at runtime, depending on the chip address and connectivity. We will need the mv88e6xxx_ops name for future chip-wide operation structure, thus rename mv88e6xxx_ops to more explicit mv88e6xxx_bus_ops. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 10 +++++----- drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index d805661af3cc..ad31d3ed3aca 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -98,7 +98,7 @@ static int mv88e6xxx_smi_single_chip_write(struct mv88e6xxx_chip *chip, return 0; } -static const struct mv88e6xxx_ops mv88e6xxx_smi_single_chip_ops = { +static const struct mv88e6xxx_bus_ops mv88e6xxx_smi_single_chip_ops = { .read = mv88e6xxx_smi_single_chip_read, .write = mv88e6xxx_smi_single_chip_write, }; @@ -180,7 +180,7 @@ static int mv88e6xxx_smi_multi_chip_write(struct mv88e6xxx_chip *chip, return 0; } -static const struct mv88e6xxx_ops mv88e6xxx_smi_multi_chip_ops = { +static const struct mv88e6xxx_bus_ops mv88e6xxx_smi_multi_chip_ops = { .read = mv88e6xxx_smi_multi_chip_read, .write = mv88e6xxx_smi_multi_chip_write, }; @@ -515,7 +515,7 @@ static int mv88e6xxx_phy_ppu_write(struct mv88e6xxx_chip *chip, int addr, return err; } -static const struct mv88e6xxx_ops mv88e6xxx_phy_ppu_ops = { +static const struct mv88e6xxx_bus_ops mv88e6xxx_phy_ppu_ops = { .read = mv88e6xxx_phy_ppu_read, .write = mv88e6xxx_phy_ppu_write, }; @@ -3479,12 +3479,12 @@ static struct mv88e6xxx_chip *mv88e6xxx_alloc_chip(struct device *dev) return chip; } -static const struct mv88e6xxx_ops mv88e6xxx_g2_smi_phy_ops = { +static const struct mv88e6xxx_bus_ops mv88e6xxx_g2_smi_phy_ops = { .read = mv88e6xxx_g2_smi_phy_read, .write = mv88e6xxx_g2_smi_phy_write, }; -static const struct mv88e6xxx_ops mv88e6xxx_phy_ops = { +static const struct mv88e6xxx_bus_ops mv88e6xxx_phy_ops = { .read = mv88e6xxx_read, .write = mv88e6xxx_write, }; diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index ee7873aaf127..b9ef769311d9 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -648,7 +648,7 @@ struct mv88e6xxx_vtu_entry { u8 data[DSA_MAX_PORTS]; }; -struct mv88e6xxx_ops; +struct mv88e6xxx_bus_ops; struct mv88e6xxx_priv_port { struct net_device *bridge_dev; @@ -669,14 +669,14 @@ struct mv88e6xxx_chip { /* The MII bus and the address on the bus that is used to * communication with the switch */ - const struct mv88e6xxx_ops *smi_ops; + const struct mv88e6xxx_bus_ops *smi_ops; struct mii_bus *bus; int sw_addr; /* Handles automatic disabling and re-enabling of the PHY * polling unit. */ - const struct mv88e6xxx_ops *phy_ops; + const struct mv88e6xxx_bus_ops *phy_ops; struct mutex ppu_mutex; int ppu_disabled; struct work_struct ppu_work; @@ -705,7 +705,7 @@ struct mv88e6xxx_chip { struct mii_bus *mdio_bus; }; -struct mv88e6xxx_ops { +struct mv88e6xxx_bus_ops { int (*read)(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val); int (*write)(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val); }; From b3469dd8adade11e8234854d79b43daf8ce478c9 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 29 Sep 2016 12:22:00 -0400 Subject: [PATCH 0971/1050] net: dsa: mv88e6xxx: add chip-wide ops Introduce a mv88e6xxx_ops structure to describe supported chip-wide functions and assign the correct variant to the chip models. For the moment, add only PHY access routines. This allows to get rid of the PHY ops structures and the usage of PHY flags. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 136 ++++++++++++++++++++------ drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 28 +++--- 2 files changed, 121 insertions(+), 43 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index ad31d3ed3aca..83a37693a8db 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -238,10 +238,10 @@ static int mv88e6xxx_phy_read(struct mv88e6xxx_chip *chip, int phy, { int addr = phy; /* PHY devices addresses start at 0x0 */ - if (!chip->phy_ops) + if (!chip->info->ops->phy_read) return -EOPNOTSUPP; - return chip->phy_ops->read(chip, addr, reg, val); + return chip->info->ops->phy_read(chip, addr, reg, val); } static int mv88e6xxx_phy_write(struct mv88e6xxx_chip *chip, int phy, @@ -249,10 +249,10 @@ static int mv88e6xxx_phy_write(struct mv88e6xxx_chip *chip, int phy, { int addr = phy; /* PHY devices addresses start at 0x0 */ - if (!chip->phy_ops) + if (!chip->info->ops->phy_write) return -EOPNOTSUPP; - return chip->phy_ops->write(chip, addr, reg, val); + return chip->info->ops->phy_write(chip, addr, reg, val); } static int mv88e6xxx_phy_page_get(struct mv88e6xxx_chip *chip, int phy, u8 page) @@ -515,11 +515,6 @@ static int mv88e6xxx_phy_ppu_write(struct mv88e6xxx_chip *chip, int addr, return err; } -static const struct mv88e6xxx_bus_ops mv88e6xxx_phy_ppu_ops = { - .read = mv88e6xxx_phy_ppu_read, - .write = mv88e6xxx_phy_ppu_write, -}; - static bool mv88e6xxx_6065_family(struct mv88e6xxx_chip *chip) { return chip->info->family == MV88E6XXX_FAMILY_6065; @@ -3214,6 +3209,91 @@ static int mv88e6xxx_set_eeprom(struct dsa_switch *ds, return err; } +static const struct mv88e6xxx_ops mv88e6085_ops = { + .phy_read = mv88e6xxx_phy_ppu_read, + .phy_write = mv88e6xxx_phy_ppu_write, +}; + +static const struct mv88e6xxx_ops mv88e6095_ops = { + .phy_read = mv88e6xxx_phy_ppu_read, + .phy_write = mv88e6xxx_phy_ppu_write, +}; + +static const struct mv88e6xxx_ops mv88e6123_ops = { + .phy_read = mv88e6xxx_read, + .phy_write = mv88e6xxx_write, +}; + +static const struct mv88e6xxx_ops mv88e6131_ops = { + .phy_read = mv88e6xxx_phy_ppu_read, + .phy_write = mv88e6xxx_phy_ppu_write, +}; + +static const struct mv88e6xxx_ops mv88e6161_ops = { + .phy_read = mv88e6xxx_read, + .phy_write = mv88e6xxx_write, +}; + +static const struct mv88e6xxx_ops mv88e6165_ops = { + .phy_read = mv88e6xxx_read, + .phy_write = mv88e6xxx_write, +}; + +static const struct mv88e6xxx_ops mv88e6171_ops = { + .phy_read = mv88e6xxx_g2_smi_phy_read, + .phy_write = mv88e6xxx_g2_smi_phy_write, +}; + +static const struct mv88e6xxx_ops mv88e6172_ops = { + .phy_read = mv88e6xxx_g2_smi_phy_read, + .phy_write = mv88e6xxx_g2_smi_phy_write, +}; + +static const struct mv88e6xxx_ops mv88e6175_ops = { + .phy_read = mv88e6xxx_g2_smi_phy_read, + .phy_write = mv88e6xxx_g2_smi_phy_write, +}; + +static const struct mv88e6xxx_ops mv88e6176_ops = { + .phy_read = mv88e6xxx_g2_smi_phy_read, + .phy_write = mv88e6xxx_g2_smi_phy_write, +}; + +static const struct mv88e6xxx_ops mv88e6185_ops = { + .phy_read = mv88e6xxx_phy_ppu_read, + .phy_write = mv88e6xxx_phy_ppu_write, +}; + +static const struct mv88e6xxx_ops mv88e6240_ops = { + .phy_read = mv88e6xxx_g2_smi_phy_read, + .phy_write = mv88e6xxx_g2_smi_phy_write, +}; + +static const struct mv88e6xxx_ops mv88e6320_ops = { + .phy_read = mv88e6xxx_g2_smi_phy_read, + .phy_write = mv88e6xxx_g2_smi_phy_write, +}; + +static const struct mv88e6xxx_ops mv88e6321_ops = { + .phy_read = mv88e6xxx_g2_smi_phy_read, + .phy_write = mv88e6xxx_g2_smi_phy_write, +}; + +static const struct mv88e6xxx_ops mv88e6350_ops = { + .phy_read = mv88e6xxx_g2_smi_phy_read, + .phy_write = mv88e6xxx_g2_smi_phy_write, +}; + +static const struct mv88e6xxx_ops mv88e6351_ops = { + .phy_read = mv88e6xxx_g2_smi_phy_read, + .phy_write = mv88e6xxx_g2_smi_phy_write, +}; + +static const struct mv88e6xxx_ops mv88e6352_ops = { + .phy_read = mv88e6xxx_g2_smi_phy_read, + .phy_write = mv88e6xxx_g2_smi_phy_write, +}; + static const struct mv88e6xxx_info mv88e6xxx_table[] = { [MV88E6085] = { .prod_num = PORT_SWITCH_ID_PROD_NUM_6085, @@ -3225,6 +3305,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6097, + .ops = &mv88e6085_ops, }, [MV88E6095] = { @@ -3237,6 +3318,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6095, + .ops = &mv88e6095_ops, }, [MV88E6123] = { @@ -3249,6 +3331,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6165, + .ops = &mv88e6123_ops, }, [MV88E6131] = { @@ -3261,6 +3344,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6185, + .ops = &mv88e6131_ops, }, [MV88E6161] = { @@ -3273,6 +3357,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6165, + .ops = &mv88e6161_ops, }, [MV88E6165] = { @@ -3285,6 +3370,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6165, + .ops = &mv88e6165_ops, }, [MV88E6171] = { @@ -3297,6 +3383,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6351, + .ops = &mv88e6171_ops, }, [MV88E6172] = { @@ -3309,6 +3396,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6352, + .ops = &mv88e6172_ops, }, [MV88E6175] = { @@ -3321,6 +3409,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6351, + .ops = &mv88e6175_ops, }, [MV88E6176] = { @@ -3333,6 +3422,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6352, + .ops = &mv88e6176_ops, }, [MV88E6185] = { @@ -3345,6 +3435,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6185, + .ops = &mv88e6185_ops, }, [MV88E6240] = { @@ -3357,6 +3448,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6352, + .ops = &mv88e6240_ops, }, [MV88E6320] = { @@ -3369,6 +3461,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6320, + .ops = &mv88e6320_ops, }, [MV88E6321] = { @@ -3381,6 +3474,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6320, + .ops = &mv88e6321_ops, }, [MV88E6350] = { @@ -3393,6 +3487,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6351, + .ops = &mv88e6350_ops, }, [MV88E6351] = { @@ -3405,6 +3500,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6351, + .ops = &mv88e6351_ops, }, [MV88E6352] = { @@ -3417,6 +3513,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6352, + .ops = &mv88e6352_ops, }, }; @@ -3479,33 +3576,16 @@ static struct mv88e6xxx_chip *mv88e6xxx_alloc_chip(struct device *dev) return chip; } -static const struct mv88e6xxx_bus_ops mv88e6xxx_g2_smi_phy_ops = { - .read = mv88e6xxx_g2_smi_phy_read, - .write = mv88e6xxx_g2_smi_phy_write, -}; - -static const struct mv88e6xxx_bus_ops mv88e6xxx_phy_ops = { - .read = mv88e6xxx_read, - .write = mv88e6xxx_write, -}; - static void mv88e6xxx_phy_init(struct mv88e6xxx_chip *chip) { - if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_SMI_PHY)) { - chip->phy_ops = &mv88e6xxx_g2_smi_phy_ops; - } else if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU)) { - chip->phy_ops = &mv88e6xxx_phy_ppu_ops; + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU)) mv88e6xxx_ppu_state_init(chip); - } else { - chip->phy_ops = &mv88e6xxx_phy_ops; - } } static void mv88e6xxx_phy_destroy(struct mv88e6xxx_chip *chip) { - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU)) { + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU)) mv88e6xxx_ppu_state_destroy(chip); - } } static int mv88e6xxx_smi_init(struct mv88e6xxx_chip *chip, diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index b9ef769311d9..8e1290278ef6 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -427,8 +427,6 @@ enum mv88e6xxx_cap { MV88E6XXX_CAP_G2_POT, /* (0x0f) Priority Override Table */ MV88E6XXX_CAP_G2_EEPROM_CMD, /* (0x14) EEPROM Command */ MV88E6XXX_CAP_G2_EEPROM_DATA, /* (0x15) EEPROM Data */ - MV88E6XXX_CAP_G2_SMI_PHY_CMD, /* (0x18) SMI PHY Command */ - MV88E6XXX_CAP_G2_SMI_PHY_DATA, /* (0x19) SMI PHY Data */ /* PHY Polling Unit. * See GLOBAL_CONTROL_PPU_ENABLE and GLOBAL_STATUS_PPU_POLLING. @@ -479,8 +477,6 @@ enum mv88e6xxx_cap { #define MV88E6XXX_FLAG_G2_POT BIT_ULL(MV88E6XXX_CAP_G2_POT) #define MV88E6XXX_FLAG_G2_EEPROM_CMD BIT_ULL(MV88E6XXX_CAP_G2_EEPROM_CMD) #define MV88E6XXX_FLAG_G2_EEPROM_DATA BIT_ULL(MV88E6XXX_CAP_G2_EEPROM_DATA) -#define MV88E6XXX_FLAG_G2_SMI_PHY_CMD BIT_ULL(MV88E6XXX_CAP_G2_SMI_PHY_CMD) -#define MV88E6XXX_FLAG_G2_SMI_PHY_DATA BIT_ULL(MV88E6XXX_CAP_G2_SMI_PHY_DATA) #define MV88E6XXX_FLAG_PPU BIT_ULL(MV88E6XXX_CAP_PPU) #define MV88E6XXX_FLAG_PPU_ACTIVE BIT_ULL(MV88E6XXX_CAP_PPU_ACTIVE) @@ -514,11 +510,6 @@ enum mv88e6xxx_cap { (MV88E6XXX_FLAG_PHY_PAGE | \ MV88E6XXX_FLAG_SERDES) -/* Indirect PHY access via Global2 SMI PHY registers */ -#define MV88E6XXX_FLAGS_SMI_PHY \ - (MV88E6XXX_FLAG_G2_SMI_PHY_CMD |\ - MV88E6XXX_FLAG_G2_SMI_PHY_DATA) - #define MV88E6XXX_FLAGS_FAMILY_6095 \ (MV88E6XXX_FLAG_GLOBAL2 | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ @@ -577,8 +568,7 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAGS_EEPROM16 | \ MV88E6XXX_FLAGS_IRL | \ MV88E6XXX_FLAGS_MULTI_CHIP | \ - MV88E6XXX_FLAGS_PVT | \ - MV88E6XXX_FLAGS_SMI_PHY) + MV88E6XXX_FLAGS_PVT) #define MV88E6XXX_FLAGS_FAMILY_6351 \ (MV88E6XXX_FLAG_EDSA | \ @@ -595,8 +585,7 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAG_VTU | \ MV88E6XXX_FLAGS_IRL | \ MV88E6XXX_FLAGS_MULTI_CHIP | \ - MV88E6XXX_FLAGS_PVT | \ - MV88E6XXX_FLAGS_SMI_PHY) + MV88E6XXX_FLAGS_PVT) #define MV88E6XXX_FLAGS_FAMILY_6352 \ (MV88E6XXX_FLAG_EDSA | \ @@ -617,8 +606,9 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAGS_IRL | \ MV88E6XXX_FLAGS_MULTI_CHIP | \ MV88E6XXX_FLAGS_PVT | \ - MV88E6XXX_FLAGS_SERDES | \ - MV88E6XXX_FLAGS_SMI_PHY) + MV88E6XXX_FLAGS_SERDES) + +struct mv88e6xxx_ops; struct mv88e6xxx_info { enum mv88e6xxx_family family; @@ -630,6 +620,7 @@ struct mv88e6xxx_info { unsigned int global1_addr; unsigned int age_time_coeff; unsigned long long flags; + const struct mv88e6xxx_ops *ops; }; struct mv88e6xxx_atu_entry { @@ -710,6 +701,13 @@ struct mv88e6xxx_bus_ops { int (*write)(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val); }; +struct mv88e6xxx_ops { + int (*phy_read)(struct mv88e6xxx_chip *chip, int addr, int reg, + u16 *val); + int (*phy_write)(struct mv88e6xxx_chip *chip, int addr, int reg, + u16 val); +}; + enum stat_type { BANK0, BANK1, From b073d4e2b16a42f7d5b01814307ff41012c2e1ea Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 29 Sep 2016 12:22:01 -0400 Subject: [PATCH 0972/1050] net: dsa: mv88e6xxx: add set_switch_mac to ops Add a set_switch_mac chip-wide function to mv88e6xxx_ops and remove MV88E6XXX_FLAG_G2_SWITCH_MAC flags. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 28 ++++++++++++++++++++------- drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 8 ++------ 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 83a37693a8db..e40b71ba871c 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2909,14 +2909,11 @@ static int mv88e6xxx_set_addr(struct dsa_switch *ds, u8 *addr) struct mv88e6xxx_chip *chip = ds->priv; int err; + if (!chip->info->ops->set_switch_mac) + return -EOPNOTSUPP; + mutex_lock(&chip->reg_lock); - - /* Has an indirect Switch MAC/WoL/WoF register in Global 2? */ - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_SWITCH_MAC)) - err = mv88e6xxx_g2_set_switch_mac(chip, addr); - else - err = mv88e6xxx_g1_set_switch_mac(chip, addr); - + err = chip->info->ops->set_switch_mac(chip, addr); mutex_unlock(&chip->reg_lock); return err; @@ -3210,86 +3207,103 @@ static int mv88e6xxx_set_eeprom(struct dsa_switch *ds, } static const struct mv88e6xxx_ops mv88e6085_ops = { + .set_switch_mac = mv88e6xxx_g1_set_switch_mac, .phy_read = mv88e6xxx_phy_ppu_read, .phy_write = mv88e6xxx_phy_ppu_write, }; static const struct mv88e6xxx_ops mv88e6095_ops = { + .set_switch_mac = mv88e6xxx_g1_set_switch_mac, .phy_read = mv88e6xxx_phy_ppu_read, .phy_write = mv88e6xxx_phy_ppu_write, }; static const struct mv88e6xxx_ops mv88e6123_ops = { + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_read, .phy_write = mv88e6xxx_write, }; static const struct mv88e6xxx_ops mv88e6131_ops = { + .set_switch_mac = mv88e6xxx_g1_set_switch_mac, .phy_read = mv88e6xxx_phy_ppu_read, .phy_write = mv88e6xxx_phy_ppu_write, }; static const struct mv88e6xxx_ops mv88e6161_ops = { + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_read, .phy_write = mv88e6xxx_write, }; static const struct mv88e6xxx_ops mv88e6165_ops = { + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_read, .phy_write = mv88e6xxx_write, }; static const struct mv88e6xxx_ops mv88e6171_ops = { + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, }; static const struct mv88e6xxx_ops mv88e6172_ops = { + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, }; static const struct mv88e6xxx_ops mv88e6175_ops = { + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, }; static const struct mv88e6xxx_ops mv88e6176_ops = { + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, }; static const struct mv88e6xxx_ops mv88e6185_ops = { + .set_switch_mac = mv88e6xxx_g1_set_switch_mac, .phy_read = mv88e6xxx_phy_ppu_read, .phy_write = mv88e6xxx_phy_ppu_write, }; static const struct mv88e6xxx_ops mv88e6240_ops = { + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, }; static const struct mv88e6xxx_ops mv88e6320_ops = { + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, }; static const struct mv88e6xxx_ops mv88e6321_ops = { + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, }; static const struct mv88e6xxx_ops mv88e6350_ops = { + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, }; static const struct mv88e6xxx_ops mv88e6351_ops = { + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, }; static const struct mv88e6xxx_ops mv88e6352_ops = { + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, }; diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index 8e1290278ef6..d04184c7e068 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -423,7 +423,6 @@ enum mv88e6xxx_cap { MV88E6XXX_CAP_G2_IRL_DATA, /* (0x0a) Ingress Rate Data */ MV88E6XXX_CAP_G2_PVT_ADDR, /* (0x0b) Cross Chip Port VLAN Addr */ MV88E6XXX_CAP_G2_PVT_DATA, /* (0x0c) Cross Chip Port VLAN Data */ - MV88E6XXX_CAP_G2_SWITCH_MAC, /* (0x0d) Switch MAC/WoL/WoF */ MV88E6XXX_CAP_G2_POT, /* (0x0f) Priority Override Table */ MV88E6XXX_CAP_G2_EEPROM_CMD, /* (0x14) EEPROM Command */ MV88E6XXX_CAP_G2_EEPROM_DATA, /* (0x15) EEPROM Data */ @@ -473,7 +472,6 @@ enum mv88e6xxx_cap { #define MV88E6XXX_FLAG_G2_IRL_DATA BIT_ULL(MV88E6XXX_CAP_G2_IRL_DATA) #define MV88E6XXX_FLAG_G2_PVT_ADDR BIT_ULL(MV88E6XXX_CAP_G2_PVT_ADDR) #define MV88E6XXX_FLAG_G2_PVT_DATA BIT_ULL(MV88E6XXX_CAP_G2_PVT_DATA) -#define MV88E6XXX_FLAG_G2_SWITCH_MAC BIT_ULL(MV88E6XXX_CAP_G2_SWITCH_MAC) #define MV88E6XXX_FLAG_G2_POT BIT_ULL(MV88E6XXX_CAP_G2_POT) #define MV88E6XXX_FLAG_G2_EEPROM_CMD BIT_ULL(MV88E6XXX_CAP_G2_EEPROM_CMD) #define MV88E6XXX_FLAG_G2_EEPROM_DATA BIT_ULL(MV88E6XXX_CAP_G2_EEPROM_DATA) @@ -537,7 +535,6 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAG_GLOBAL2 | \ MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ - MV88E6XXX_FLAG_G2_SWITCH_MAC | \ MV88E6XXX_FLAG_G2_POT | \ MV88E6XXX_FLAG_STU | \ MV88E6XXX_FLAG_TEMP | \ @@ -559,7 +556,6 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAG_GLOBAL2 | \ MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ - MV88E6XXX_FLAG_G2_SWITCH_MAC | \ MV88E6XXX_FLAG_G2_POT | \ MV88E6XXX_FLAG_PPU_ACTIVE | \ MV88E6XXX_FLAG_TEMP | \ @@ -577,7 +573,6 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAG_GLOBAL2 | \ MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ - MV88E6XXX_FLAG_G2_SWITCH_MAC | \ MV88E6XXX_FLAG_G2_POT | \ MV88E6XXX_FLAG_PPU_ACTIVE | \ MV88E6XXX_FLAG_STU | \ @@ -595,7 +590,6 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAG_GLOBAL2 | \ MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ - MV88E6XXX_FLAG_G2_SWITCH_MAC | \ MV88E6XXX_FLAG_G2_POT | \ MV88E6XXX_FLAG_PPU_ACTIVE | \ MV88E6XXX_FLAG_STU | \ @@ -702,6 +696,8 @@ struct mv88e6xxx_bus_ops { }; struct mv88e6xxx_ops { + int (*set_switch_mac)(struct mv88e6xxx_chip *chip, u8 *addr); + int (*phy_read)(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val); int (*phy_write)(struct mv88e6xxx_chip *chip, int addr, int reg, From ee4dc2e75337e5925e9434f28ec48374a65ffcd9 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 29 Sep 2016 12:22:02 -0400 Subject: [PATCH 0973/1050] net: dsa: mv88e6xxx: add eeprom ops Remove EEPROM flags in favor of new {get,set}_eeprom chip-wide functions in the mv88e6xxx_ops structure. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 34 +++++++++++++++++---------- drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 16 ++++--------- 2 files changed, 26 insertions(+), 24 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index e40b71ba871c..883fd9809dd2 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3168,13 +3168,11 @@ static int mv88e6xxx_get_eeprom(struct dsa_switch *ds, struct mv88e6xxx_chip *chip = ds->priv; int err; + if (!chip->info->ops->get_eeprom) + return -EOPNOTSUPP; + mutex_lock(&chip->reg_lock); - - if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_EEPROM16)) - err = mv88e6xxx_g2_get_eeprom16(chip, eeprom, data); - else - err = -EOPNOTSUPP; - + err = chip->info->ops->get_eeprom(chip, eeprom, data); mutex_unlock(&chip->reg_lock); if (err) @@ -3191,16 +3189,14 @@ static int mv88e6xxx_set_eeprom(struct dsa_switch *ds, struct mv88e6xxx_chip *chip = ds->priv; int err; + if (!chip->info->ops->set_eeprom) + return -EOPNOTSUPP; + if (eeprom->magic != 0xc3ec4951) return -EINVAL; mutex_lock(&chip->reg_lock); - - if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_EEPROM16)) - err = mv88e6xxx_g2_set_eeprom16(chip, eeprom, data); - else - err = -EOPNOTSUPP; - + err = chip->info->ops->set_eeprom(chip, eeprom, data); mutex_unlock(&chip->reg_lock); return err; @@ -3249,6 +3245,8 @@ static const struct mv88e6xxx_ops mv88e6171_ops = { }; static const struct mv88e6xxx_ops mv88e6172_ops = { + .get_eeprom = mv88e6xxx_g2_get_eeprom16, + .set_eeprom = mv88e6xxx_g2_set_eeprom16, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, @@ -3261,6 +3259,8 @@ static const struct mv88e6xxx_ops mv88e6175_ops = { }; static const struct mv88e6xxx_ops mv88e6176_ops = { + .get_eeprom = mv88e6xxx_g2_get_eeprom16, + .set_eeprom = mv88e6xxx_g2_set_eeprom16, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, @@ -3273,18 +3273,24 @@ static const struct mv88e6xxx_ops mv88e6185_ops = { }; static const struct mv88e6xxx_ops mv88e6240_ops = { + .get_eeprom = mv88e6xxx_g2_get_eeprom16, + .set_eeprom = mv88e6xxx_g2_set_eeprom16, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, }; static const struct mv88e6xxx_ops mv88e6320_ops = { + .get_eeprom = mv88e6xxx_g2_get_eeprom16, + .set_eeprom = mv88e6xxx_g2_set_eeprom16, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, }; static const struct mv88e6xxx_ops mv88e6321_ops = { + .get_eeprom = mv88e6xxx_g2_get_eeprom16, + .set_eeprom = mv88e6xxx_g2_set_eeprom16, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, @@ -3303,6 +3309,8 @@ static const struct mv88e6xxx_ops mv88e6351_ops = { }; static const struct mv88e6xxx_ops mv88e6352_ops = { + .get_eeprom = mv88e6xxx_g2_get_eeprom16, + .set_eeprom = mv88e6xxx_g2_set_eeprom16, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, @@ -3825,7 +3833,7 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev) if (IS_ERR(chip->reset)) return PTR_ERR(chip->reset); - if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_EEPROM16) && + if (chip->info->ops->get_eeprom && !of_property_read_u32(np, "eeprom-length", &eeprom_len)) chip->eeprom_len = eeprom_len; diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index d04184c7e068..e572121c196e 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -424,8 +424,6 @@ enum mv88e6xxx_cap { MV88E6XXX_CAP_G2_PVT_ADDR, /* (0x0b) Cross Chip Port VLAN Addr */ MV88E6XXX_CAP_G2_PVT_DATA, /* (0x0c) Cross Chip Port VLAN Data */ MV88E6XXX_CAP_G2_POT, /* (0x0f) Priority Override Table */ - MV88E6XXX_CAP_G2_EEPROM_CMD, /* (0x14) EEPROM Command */ - MV88E6XXX_CAP_G2_EEPROM_DATA, /* (0x15) EEPROM Data */ /* PHY Polling Unit. * See GLOBAL_CONTROL_PPU_ENABLE and GLOBAL_STATUS_PPU_POLLING. @@ -473,8 +471,6 @@ enum mv88e6xxx_cap { #define MV88E6XXX_FLAG_G2_PVT_ADDR BIT_ULL(MV88E6XXX_CAP_G2_PVT_ADDR) #define MV88E6XXX_FLAG_G2_PVT_DATA BIT_ULL(MV88E6XXX_CAP_G2_PVT_DATA) #define MV88E6XXX_FLAG_G2_POT BIT_ULL(MV88E6XXX_CAP_G2_POT) -#define MV88E6XXX_FLAG_G2_EEPROM_CMD BIT_ULL(MV88E6XXX_CAP_G2_EEPROM_CMD) -#define MV88E6XXX_FLAG_G2_EEPROM_DATA BIT_ULL(MV88E6XXX_CAP_G2_EEPROM_DATA) #define MV88E6XXX_FLAG_PPU BIT_ULL(MV88E6XXX_CAP_PPU) #define MV88E6XXX_FLAG_PPU_ACTIVE BIT_ULL(MV88E6XXX_CAP_PPU_ACTIVE) @@ -483,11 +479,6 @@ enum mv88e6xxx_cap { #define MV88E6XXX_FLAG_TEMP_LIMIT BIT_ULL(MV88E6XXX_CAP_TEMP_LIMIT) #define MV88E6XXX_FLAG_VTU BIT_ULL(MV88E6XXX_CAP_VTU) -/* EEPROM Programming via Global2 with 16-bit data */ -#define MV88E6XXX_FLAGS_EEPROM16 \ - (MV88E6XXX_FLAG_G2_EEPROM_CMD | \ - MV88E6XXX_FLAG_G2_EEPROM_DATA) - /* Ingress Rate Limit unit */ #define MV88E6XXX_FLAGS_IRL \ (MV88E6XXX_FLAG_G2_IRL_CMD | \ @@ -561,7 +552,6 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAG_TEMP | \ MV88E6XXX_FLAG_TEMP_LIMIT | \ MV88E6XXX_FLAG_VTU | \ - MV88E6XXX_FLAGS_EEPROM16 | \ MV88E6XXX_FLAGS_IRL | \ MV88E6XXX_FLAGS_MULTI_CHIP | \ MV88E6XXX_FLAGS_PVT) @@ -596,7 +586,6 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAG_TEMP | \ MV88E6XXX_FLAG_TEMP_LIMIT | \ MV88E6XXX_FLAG_VTU | \ - MV88E6XXX_FLAGS_EEPROM16 | \ MV88E6XXX_FLAGS_IRL | \ MV88E6XXX_FLAGS_MULTI_CHIP | \ MV88E6XXX_FLAGS_PVT | \ @@ -696,6 +685,11 @@ struct mv88e6xxx_bus_ops { }; struct mv88e6xxx_ops { + int (*get_eeprom)(struct mv88e6xxx_chip *chip, + struct ethtool_eeprom *eeprom, u8 *data); + int (*set_eeprom)(struct mv88e6xxx_chip *chip, + struct ethtool_eeprom *eeprom, u8 *data); + int (*set_switch_mac)(struct mv88e6xxx_chip *chip, u8 *addr); int (*phy_read)(struct mv88e6xxx_chip *chip, int addr, int reg, From 1b0ff89852d79354e8a091c81a88df21f5aa9f0a Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Thu, 29 Sep 2016 13:24:08 -0300 Subject: [PATCH 0974/1050] tg3: Avoid NULL pointer dereference in tg3_io_error_detected() While the driver is probing the adapter, an error may occur before the netdev structure is allocated and attached to pci_dev. In this case, not only netdev isn't available, but the tg3 private structure is also not available as it is just math from the NULL pointer, so dereferences must be skipped. The following trace is seen when the error is triggered: [1.402247] Unable to handle kernel paging request for data at address 0x00001a99 [1.402410] Faulting instruction address: 0xc0000000007e33f8 [1.402450] Oops: Kernel access of bad area, sig: 11 [#1] [1.402481] SMP NR_CPUS=2048 NUMA PowerNV [1.402513] Modules linked in: [1.402545] CPU: 0 PID: 651 Comm: eehd Not tainted 4.4.0-36-generic #55-Ubuntu [1.402591] task: c000001fe4e42a20 ti: c000001fe4e88000 task.ti: c000001fe4e88000 [1.402742] NIP: c0000000007e33f8 LR: c0000000007e3164 CTR: c000000000595ea0 [1.402787] REGS: c000001fe4e8b790 TRAP: 0300 Not tainted (4.4.0-36-generic) [1.402832] MSR: 9000000100009033 CR: 28000422 XER: 20000000 [1.403058] CFAR: c000000000008468 DAR: 0000000000001a99 DSISR: 42000000 SOFTE: 1 GPR00: c0000000007e3164 c000001fe4e8ba10 c0000000015c5e00 0000000000000000 GPR04: 0000000000000001 0000000000000000 0000000000000039 0000000000000299 GPR08: 0000000000000000 0000000000000001 c000001fe4e88000 0000000000000006 GPR12: 0000000000000000 c00000000fb40000 c0000000000e6558 c000003ca1bffd00 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: 0000000000000000 0000000000000000 0000000000000000 c000000000d52768 GPR24: c000000000d52740 0000000000000100 c000003ca1b52000 0000000000000002 GPR28: 0000000000000900 0000000000000000 c00000000152a0c0 c000003ca1b52000 [1.404226] NIP [c0000000007e33f8] tg3_io_error_detected+0x308/0x340 [1.404265] LR [c0000000007e3164] tg3_io_error_detected+0x74/0x340 This patch avoids the NULL pointer dereference by moving the access after the netdev NULL pointer check on tg3_io_error_detected(). Also, we add a check for netdev being NULL on tg3_io_resume() [suggested by Michael Chan]. Fixes: 0486a063b1ff ("tg3: prevent ifup/ifdown during PCI error recovery") Fixes: dfc8f370316b ("net/tg3: Release IRQs on permanent error") Tested-by: Guilherme G. Piccoli Signed-off-by: Milton Miller Signed-off-by: Guilherme G. Piccoli Acked-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index a2551bcd1027..ea967df4b202 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -18122,14 +18122,14 @@ static pci_ers_result_t tg3_io_error_detected(struct pci_dev *pdev, rtnl_lock(); - /* We needn't recover from permanent error */ - if (state == pci_channel_io_frozen) - tp->pcierr_recovery = true; - /* We probably don't have netdev yet */ if (!netdev || !netif_running(netdev)) goto done; + /* We needn't recover from permanent error */ + if (state == pci_channel_io_frozen) + tp->pcierr_recovery = true; + tg3_phy_stop(tp); tg3_netif_stop(tp); @@ -18226,7 +18226,7 @@ static void tg3_io_resume(struct pci_dev *pdev) rtnl_lock(); - if (!netif_running(netdev)) + if (!netdev || !netif_running(netdev)) goto done; tg3_full_lock(tp, 0); From 6348ef2dbbd96c4488a1ee83cc0f0f3d9a314a2f Mon Sep 17 00:00:00 2001 From: Jia He Date: Fri, 30 Sep 2016 11:28:58 +0800 Subject: [PATCH 0975/1050] net:snmp: Introduce generic interfaces for snmp_get_cpu_field{, 64} This is to introduce the generic interfaces for snmp_get_cpu_field{,64}. It exchanges the two for-loops for collecting the percpu statistics data. This can aggregate the data by going through all the items of each cpu sequentially. Signed-off-by: Jia He Suggested-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/ip.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/include/net/ip.h b/include/net/ip.h index 9742b92dc933..bc43c0fcae12 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -219,6 +219,29 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o } #endif +#define snmp_get_cpu_field64_batch(buff64, stats_list, mib_statistic, offset) \ +{ \ + int i, c; \ + for_each_possible_cpu(c) { \ + for (i = 0; stats_list[i].name; i++) \ + buff64[i] += snmp_get_cpu_field64( \ + mib_statistic, \ + c, stats_list[i].entry, \ + offset); \ + } \ +} + +#define snmp_get_cpu_field_batch(buff, stats_list, mib_statistic) \ +{ \ + int i, c; \ + for_each_possible_cpu(c) { \ + for (i = 0; stats_list[i].name; i++) \ + buff[i] += snmp_get_cpu_field( \ + mib_statistic, \ + c, stats_list[i].entry); \ + } \ +} + void inet_get_local_port_range(struct net *net, int *low, int *high); #ifdef CONFIG_SYSCTL From f22d5c490990ecb6f4eb70c4ed478fc8cea78fe1 Mon Sep 17 00:00:00 2001 From: Jia He Date: Fri, 30 Sep 2016 11:28:59 +0800 Subject: [PATCH 0976/1050] proc: Reduce cache miss in snmp_seq_show This is to use the generic interfaces snmp_get_cpu_field{,64}_batch to aggregate the data by going through all the items of each cpu sequentially. Then snmp_seq_show is split into 2 parts to avoid build warning "the frame size" larger than 1024. Signed-off-by: Jia He Signed-off-by: David S. Miller --- net/ipv4/proc.c | 70 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 47 insertions(+), 23 deletions(-) diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 1ed015e4bc79..f51fc8803154 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -46,6 +46,8 @@ #include #include +#define TCPUDP_MIB_MAX max_t(u32, UDP_MIB_MAX, TCP_MIB_MAX) + /* * Report socket allocation statistics [mea@utu.fi] */ @@ -379,13 +381,15 @@ static void icmp_put(struct seq_file *seq) /* * Called from the PROCfs module. This outputs /proc/net/snmp. */ -static int snmp_seq_show(struct seq_file *seq, void *v) +static int snmp_seq_show_ipstats(struct seq_file *seq, void *v) { - int i; struct net *net = seq->private; + u64 buff64[IPSTATS_MIB_MAX]; + int i; + + memset(buff64, 0, IPSTATS_MIB_MAX * sizeof(u64)); seq_puts(seq, "Ip: Forwarding DefaultTTL"); - for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) seq_printf(seq, " %s", snmp4_ipstats_list[i].name); @@ -394,57 +398,77 @@ static int snmp_seq_show(struct seq_file *seq, void *v) net->ipv4.sysctl_ip_default_ttl); BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0); + snmp_get_cpu_field64_batch(buff64, snmp4_ipstats_list, + net->mib.ip_statistics, + offsetof(struct ipstats_mib, syncp)); for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) - seq_printf(seq, " %llu", - snmp_fold_field64(net->mib.ip_statistics, - snmp4_ipstats_list[i].entry, - offsetof(struct ipstats_mib, syncp))); + seq_printf(seq, " %llu", buff64[i]); - icmp_put(seq); /* RFC 2011 compatibility */ - icmpmsg_put(seq); + return 0; +} + +static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v) +{ + unsigned long buff[TCPUDP_MIB_MAX]; + struct net *net = seq->private; + int i; + + memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); seq_puts(seq, "\nTcp:"); for (i = 0; snmp4_tcp_list[i].name != NULL; i++) seq_printf(seq, " %s", snmp4_tcp_list[i].name); seq_puts(seq, "\nTcp:"); + snmp_get_cpu_field_batch(buff, snmp4_tcp_list, + net->mib.tcp_statistics); for (i = 0; snmp4_tcp_list[i].name != NULL; i++) { /* MaxConn field is signed, RFC 2012 */ if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) - seq_printf(seq, " %ld", - snmp_fold_field(net->mib.tcp_statistics, - snmp4_tcp_list[i].entry)); + seq_printf(seq, " %ld", buff[i]); else - seq_printf(seq, " %lu", - snmp_fold_field(net->mib.tcp_statistics, - snmp4_tcp_list[i].entry)); + seq_printf(seq, " %lu", buff[i]); } + memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); + + snmp_get_cpu_field_batch(buff, snmp4_udp_list, + net->mib.udp_statistics); seq_puts(seq, "\nUdp:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) seq_printf(seq, " %s", snmp4_udp_list[i].name); - seq_puts(seq, "\nUdp:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) - seq_printf(seq, " %lu", - snmp_fold_field(net->mib.udp_statistics, - snmp4_udp_list[i].entry)); + seq_printf(seq, " %lu", buff[i]); + + memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); /* the UDP and UDP-Lite MIBs are the same */ seq_puts(seq, "\nUdpLite:"); + snmp_get_cpu_field_batch(buff, snmp4_udp_list, + net->mib.udplite_statistics); for (i = 0; snmp4_udp_list[i].name != NULL; i++) seq_printf(seq, " %s", snmp4_udp_list[i].name); - seq_puts(seq, "\nUdpLite:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) - seq_printf(seq, " %lu", - snmp_fold_field(net->mib.udplite_statistics, - snmp4_udp_list[i].entry)); + seq_printf(seq, " %lu", buff[i]); seq_putc(seq, '\n'); return 0; } +static int snmp_seq_show(struct seq_file *seq, void *v) +{ + snmp_seq_show_ipstats(seq, v); + + icmp_put(seq); /* RFC 2011 compatibility */ + icmpmsg_put(seq); + + snmp_seq_show_tcp_udp(seq, v); + + return 0; +} + static int snmp_seq_open(struct inode *inode, struct file *file) { return single_open_net(inode, file, snmp_seq_show); From 4a4857b1c81ef39a9dc719af6b498cd39d1c1eb0 Mon Sep 17 00:00:00 2001 From: Jia He Date: Fri, 30 Sep 2016 11:29:00 +0800 Subject: [PATCH 0977/1050] proc: Reduce cache miss in snmp6_seq_show This is to use the generic interfaces snmp_get_cpu_field{,64}_batch to aggregate the data by going through all the items of each cpu sequentially. Signed-off-by: Jia He Signed-off-by: David S. Miller --- net/ipv6/proc.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 679253d0af84..cc8e3ae9ca73 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -30,6 +30,11 @@ #include #include +#define MAX4(a, b, c, d) \ + max_t(u32, max_t(u32, a, b), max_t(u32, c, d)) +#define SNMP_MIB_MAX MAX4(UDP_MIB_MAX, TCP_MIB_MAX, \ + IPSTATS_MIB_MAX, ICMP_MIB_MAX) + static int sockstat6_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; @@ -191,25 +196,34 @@ static void snmp6_seq_show_item(struct seq_file *seq, void __percpu *pcpumib, atomic_long_t *smib, const struct snmp_mib *itemlist) { + unsigned long buff[SNMP_MIB_MAX]; int i; - unsigned long val; - for (i = 0; itemlist[i].name; i++) { - val = pcpumib ? - snmp_fold_field(pcpumib, itemlist[i].entry) : - atomic_long_read(smib + itemlist[i].entry); - seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, val); + if (pcpumib) { + memset(buff, 0, sizeof(unsigned long) * SNMP_MIB_MAX); + + snmp_get_cpu_field_batch(buff, itemlist, pcpumib); + for (i = 0; itemlist[i].name; i++) + seq_printf(seq, "%-32s\t%lu\n", + itemlist[i].name, buff[i]); + } else { + for (i = 0; itemlist[i].name; i++) + seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, + atomic_long_read(smib + itemlist[i].entry)); } } static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib, const struct snmp_mib *itemlist, size_t syncpoff) { + u64 buff64[SNMP_MIB_MAX]; int i; + memset(buff64, 0, sizeof(unsigned long) * SNMP_MIB_MAX); + + snmp_get_cpu_field64_batch(buff64, itemlist, mib, syncpoff); for (i = 0; itemlist[i].name; i++) - seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name, - snmp_fold_field64(mib, itemlist[i].entry, syncpoff)); + seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name, buff64[i]); } static int snmp6_seq_show(struct seq_file *seq, void *v) From 7d64a94be2f9fadb1dd95742650f1fdbac69f25b Mon Sep 17 00:00:00 2001 From: Jia He Date: Fri, 30 Sep 2016 11:29:01 +0800 Subject: [PATCH 0978/1050] proc: Reduce cache miss in sctp_snmp_seq_show This is to use the generic interfaces snmp_get_cpu_field{,64}_batch to aggregate the data by going through all the items of each cpu sequentially. Signed-off-by: Jia He Signed-off-by: David S. Miller --- net/sctp/proc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/sctp/proc.c b/net/sctp/proc.c index ef8ba77a5bea..09e16c2b5c17 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -73,13 +73,17 @@ static const struct snmp_mib sctp_snmp_list[] = { /* Display sctp snmp mib statistics(/proc/net/sctp/snmp). */ static int sctp_snmp_seq_show(struct seq_file *seq, void *v) { + unsigned long buff[SCTP_MIB_MAX]; struct net *net = seq->private; int i; + memset(buff, 0, sizeof(unsigned long) * SCTP_MIB_MAX); + + snmp_get_cpu_field_batch(buff, sctp_snmp_list, + net->sctp.sctp_statistics); for (i = 0; sctp_snmp_list[i].name != NULL; i++) seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name, - snmp_fold_field(net->sctp.sctp_statistics, - sctp_snmp_list[i].entry)); + buff[i]); return 0; } From 07613873f1731aa47fdf06a1cbd2e3cd1974c026 Mon Sep 17 00:00:00 2001 From: Jia He Date: Fri, 30 Sep 2016 11:29:02 +0800 Subject: [PATCH 0979/1050] proc: Reduce cache miss in xfrm_statistics_seq_show This is to use the generic interfaces snmp_get_cpu_field{,64}_batch to aggregate the data by going through all the items of each cpu sequentially. Signed-off-by: Jia He Signed-off-by: David S. Miller --- net/xfrm/xfrm_proc.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index 9c4fbd8935f4..ba2b539879bc 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -50,12 +50,18 @@ static const struct snmp_mib xfrm_mib_list[] = { static int xfrm_statistics_seq_show(struct seq_file *seq, void *v) { + unsigned long buff[LINUX_MIB_XFRMMAX]; struct net *net = seq->private; int i; + + memset(buff, 0, sizeof(unsigned long) * LINUX_MIB_XFRMMAX); + + snmp_get_cpu_field_batch(buff, xfrm_mib_list, + net->mib.xfrm_statistics); for (i = 0; xfrm_mib_list[i].name; i++) seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name, - snmp_fold_field(net->mib.xfrm_statistics, - xfrm_mib_list[i].entry)); + buff[i]); + return 0; } From aca05671d58cea06dc60bbe554b8b399af7da409 Mon Sep 17 00:00:00 2001 From: Jia He Date: Fri, 30 Sep 2016 11:29:03 +0800 Subject: [PATCH 0980/1050] ipv6: Remove useless parameter in __snmp6_fill_statsdev The parameter items(is always ICMP6_MIB_MAX) is useless for __snmp6_fill_statsdev Signed-off-by: Jia He Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2f1f5d439788..35d4baa55c9d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4961,18 +4961,18 @@ static inline size_t inet6_if_nlmsg_size(void) } static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib, - int items, int bytes) + int bytes) { int i; - int pad = bytes - sizeof(u64) * items; + int pad = bytes - sizeof(u64) * ICMP6_MIB_MAX; BUG_ON(pad < 0); /* Use put_unaligned() because stats may not be aligned for u64. */ - put_unaligned(items, &stats[0]); - for (i = 1; i < items; i++) + put_unaligned(ICMP6_MIB_MAX, &stats[0]); + for (i = 1; i < ICMP6_MIB_MAX; i++) put_unaligned(atomic_long_read(&mib[i]), &stats[i]); - memset(&stats[items], 0, pad); + memset(&stats[ICMP6_MIB_MAX], 0, pad); } static inline void __snmp6_fill_stats64(u64 *stats, void __percpu *mib, @@ -5005,7 +5005,7 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, offsetof(struct ipstats_mib, syncp)); break; case IFLA_INET6_ICMP6STATS: - __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, ICMP6_MIB_MAX, bytes); + __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, bytes); break; } } From 6d4a741cbbfa6612a479656654ca5edf7becc72c Mon Sep 17 00:00:00 2001 From: Jia He Date: Fri, 30 Sep 2016 11:29:04 +0800 Subject: [PATCH 0981/1050] net: Suppress the "Comparison to NULL could be written" warnings This is to suppress the checkpatch.pl warning "Comparison to NULL could be written". No functional changes here. Signed-off-by: Jia He Signed-off-by: David S. Miller --- net/ipv4/proc.c | 32 ++++++++++++++++---------------- net/sctp/proc.c | 2 +- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index f51fc8803154..7143ca1a6af9 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -358,22 +358,22 @@ static void icmp_put(struct seq_file *seq) atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs; seq_puts(seq, "\nIcmp: InMsgs InErrors InCsumErrors"); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " In%s", icmpmibmap[i].name); seq_puts(seq, " OutMsgs OutErrors"); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " Out%s", icmpmibmap[i].name); seq_printf(seq, "\nIcmp: %lu %lu %lu", snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INMSGS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INERRORS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS)); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " %lu", atomic_long_read(ptr + icmpmibmap[i].index)); seq_printf(seq, " %lu %lu", snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " %lu", atomic_long_read(ptr + (icmpmibmap[i].index | 0x100))); } @@ -390,7 +390,7 @@ static int snmp_seq_show_ipstats(struct seq_file *seq, void *v) memset(buff64, 0, IPSTATS_MIB_MAX * sizeof(u64)); seq_puts(seq, "Ip: Forwarding DefaultTTL"); - for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) + for (i = 0; snmp4_ipstats_list[i].name; i++) seq_printf(seq, " %s", snmp4_ipstats_list[i].name); seq_printf(seq, "\nIp: %d %d", @@ -401,7 +401,7 @@ static int snmp_seq_show_ipstats(struct seq_file *seq, void *v) snmp_get_cpu_field64_batch(buff64, snmp4_ipstats_list, net->mib.ip_statistics, offsetof(struct ipstats_mib, syncp)); - for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) + for (i = 0; snmp4_ipstats_list[i].name; i++) seq_printf(seq, " %llu", buff64[i]); return 0; @@ -416,13 +416,13 @@ static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v) memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); seq_puts(seq, "\nTcp:"); - for (i = 0; snmp4_tcp_list[i].name != NULL; i++) + for (i = 0; snmp4_tcp_list[i].name; i++) seq_printf(seq, " %s", snmp4_tcp_list[i].name); seq_puts(seq, "\nTcp:"); snmp_get_cpu_field_batch(buff, snmp4_tcp_list, net->mib.tcp_statistics); - for (i = 0; snmp4_tcp_list[i].name != NULL; i++) { + for (i = 0; snmp4_tcp_list[i].name; i++) { /* MaxConn field is signed, RFC 2012 */ if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) seq_printf(seq, " %ld", buff[i]); @@ -435,10 +435,10 @@ static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v) snmp_get_cpu_field_batch(buff, snmp4_udp_list, net->mib.udp_statistics); seq_puts(seq, "\nUdp:"); - for (i = 0; snmp4_udp_list[i].name != NULL; i++) + for (i = 0; snmp4_udp_list[i].name; i++) seq_printf(seq, " %s", snmp4_udp_list[i].name); seq_puts(seq, "\nUdp:"); - for (i = 0; snmp4_udp_list[i].name != NULL; i++) + for (i = 0; snmp4_udp_list[i].name; i++) seq_printf(seq, " %lu", buff[i]); memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); @@ -447,10 +447,10 @@ static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v) seq_puts(seq, "\nUdpLite:"); snmp_get_cpu_field_batch(buff, snmp4_udp_list, net->mib.udplite_statistics); - for (i = 0; snmp4_udp_list[i].name != NULL; i++) + for (i = 0; snmp4_udp_list[i].name; i++) seq_printf(seq, " %s", snmp4_udp_list[i].name); seq_puts(seq, "\nUdpLite:"); - for (i = 0; snmp4_udp_list[i].name != NULL; i++) + for (i = 0; snmp4_udp_list[i].name; i++) seq_printf(seq, " %lu", buff[i]); seq_putc(seq, '\n'); @@ -493,21 +493,21 @@ static int netstat_seq_show(struct seq_file *seq, void *v) struct net *net = seq->private; seq_puts(seq, "TcpExt:"); - for (i = 0; snmp4_net_list[i].name != NULL; i++) + for (i = 0; snmp4_net_list[i].name; i++) seq_printf(seq, " %s", snmp4_net_list[i].name); seq_puts(seq, "\nTcpExt:"); - for (i = 0; snmp4_net_list[i].name != NULL; i++) + for (i = 0; snmp4_net_list[i].name; i++) seq_printf(seq, " %lu", snmp_fold_field(net->mib.net_statistics, snmp4_net_list[i].entry)); seq_puts(seq, "\nIpExt:"); - for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) + for (i = 0; snmp4_ipextstats_list[i].name; i++) seq_printf(seq, " %s", snmp4_ipextstats_list[i].name); seq_puts(seq, "\nIpExt:"); - for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) + for (i = 0; snmp4_ipextstats_list[i].name; i++) seq_printf(seq, " %llu", snmp_fold_field64(net->mib.ip_statistics, snmp4_ipextstats_list[i].entry, diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 09e16c2b5c17..206377fe91ec 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -81,7 +81,7 @@ static int sctp_snmp_seq_show(struct seq_file *seq, void *v) snmp_get_cpu_field_batch(buff, sctp_snmp_list, net->sctp.sctp_statistics); - for (i = 0; sctp_snmp_list[i].name != NULL; i++) + for (i = 0; sctp_snmp_list[i].name; i++) seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name, buff[i]); From bd11f0741fa5a2c296629898ad07759dd12b35bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Tue, 27 Sep 2016 23:57:58 -0700 Subject: [PATCH 0982/1050] ipv6 addrconf: implement RFC7559 router solicitation backoff MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This implements: https://tools.ietf.org/html/rfc7559 Backoff is performed according to RFC3315 section 14: https://tools.ietf.org/html/rfc3315#section-14 We allow setting /proc/sys/net/ipv6/conf/*/router_solicitations to a negative value meaning an unlimited number of retransmits, and we make this the new default (inline with the RFC). We also add a new setting: /proc/sys/net/ipv6/conf/*/router_solicitation_max_interval defaulting to 1 hour (per RFC recommendation). Signed-off-by: Maciej Żenczykowski Acked-by: Erik Kline Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + include/net/addrconf.h | 3 ++- include/net/if_inet6.h | 1 + include/uapi/linux/ipv6.h | 1 + net/ipv6/addrconf.c | 51 +++++++++++++++++++++++++++++++++------ 5 files changed, 49 insertions(+), 8 deletions(-) diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index c6dbcd84a2c7..7e9a789be5e0 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -18,6 +18,7 @@ struct ipv6_devconf { __s32 dad_transmits; __s32 rtr_solicits; __s32 rtr_solicit_interval; + __s32 rtr_solicit_max_interval; __s32 rtr_solicit_delay; __s32 force_mld_version; __s32 mldv1_unsolicited_report_interval; diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 9826d3a9464c..f2d072787947 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -1,8 +1,9 @@ #ifndef _ADDRCONF_H #define _ADDRCONF_H -#define MAX_RTR_SOLICITATIONS 3 +#define MAX_RTR_SOLICITATIONS -1 /* unlimited */ #define RTR_SOLICITATION_INTERVAL (4*HZ) +#define RTR_SOLICITATION_MAX_INTERVAL (3600*HZ) /* 1 hour */ #define MIN_VALID_LIFETIME (2*3600) /* 2 hours */ diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 1c8b6820b694..515352c6280a 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -201,6 +201,7 @@ struct inet6_dev { struct ipv6_devstat stats; struct timer_list rs_timer; + __s32 rs_interval; /* in jiffies */ __u8 rs_probes; __u8 addr_gen_mode; diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 395876060f50..8c2772340c3f 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -177,6 +177,7 @@ enum { DEVCONF_DROP_UNICAST_IN_L2_MULTICAST, DEVCONF_DROP_UNSOLICITED_NA, DEVCONF_KEEP_ADDR_ON_DOWN, + DEVCONF_RTR_SOLICIT_MAX_INTERVAL, DEVCONF_MAX }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 35d4baa55c9d..87183983724d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -112,6 +112,27 @@ static inline u32 cstamp_delta(unsigned long cstamp) return (cstamp - INITIAL_JIFFIES) * 100UL / HZ; } +static inline s32 rfc3315_s14_backoff_init(s32 irt) +{ + /* multiply 'initial retransmission time' by 0.9 .. 1.1 */ + u64 tmp = (900000 + prandom_u32() % 200001) * (u64)irt; + do_div(tmp, 1000000); + return (s32)tmp; +} + +static inline s32 rfc3315_s14_backoff_update(s32 rt, s32 mrt) +{ + /* multiply 'retransmission timeout' by 1.9 .. 2.1 */ + u64 tmp = (1900000 + prandom_u32() % 200001) * (u64)rt; + do_div(tmp, 1000000); + if ((s32)tmp > mrt) { + /* multiply 'maximum retransmission time' by 0.9 .. 1.1 */ + tmp = (900000 + prandom_u32() % 200001) * (u64)mrt; + do_div(tmp, 1000000); + } + return (s32)tmp; +} + #ifdef CONFIG_SYSCTL static int addrconf_sysctl_register(struct inet6_dev *idev); static void addrconf_sysctl_unregister(struct inet6_dev *idev); @@ -187,6 +208,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .dad_transmits = 1, .rtr_solicits = MAX_RTR_SOLICITATIONS, .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL, + .rtr_solicit_max_interval = RTR_SOLICITATION_MAX_INTERVAL, .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY, .use_tempaddr = 0, .temp_valid_lft = TEMP_VALID_LIFETIME, @@ -232,6 +254,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .dad_transmits = 1, .rtr_solicits = MAX_RTR_SOLICITATIONS, .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL, + .rtr_solicit_max_interval = RTR_SOLICITATION_MAX_INTERVAL, .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY, .use_tempaddr = 0, .temp_valid_lft = TEMP_VALID_LIFETIME, @@ -3687,7 +3710,7 @@ static void addrconf_rs_timer(unsigned long data) if (idev->if_flags & IF_RA_RCVD) goto out; - if (idev->rs_probes++ < idev->cnf.rtr_solicits) { + if (idev->rs_probes++ < idev->cnf.rtr_solicits || idev->cnf.rtr_solicits < 0) { write_unlock(&idev->lock); if (!ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE)) ndisc_send_rs(dev, &lladdr, @@ -3696,11 +3719,13 @@ static void addrconf_rs_timer(unsigned long data) goto put; write_lock(&idev->lock); + idev->rs_interval = rfc3315_s14_backoff_update( + idev->rs_interval, idev->cnf.rtr_solicit_max_interval); /* The wait after the last probe can be shorter */ addrconf_mod_rs_timer(idev, (idev->rs_probes == idev->cnf.rtr_solicits) ? idev->cnf.rtr_solicit_delay : - idev->cnf.rtr_solicit_interval); + idev->rs_interval); } else { /* * Note: we do not support deprecated "all on-link" @@ -3949,7 +3974,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) send_mld = ifp->scope == IFA_LINK && ipv6_lonely_lladdr(ifp); send_rs = send_mld && ipv6_accept_ra(ifp->idev) && - ifp->idev->cnf.rtr_solicits > 0 && + ifp->idev->cnf.rtr_solicits != 0 && (dev->flags&IFF_LOOPBACK) == 0; read_unlock_bh(&ifp->idev->lock); @@ -3971,10 +3996,11 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) write_lock_bh(&ifp->idev->lock); spin_lock(&ifp->lock); + ifp->idev->rs_interval = rfc3315_s14_backoff_init( + ifp->idev->cnf.rtr_solicit_interval); ifp->idev->rs_probes = 1; ifp->idev->if_flags |= IF_RS_SENT; - addrconf_mod_rs_timer(ifp->idev, - ifp->idev->cnf.rtr_solicit_interval); + addrconf_mod_rs_timer(ifp->idev, ifp->idev->rs_interval); spin_unlock(&ifp->lock); write_unlock_bh(&ifp->idev->lock); } @@ -4891,6 +4917,8 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits; array[DEVCONF_RTR_SOLICIT_INTERVAL] = jiffies_to_msecs(cnf->rtr_solicit_interval); + array[DEVCONF_RTR_SOLICIT_MAX_INTERVAL] = + jiffies_to_msecs(cnf->rtr_solicit_max_interval); array[DEVCONF_RTR_SOLICIT_DELAY] = jiffies_to_msecs(cnf->rtr_solicit_delay); array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version; @@ -5099,7 +5127,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) return -EINVAL; if (!ipv6_accept_ra(idev)) return -EINVAL; - if (idev->cnf.rtr_solicits <= 0) + if (idev->cnf.rtr_solicits == 0) return -EINVAL; write_lock_bh(&idev->lock); @@ -5128,8 +5156,10 @@ update_lft: if (update_rs) { idev->if_flags |= IF_RS_SENT; + idev->rs_interval = rfc3315_s14_backoff_init( + idev->cnf.rtr_solicit_interval); idev->rs_probes = 1; - addrconf_mod_rs_timer(idev, idev->cnf.rtr_solicit_interval); + addrconf_mod_rs_timer(idev, idev->rs_interval); } /* Well, that's kinda nasty ... */ @@ -5777,6 +5807,13 @@ static const struct ctl_table addrconf_sysctl[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, + { + .procname = "router_solicitation_max_interval", + .data = &ipv6_devconf.rtr_solicit_max_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, { .procname = "router_solicitation_delay", .data = &ipv6_devconf.rtr_solicit_delay, From 5038056e6bd45788235e97e3bcfc43f96c52ca84 Mon Sep 17 00:00:00 2001 From: David Decotigny Date: Wed, 28 Sep 2016 11:00:04 -0700 Subject: [PATCH 0983/1050] mlx4: remove unused fields This also can address following UBSAN warnings: [ 36.640343] ================================================================================ [ 36.648772] UBSAN: Undefined behaviour in drivers/net/ethernet/mellanox/mlx4/fw.c:857:26 [ 36.656853] shift exponent 64 is too large for 32-bit type 'int' [ 36.663348] ================================================================================ [ 36.671783] ================================================================================ [ 36.680213] UBSAN: Undefined behaviour in drivers/net/ethernet/mellanox/mlx4/fw.c:861:27 [ 36.688297] shift exponent 35 is too large for 32-bit type 'int' [ 36.694702] ================================================================================ Tested: reboot with UBSAN, no warning. Signed-off-by: David Decotigny Acked-by: Eric Dumazet Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/fw.c | 4 ---- drivers/net/ethernet/mellanox/mlx4/fw.h | 2 -- 2 files changed, 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 090bf81076e8..f9cbc67f1694 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -853,12 +853,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->max_eqs = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MTT_OFFSET); dev_cap->reserved_mtts = 1 << (field >> 4); - MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MRW_SZ_OFFSET); - dev_cap->max_mrw_sz = 1 << field; MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MRW_OFFSET); dev_cap->reserved_mrws = 1 << (field & 0xf); - MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MTT_SEG_OFFSET); - dev_cap->max_mtt_seg = 1 << (field & 0x3f); MLX4_GET(size, outbox, QUERY_DEV_CAP_NUM_SYS_EQ_OFFSET); dev_cap->num_sys_eqs = size & 0xfff; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_REQ_QP_OFFSET); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index f11614f12517..5343a0599253 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -80,9 +80,7 @@ struct mlx4_dev_cap { int max_eqs; int num_sys_eqs; int reserved_mtts; - int max_mrw_sz; int reserved_mrws; - int max_mtt_seg; int max_requester_per_qp; int max_responder_per_qp; int max_rdma_global; From 73dca124cdbad2d67d47d6196c08325f18447d07 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 29 Sep 2016 02:37:26 +0800 Subject: [PATCH 0984/1050] sctp: move sent_count to the memory hole in sctp_chunk Now pahole sctp_chunk, it has 2 memory holes: struct sctp_chunk { struct list_head list; atomic_t refcnt; /* XXX 4 bytes hole, try to pack */ ... long unsigned int prsctp_param; int sent_count; /* XXX 4 bytes hole, try to pack */ This patch is to move up sent_count to fill the 1st one and eliminate the 2nd one. It's not just another struct compaction, it also fixes the "netperf- Throughput_Mbps -37.2% regression" issue when overloading the CPU. Fixes: a6c2f792873a ("sctp: implement prsctp TTL policy") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index ce93c4b10d26..4f097f538fff 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -554,6 +554,9 @@ struct sctp_chunk { atomic_t refcnt; + /* How many times this chunk have been sent, for prsctp RTX policy */ + int sent_count; + /* This is our link to the per-transport transmitted list. */ struct list_head transmitted_list; @@ -610,9 +613,6 @@ struct sctp_chunk { */ unsigned long prsctp_param; - /* How many times this chunk have been sent, for prsctp RTX policy */ - int sent_count; - /* Which association does this belong to? */ struct sctp_association *asoc; From 0605483f6ace1f6b63e397c819a115ddcd13af0d Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 29 Sep 2016 02:37:27 +0800 Subject: [PATCH 0985/1050] sctp: remove prsctp_param from sctp_chunk Now sctp uses chunk->prsctp_param to save the prsctp param for all the prsctp polices, we didn't need to introduce prsctp_param to sctp_chunk. We can just use chunk->sinfo.sinfo_timetolive for RTX and BUF polices, and reuse msg->expires_at for TTL policy, as the prsctp polices and old expires policy are mutual exclusive. This patch is to remove prsctp_param from sctp_chunk, and reuse msg's expires_at for TTL and chunk's sinfo.sinfo_timetolive for RTX and BUF polices. Note that sctp can't use chunk's sinfo.sinfo_timetolive for TTL policy, as it needs a u64 variables to save the expires_at time. This one also fixes the "netperf-Throughput_Mbps -37.2% regression" issue. Fixes: a6c2f792873a ("sctp: implement prsctp TTL policy") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 7 ------- net/sctp/chunk.c | 9 +++++++-- net/sctp/outqueue.c | 4 ++-- net/sctp/sm_make_chunk.c | 15 --------------- 4 files changed, 9 insertions(+), 26 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 4f097f538fff..ced0df374e60 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -606,13 +606,6 @@ struct sctp_chunk { /* This needs to be recoverable for SCTP_SEND_FAILED events. */ struct sctp_sndrcvinfo sinfo; - /* We use this field to record param for prsctp policies, - * for TTL policy, it is the time_to_drop of this chunk, - * for RTX policy, it is the max_sent_count of this chunk, - * for PRIO policy, it is the priority of this chunk. - */ - unsigned long prsctp_param; - /* Which association does this belong to? */ struct sctp_association *asoc; diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index a55e54738b81..14990e21cf55 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -179,6 +179,11 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, msg, msg->expires_at, jiffies); } + if (asoc->prsctp_enable && + SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags)) + msg->expires_at = + jiffies + msecs_to_jiffies(sinfo->sinfo_timetolive); + /* This is the biggest possible DATA chunk that can fit into * the packet */ @@ -349,14 +354,14 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) } if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) && - time_after(jiffies, chunk->prsctp_param)) { + time_after(jiffies, chunk->msg->expires_at)) { if (chunk->sent_count) chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++; else chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; return 1; } else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) && - chunk->sent_count > chunk->prsctp_param) { + chunk->sent_count > chunk->sinfo.sinfo_timetolive) { chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++; return 1; } diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 72e54a416af6..e084f35d40d2 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -383,7 +383,7 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc, list_for_each_entry_safe(chk, temp, queue, transmitted_list) { if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || - chk->prsctp_param <= sinfo->sinfo_timetolive) + chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive) continue; list_del_init(&chk->transmitted_list); @@ -418,7 +418,7 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, list_for_each_entry_safe(chk, temp, queue, list) { if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || - chk->prsctp_param <= sinfo->sinfo_timetolive) + chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive) continue; list_del_init(&chk->list); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 8c77b87a8565..46ffecc57214 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -706,20 +706,6 @@ nodata: return retval; } -static void sctp_set_prsctp_policy(struct sctp_chunk *chunk, - const struct sctp_sndrcvinfo *sinfo) -{ - if (!chunk->asoc->prsctp_enable) - return; - - if (SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags)) - chunk->prsctp_param = - jiffies + msecs_to_jiffies(sinfo->sinfo_timetolive); - else if (SCTP_PR_RTX_ENABLED(sinfo->sinfo_flags) || - SCTP_PR_PRIO_ENABLED(sinfo->sinfo_flags)) - chunk->prsctp_param = sinfo->sinfo_timetolive; -} - /* Make a DATA chunk for the given association from the provided * parameters. However, do not populate the data payload. */ @@ -753,7 +739,6 @@ struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc, retval->subh.data_hdr = sctp_addto_chunk(retval, sizeof(dp), &dp); memcpy(&retval->sinfo, sinfo, sizeof(struct sctp_sndrcvinfo)); - sctp_set_prsctp_policy(retval, sinfo); nodata: return retval; From be4947bf46cb0e7a7d089e03c61bab35f1e695ce Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 29 Sep 2016 02:37:28 +0800 Subject: [PATCH 0986/1050] sctp: change to check peer prsctp_capable when using prsctp polices Now before using prsctp polices, sctp uses asoc->prsctp_enable to check if prsctp is enabled. However asoc->prsctp_enable is set only means local host support prsctp, sctp should not abandon packet if peer host doesn't enable prsctp. So this patch is to use asoc->peer.prsctp_capable to check if prsctp is enabled on both side, instead of asoc->prsctp_enable, as asoc's peer.prsctp_capable is set only when local and peer both enable prsctp. Fixes: a6c2f792873a ("sctp: implement prsctp TTL policy") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/chunk.c | 4 ++-- net/sctp/outqueue.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 14990e21cf55..0a3dbec0a8fb 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -179,7 +179,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, msg, msg->expires_at, jiffies); } - if (asoc->prsctp_enable && + if (asoc->peer.prsctp_capable && SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags)) msg->expires_at = jiffies + msecs_to_jiffies(sinfo->sinfo_timetolive); @@ -340,7 +340,7 @@ errout: /* Check whether this message has expired. */ int sctp_chunk_abandoned(struct sctp_chunk *chunk) { - if (!chunk->asoc->prsctp_enable || + if (!chunk->asoc->peer.prsctp_capable || !SCTP_PR_POLICY(chunk->sinfo.sinfo_flags)) { struct sctp_datamsg *msg = chunk->msg; diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index e084f35d40d2..107233da5cc9 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -326,7 +326,7 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp) sctp_chunk_hold(chunk); sctp_outq_tail_data(q, chunk); - if (chunk->asoc->prsctp_enable && + if (chunk->asoc->peer.prsctp_capable && SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags)) chunk->asoc->sent_cnt_removable++; if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) @@ -442,7 +442,7 @@ void sctp_prsctp_prune(struct sctp_association *asoc, { struct sctp_transport *transport; - if (!asoc->prsctp_enable || !asoc->sent_cnt_removable) + if (!asoc->peer.prsctp_capable || !asoc->sent_cnt_removable) return; msg_len = sctp_prsctp_prune_sent(asoc, sinfo, @@ -1055,7 +1055,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) /* Mark as failed send. */ sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM); - if (asoc->prsctp_enable && + if (asoc->peer.prsctp_capable && SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags)) asoc->sent_cnt_removable--; sctp_chunk_free(chunk); @@ -1347,7 +1347,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk) tsn = ntohl(tchunk->subh.data_hdr->tsn); if (TSN_lte(tsn, ctsn)) { list_del_init(&tchunk->transmitted_list); - if (asoc->prsctp_enable && + if (asoc->peer.prsctp_capable && SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags)) asoc->sent_cnt_removable--; sctp_chunk_free(tchunk); From 1cceda7849809a8857fd9f26efe8846506c710e1 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 29 Sep 2016 02:55:44 +0800 Subject: [PATCH 0987/1050] sctp: fix the issue sctp_diag uses lock_sock in rcu_read_lock When sctp dumps all the ep->assocs, it needs to lock_sock first, but now it locks sock in rcu_read_lock, and lock_sock may sleep, which would break rcu_read_lock. This patch is to get and hold one sock when traversing the list. After that and get out of rcu_read_lock, lock and dump it. Then it will traverse the list again to get the next one until all sctp socks are dumped. For sctp_diag_dump_one, it fixes this issue by holding asoc and moving cb() out of rcu_read_lock in sctp_transport_lookup_process. Fixes: 8f840e47f190 ("sctp: add the sctp_diag.c file") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/sctp_diag.c | 58 ++++++++++++++++++++++++++++++-------------- net/sctp/socket.c | 10 +++++--- 2 files changed, 47 insertions(+), 21 deletions(-) diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c index f3508aa75815..cef0cee182d4 100644 --- a/net/sctp/sctp_diag.c +++ b/net/sctp/sctp_diag.c @@ -272,28 +272,17 @@ out: return err; } -static int sctp_tsp_dump(struct sctp_transport *tsp, void *p) +static int sctp_sock_dump(struct sock *sk, void *p) { - struct sctp_endpoint *ep = tsp->asoc->ep; + struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_comm_param *commp = p; - struct sock *sk = ep->base.sk; struct sk_buff *skb = commp->skb; struct netlink_callback *cb = commp->cb; const struct inet_diag_req_v2 *r = commp->r; - struct sctp_association *assoc = - list_entry(ep->asocs.next, struct sctp_association, asocs); + struct sctp_association *assoc; int err = 0; - /* find the ep only once through the transports by this condition */ - if (tsp->asoc != assoc) - goto out; - - if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family) - goto out; - lock_sock(sk); - if (sk != assoc->base.sk) - goto release; list_for_each_entry(assoc, &ep->asocs, asocs) { if (cb->args[4] < cb->args[1]) goto next; @@ -312,7 +301,7 @@ static int sctp_tsp_dump(struct sctp_transport *tsp, void *p) cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh) < 0) { cb->args[3] = 1; - err = 2; + err = 1; goto release; } cb->args[3] = 1; @@ -321,7 +310,7 @@ static int sctp_tsp_dump(struct sctp_transport *tsp, void *p) sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 0, cb->nlh) < 0) { - err = 2; + err = 1; goto release; } next: @@ -333,10 +322,35 @@ next: cb->args[4] = 0; release: release_sock(sk); + sock_put(sk); return err; +} + +static int sctp_get_sock(struct sctp_transport *tsp, void *p) +{ + struct sctp_endpoint *ep = tsp->asoc->ep; + struct sctp_comm_param *commp = p; + struct sock *sk = ep->base.sk; + struct netlink_callback *cb = commp->cb; + const struct inet_diag_req_v2 *r = commp->r; + struct sctp_association *assoc = + list_entry(ep->asocs.next, struct sctp_association, asocs); + + /* find the ep only once through the transports by this condition */ + if (tsp->asoc != assoc) + goto out; + + if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family) + goto out; + + sock_hold(sk); + cb->args[5] = (long)sk; + + return 1; + out: cb->args[2]++; - return err; + return 0; } static int sctp_ep_dump(struct sctp_endpoint *ep, void *p) @@ -472,10 +486,18 @@ skip: * 2 : to record the transport pos of this time's traversal * 3 : to mark if we have dumped the ep info of the current asoc * 4 : to work as a temporary variable to traversal list + * 5 : to save the sk we get from travelsing the tsp list. */ if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE))) goto done; - sctp_for_each_transport(sctp_tsp_dump, net, cb->args[2], &commp); + +next: + cb->args[5] = 0; + sctp_for_each_transport(sctp_get_sock, net, cb->args[2], &commp); + + if (cb->args[5] && !sctp_sock_dump((struct sock *)cb->args[5], &commp)) + goto next; + done: cb->args[1] = cb->args[4]; cb->args[4] = 0; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 9fc417a8b476..8ed2d99bde6d 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4469,17 +4469,21 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), const union sctp_addr *paddr, void *p) { struct sctp_transport *transport; - int err = 0; + int err = -ENOENT; rcu_read_lock(); transport = sctp_addrs_lookup_transport(net, laddr, paddr); if (!transport || !sctp_transport_hold(transport)) goto out; - err = cb(transport, p); + + sctp_association_hold(transport->asoc); sctp_transport_put(transport); -out: rcu_read_unlock(); + err = cb(transport, p); + sctp_association_put(transport->asoc); + +out: return err; } EXPORT_SYMBOL_GPL(sctp_transport_lookup_process); From 6cd80b55471515e0102760b59034ded000c35a09 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 28 Sep 2016 23:47:37 +0100 Subject: [PATCH 0988/1050] nfp: bpf: zero extend 4 byte context loads Set upper 32 bits of destination register to zeros after load from the context structure. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c index 3de819acd68c..f8df5300f49c 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c @@ -1134,6 +1134,8 @@ static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) else return -ENOTSUPP; + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); + return 0; } From 803783849fed11e38a30f31932c02c815520da70 Mon Sep 17 00:00:00 2001 From: Calvin Owens Date: Wed, 28 Sep 2016 21:46:39 -0700 Subject: [PATCH 0989/1050] mlx5: Add ndo_poll_controller() implementation This implements ndo_poll_controller in net_device_ops callbacks for mlx5, which is necessary to use netconsole with this driver. Acked-By: Saeed Mahameed Signed-off-by: Calvin Owens Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index b58cfe37dead..7eaf38020a8f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3188,6 +3188,20 @@ static int mlx5e_xdp(struct net_device *dev, struct netdev_xdp *xdp) } } +#ifdef CONFIG_NET_POLL_CONTROLLER +/* Fake "interrupt" called by netpoll (eg netconsole) to send skbs without + * reenabling interrupts. + */ +static void mlx5e_netpoll(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int i; + + for (i = 0; i < priv->params.num_channels; i++) + napi_schedule(&priv->channel[i]->napi); +} +#endif + static const struct net_device_ops mlx5e_netdev_ops_basic = { .ndo_open = mlx5e_open, .ndo_stop = mlx5e_close, @@ -3208,6 +3222,9 @@ static const struct net_device_ops mlx5e_netdev_ops_basic = { #endif .ndo_tx_timeout = mlx5e_tx_timeout, .ndo_xdp = mlx5e_xdp, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = mlx5e_netpoll, +#endif }; static const struct net_device_ops mlx5e_netdev_ops_sriov = { @@ -3240,6 +3257,9 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = { .ndo_get_vf_stats = mlx5e_get_vf_stats, .ndo_tx_timeout = mlx5e_tx_timeout, .ndo_xdp = mlx5e_xdp, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = mlx5e_netpoll, +#endif }; static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) From 265a44bbf23e4ed1c76409f07595ea88351ba4b3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 30 Sep 2016 08:50:42 +0100 Subject: [PATCH 0990/1050] rxrpc: Actually display the tx_data trace retransmission note Actually display in the tx_data trace the retransmission note added in a previous patch. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 8ba8d76e856a..67f03946ea4a 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -280,11 +280,12 @@ TRACE_EVENT(rxrpc_tx_data, __entry->lose = lose; ), - TP_printk("c=%p DATA %08x q=%08x fl=%02x%s", + TP_printk("c=%p DATA %08x q=%08x fl=%02x%s%s", __entry->call, __entry->serial, __entry->seq, __entry->flags, + __entry->retrans ? " *RETRANS*" : "", __entry->lose ? " *LOSE*" : "") ); From 192d1dccbfc5b901b66527df9df80304693cf06e Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 29 Sep 2016 12:48:11 -0700 Subject: [PATCH 0991/1050] x86/boot: Fix another __read_cr4() case on 486 The condition for reading CR4 was wrong: there are some CPUs with CPUID but not CR4. Rather than trying to make the condition exact, use __read_cr4_safe(). Fixes: 18bc7bd523e0 ("x86/boot: Synchronize trampoline_cr4_features and mmu_cr4_features directly") Reported-by: david@saggiorato.net Signed-off-by: Andy Lutomirski Reviewed-by: Borislav Petkov Cc: Brian Gerst Link: http://lkml.kernel.org/r/8c453a61c4f44ab6ff43c29780ba04835234d2e5.1475178369.git.luto@kernel.org Signed-off-by: Thomas Gleixner --- arch/x86/kernel/setup.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 0fa60f5f5a16..98c9cd6f3b5d 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1137,9 +1137,7 @@ void __init setup_arch(char **cmdline_p) * auditing all the early-boot CR4 manipulation would be needed to * rule it out. */ - if (boot_cpu_data.cpuid_level >= 0) - /* A CPU has %cr4 if and only if it has CPUID. */ - mmu_cr4_features = __read_cr4(); + mmu_cr4_features = __read_cr4_safe(); memblock_set_current_limit(get_max_mapped()); From e4aad64597d7a2455a541f904365b48d607916db Mon Sep 17 00:00:00 2001 From: Segher Boessenkool Date: Thu, 29 Sep 2016 11:51:00 +0000 Subject: [PATCH 0992/1050] x86/vdso: Fix building on big endian host We need to call GET_LE to read hdr->e_type. Fixes: 57f90c3dfc75 ("x86/vdso: Error out if the vDSO isn't a valid DSO") Reported-by: Paul Gortmaker Signed-off-by: Segher Boessenkool Acked-by: Andy Lutomirski Cc: Stephen Rothwell Cc: linux-next@vger.kernel.org Link: http://lkml.kernel.org/r/20160929193442.GA16617@gate.crashing.org Signed-off-by: Thomas Gleixner --- arch/x86/entry/vdso/vdso2c.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h index 4f741192846d..3dab75f2a673 100644 --- a/arch/x86/entry/vdso/vdso2c.h +++ b/arch/x86/entry/vdso/vdso2c.h @@ -22,7 +22,7 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len, ELF(Phdr) *pt = (ELF(Phdr) *)(raw_addr + GET_LE(&hdr->e_phoff)); - if (hdr->e_type != ET_DYN) + if (GET_LE(&hdr->e_type) != ET_DYN) fail("input is not a shared object\n"); /* Walk the segment table. */ From 05fb3c199bb09f5b85de56cc3ede194ac95c5e1f Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 28 Sep 2016 16:06:33 -0700 Subject: [PATCH 0993/1050] x86/boot: Initialize FPU and X86_FEATURE_ALWAYS even if we don't have CPUID Otherwise arch_task_struct_size == 0 and we die. While we're at it, set X86_FEATURE_ALWAYS, too. Reported-by: David Saggiorato Tested-by: David Saggiorato Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: aaeb5c01c5b ("x86/fpu, sched: Introduce CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT and use it on x86") Link: http://lkml.kernel.org/r/8de723afbf0811071185039f9088733188b606c9.1475103911.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 809eda03c527..bcc9ccc220c9 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -804,21 +804,20 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) identify_cpu_without_cpuid(c); /* cyrix could have cpuid enabled via c_identify()*/ - if (!have_cpuid_p()) - return; + if (have_cpuid_p()) { + cpu_detect(c); + get_cpu_vendor(c); + get_cpu_cap(c); - cpu_detect(c); - get_cpu_vendor(c); - get_cpu_cap(c); + if (this_cpu->c_early_init) + this_cpu->c_early_init(c); - if (this_cpu->c_early_init) - this_cpu->c_early_init(c); + c->cpu_index = 0; + filter_cpuid_features(c, false); - c->cpu_index = 0; - filter_cpuid_features(c, false); - - if (this_cpu->c_bsp_init) - this_cpu->c_bsp_init(c); + if (this_cpu->c_bsp_init) + this_cpu->c_bsp_init(c); + } setup_force_cpu_cap(X86_FEATURE_ALWAYS); fpu__init_system(c); From 2fa5f04f85730d0c4f49f984b7efeb4f8d5bd1fc Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Fri, 30 Sep 2016 09:01:06 +0800 Subject: [PATCH 0994/1050] x86/entry/64: Fix context tracking state warning when load_gs_index fails This warning: WARNING: CPU: 0 PID: 3331 at arch/x86/entry/common.c:45 enter_from_user_mode+0x32/0x50 CPU: 0 PID: 3331 Comm: ldt_gdt_64 Not tainted 4.8.0-rc7+ #13 Call Trace: dump_stack+0x99/0xd0 __warn+0xd1/0xf0 warn_slowpath_null+0x1d/0x20 enter_from_user_mode+0x32/0x50 error_entry+0x6d/0xc0 ? general_protection+0x12/0x30 ? native_load_gs_index+0xd/0x20 ? do_set_thread_area+0x19c/0x1f0 SyS_set_thread_area+0x24/0x30 do_int80_syscall_32+0x7c/0x220 entry_INT80_compat+0x38/0x50 ... can be reproduced by running the GS testcase of the ldt_gdt test unit in the x86 selftests. do_int80_syscall_32() will call enter_form_user_mode() to convert context tracking state from user state to kernel state. The load_gs_index() call can fail with user gsbase, gsbase will be fixed up and proceed if this happen. However, enter_from_user_mode() will be called again in the fixed up path though it is context tracking kernel state currently. This patch fixes it by just fixing up gsbase and telling lockdep that IRQs are off once load_gs_index() failed with user gsbase. Signed-off-by: Wanpeng Li Acked-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1475197266-3440-1-git-send-email-wanpeng.li@hotmail.com Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index d172c619c449..02fff3ebfb87 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1002,7 +1002,6 @@ ENTRY(error_entry) testb $3, CS+8(%rsp) jz .Lerror_kernelspace -.Lerror_entry_from_usermode_swapgs: /* * We entered from user mode or we're pretending to have entered * from user mode due to an IRET fault. @@ -1045,7 +1044,8 @@ ENTRY(error_entry) * gsbase and proceed. We'll fix up the exception and land in * .Lgs_change's error handler with kernel gsbase. */ - jmp .Lerror_entry_from_usermode_swapgs + SWAPGS + jmp .Lerror_entry_done .Lbstep_iret: /* Fix truncated RIP */ From 8782def204e57f6a507ff425e4944df4e010751a Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 30 Sep 2016 09:26:12 +0100 Subject: [PATCH 0995/1050] rxrpc: Switch to Congestion Avoidance mode at cwnd==ssthresh Switch to Congestion Avoidance mode at cwnd == ssthresh rather than relying on cwnd getting incremented beyond ssthresh and the window size, the mode being shifted and then cwnd being corrected. We need to make sure we switch into CA mode so that we stop marking every packet for ACK. Signed-off-by: David Howells --- net/rxrpc/input.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 1461d30583c9..21746f0f7ae0 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -57,7 +57,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, call->cong_ssthresh = max_t(unsigned int, summary->flight_size / 2, 2); cwnd = 1; - if (cwnd > call->cong_ssthresh && + if (cwnd >= call->cong_ssthresh && call->cong_mode == RXRPC_CALL_SLOW_START) { call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; call->cong_tstamp = skb->tstamp; @@ -82,7 +82,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, goto packet_loss_detected; if (summary->cumulative_acks > 0) cwnd += 1; - if (cwnd > call->cong_ssthresh) { + if (cwnd >= call->cong_ssthresh) { call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; call->cong_tstamp = skb->tstamp; } @@ -161,7 +161,7 @@ resume_normality: call->cong_dup_acks = 0; call->cong_extra = 0; call->cong_tstamp = skb->tstamp; - if (cwnd <= call->cong_ssthresh) + if (cwnd < call->cong_ssthresh) call->cong_mode = RXRPC_CALL_SLOW_START; else call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; From 0851115090a3eb9585d6a804a61e47f3d89ac2a8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 30 Sep 2016 09:33:27 +0100 Subject: [PATCH 0996/1050] rxrpc: Reduce ssthresh to peer's receive window When we receive an ACK from the peer that tells us what the peer's receive window (rwind) is, we should reduce ssthresh to rwind if rwind is smaller than ssthresh. Signed-off-by: David Howells --- net/rxrpc/input.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 21746f0f7ae0..7993473e56bb 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -658,6 +658,8 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, if (rwind > RXRPC_RXTX_BUFF_SIZE - 1) rwind = RXRPC_RXTX_BUFF_SIZE - 1; call->tx_winsize = rwind; + if (call->cong_ssthresh > rwind) + call->cong_ssthresh = rwind; mtu = min(ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU)); From 775e5b71db6aca47d49d43d08751f2e8ebad7f60 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 30 Sep 2016 13:26:03 +0100 Subject: [PATCH 0997/1050] rxrpc: The offset field in struct rxrpc_skb_priv is unnecessary The offset field in struct rxrpc_skb_priv is unnecessary as the value can always be calculated. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 1 - net/rxrpc/conn_event.c | 3 ++- net/rxrpc/input.c | 23 ++++++++++++----------- net/rxrpc/local_event.c | 3 ++- net/rxrpc/recvmsg.c | 6 ++---- net/rxrpc/rxkad.c | 9 ++++++--- 6 files changed, 24 insertions(+), 21 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 539db54697f9..fd64a2bd1072 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -144,7 +144,6 @@ struct rxrpc_skb_priv { u8 nr_jumbo; /* Number of jumbo subpackets */ }; union { - unsigned int offset; /* offset into buffer of next read */ int remain; /* amount of space remaining for next write */ u32 error; /* network error code */ }; diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 37609ce89f52..3f9d8d7ec632 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -276,7 +276,8 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, return 0; case RXRPC_PACKET_TYPE_ABORT: - if (skb_copy_bits(skb, sp->offset, &wtmp, sizeof(wtmp)) < 0) + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + &wtmp, sizeof(wtmp)) < 0) return -EPROTO; abort_code = ntohl(wtmp); _proto("Rx ABORT %%%u { ac=%d }", sp->hdr.serial, abort_code); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 7993473e56bb..5ba35b4a907b 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -358,7 +358,7 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call) static bool rxrpc_validate_jumbo(struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - unsigned int offset = sp->offset; + unsigned int offset = sizeof(struct rxrpc_wire_header); unsigned int len = skb->len; int nr_jumbo = 1; u8 flags = sp->hdr.flags; @@ -419,7 +419,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb, u16 skew) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - unsigned int offset = sp->offset; + unsigned int offset = sizeof(struct rxrpc_wire_header); unsigned int ix; rxrpc_serial_t serial = sp->hdr.serial, ack_serial = 0; rxrpc_seq_t seq = sp->hdr.seq, hard_ack; @@ -746,15 +746,16 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, } buf; rxrpc_serial_t acked_serial; rxrpc_seq_t first_soft_ack, hard_ack; - int nr_acks, offset; + int nr_acks, offset, ioffset; _enter(""); - if (skb_copy_bits(skb, sp->offset, &buf.ack, sizeof(buf.ack)) < 0) { + offset = sizeof(struct rxrpc_wire_header); + if (skb_copy_bits(skb, offset, &buf.ack, sizeof(buf.ack)) < 0) { _debug("extraction failure"); return rxrpc_proto_abort("XAK", call, 0); } - sp->offset += sizeof(buf.ack); + offset += sizeof(buf.ack); acked_serial = ntohl(buf.ack.serial); first_soft_ack = ntohl(buf.ack.firstPacket); @@ -792,9 +793,9 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, rxrpc_propose_ack_respond_to_ack); } - offset = sp->offset + nr_acks + 3; - if (skb->len >= offset + sizeof(buf.info)) { - if (skb_copy_bits(skb, offset, &buf.info, sizeof(buf.info)) < 0) + ioffset = offset + nr_acks + 3; + if (skb->len >= ioffset + sizeof(buf.info)) { + if (skb_copy_bits(skb, ioffset, &buf.info, sizeof(buf.info)) < 0) return rxrpc_proto_abort("XAI", call, 0); rxrpc_input_ackinfo(call, skb, &buf.info); } @@ -832,7 +833,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, rxrpc_rotate_tx_window(call, hard_ack, &summary); if (nr_acks > 0) { - if (skb_copy_bits(skb, sp->offset, buf.acks, nr_acks) < 0) + if (skb_copy_bits(skb, offset, buf.acks, nr_acks) < 0) return rxrpc_proto_abort("XSA", call, 0); rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks, &summary); @@ -880,7 +881,8 @@ static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb) _enter(""); if (skb->len >= 4 && - skb_copy_bits(skb, sp->offset, &wtmp, sizeof(wtmp)) >= 0) + skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + &wtmp, sizeof(wtmp)) >= 0) abort_code = ntohl(wtmp); _proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code); @@ -996,7 +998,6 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) sp->hdr.securityIndex = whdr.securityIndex; sp->hdr._rsvd = ntohs(whdr._rsvd); sp->hdr.serviceId = ntohs(whdr.serviceId); - sp->offset = sizeof(whdr); return 0; } diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c index 190f68bd9e27..540d3955c1bc 100644 --- a/net/rxrpc/local_event.c +++ b/net/rxrpc/local_event.c @@ -95,7 +95,8 @@ void rxrpc_process_local_events(struct rxrpc_local *local) switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_VERSION: - if (skb_copy_bits(skb, sp->offset, &v, 1) < 0) + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + &v, 1) < 0) return; _proto("Rx VERSION { %02x }", v); if (v == 0) diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 038ae62ddb4d..f05ea0a88076 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -261,15 +261,13 @@ static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb, u8 *_annotation, unsigned int *_offset, unsigned int *_len) { - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - unsigned int offset = *_offset; + unsigned int offset = sizeof(struct rxrpc_wire_header); unsigned int len = *_len; int ret; u8 annotation = *_annotation; /* Locate the subpacket */ - offset = sp->offset; - len = skb->len - sp->offset; + len = skb->len - offset; if ((annotation & RXRPC_RX_ANNO_JUMBO) > 0) { offset += (((annotation & RXRPC_RX_ANNO_JUMBO) - 1) * RXRPC_JUMBO_SUBPKTLEN); diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 88d080a1a3de..627abed5f999 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -771,7 +771,8 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, } abort_code = RXKADPACKETSHORT; - if (skb_copy_bits(skb, sp->offset, &challenge, sizeof(challenge)) < 0) + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + &challenge, sizeof(challenge)) < 0) goto protocol_error; version = ntohl(challenge.version); @@ -1028,7 +1029,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, _enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key)); abort_code = RXKADPACKETSHORT; - if (skb_copy_bits(skb, sp->offset, &response, sizeof(response)) < 0) + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + &response, sizeof(response)) < 0) goto protocol_error; if (!pskb_pull(skb, sizeof(response))) BUG(); @@ -1057,7 +1059,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, return -ENOMEM; abort_code = RXKADPACKETSHORT; - if (skb_copy_bits(skb, sp->offset, ticket, ticket_len) < 0) + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + ticket, ticket_len) < 0) goto protocol_error_free; ret = rxkad_decrypt_ticket(conn, ticket, ticket_len, &session_key, From c31410ea009d10501ea90f64cdda0083c8cf0161 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 30 Sep 2016 13:42:31 +0100 Subject: [PATCH 0998/1050] rxrpc: Remove error from struct rxrpc_skb_priv as it is unused Remove error from struct rxrpc_skb_priv as it is no longer used. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 1 - 1 file changed, 1 deletion(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index fd64a2bd1072..141c1458e719 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -145,7 +145,6 @@ struct rxrpc_skb_priv { }; union { int remain; /* amount of space remaining for next write */ - u32 error; /* network error code */ }; struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */ From df0adc788ae74e35ab1a79f3db878df7fdc7db55 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 26 Sep 2016 22:12:49 +0100 Subject: [PATCH 0999/1050] rxrpc: Keep the call timeouts as ktimes rather than jiffies Keep that call timeouts as ktimes rather than jiffies so that they can be expressed as functions of RTT. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 25 ++++++------ net/rxrpc/ar-internal.h | 8 ++-- net/rxrpc/call_event.c | 73 +++++++++++++++++++----------------- net/rxrpc/call_object.c | 16 +++----- net/rxrpc/input.c | 3 +- net/rxrpc/misc.c | 15 +++++--- net/rxrpc/sendmsg.c | 8 ++-- net/rxrpc/sysctl.c | 8 ++-- 8 files changed, 82 insertions(+), 74 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 67f03946ea4a..0383e5e9a0f3 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -453,17 +453,18 @@ TRACE_EVENT(rxrpc_rtt_rx, TRACE_EVENT(rxrpc_timer, TP_PROTO(struct rxrpc_call *call, enum rxrpc_timer_trace why, - unsigned long now), + ktime_t now, unsigned long now_j), - TP_ARGS(call, why, now), + TP_ARGS(call, why, now, now_j), TP_STRUCT__entry( __field(struct rxrpc_call *, call ) __field(enum rxrpc_timer_trace, why ) - __field(unsigned long, now ) - __field(unsigned long, expire_at ) - __field(unsigned long, ack_at ) - __field(unsigned long, resend_at ) + __field_struct(ktime_t, now ) + __field_struct(ktime_t, expire_at ) + __field_struct(ktime_t, ack_at ) + __field_struct(ktime_t, resend_at ) + __field(unsigned long, now_j ) __field(unsigned long, timer ) ), @@ -474,17 +475,17 @@ TRACE_EVENT(rxrpc_timer, __entry->expire_at = call->expire_at; __entry->ack_at = call->ack_at; __entry->resend_at = call->resend_at; + __entry->now_j = now_j; __entry->timer = call->timer.expires; ), - TP_printk("c=%p %s now=%lx x=%ld a=%ld r=%ld t=%ld", + TP_printk("c=%p %s x=%lld a=%lld r=%lld t=%ld", __entry->call, rxrpc_timer_traces[__entry->why], - __entry->now, - __entry->expire_at - __entry->now, - __entry->ack_at - __entry->now, - __entry->resend_at - __entry->now, - __entry->timer - __entry->now) + ktime_to_ns(ktime_sub(__entry->expire_at, __entry->now)), + ktime_to_ns(ktime_sub(__entry->ack_at, __entry->now)), + ktime_to_ns(ktime_sub(__entry->resend_at, __entry->now)), + __entry->timer - __entry->now_j) ); TRACE_EVENT(rxrpc_rx_lose, diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 141c1458e719..d38dffd78085 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -464,9 +464,9 @@ struct rxrpc_call { struct rxrpc_connection *conn; /* connection carrying call */ struct rxrpc_peer *peer; /* Peer record for remote address */ struct rxrpc_sock __rcu *socket; /* socket responsible */ - unsigned long ack_at; /* When deferred ACK needs to happen */ - unsigned long resend_at; /* When next resend needs to happen */ - unsigned long expire_at; /* When the call times out */ + ktime_t ack_at; /* When deferred ACK needs to happen */ + ktime_t resend_at; /* When next resend needs to happen */ + ktime_t expire_at; /* When the call times out */ struct timer_list timer; /* Combined event timer */ struct work_struct processor; /* Event processor */ rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */ @@ -805,7 +805,7 @@ int rxrpc_reject_call(struct rxrpc_sock *); /* * call_event.c */ -void rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace); +void rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace, ktime_t); void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool, enum rxrpc_propose_ack_trace); void rxrpc_process_call(struct work_struct *); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 1f6c7633b964..9ff3bb3ffb41 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -24,28 +24,40 @@ /* * Set the timer */ -void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why) +void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why, + ktime_t now) { - unsigned long t, now = jiffies; + unsigned long t_j, now_j = jiffies; + ktime_t t; read_lock_bh(&call->state_lock); if (call->state < RXRPC_CALL_COMPLETE) { t = call->expire_at; - if (time_before_eq(t, now)) + if (!ktime_after(t, now)) goto out; - if (time_after(call->resend_at, now) && - time_before(call->resend_at, t)) + if (ktime_after(call->resend_at, now) && + ktime_before(call->resend_at, t)) t = call->resend_at; - if (time_after(call->ack_at, now) && - time_before(call->ack_at, t)) + if (ktime_after(call->ack_at, now) && + ktime_before(call->ack_at, t)) t = call->ack_at; - if (call->timer.expires != t || !timer_pending(&call->timer)) { - mod_timer(&call->timer, t); - trace_rxrpc_timer(call, why, now); + t_j = nsecs_to_jiffies(ktime_to_ns(ktime_sub(t, now))); + t_j += jiffies; + + /* We have to make sure that the calculated jiffies value falls + * at or after the nsec value, or we may loop ceaselessly + * because the timer times out, but we haven't reached the nsec + * timeout yet. + */ + t_j++; + + if (call->timer.expires != t_j || !timer_pending(&call->timer)) { + mod_timer(&call->timer, t_j); + trace_rxrpc_timer(call, why, now, now_j); } } @@ -62,7 +74,8 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, enum rxrpc_propose_ack_trace why) { enum rxrpc_propose_ack_outcome outcome = rxrpc_propose_ack_use; - unsigned long now, ack_at, expiry = rxrpc_soft_ack_delay; + unsigned int expiry = rxrpc_soft_ack_delay; + ktime_t now, ack_at; s8 prior = rxrpc_ack_priority[ack_reason]; /* Update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial @@ -111,7 +124,6 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, break; } - now = jiffies; if (test_bit(RXRPC_CALL_EV_ACK, &call->events)) { _debug("already scheduled"); } else if (immediate || expiry == 0) { @@ -120,11 +132,11 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, background) rxrpc_queue_call(call); } else { - ack_at = now + expiry; - _debug("deferred ACK %ld < %ld", expiry, call->ack_at - now); - if (time_before(ack_at, call->ack_at)) { + now = ktime_get_real(); + ack_at = ktime_add_ms(now, expiry); + if (ktime_before(ack_at, call->ack_at)) { call->ack_at = ack_at; - rxrpc_set_timer(call, rxrpc_timer_set_for_ack); + rxrpc_set_timer(call, rxrpc_timer_set_for_ack, now); } } @@ -157,12 +169,12 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call) /* * Perform retransmission of NAK'd and unack'd packets. */ -static void rxrpc_resend(struct rxrpc_call *call) +static void rxrpc_resend(struct rxrpc_call *call, ktime_t now) { struct rxrpc_skb_priv *sp; struct sk_buff *skb; rxrpc_seq_t cursor, seq, top; - ktime_t now = ktime_get_real(), max_age, oldest, resend_at, ack_ts; + ktime_t max_age, oldest, ack_ts; int ix; u8 annotation, anno_type, retrans = 0, unacked = 0; @@ -212,14 +224,7 @@ static void rxrpc_resend(struct rxrpc_call *call) ktime_to_ns(ktime_sub(skb->tstamp, max_age))); } - resend_at = ktime_add_ms(oldest, rxrpc_resend_timeout); - call->resend_at = jiffies + - nsecs_to_jiffies(ktime_to_ns(ktime_sub(resend_at, now))) + - 1; /* We have to make sure that the calculated jiffies value - * falls at or after the nsec value, or we shall loop - * ceaselessly because the timer times out, but we haven't - * reached the nsec timeout yet. - */ + call->resend_at = ktime_add_ms(oldest, rxrpc_resend_timeout); if (unacked) rxrpc_congestion_timeout(call); @@ -229,7 +234,7 @@ static void rxrpc_resend(struct rxrpc_call *call) * retransmitting data. */ if (!retrans) { - rxrpc_set_timer(call, rxrpc_timer_set_for_resend); + rxrpc_set_timer(call, rxrpc_timer_set_for_resend, now); spin_unlock_bh(&call->lock); ack_ts = ktime_sub(now, call->acks_latest_ts); if (ktime_to_ns(ack_ts) < call->peer->rtt) @@ -301,7 +306,7 @@ void rxrpc_process_call(struct work_struct *work) { struct rxrpc_call *call = container_of(work, struct rxrpc_call, processor); - unsigned long now; + ktime_t now; rxrpc_see_call(call); @@ -320,15 +325,15 @@ recheck_state: goto out_put; } - now = jiffies; - if (time_after_eq(now, call->expire_at)) { + now = ktime_get_real(); + if (ktime_before(call->expire_at, now)) { rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, ETIME); set_bit(RXRPC_CALL_EV_ABORT, &call->events); goto recheck_state; } if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events) || - time_after_eq(now, call->ack_at)) { + ktime_before(call->ack_at, now)) { call->ack_at = call->expire_at; if (call->ackr_reason) { rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); @@ -337,12 +342,12 @@ recheck_state: } if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events) || - time_after_eq(now, call->resend_at)) { - rxrpc_resend(call); + ktime_before(call->resend_at, now)) { + rxrpc_resend(call, now); goto recheck_state; } - rxrpc_set_timer(call, rxrpc_timer_set_for_resend); + rxrpc_set_timer(call, rxrpc_timer_set_for_resend, now); /* other events may have been raised since we started checking */ if (call->events && call->state < RXRPC_CALL_COMPLETE) { diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index d4b3293b78fa..456ab752d473 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -19,11 +19,6 @@ #include #include "ar-internal.h" -/* - * Maximum lifetime of a call (in jiffies). - */ -unsigned int rxrpc_max_call_lifetime = 60 * HZ; - const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = { [RXRPC_CALL_UNINITIALISED] = "Uninit ", [RXRPC_CALL_CLIENT_AWAIT_CONN] = "ClWtConn", @@ -77,7 +72,8 @@ static void rxrpc_call_timer_expired(unsigned long _call) _enter("%d", call->debug_id); if (call->state < RXRPC_CALL_COMPLETE) { - trace_rxrpc_timer(call, rxrpc_timer_expired, jiffies); + trace_rxrpc_timer(call, rxrpc_timer_expired, + ktime_get_real(), jiffies); rxrpc_queue_call(call); } } @@ -207,14 +203,14 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx, */ static void rxrpc_start_call_timer(struct rxrpc_call *call) { - unsigned long expire_at; + ktime_t now = ktime_get_real(), expire_at; - expire_at = jiffies + rxrpc_max_call_lifetime; + expire_at = ktime_add_ms(now, rxrpc_max_call_lifetime); call->expire_at = expire_at; call->ack_at = expire_at; call->resend_at = expire_at; - call->timer.expires = expire_at + 1; - rxrpc_set_timer(call, rxrpc_timer_begin); + call->timer.expires = jiffies + LONG_MAX / 2; + rxrpc_set_timer(call, rxrpc_timer_begin, now); } /* diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 5ba35b4a907b..3ad9f75031e3 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -328,7 +328,8 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call) call->resend_at = call->expire_at; call->ack_at = call->expire_at; spin_unlock_bh(&call->lock); - rxrpc_set_timer(call, rxrpc_timer_init_for_reply); + rxrpc_set_timer(call, rxrpc_timer_init_for_reply, + ktime_get_real()); } if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 47dddacdbb91..9d1c721bc4e8 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -20,29 +20,34 @@ */ unsigned int rxrpc_max_backlog __read_mostly = 10; +/* + * Maximum lifetime of a call (in mx). + */ +unsigned int rxrpc_max_call_lifetime = 60 * 1000; + /* * How long to wait before scheduling ACK generation after seeing a - * packet with RXRPC_REQUEST_ACK set (in jiffies). + * packet with RXRPC_REQUEST_ACK set (in ms). */ unsigned int rxrpc_requested_ack_delay = 1; /* - * How long to wait before scheduling an ACK with subtype DELAY (in jiffies). + * How long to wait before scheduling an ACK with subtype DELAY (in ms). * * We use this when we've received new data packets. If those packets aren't * all consumed within this time we will send a DELAY ACK if an ACK was not * requested to let the sender know it doesn't need to resend. */ -unsigned int rxrpc_soft_ack_delay = 1 * HZ; +unsigned int rxrpc_soft_ack_delay = 1 * 1000; /* - * How long to wait before scheduling an ACK with subtype IDLE (in jiffies). + * How long to wait before scheduling an ACK with subtype IDLE (in ms). * * We use this when we've consumed some previously soft-ACK'd packets when * further packets aren't immediately received to decide when to send an IDLE * ACK let the other end know that it can free up its Tx buffer space. */ -unsigned int rxrpc_idle_ack_delay = 0.5 * HZ; +unsigned int rxrpc_idle_ack_delay = 0.5 * 1000; /* * Receive window size in packets. This indicates the maximum number of diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index d8dfdce874d8..3322543d460a 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -149,13 +149,13 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, _debug("need instant resend %d", ret); rxrpc_instant_resend(call, ix); } else { - unsigned long resend_at; + ktime_t now = ktime_get_real(), resend_at; - resend_at = jiffies + msecs_to_jiffies(rxrpc_resend_timeout); + resend_at = ktime_add_ms(now, rxrpc_resend_timeout); - if (time_before(resend_at, call->resend_at)) { + if (ktime_before(resend_at, call->resend_at)) { call->resend_at = resend_at; - rxrpc_set_timer(call, rxrpc_timer_set_for_send); + rxrpc_set_timer(call, rxrpc_timer_set_for_send, now); } } diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index 13d1df03ebac..34c706d2f79c 100644 --- a/net/rxrpc/sysctl.c +++ b/net/rxrpc/sysctl.c @@ -35,7 +35,7 @@ static struct ctl_table rxrpc_sysctl_table[] = { .data = &rxrpc_requested_ack_delay, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, + .proc_handler = proc_dointvec, .extra1 = (void *)&zero, }, { @@ -43,7 +43,7 @@ static struct ctl_table rxrpc_sysctl_table[] = { .data = &rxrpc_soft_ack_delay, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, + .proc_handler = proc_dointvec, .extra1 = (void *)&one, }, { @@ -51,7 +51,7 @@ static struct ctl_table rxrpc_sysctl_table[] = { .data = &rxrpc_idle_ack_delay, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, + .proc_handler = proc_dointvec, .extra1 = (void *)&one, }, { @@ -85,7 +85,7 @@ static struct ctl_table rxrpc_sysctl_table[] = { .data = &rxrpc_max_call_lifetime, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_jiffies, + .proc_handler = proc_dointvec, .extra1 = (void *)&one, }, From 405dea1debeb9956684de342903bba9ddd52f1cb Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 30 Sep 2016 09:13:50 +0100 Subject: [PATCH 1000/1050] rxrpc: Fix the call timer handling The call timer's concept of a call timeout (of which there are three) that is inactive is that it is the timeout has the same expiration time as the call expiration timeout (the expiration timer is never inactive). However, I'm not resetting the timeouts when they expire, leading to repeated processing of expired timeouts when other timeout events occur. Fix this by: (1) Move the timer expiry detection into rxrpc_set_timer() inside the locked section. This means that if a timeout is set that will expire immediately, we deal with it immediately. (2) If a timeout is at or before now then it has expired. When an expiry is detected, an event is raised, the timeout is automatically inactivated and the event processor is queued. (3) If a timeout is at or after the expiry timeout then it is inactive. Inactive timeouts do not contribute to the timer setting. (4) The call timer callback can now just call rxrpc_set_timer() to handle things. (5) The call processor work function now checks the event flags rather than checking the timeouts directly. Signed-off-by: David Howells --- net/rxrpc/call_event.c | 26 ++++++++++++++++++-------- net/rxrpc/call_object.c | 7 ++----- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 9ff3bb3ffb41..4f00476630b9 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -29,6 +29,7 @@ void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why, { unsigned long t_j, now_j = jiffies; ktime_t t; + bool queue = false; read_lock_bh(&call->state_lock); @@ -37,13 +38,21 @@ void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why, if (!ktime_after(t, now)) goto out; - if (ktime_after(call->resend_at, now) && - ktime_before(call->resend_at, t)) + if (!ktime_after(call->resend_at, now)) { + call->resend_at = call->expire_at; + if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) + queue = true; + } else if (ktime_before(call->resend_at, t)) { t = call->resend_at; + } - if (ktime_after(call->ack_at, now) && - ktime_before(call->ack_at, t)) + if (!ktime_after(call->ack_at, now)) { + call->ack_at = call->expire_at; + if (!test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events)) + queue = true; + } else if (ktime_before(call->ack_at, t)) { t = call->ack_at; + } t_j = nsecs_to_jiffies(ktime_to_ns(ktime_sub(t, now))); t_j += jiffies; @@ -59,6 +68,9 @@ void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why, mod_timer(&call->timer, t_j); trace_rxrpc_timer(call, why, now, now_j); } + + if (queue) + rxrpc_queue_call(call); } out: @@ -332,8 +344,7 @@ recheck_state: goto recheck_state; } - if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events) || - ktime_before(call->ack_at, now)) { + if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events)) { call->ack_at = call->expire_at; if (call->ackr_reason) { rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); @@ -341,8 +352,7 @@ recheck_state: } } - if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events) || - ktime_before(call->resend_at, now)) { + if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events)) { rxrpc_resend(call, now); goto recheck_state; } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 456ab752d473..364b42dc3dce 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -71,11 +71,8 @@ static void rxrpc_call_timer_expired(unsigned long _call) _enter("%d", call->debug_id); - if (call->state < RXRPC_CALL_COMPLETE) { - trace_rxrpc_timer(call, rxrpc_timer_expired, - ktime_get_real(), jiffies); - rxrpc_queue_call(call); - } + if (call->state < RXRPC_CALL_COMPLETE) + rxrpc_set_timer(call, rxrpc_timer_expired, ktime_get_real()); } /* From 9a2172a8d52cf14ce44e9cadd8c9df84cf832d75 Mon Sep 17 00:00:00 2001 From: Javi Merino Date: Fri, 30 Sep 2016 13:14:28 +0100 Subject: [PATCH 1001/1050] MAINTAINERS: Switch to kernel.org email address for Javi Merino Change my email address to my kernel.org account instead of the ARM one. Signed-off-by: Javi Merino Signed-off-by: Linus Torvalds --- .mailmap | 1 + MAINTAINERS | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index de22daefd9da..1dab0a156489 100644 --- a/.mailmap +++ b/.mailmap @@ -69,6 +69,7 @@ James Bottomley James Bottomley James E Wilson James Ketrenos +Javi Merino Jean Tourrilhes Jeff Garzik diff --git a/MAINTAINERS b/MAINTAINERS index b003d0ca6238..f593300e310b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11626,7 +11626,7 @@ F: Documentation/devicetree/bindings/thermal/ THERMAL/CPU_COOLING M: Amit Daniel Kachhap M: Viresh Kumar -M: Javi Merino +M: Javi Merino L: linux-pm@vger.kernel.org S: Supported F: Documentation/thermal/cpu-cooling-api.txt From 22f2ac51b6d643666f4db093f13144f773ff3f3a Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 30 Sep 2016 15:11:29 -0700 Subject: [PATCH 1002/1050] mm: workingset: fix crash in shadow node shrinker caused by replace_page_cache_page() Antonio reports the following crash when using fuse under memory pressure: kernel BUG at /build/linux-a2WvEb/linux-4.4.0/mm/workingset.c:346! invalid opcode: 0000 [#1] SMP Modules linked in: all of them CPU: 2 PID: 63 Comm: kswapd0 Not tainted 4.4.0-36-generic #55-Ubuntu Hardware name: System manufacturer System Product Name/P8H67-M PRO, BIOS 3904 04/27/2013 task: ffff88040cae6040 ti: ffff880407488000 task.ti: ffff880407488000 RIP: shadow_lru_isolate+0x181/0x190 Call Trace: __list_lru_walk_one.isra.3+0x8f/0x130 list_lru_walk_one+0x23/0x30 scan_shadow_nodes+0x34/0x50 shrink_slab.part.40+0x1ed/0x3d0 shrink_zone+0x2ca/0x2e0 kswapd+0x51e/0x990 kthread+0xd8/0xf0 ret_from_fork+0x3f/0x70 which corresponds to the following sanity check in the shadow node tracking: BUG_ON(node->count & RADIX_TREE_COUNT_MASK); The workingset code tracks radix tree nodes that exclusively contain shadow entries of evicted pages in them, and this (somewhat obscure) line checks whether there are real pages left that would interfere with reclaim of the radix tree node under memory pressure. While discussing ways how fuse might sneak pages into the radix tree past the workingset code, Miklos pointed to replace_page_cache_page(), and indeed there is a problem there: it properly accounts for the old page being removed - __delete_from_page_cache() does that - but then does a raw raw radix_tree_insert(), not accounting for the replacement page. Eventually the page count bits in node->count underflow while leaving the node incorrectly linked to the shadow node LRU. To address this, make sure replace_page_cache_page() uses the tracked page insertion code, page_cache_tree_insert(). This fixes the page accounting and makes sure page-containing nodes are properly unlinked from the shadow node LRU again. Also, make the sanity checks a bit less obscure by using the helpers for checking the number of pages and shadows in a radix tree node. Fixes: 449dd6984d0e ("mm: keep page cache radix tree nodes in check") Link: http://lkml.kernel.org/r/20160919155822.29498-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Reported-by: Antonio SJ Musumeci Debugged-by: Miklos Szeredi Cc: [3.15+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 + mm/filemap.c | 114 +++++++++++++++++++++---------------------- mm/workingset.c | 10 ++-- 3 files changed, 63 insertions(+), 63 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index b17cc4830fa6..4a529c984a3f 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -257,6 +257,7 @@ static inline void workingset_node_pages_inc(struct radix_tree_node *node) static inline void workingset_node_pages_dec(struct radix_tree_node *node) { + VM_BUG_ON(!workingset_node_pages(node)); node->count--; } @@ -272,6 +273,7 @@ static inline void workingset_node_shadows_inc(struct radix_tree_node *node) static inline void workingset_node_shadows_dec(struct radix_tree_node *node) { + VM_BUG_ON(!workingset_node_shadows(node)); node->count -= 1U << RADIX_TREE_COUNT_SHIFT; } diff --git a/mm/filemap.c b/mm/filemap.c index 8a287dfc5372..2d0986a64f1f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -110,6 +110,62 @@ * ->tasklist_lock (memory_failure, collect_procs_ao) */ +static int page_cache_tree_insert(struct address_space *mapping, + struct page *page, void **shadowp) +{ + struct radix_tree_node *node; + void **slot; + int error; + + error = __radix_tree_create(&mapping->page_tree, page->index, 0, + &node, &slot); + if (error) + return error; + if (*slot) { + void *p; + + p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock); + if (!radix_tree_exceptional_entry(p)) + return -EEXIST; + + mapping->nrexceptional--; + if (!dax_mapping(mapping)) { + if (shadowp) + *shadowp = p; + if (node) + workingset_node_shadows_dec(node); + } else { + /* DAX can replace empty locked entry with a hole */ + WARN_ON_ONCE(p != + (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | + RADIX_DAX_ENTRY_LOCK)); + /* DAX accounts exceptional entries as normal pages */ + if (node) + workingset_node_pages_dec(node); + /* Wakeup waiters for exceptional entry lock */ + dax_wake_mapping_entry_waiter(mapping, page->index, + false); + } + } + radix_tree_replace_slot(slot, page); + mapping->nrpages++; + if (node) { + workingset_node_pages_inc(node); + /* + * Don't track node that contains actual pages. + * + * Avoid acquiring the list_lru lock if already + * untracked. The list_empty() test is safe as + * node->private_list is protected by + * mapping->tree_lock. + */ + if (!list_empty(&node->private_list)) + list_lru_del(&workingset_shadow_nodes, + &node->private_list); + } + return 0; +} + static void page_cache_tree_delete(struct address_space *mapping, struct page *page, void *shadow) { @@ -561,7 +617,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) spin_lock_irqsave(&mapping->tree_lock, flags); __delete_from_page_cache(old, NULL); - error = radix_tree_insert(&mapping->page_tree, offset, new); + error = page_cache_tree_insert(mapping, new, NULL); BUG_ON(error); mapping->nrpages++; @@ -584,62 +640,6 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) } EXPORT_SYMBOL_GPL(replace_page_cache_page); -static int page_cache_tree_insert(struct address_space *mapping, - struct page *page, void **shadowp) -{ - struct radix_tree_node *node; - void **slot; - int error; - - error = __radix_tree_create(&mapping->page_tree, page->index, 0, - &node, &slot); - if (error) - return error; - if (*slot) { - void *p; - - p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock); - if (!radix_tree_exceptional_entry(p)) - return -EEXIST; - - mapping->nrexceptional--; - if (!dax_mapping(mapping)) { - if (shadowp) - *shadowp = p; - if (node) - workingset_node_shadows_dec(node); - } else { - /* DAX can replace empty locked entry with a hole */ - WARN_ON_ONCE(p != - (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | - RADIX_DAX_ENTRY_LOCK)); - /* DAX accounts exceptional entries as normal pages */ - if (node) - workingset_node_pages_dec(node); - /* Wakeup waiters for exceptional entry lock */ - dax_wake_mapping_entry_waiter(mapping, page->index, - false); - } - } - radix_tree_replace_slot(slot, page); - mapping->nrpages++; - if (node) { - workingset_node_pages_inc(node); - /* - * Don't track node that contains actual pages. - * - * Avoid acquiring the list_lru lock if already - * untracked. The list_empty() test is safe as - * node->private_list is protected by - * mapping->tree_lock. - */ - if (!list_empty(&node->private_list)) - list_lru_del(&workingset_shadow_nodes, - &node->private_list); - } - return 0; -} - static int __add_to_page_cache_locked(struct page *page, struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask, diff --git a/mm/workingset.c b/mm/workingset.c index 69551cfae97b..617475f529f4 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -418,21 +418,19 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, * no pages, so we expect to be able to remove them all and * delete and free the empty node afterwards. */ - - BUG_ON(!node->count); - BUG_ON(node->count & RADIX_TREE_COUNT_MASK); + BUG_ON(!workingset_node_shadows(node)); + BUG_ON(workingset_node_pages(node)); for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { if (node->slots[i]) { BUG_ON(!radix_tree_exceptional_entry(node->slots[i])); node->slots[i] = NULL; - BUG_ON(node->count < (1U << RADIX_TREE_COUNT_SHIFT)); - node->count -= 1U << RADIX_TREE_COUNT_SHIFT; + workingset_node_shadows_dec(node); BUG_ON(!mapping->nrexceptional); mapping->nrexceptional--; } } - BUG_ON(node->count); + BUG_ON(workingset_node_shadows(node)); inc_node_state(page_pgdat(virt_to_page(node)), WORKINGSET_NODERECLAIM); if (!__radix_tree_delete_node(&mapping->page_tree, node)) BUG(); From c33f0785bf292cf1d15f4fbe42869c63e205b21c Mon Sep 17 00:00:00 2001 From: Eric Ren Date: Fri, 30 Sep 2016 15:11:32 -0700 Subject: [PATCH 1003/1050] ocfs2: fix deadlock on mmapped page in ocfs2_write_begin_nolock() The testcase "mmaptruncate" of ocfs2-test deadlocks occasionally. In this testcase, we create a 2*CLUSTER_SIZE file and mmap() on it; there are 2 process repeatedly performing the following operations respectively: one is doing memset(mmaped_addr + 2*CLUSTER_SIZE - 1, 'a', 1), while the another is playing ftruncate(fd, 2*CLUSTER_SIZE) and then ftruncate(fd, CLUSTER_SIZE) again and again. This is the backtrace when the deadlock happens: __wait_on_bit_lock+0x50/0xa0 __lock_page+0xb7/0xc0 ocfs2_write_begin_nolock+0x163f/0x1790 [ocfs2] ocfs2_page_mkwrite+0x1c7/0x2a0 [ocfs2] do_page_mkwrite+0x66/0xc0 handle_mm_fault+0x685/0x1350 __do_page_fault+0x1d8/0x4d0 trace_do_page_fault+0x37/0xf0 do_async_page_fault+0x19/0x70 async_page_fault+0x28/0x30 In ocfs2_write_begin_nolock(), we first grab the pages and then allocate disk space for this write; ocfs2_try_to_free_truncate_log() will be called if -ENOSPC is returned; if we're lucky to get enough clusters, which is usually the case, we start over again. But in ocfs2_free_write_ctxt() the target page isn't unlocked, so we will deadlock when trying to grab the target page again. Also, -ENOMEM might be returned in ocfs2_grab_pages_for_write(). Another deadlock will happen in __do_page_mkwrite() if ocfs2_page_mkwrite() returns non-VM_FAULT_LOCKED, and along with a locked target page. These two errors fail on the same path, so fix them by unlocking the target page manually before ocfs2_free_write_ctxt(). Jan Kara helps me clear out the JBD2 part, and suggest the hint for root cause. Changes since v1: 1. Also put ENOMEM error case into consideration. Link: http://lkml.kernel.org/r/1474173902-32075-1-git-send-email-zren@suse.com Signed-off-by: Eric Ren Reviewed-by: He Gang Acked-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/aops.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 98d36548153d..bbb4b3e5b4ff 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -1842,6 +1842,16 @@ out_commit: ocfs2_commit_trans(osb, handle); out: + /* + * The mmapped page won't be unlocked in ocfs2_free_write_ctxt(), + * even in case of error here like ENOSPC and ENOMEM. So, we need + * to unlock the target page manually to prevent deadlocks when + * retrying again on ENOSPC, or when returning non-VM_FAULT_LOCKED + * to VM code. + */ + if (wc->w_target_locked) + unlock_page(mmap_page); + ocfs2_free_write_ctxt(inode, wc); if (data_ac) { From 37aa7271d9742b574763e5ce019bde9c49aa8bfe Mon Sep 17 00:00:00 2001 From: John Youn Date: Fri, 30 Sep 2016 15:11:35 -0700 Subject: [PATCH 1004/1050] include/linux/property.h: fix typo/compile error This fixes commit d76eebfa175e ("include/linux/property.h: fix build issues with gcc-4.4.4"). With that commit we get the following compile error when using the PROPERTY_ENTRY_INTEGER_ARRAY macro. include/linux/property.h:201:39: error: `u32_data' undeclared (first use in this function) PROPERTY_ENTRY_INTEGER_ARRAY(_name_, u32, _val_) ^ include/linux/property.h:193:17: note: in definition of macro `PROPERTY_ENTRY_INTEGER_ARRAY' { .pointer = { _type_##_data = _val_ } }, \ ^ This needs a '.' to reference the union member. It seems this was just overlooked here since it is done correctly in similar constructs in other parts of the original commit. This fix is in preparation of upcoming commits that will use this macro. Fixes: commit d76eebfa175e ("include/linux/property.h: fix build issues with gcc-4.4.4") Link: http://lkml.kernel.org/r/2de3b929290d88a723ed829a3e3cbd02044714df.1475114627.git.johnyoun@synopsys.com Signed-off-by: John Youn Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/property.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/property.h b/include/linux/property.h index 3a2f9ae25c86..856e50b2140c 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -190,7 +190,7 @@ struct property_entry { .length = ARRAY_SIZE(_val_) * sizeof(_type_), \ .is_array = true, \ .is_string = false, \ - { .pointer = { _type_##_data = _val_ } }, \ + { .pointer = { ._type_##_data = _val_ } }, \ } #define PROPERTY_ENTRY_U8_ARRAY(_name_, _val_) \ From d6169b0206db1c8c8d0e4c6b79fdf4b2fc6455f1 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Fri, 30 Sep 2016 15:24:31 -0700 Subject: [PATCH 1005/1050] net: Use ns_capable_noaudit() when determining net sysctl permissions The capability check should not be audited since it is only being used to determine the inode permissions. A failed check does not indicate a violation of security policy but, when an LSM is enabled, a denial audit message was being generated. The denial audit message caused confusion for some application authors because root-running Go applications always triggered the denial. To prevent this confusion, the capability check in net_ctl_permissions() is switched to the noaudit variant. BugLink: https://launchpad.net/bugs/1465724 Signed-off-by: Tyler Hicks Acked-by: Serge E. Hallyn Signed-off-by: James Morris [dtor: reapplied after e79c6a4fc923 ("net: make net namespace sysctls belong to container's owner") accidentally reverted the change.] Signed-off-by: Dmitry Torokhov Signed-off-by: David S. Miller --- net/sysctl_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 5bc1a3d57401..e0c71bd8f7cf 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -44,7 +44,7 @@ static int net_ctl_permissions(struct ctl_table_header *head, struct net *net = container_of(head->set, struct net, sysctls); /* Allow network administrator to have same access as root. */ - if (ns_capable(net->user_ns, CAP_NET_ADMIN)) { + if (ns_capable_noaudit(net->user_ns, CAP_NET_ADMIN)) { int mode = (table->mode >> 6) & 7; return (mode << 6) | (mode << 3) | mode; } From 6605d156bdfbb2502ba301bc4fbd8db696ae4b6d Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 30 Sep 2016 17:25:01 +0100 Subject: [PATCH 1006/1050] MIPS: CM: Fix mips_cm_max_vp_width for non-MT kernels on MT systems When discovering the number of VPEs per core, smp_num_siblings will be incorrect for kernels built without support for the MIPS MultiThreading (MT) ASE running on systems which implement said ASE. This leads to accesses to VPEs in secondary cores being performed incorrectly since mips_cm_vp_id calculates the wrong ID to write to the local "other" registers. Fix this by examining the number of VPEs in the core as reported by the CM. This patch presumes that the number of VPEs will be the same in each core of the system. As this path only applies to systems with CM version 2.5 or lower, and this property is true of all such known systems, this is likely to be fine but is described in a comment for good measure. Signed-off-by: Paul Burton Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14338/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/mips-cm.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/mips/include/asm/mips-cm.h b/arch/mips/include/asm/mips-cm.h index 58e7874e9347..4fafeefe65c2 100644 --- a/arch/mips/include/asm/mips-cm.h +++ b/arch/mips/include/asm/mips-cm.h @@ -458,10 +458,21 @@ static inline int mips_cm_revision(void) static inline unsigned int mips_cm_max_vp_width(void) { extern int smp_num_siblings; + uint32_t cfg; if (mips_cm_revision() >= CM_REV_CM3) return read_gcr_sys_config2() & CM_GCR_SYS_CONFIG2_MAXVPW_MSK; + if (mips_cm_present()) { + /* + * We presume that all cores in the system will have the same + * number of VP(E)s, and if that ever changes then this will + * need revisiting. + */ + cfg = read_gcr_cl_config() & CM_GCR_Cx_CONFIG_PVPE_MSK; + return (cfg >> CM_GCR_Cx_CONFIG_PVPE_SHF) + 1; + } + if (IS_ENABLED(CONFIG_SMP)) return smp_num_siblings; From 117e5e9c4cfcb7628f08de074fbfefec1bb678b7 Mon Sep 17 00:00:00 2001 From: Srinivas Ramana Date: Fri, 30 Sep 2016 15:03:31 +0100 Subject: [PATCH 1007/1050] ARM: 8618/1: decompressor: reset ttbcr fields to use TTBR0 on ARMv7 If the bootloader uses the long descriptor format and jumps to kernel decompressor code, TTBCR may not be in a right state. Before enabling the MMU, it is required to clear the TTBCR.PD0 field to use TTBR0 for translation table walks. The commit dbece45894d3a ("ARM: 7501/1: decompressor: reset ttbcr for VMSA ARMv7 cores") does the reset of TTBCR.N, but doesn't consider all the bits for the size of TTBCR.N. Clear TTBCR.PD0 field and reset all the three bits of TTBCR.N to indicate the use of TTBR0 and the correct base address width. Fixes: dbece45894d3 ("ARM: 7501/1: decompressor: reset ttbcr for VMSA ARMv7 cores") Acked-by: Robin Murphy Signed-off-by: Srinivas Ramana Signed-off-by: Russell King --- arch/arm/boot/compressed/head.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index af11c2f8f3b7..fc6d541549a2 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -779,7 +779,7 @@ __armv7_mmu_cache_on: orrne r0, r0, #1 @ MMU enabled movne r1, #0xfffffffd @ domain 0 = client bic r6, r6, #1 << 31 @ 32-bit translation system - bic r6, r6, #3 << 0 @ use only ttbr0 + bic r6, r6, #(7 << 0) | (1 << 4) @ use only ttbr0 mcrne p15, 0, r3, c2, c0, 0 @ load page table pointer mcrne p15, 0, r1, c3, c0, 0 @ load domain access control mcrne p15, 0, r6, c2, c0, 2 @ load ttb control From c8d2bc9bc39ebea8437fd974fdbc21847bb897a3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 2 Oct 2016 16:24:33 -0700 Subject: [PATCH 1008/1050] Linux 4.8 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ce2ddb3fec98..80b8671d5c46 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 8 SUBLEVEL = 0 -EXTRAVERSION = -rc8 +EXTRAVERSION = NAME = Psychotic Stoned Sheep # *DOCUMENTATION* From d4ef9f72128d414ad83b27b49312faa971d77382 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Wed, 28 Sep 2016 15:05:28 -0700 Subject: [PATCH 1009/1050] netfilter: bridge: clarify bridge/netfilter message When using bridge without bridge netfilter enabled the message displayed is rather confusing and leads to belive that a deprecated feature is in use. Use IS_MODULE to be explicit that the message only affects users which use bridge netfilter as module and reword the message. Signed-off-by: Stefan Agner Acked-by: Florian Westphal Signed-off-by: David S. Miller --- net/bridge/br.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/bridge/br.c b/net/bridge/br.c index 3addc05b9a16..889e5640455f 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -227,9 +227,11 @@ static int __init br_init(void) br_fdb_test_addr_hook = br_fdb_test_addr; #endif - pr_info("bridge: automatic filtering via arp/ip/ip6tables has been " - "deprecated. Update your scripts to load br_netfilter if you " +#if IS_MODULE(CONFIG_BRIDGE_NETFILTER) + pr_info("bridge: filtering via arp/ip/ip6tables is no longer available " + "by default. Update your scripts to load br_netfilter if you " "need this.\n"); +#endif return 0; From cb9e684e89e69894cb6697a3fa1274a284d1d3bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Thu, 29 Sep 2016 00:33:43 -0700 Subject: [PATCH 1010/1050] ipv6 addrconf: remove addrconf_sysctl_hop_limit() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is an effective no-op in terms of user observable behaviour. By preventing the overwrite of non-null extra1/extra2 fields in addrconf_sysctl() we can enable the use of proc_dointvec_minmax(). This allows us to eliminate the constant min/max (1..255) trampoline function that is addrconf_sysctl_hop_limit(). This is nice because it simplifies the code, and allows future sysctls with constant min/max limits to also not require trampolines. We still can't eliminate the trampoline for mtu because it isn't actually a constant (it depends on other tunables of the device) and thus requires at-write-time logic to enforce range. Signed-off-by: Maciej Żenczykowski Acked-by: Erik Kline Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 87183983724d..cbd9343751a2 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5496,20 +5496,6 @@ int addrconf_sysctl_forward(struct ctl_table *ctl, int write, return ret; } -static -int addrconf_sysctl_hop_limit(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - struct ctl_table lctl; - int min_hl = 1, max_hl = 255; - - lctl = *ctl; - lctl.extra1 = &min_hl; - lctl.extra2 = &max_hl; - - return proc_dointvec_minmax(&lctl, write, buffer, lenp, ppos); -} - static int addrconf_sysctl_mtu(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -5743,6 +5729,9 @@ int addrconf_sysctl_ignore_routes_with_linkdown(struct ctl_table *ctl, return ret; } +static const int one = 1; +static const int two_five_five = 255; + static const struct ctl_table addrconf_sysctl[] = { { .procname = "forwarding", @@ -5756,7 +5745,9 @@ static const struct ctl_table addrconf_sysctl[] = { .data = &ipv6_devconf.hop_limit, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = addrconf_sysctl_hop_limit, + .proc_handler = proc_dointvec_minmax, + .extra1 = (void *)&one, + .extra2 = (void *)&two_five_five, }, { .procname = "mtu", @@ -6081,8 +6072,14 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name, for (i = 0; table[i].data; i++) { table[i].data += (char *)p - (char *)&ipv6_devconf; - table[i].extra1 = idev; /* embedded; no ref */ - table[i].extra2 = net; + /* If one of these is already set, then it is not safe to + * overwrite either of them: this makes proc_dointvec_minmax + * usable. + */ + if (!table[i].extra1 && !table[i].extra2) { + table[i].extra1 = idev; /* embedded; no ref */ + table[i].extra2 = net; + } } snprintf(path, sizeof(path), "net/ipv6/conf/%s", dev_name); From 8efebd6e5e93283a72d7a014d6dd8130e6601352 Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Fri, 30 Sep 2016 15:34:25 +0800 Subject: [PATCH 1011/1050] cxgb4: mark cxgb_setup_tc() static We get 1 warning when building kernel with W=1: drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c:2715:5: warning: no previous prototype for 'cxgb_setup_tc' [-Wmissing-prototypes] In fact, this function is only used in the file in which it is declared and don't need a declaration, but can be made static. so this patch marks this function with 'static'. Signed-off-by: Baoyou Xie Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index c8a5c434ad2c..08e69d31dbaa 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -2712,8 +2712,8 @@ static int cxgb_set_tx_maxrate(struct net_device *dev, int index, u32 rate) return err; } -int cxgb_setup_tc(struct net_device *dev, u32 handle, __be16 proto, - struct tc_to_netdev *tc) +static int cxgb_setup_tc(struct net_device *dev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) { struct port_info *pi = netdev2pinfo(dev); struct adapter *adap = netdev2adap(dev); From 3a82e78c131a8199d38cf653b523c8fa2909df65 Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Fri, 30 Sep 2016 15:48:50 +0800 Subject: [PATCH 1012/1050] net: ethernet: mediatek: mark symbols static where possible We get 2 warnings when building kernel with W=1: drivers/net/ethernet/mediatek/mtk_eth_soc.c:2041:5: warning: no previous prototype for 'mtk_get_link_ksettings' [-Wmissing-prototypes] drivers/net/ethernet/mediatek/mtk_eth_soc.c:2052:5: warning: no previous prototype for 'mtk_set_link_ksettings' [-Wmissing-prototypes] In fact, these functions are only used in the file in which they are declared and don't need a declaration, but can be made static. So this patch marks these functions with 'static'. Signed-off-by: Baoyou Xie Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index ddf20a00654e..ad4ab979507b 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -2038,8 +2038,8 @@ static int mtk_cleanup(struct mtk_eth *eth) return 0; } -int mtk_get_link_ksettings(struct net_device *ndev, - struct ethtool_link_ksettings *cmd) +static int mtk_get_link_ksettings(struct net_device *ndev, + struct ethtool_link_ksettings *cmd) { struct mtk_mac *mac = netdev_priv(ndev); @@ -2049,8 +2049,8 @@ int mtk_get_link_ksettings(struct net_device *ndev, return phy_ethtool_ksettings_get(ndev->phydev, cmd); } -int mtk_set_link_ksettings(struct net_device *ndev, - const struct ethtool_link_ksettings *cmd) +static int mtk_set_link_ksettings(struct net_device *ndev, + const struct ethtool_link_ksettings *cmd) { struct mtk_mac *mac = netdev_priv(ndev); From b82d44d78480faff7456e9e0999acb9d38666057 Mon Sep 17 00:00:00 2001 From: Gavin Schenk Date: Fri, 30 Sep 2016 11:46:10 +0200 Subject: [PATCH 1013/1050] net: fec: set mac address unconditionally MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the mac address origin is not dt, you can only safely assign a mac address after "link up" of the device. If the link is off the clocks are disabled and because of issues assigning registers when clocks are off the new mac address cannot be written in .ndo_set_mac_address() on some soc's. This fix sets the mac address unconditionally in fec_restart(...) and ensures consistency between fec registers and the network layer. Signed-off-by: Gavin Schenk Acked-by: Fugang Duan Acked-by: Uwe Kleine-König Fixes: 9638d19e4816 ("net: fec: add netif status check before set mac address") Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 1fa2d87c2fc9..48a033e64423 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -913,13 +913,11 @@ fec_restart(struct net_device *ndev) * enet-mac reset will reset mac address registers too, * so need to reconfigure it. */ - if (fep->quirks & FEC_QUIRK_ENET_MAC) { - memcpy(&temp_mac, ndev->dev_addr, ETH_ALEN); - writel((__force u32)cpu_to_be32(temp_mac[0]), - fep->hwp + FEC_ADDR_LOW); - writel((__force u32)cpu_to_be32(temp_mac[1]), - fep->hwp + FEC_ADDR_HIGH); - } + memcpy(&temp_mac, ndev->dev_addr, ETH_ALEN); + writel((__force u32)cpu_to_be32(temp_mac[0]), + fep->hwp + FEC_ADDR_LOW); + writel((__force u32)cpu_to_be32(temp_mac[1]), + fep->hwp + FEC_ADDR_HIGH); /* Clear any outstanding interrupt. */ writel(0xffffffff, fep->hwp + FEC_IEVENT); From 63d75463c91a5b5be7c0aca11ceb45ea5a0ae81d Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 30 Sep 2016 16:56:45 +0200 Subject: [PATCH 1014/1050] net: pktgen: fix pkt_size The commit 879c7220e828 ("net: pktgen: Observe needed_headroom of the device") increased the 'pkt_overhead' field value by LL_RESERVED_SPACE. As a side effect the generated packet size, computed as: /* Eth + IPh + UDPh + mpls */ datalen = pkt_dev->cur_pkt_size - 14 - 20 - 8 - pkt_dev->pkt_overhead; is decreased by the same value. The above changed slightly the behavior of existing pktgen users, and made the procfs interface somewhat inconsistent. Fix it by restoring the previous pkt_overhead value and using LL_RESERVED_SPACE as extralen in skb allocation. Also, change pktgen_alloc_skb() to only partially reserve the headroom to allow the caller to prefetch from ll header start. v1 -> v2: - fixed some typos in the comments Fixes: 879c7220e828 ("net: pktgen: Observe needed_headroom of the device") Suggested-by: Ben Greear Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/core/pktgen.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index bbd118b19aef..5219a9e2127a 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2286,7 +2286,7 @@ out: static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev) { - pkt_dev->pkt_overhead = LL_RESERVED_SPACE(pkt_dev->odev); + pkt_dev->pkt_overhead = 0; pkt_dev->pkt_overhead += pkt_dev->nr_labels*sizeof(u32); pkt_dev->pkt_overhead += VLAN_TAG_SIZE(pkt_dev); pkt_dev->pkt_overhead += SVLAN_TAG_SIZE(pkt_dev); @@ -2777,13 +2777,13 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, } static struct sk_buff *pktgen_alloc_skb(struct net_device *dev, - struct pktgen_dev *pkt_dev, - unsigned int extralen) + struct pktgen_dev *pkt_dev) { + unsigned int extralen = LL_RESERVED_SPACE(dev); struct sk_buff *skb = NULL; - unsigned int size = pkt_dev->cur_pkt_size + 64 + extralen + - pkt_dev->pkt_overhead; + unsigned int size; + size = pkt_dev->cur_pkt_size + 64 + extralen + pkt_dev->pkt_overhead; if (pkt_dev->flags & F_NODE) { int node = pkt_dev->node >= 0 ? pkt_dev->node : numa_node_id(); @@ -2796,8 +2796,9 @@ static struct sk_buff *pktgen_alloc_skb(struct net_device *dev, skb = __netdev_alloc_skb(dev, size, GFP_NOWAIT); } + /* the caller pre-fetches from skb->data and reserves for the mac hdr */ if (likely(skb)) - skb_reserve(skb, LL_RESERVED_SPACE(dev)); + skb_reserve(skb, extralen - 16); return skb; } @@ -2830,16 +2831,14 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, mod_cur_headers(pkt_dev); queue_map = pkt_dev->cur_queue_map; - datalen = (odev->hard_header_len + 16) & ~0xf; - - skb = pktgen_alloc_skb(odev, pkt_dev, datalen); + skb = pktgen_alloc_skb(odev, pkt_dev); if (!skb) { sprintf(pkt_dev->result, "No memory"); return NULL; } prefetchw(skb->data); - skb_reserve(skb, datalen); + skb_reserve(skb, 16); /* Reserve for ethernet and IP header */ eth = (__u8 *) skb_push(skb, 14); @@ -2959,7 +2958,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, mod_cur_headers(pkt_dev); queue_map = pkt_dev->cur_queue_map; - skb = pktgen_alloc_skb(odev, pkt_dev, 16); + skb = pktgen_alloc_skb(odev, pkt_dev); if (!skb) { sprintf(pkt_dev->result, "No memory"); return NULL; From fa34cd94fb01fcb8d79d91e009451b37692e94e5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 30 Sep 2016 18:13:49 +0200 Subject: [PATCH 1015/1050] net: rtnl: avoid uninitialized data in IFLA_VF_VLAN_LIST handling With the newly added support for IFLA_VF_VLAN_LIST netlink messages, we get a warning about potential uninitialized variable use in the parsing of the user input when enabling the -Wmaybe-uninitialized warning: net/core/rtnetlink.c: In function 'do_setvfinfo': net/core/rtnetlink.c:1756:9: error: 'ivvl$' may be used uninitialized in this function [-Werror=maybe-uninitialized] I have not been able to prove whether it is possible to arrive in this code with an empty IFLA_VF_VLAN_LIST block, but if we do, then ndo_set_vf_vlan gets called with uninitialized arguments. This adds an explicit check for an empty list, making it obvious to the reader and the compiler that this cannot happen. Fixes: 79aab093a0b5 ("net: Update API for VF vlan protocol 802.1ad support") Signed-off-by: Arnd Bergmann Reviewed-by: Moshe Shemesh Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 3ac8946bf244..b06d2f46b83e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1753,6 +1753,9 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) len++; } + if (len == 0) + return -EINVAL; + err = ops->ndo_set_vf_vlan(dev, ivvl[0]->vf, ivvl[0]->vlan, ivvl[0]->qos, ivvl[0]->vlan_proto); if (err < 0) From 7c70c4f8b2bf5ed777120f3d70efe35e64930c10 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 30 Sep 2016 18:15:33 +0200 Subject: [PATCH 1016/1050] cxgb4: unexport cxgb4_dcb_enabled A recent cleanup marked cxgb4_dcb_enabled as 'static', which is correct, but this ignored how the symbol is also exported. In addition, the export can be compiled out when modules are disabled, causing a harmless compiler warning in configurations for which it is not used at all: drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c:282:12: error: 'cxgb4_dcb_enabled' defined but not used [-Werror=unused-function] This removes the export and moves the function into the correct #ifdef so we only build it when there are users. Fixes: 50935857f878 ("cxgb4: mark symbols static where possible") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 08e69d31dbaa..cf147ca419a8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -277,11 +277,9 @@ static void dcb_tx_queue_prio_enable(struct net_device *dev, int enable) txq->dcb_prio = value; } } -#endif /* CONFIG_CHELSIO_T4_DCB */ static int cxgb4_dcb_enabled(const struct net_device *dev) { -#ifdef CONFIG_CHELSIO_T4_DCB struct port_info *pi = netdev_priv(dev); if (!pi->dcb.enabled) @@ -289,11 +287,8 @@ static int cxgb4_dcb_enabled(const struct net_device *dev) return ((pi->dcb.state == CXGB4_DCB_STATE_FW_ALLSYNCED) || (pi->dcb.state == CXGB4_DCB_STATE_HOST)); -#else - return 0; -#endif } -EXPORT_SYMBOL(cxgb4_dcb_enabled); +#endif /* CONFIG_CHELSIO_T4_DCB */ void t4_os_link_changed(struct adapter *adapter, int port_id, int link_stat) { From bab02a69296682881cae280587618978c357e26e Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 25 Aug 2016 14:06:54 -0700 Subject: [PATCH 1017/1050] fm10k: use generic ethtool_op_get_ts_info callback This generic callback is for drivers which have software Tx timestamp support enabled. Without this, PTP applications requesting software timestamps may complain that the requested mode is not supported. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c index adb7cb4311ba..5241e0873397 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c @@ -1182,6 +1182,7 @@ static const struct ethtool_ops fm10k_ethtool_ops = { .set_rxfh = fm10k_set_rssh, .get_channels = fm10k_get_channels, .set_channels = fm10k_set_channels, + .get_ts_info = ethtool_op_get_ts_info, }; void fm10k_set_ethtool_ops(struct net_device *dev) From d0debb76df18f05ecc65579d37203703ffdec44d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 30 Sep 2016 18:17:09 +0200 Subject: [PATCH 1018/1050] net/mlx5e: shut up maybe-uninitialized warning Build-testing this driver with -Wmaybe-uninitialized gives a new false-positive warning that I can't really explain: drivers/net/ethernet/mellanox/mlx5/core/en_tc.c: In function 'mlx5e_configure_flower': drivers/net/ethernet/mellanox/mlx5/core/en_tc.c:509:3: error: 'old_attr' may be used uninitialized in this function [-Werror=maybe-uninitialized] It's obvious from the code that 'old_attr' is initialized whenever 'old' is non-NULL here. The warning appears with all versions I tested from gcc-4.7 through gcc-6.1, and I could not come up with a way to rewrite the function in a more readable way that avoids the warning, so I'm adding another initialization to shut it up. Fixes: 8b32580df1cb ("net/mlx5e: Add TC vlan action for SRIOV offloads") Signed-off-by: Arnd Bergmann Acked-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index a350b7171e3d..ce8c54d18906 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -451,7 +451,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, struct mlx5e_tc_flow *flow; struct mlx5_flow_spec *spec; struct mlx5_flow_rule *old = NULL; - struct mlx5_esw_flow_attr *old_attr; + struct mlx5_esw_flow_attr *old_attr = NULL; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; if (esw && esw->mode == SRIOV_OFFLOADS) From ab580705693d5af79663efa504a72248700766fc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 30 Sep 2016 18:17:10 +0200 Subject: [PATCH 1019/1050] mlxsw: spectrum_router: avoid potential uninitialized data usage If fi->fib_nhs is zero, the router interface pointer is uninitialized, as shown by this warning: drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c: In function 'mlxsw_sp_router_fib_event': drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c:1674:21: error: 'r' may be used uninitialized in this function [-Werror=maybe-uninitialized] drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c:1643:23: note: 'r' was declared here This changes the loop so we handle the case the same way as finding no router interface pointer attached to one of the nexthops to ensure we always trap here instead of using uninitialized data. Fixes: b45f64d16d45 ("mlxsw: spectrum_router: Use FIB notifications instead of switchdev calls") Signed-off-by: Arnd Bergmann Acked-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 48d50efec5e2..78fc557d6dd7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1640,7 +1640,7 @@ mlxsw_sp_router_fib4_entry_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry) { struct fib_info *fi = fen_info->fi; - struct mlxsw_sp_rif *r; + struct mlxsw_sp_rif *r = NULL; int nhsel; int err; @@ -1664,11 +1664,15 @@ mlxsw_sp_router_fib4_entry_init(struct mlxsw_sp *mlxsw_sp, * to us. Set trap and pass the packets for * this prefix to kernel. */ - fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; - return 0; + break; } } + if (!r) { + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + return 0; + } + if (fi->fib_scope != RT_SCOPE_UNIVERSE) { fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; fib_entry->rif = r->rif; From f7d49bce8e741e1e6aa14ce4db1b6cea7e4be4e8 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Fri, 30 Sep 2016 19:08:05 +0200 Subject: [PATCH 1020/1050] openvswitch: mpls: set network header correctly on key extract After the 48d2ab609b6b ("net: mpls: Fixups for GSO"), MPLS handling in openvswitch was changed to have network header pointing to the start of the MPLS headers and inner_network_header pointing after the MPLS headers. However, key_extract was missed by the mentioned commit, causing incorrect headers to be set when a MPLS packet just enters the bridge or after it is recirculated. Fixes: 48d2ab609b6b ("net: mpls: Fixups for GSO") Signed-off-by: Jiri Benc Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/flow.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 634cc10d6dee..c8c82e109c68 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -633,12 +633,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) } else if (eth_p_mpls(key->eth.type)) { size_t stack_len = MPLS_HLEN; - /* In the presence of an MPLS label stack the end of the L2 - * header and the beginning of the L3 header differ. - * - * Advance network_header to the beginning of the L3 - * header. mac_len corresponds to the end of the L2 header. - */ + skb_set_inner_network_header(skb, skb->mac_len); while (1) { __be32 lse; @@ -646,12 +641,12 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) if (unlikely(error)) return 0; - memcpy(&lse, skb_network_header(skb), MPLS_HLEN); + memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN); if (stack_len == MPLS_HLEN) memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN); - skb_set_network_header(skb, skb->mac_len + stack_len); + skb_set_inner_network_header(skb, skb->mac_len + stack_len); if (lse & htonl(MPLS_LS_S_MASK)) break; From 9095e10edd28e1e4a10ba5ca61fb54d9f74f8968 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Fri, 30 Sep 2016 19:08:06 +0200 Subject: [PATCH 1021/1050] mpls: move mpls_hdr to a common location This will be also used by openvswitch. Signed-off-by: Jiri Benc Acked-by: David Ahern Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/net/mpls.h | 9 +++++++++ net/mpls/internal.h | 10 +--------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/include/net/mpls.h b/include/net/mpls.h index 5b3b5addfb08..3ebbc0bb57ff 100644 --- a/include/net/mpls.h +++ b/include/net/mpls.h @@ -19,12 +19,21 @@ #define MPLS_HLEN 4 +struct mpls_shim_hdr { + __be32 label_stack_entry; +}; + static inline bool eth_p_mpls(__be16 eth_type) { return eth_type == htons(ETH_P_MPLS_UC) || eth_type == htons(ETH_P_MPLS_MC); } +static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb) +{ + return (struct mpls_shim_hdr *)skb_network_header(skb); +} + /* * For non-MPLS skbs this will correspond to the network header. * For MPLS skbs it will be before the network_header as the MPLS diff --git a/net/mpls/internal.h b/net/mpls/internal.h index 732a5c17e986..bdfef6c3271a 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -1,9 +1,6 @@ #ifndef MPLS_INTERNAL_H #define MPLS_INTERNAL_H - -struct mpls_shim_hdr { - __be32 label_stack_entry; -}; +#include struct mpls_entry_decoded { u32 label; @@ -93,11 +90,6 @@ struct mpls_route { /* next hop label forwarding entry */ #define endfor_nexthops(rt) } -static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb) -{ - return (struct mpls_shim_hdr *)skb_network_header(skb); -} - static inline struct mpls_shim_hdr mpls_entry_encode(u32 label, unsigned ttl, unsigned tc, bool bos) { struct mpls_shim_hdr result; From 85de4a2101acb85c3b1dde465e84596ccca99f2c Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Fri, 30 Sep 2016 19:08:07 +0200 Subject: [PATCH 1022/1050] openvswitch: use mpls_hdr skb_mpls_header is equivalent to mpls_hdr now. Use the existing helper instead. Signed-off-by: Jiri Benc Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/net/mpls.h | 12 ------------ net/openvswitch/actions.c | 24 ++++++++++++------------ 2 files changed, 12 insertions(+), 24 deletions(-) diff --git a/include/net/mpls.h b/include/net/mpls.h index 3ebbc0bb57ff..1dbc669b770e 100644 --- a/include/net/mpls.h +++ b/include/net/mpls.h @@ -33,16 +33,4 @@ static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb) { return (struct mpls_shim_hdr *)skb_network_header(skb); } - -/* - * For non-MPLS skbs this will correspond to the network header. - * For MPLS skbs it will be before the network_header as the MPLS - * label stack lies between the end of the mac header and the network - * header. That is, for MPLS skbs the end of the mac header - * is the top of the MPLS label stack. - */ -static inline unsigned char *skb_mpls_header(struct sk_buff *skb) -{ - return skb_mac_header(skb) + skb->mac_len; -} #endif diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 863e992dfbc0..4e03f64709bc 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -160,7 +160,7 @@ static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr, static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key, const struct ovs_action_push_mpls *mpls) { - __be32 *new_mpls_lse; + struct mpls_shim_hdr *new_mpls_lse; /* Networking stack do not allow simultaneous Tunnel and MPLS GSO. */ if (skb->encapsulation) @@ -180,8 +180,8 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key, skb_reset_mac_header(skb); skb_set_network_header(skb, skb->mac_len); - new_mpls_lse = (__be32 *)skb_mpls_header(skb); - *new_mpls_lse = mpls->mpls_lse; + new_mpls_lse = mpls_hdr(skb); + new_mpls_lse->label_stack_entry = mpls->mpls_lse; skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN); @@ -202,7 +202,7 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key, if (unlikely(err)) return err; - skb_postpull_rcsum(skb, skb_mpls_header(skb), MPLS_HLEN); + skb_postpull_rcsum(skb, mpls_hdr(skb), MPLS_HLEN); memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb), skb->mac_len); @@ -211,10 +211,10 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key, skb_reset_mac_header(skb); skb_set_network_header(skb, skb->mac_len); - /* skb_mpls_header() is used to locate the ethertype - * field correctly in the presence of VLAN tags. + /* mpls_hdr() is used to locate the ethertype field correctly in the + * presence of VLAN tags. */ - hdr = (struct ethhdr *)(skb_mpls_header(skb) - ETH_HLEN); + hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN); update_ethertype(skb, hdr, ethertype); if (eth_p_mpls(skb->protocol)) skb->protocol = ethertype; @@ -226,7 +226,7 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key, static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key, const __be32 *mpls_lse, const __be32 *mask) { - __be32 *stack; + struct mpls_shim_hdr *stack; __be32 lse; int err; @@ -234,16 +234,16 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key, if (unlikely(err)) return err; - stack = (__be32 *)skb_mpls_header(skb); - lse = OVS_MASKED(*stack, *mpls_lse, *mask); + stack = mpls_hdr(skb); + lse = OVS_MASKED(stack->label_stack_entry, *mpls_lse, *mask); if (skb->ip_summed == CHECKSUM_COMPLETE) { - __be32 diff[] = { ~(*stack), lse }; + __be32 diff[] = { ~(stack->label_stack_entry), lse }; skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum); } - *stack = lse; + stack->label_stack_entry = lse; flow_key->mpls.top_lse = lse; return 0; } From f814bfd765218908b23e21ca7f0b6f403fb88972 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sat, 1 Oct 2016 00:56:37 +0300 Subject: [PATCH 1023/1050] net: mvmdio: do not clk_disable_unprepare() NULL clock There is no need to clk_disable_unprepare(dev->clk) before it was initialized. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvmdio.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c index 8982c882af1b..a0d1b084ecec 100644 --- a/drivers/net/ethernet/marvell/mvmdio.c +++ b/drivers/net/ethernet/marvell/mvmdio.c @@ -211,8 +211,7 @@ static int orion_mdio_probe(struct platform_device *pdev) dev->regs = devm_ioremap(&pdev->dev, r->start, resource_size(r)); if (!dev->regs) { dev_err(&pdev->dev, "Unable to remap SMI register\n"); - ret = -ENODEV; - goto out_mdio; + return -ENODEV; } init_waitqueue_head(&dev->smi_busy_wait); From 5e3d033ec175b3d0a8da2b1fc9aee8bc8acf3cfc Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 25 Aug 2016 14:15:39 -0700 Subject: [PATCH 1024/1050] fm10k: wrap long line for alloc_workqueue Trivial change here to cleanup a checkpatch.pl warning that got introduced when changing to alloc_workqueue. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 0d39103124bd..5de937852436 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -56,7 +56,8 @@ static int __init fm10k_init_module(void) pr_info("%s\n", fm10k_copyright); /* create driver workqueue */ - fm10k_workqueue = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, fm10k_driver_name); + fm10k_workqueue = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, + fm10k_driver_name); fm10k_dbg_init(); From f39acc84aad10710e89835c60d3b6694c43a8dd9 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Thu, 29 Sep 2016 12:10:40 +0300 Subject: [PATCH 1025/1050] net/sched: act_vlan: Push skb->data to mac_header prior calling skb_vlan_*() functions Generic skb_vlan_push/skb_vlan_pop functions don't properly handle the case where the input skb data pointer does not point at the mac header: - They're doing push/pop, but fail to properly unwind data back to its original location. For example, in the skb_vlan_push case, any subsequent 'skb_push(skb, skb->mac_len)' calls make the skb->data point 4 bytes BEFORE start of frame, leading to bogus frames that may be transmitted. - They update rcsum per the added/removed 4 bytes tag. Alas if data is originally after the vlan/eth headers, then these bytes were already pulled out of the csum. OTOH calling skb_vlan_push/skb_vlan_pop with skb->data at mac_header present no issues. act_vlan is the only caller to skb_vlan_*() that has skb->data pointing at network header (upon ingress). Other calles (ovs, bpf) already adjust skb->data at mac_header. This patch fixes act_vlan to point to the mac_header prior calling skb_vlan_*() functions, as other callers do. Signed-off-by: Shmulik Ladkani Cc: Daniel Borkmann Cc: Pravin Shelar Cc: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/act_vlan.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index a95c00b119da..b57fcbcefea1 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -37,6 +37,12 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, bstats_update(&v->tcf_bstats, skb); action = v->tcf_action; + /* Ensure 'data' points at mac_header prior calling vlan manipulating + * functions. + */ + if (skb_at_tc_ingress(skb)) + skb_push_rcsum(skb, skb->mac_len); + switch (v->tcfv_action) { case TCA_VLAN_ACT_POP: err = skb_vlan_pop(skb); @@ -83,6 +89,9 @@ drop: action = TC_ACT_SHOT; v->tcf_qstats.drops++; unlock: + if (skb_at_tc_ingress(skb)) + skb_pull_rcsum(skb, skb->mac_len); + spin_unlock(&v->tcf_lock); return action; } From b6a7920848cab619b5e434fdc0338778c63ef3f3 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Thu, 29 Sep 2016 12:10:41 +0300 Subject: [PATCH 1026/1050] net: skbuff: Limit skb_vlan_pop/push() to expect skb->data at mac header skb_vlan_pop/push were too generic, trying to support the cases where skb->data is at mac header, and cases where skb->data is arbitrarily elsewhere. Supporting an arbitrary skb->data was complex and bogus: - It failed to unwind skb->data to its original location post actual pop/push. (Also, semantic is not well defined for unwinding: If data was into the eth header, need to use same offset from start; But if data was at network header or beyond, need to adjust the original offset according to the push/pull) - It mangled the rcsum post actual push/pop, without taking into account that the eth bytes might already have been pulled out of the csum. Most callers (ovs, bpf) already had their skb->data at mac_header upon invoking skb_vlan_pop/push. Last caller that failed to do so (act_vlan) has been recently fixed. Therefore, to simplify things, no longer support arbitrary skb->data inputs for skb_vlan_pop/push(). skb->data is expected to be exactly at mac_header; WARN otherwise. Signed-off-by: Shmulik Ladkani Cc: Daniel Borkmann Cc: Pravin Shelar Cc: Jiri Pirko Signed-off-by: David S. Miller --- net/core/skbuff.c | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d36c7548952f..cbd19d250947 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4528,13 +4528,18 @@ EXPORT_SYMBOL(skb_ensure_writable); int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci) { struct vlan_hdr *vhdr; - unsigned int offset = skb->data - skb_mac_header(skb); + int offset = skb->data - skb_mac_header(skb); int err; - __skb_push(skb, offset); + if (WARN_ONCE(offset, + "__skb_vlan_pop got skb with skb->data not at mac header (offset %d)\n", + offset)) { + return -EINVAL; + } + err = skb_ensure_writable(skb, VLAN_ETH_HLEN); if (unlikely(err)) - goto pull; + return err; skb_postpull_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN); @@ -4551,13 +4556,14 @@ int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci) skb_set_network_header(skb, ETH_HLEN); skb_reset_mac_len(skb); -pull: - __skb_pull(skb, offset); return err; } EXPORT_SYMBOL(__skb_vlan_pop); +/* Pop a vlan tag either from hwaccel or from payload. + * Expects skb->data at mac header. + */ int skb_vlan_pop(struct sk_buff *skb) { u16 vlan_tci; @@ -4588,29 +4594,30 @@ int skb_vlan_pop(struct sk_buff *skb) } EXPORT_SYMBOL(skb_vlan_pop); +/* Push a vlan tag either into hwaccel or into payload (if hwaccel tag present). + * Expects skb->data at mac header. + */ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) { if (skb_vlan_tag_present(skb)) { - unsigned int offset = skb->data - skb_mac_header(skb); + int offset = skb->data - skb_mac_header(skb); int err; - /* __vlan_insert_tag expect skb->data pointing to mac header. - * So change skb->data before calling it and change back to - * original position later - */ - __skb_push(skb, offset); + if (WARN_ONCE(offset, + "skb_vlan_push got skb with skb->data not at mac header (offset %d)\n", + offset)) { + return -EINVAL; + } + err = __vlan_insert_tag(skb, skb->vlan_proto, skb_vlan_tag_get(skb)); - if (err) { - __skb_pull(skb, offset); + if (err) return err; - } skb->protocol = skb->vlan_proto; skb->mac_len += VLAN_HLEN; skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN); - __skb_pull(skb, offset); } __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); return 0; From 0fd7d43fbc4aaf358005231a6bed27eb1c2f60c3 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 1 Oct 2016 09:12:29 +0000 Subject: [PATCH 1027/1050] net: qcom/emac: fix return value check in emac_sgmii_config() In case of error, the function ioremap() returns NULL pointer not ERR_PTR(). The IS_ERR() test in the return value check should be replaced with NULL test. Also add check for return value of platform_get_resource(). Fixes: 54e19bc74f33 ("net: qcom/emac: do not use devm on internal phy pdev") Signed-off-by: Wei Yongjun Acked-by: Timur Tabi Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac-sgmii.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c index 3d2c05a40d2c..75c1b530e39e 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c @@ -740,9 +740,14 @@ int emac_sgmii_config(struct platform_device *pdev, struct emac_adapter *adpt) /* Base address is the first address */ res = platform_get_resource(sgmii_pdev, IORESOURCE_MEM, 0); + if (!res) { + ret = -EINVAL; + goto error_put_device; + } + phy->base = ioremap(res->start, resource_size(res)); - if (IS_ERR(phy->base)) { - ret = PTR_ERR(phy->base); + if (!phy->base) { + ret = -ENOMEM; goto error_put_device; } @@ -750,8 +755,8 @@ int emac_sgmii_config(struct platform_device *pdev, struct emac_adapter *adpt) res = platform_get_resource(sgmii_pdev, IORESOURCE_MEM, 1); if (res) { phy->digital = ioremap(res->start, resource_size(res)); - if (IS_ERR(phy->digital)) { - ret = PTR_ERR(phy->digital); + if (!phy->digital) { + ret = -ENOMEM; goto error_unmap_base; } } From b9118b7221ebb12156d2b08d4d5647bc6076d6bb Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Sun, 2 Oct 2016 09:04:16 +0200 Subject: [PATCH 1028/1050] ptp: Fix resource leak in case of error A call to 'ida_simple_remove()' is missing in the error handling path. This as been spotted with the following coccinelle script which tries to detect missing 'ida_simple_remove()' call in error handling paths. /////////////// @@ expression x; identifier l; @@ * x = ida_simple_get(...); ... if (...) { ... } ... if (...) { ... goto l; } ... * l: ... when != ida_simple_remove(...); Signed-off-by: Christophe JAILLET Acked-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/ptp/ptp_clock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index 2e481b9e8ea5..86280b7e41f3 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -263,6 +263,7 @@ no_sysfs: no_device: mutex_destroy(&ptp->tsevq_mux); mutex_destroy(&ptp->pincfg_mux); + ida_simple_remove(&ptp_clocks_map, index); no_slot: kfree(ptp); no_memory: From 9ca57e97a75780a7f9b8e93e83ab1e36dbfd9846 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Mon, 19 Sep 2016 13:37:49 +0200 Subject: [PATCH 1029/1050] i40e: check if vectors are already depleted when doing VMDq allocation During MSI-X vector allocation for VMDq, a check for "no vectors left" was missing, add it. This prevents more vectors to be allocated than available. Signed-off-by: Stefan Assmann Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 27 ++++++++++++--------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 8176596932be..66d2ca0194f6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -7674,18 +7674,23 @@ static int i40e_init_msix(struct i40e_pf *pf) int vmdq_vecs_wanted = pf->num_vmdq_vsis * pf->num_vmdq_qps; int vmdq_vecs = min_t(int, vectors_left, vmdq_vecs_wanted); - /* if we're short on vectors for what's desired, we limit - * the queues per vmdq. If this is still more than are - * available, the user will need to change the number of - * queues/vectors used by the PF later with the ethtool - * channels command - */ - if (vmdq_vecs < vmdq_vecs_wanted) - pf->num_vmdq_qps = 1; - pf->num_vmdq_msix = pf->num_vmdq_qps; + if (!vectors_left) { + pf->num_vmdq_msix = 0; + pf->num_vmdq_qps = 0; + } else { + /* if we're short on vectors for what's desired, we limit + * the queues per vmdq. If this is still more than are + * available, the user will need to change the number of + * queues/vectors used by the PF later with the ethtool + * channels command + */ + if (vmdq_vecs < vmdq_vecs_wanted) + pf->num_vmdq_qps = 1; + pf->num_vmdq_msix = pf->num_vmdq_qps; - v_budget += vmdq_vecs; - vectors_left -= vmdq_vecs; + v_budget += vmdq_vecs; + vectors_left -= vmdq_vecs; + } } pf->msix_entries = kcalloc(v_budget, sizeof(struct msix_entry), From 4ce20abc645fc1822e86d9845a8562347e877b36 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Mon, 19 Sep 2016 13:37:50 +0200 Subject: [PATCH 1030/1050] i40e: fix MSI-X vector redistribution if hw limit is reached The driver allocates 1 vector per CPU thread and the current hardware limit for vectors is 129 per PF. On systems with 128 or more threads this currently means all vectors are used by the PF leaving no room for additional features like VMDq, iWARP, etc... The code that should redistribute the vectors in this case is broken and never triggers. Fixed the code so that it actually triggers if the hardware limit is reached and adjust the number of queue pairs accordingly. Also the number of initially requested iWARP vectors was not properly saved when the vector limit was reached, and therefore always zero. Comparison with debug statement. Before: i40e 0000:2d:00.0: VMDq disabled, not enough MSI-X vectors i40e 0000:2d:00.0: IWARP disabled, not enough MSI-X vectors i40e 00.0 MSI-X vector distribution: PF 128, VMDq 0, FDSB 0, iWARP 0 After: i40e 0000:2d:00.0: MSI-X vector limit reached, attempting to redistribute vectors i40e 00.0 MSI-X vector distribution: PF 78, VMDq 8, FDSB 0, iWARP 42 Signed-off-by: Stefan Assmann Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 38 ++++++++++++--------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 66d2ca0194f6..ca7dd43abf5a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -7661,6 +7661,8 @@ static int i40e_init_msix(struct i40e_pf *pf) #endif /* can we reserve enough for iWARP? */ if (pf->flags & I40E_FLAG_IWARP_ENABLED) { + iwarp_requested = pf->num_iwarp_msix; + if (!vectors_left) pf->num_iwarp_msix = 0; else if (vectors_left < pf->num_iwarp_msix) @@ -7702,21 +7704,6 @@ static int i40e_init_msix(struct i40e_pf *pf) pf->msix_entries[i].entry = i; v_actual = i40e_reserve_msix_vectors(pf, v_budget); - if (v_actual != v_budget) { - /* If we have limited resources, we will start with no vectors - * for the special features and then allocate vectors to some - * of these features based on the policy and at the end disable - * the features that did not get any vectors. - */ - iwarp_requested = pf->num_iwarp_msix; - pf->num_iwarp_msix = 0; -#ifdef I40E_FCOE - pf->num_fcoe_qps = 0; - pf->num_fcoe_msix = 0; -#endif - pf->num_vmdq_msix = 0; - } - if (v_actual < I40E_MIN_MSIX) { pf->flags &= ~I40E_FLAG_MSIX_ENABLED; kfree(pf->msix_entries); @@ -7730,9 +7717,16 @@ static int i40e_init_msix(struct i40e_pf *pf) pf->num_lan_qps = 1; pf->num_lan_msix = 1; - } else if (v_actual != v_budget) { + } else if (!vectors_left) { + /* If we have limited resources, we will start with no vectors + * for the special features and then allocate vectors to some + * of these features based on the policy and at the end disable + * the features that did not get any vectors. + */ int vec; + dev_info(&pf->pdev->dev, + "MSI-X vector limit reached, attempting to redistribute vectors\n"); /* reserve the misc vector */ vec = v_actual - 1; @@ -7740,6 +7734,10 @@ static int i40e_init_msix(struct i40e_pf *pf) pf->num_vmdq_msix = 1; /* force VMDqs to only one vector */ pf->num_vmdq_vsis = 1; pf->num_vmdq_qps = 1; +#ifdef I40E_FCOE + pf->num_fcoe_qps = 0; + pf->num_fcoe_msix = 0; +#endif pf->flags &= ~I40E_FLAG_FD_SB_ENABLED; /* partition out the remaining vectors */ @@ -7775,6 +7773,7 @@ static int i40e_init_msix(struct i40e_pf *pf) pf->num_lan_msix = min_t(int, (vec - (pf->num_iwarp_msix + pf->num_vmdq_vsis)), pf->num_lan_msix); + pf->num_lan_qps = pf->num_lan_msix; #ifdef I40E_FCOE /* give one vector to FCoE */ if (pf->flags & I40E_FLAG_FCOE_ENABLED) { @@ -7804,6 +7803,13 @@ static int i40e_init_msix(struct i40e_pf *pf) pf->flags &= ~I40E_FLAG_FCOE_ENABLED; } #endif + i40e_debug(&pf->hw, I40E_DEBUG_INIT, + "MSI-X vector distribution: PF %d, VMDq %d, FDSB %d, iWARP %d\n", + pf->num_lan_msix, + pf->num_vmdq_msix * pf->num_vmdq_vsis, + pf->num_fdsb_msix, + pf->num_iwarp_msix); + return v_actual; } From abd97a94ba913d121a920d5541aba1e568be6972 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Mon, 19 Sep 2016 13:37:51 +0200 Subject: [PATCH 1031/1050] i40e: fix sideband flow director vector allocation Currently if the MSI-X vector limit is reached the sideband flow director gets disabled. A bit too early to make that decision, as vectors may get re-distributed. So move the check further back. Signed-off-by: Stefan Assmann Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index ca7dd43abf5a..0044c29ca31e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -7641,7 +7641,6 @@ static int i40e_init_msix(struct i40e_pf *pf) vectors_left--; } else { pf->num_fdsb_msix = 0; - pf->flags &= ~I40E_FLAG_FD_SB_ENABLED; } } @@ -7738,7 +7737,6 @@ static int i40e_init_msix(struct i40e_pf *pf) pf->num_fcoe_qps = 0; pf->num_fcoe_msix = 0; #endif - pf->flags &= ~I40E_FLAG_FD_SB_ENABLED; /* partition out the remaining vectors */ switch (vec) { @@ -7770,6 +7768,10 @@ static int i40e_init_msix(struct i40e_pf *pf) pf->num_vmdq_vsis = min_t(int, (vec / 2), I40E_DEFAULT_NUM_VMDQ_VSI); } + if (pf->flags & I40E_FLAG_FD_SB_ENABLED) { + pf->num_fdsb_msix = 1; + vec--; + } pf->num_lan_msix = min_t(int, (vec - (pf->num_iwarp_msix + pf->num_vmdq_vsis)), pf->num_lan_msix); @@ -7785,6 +7787,11 @@ static int i40e_init_msix(struct i40e_pf *pf) } } + if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) && + (pf->num_fdsb_msix == 0)) { + dev_info(&pf->pdev->dev, "Sideband Flowdir disabled, not enough MSI-X vectors\n"); + pf->flags &= ~I40E_FLAG_FD_SB_ENABLED; + } if ((pf->flags & I40E_FLAG_VMDQ_ENABLED) && (pf->num_vmdq_msix == 0)) { dev_info(&pf->pdev->dev, "VMDq disabled, not enough MSI-X vectors\n"); From 0a7fb11c23c0fb8f5ad37f285f40348f1ab9ccbd Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Sat, 1 Oct 2016 21:59:55 +0300 Subject: [PATCH 1032/1050] qed: Add Light L2 support Other protocols beside the networking driver need the ability of passing some L2 traffic, usually [although not limited] for the purpose of some management traffic. Signed-off-by: Yuval Mintz Signed-off-by: Ram Amrani Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/Kconfig | 8 + drivers/net/ethernet/qlogic/qed/Makefile | 1 + drivers/net/ethernet/qlogic/qed/qed.h | 9 + drivers/net/ethernet/qlogic/qed/qed_cxt.c | 2 + drivers/net/ethernet/qlogic/qed/qed_dev.c | 120 +- drivers/net/ethernet/qlogic/qed/qed_dev_api.h | 20 + drivers/net/ethernet/qlogic/qed/qed_ll2.c | 1699 +++++++++++++++++ drivers/net/ethernet/qlogic/qed/qed_ll2.h | 289 +++ drivers/net/ethernet/qlogic/qed/qed_main.c | 23 +- .../net/ethernet/qlogic/qed/qed_reg_addr.h | 22 + drivers/net/ethernet/qlogic/qed/qed_sp.h | 4 + include/linux/qed/qed_if.h | 1 + include/linux/qed/qed_ll2_if.h | 139 ++ 13 files changed, 2334 insertions(+), 3 deletions(-) create mode 100644 drivers/net/ethernet/qlogic/qed/qed_ll2.c create mode 100644 drivers/net/ethernet/qlogic/qed/qed_ll2.h create mode 100644 include/linux/qed/qed_ll2_if.h diff --git a/drivers/net/ethernet/qlogic/Kconfig b/drivers/net/ethernet/qlogic/Kconfig index 6ba48406899e..9eb3b1914cf5 100644 --- a/drivers/net/ethernet/qlogic/Kconfig +++ b/drivers/net/ethernet/qlogic/Kconfig @@ -88,6 +88,14 @@ config QED ---help--- This enables the support for ... +config QED_LL2 + bool "Qlogic QED Light L2 interface" + default n + depends on QED + ---help--- + This enables support for Light L2 interface which is required + by all qed protocol drivers other than qede. + config QED_SRIOV bool "QLogic QED 25/40/100Gb SR-IOV support" depends on QED && PCI_IOV diff --git a/drivers/net/ethernet/qlogic/qed/Makefile b/drivers/net/ethernet/qlogic/qed/Makefile index 86a5b4f5f870..e067098f10a9 100644 --- a/drivers/net/ethernet/qlogic/qed/Makefile +++ b/drivers/net/ethernet/qlogic/qed/Makefile @@ -4,3 +4,4 @@ qed-y := qed_cxt.o qed_dev.o qed_hw.o qed_init_fw_funcs.o qed_init_ops.o \ qed_int.o qed_main.o qed_mcp.o qed_sp_commands.o qed_spq.o qed_l2.o \ qed_selftest.o qed_dcbx.o qed_debug.o qed-$(CONFIG_QED_SRIOV) += qed_sriov.o qed_vf.o +qed-$(CONFIG_QED_LL2) += qed_ll2.o diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 0929582fc82b..91b571a3670b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -72,6 +72,7 @@ struct qed_sb_info; struct qed_sb_attn_info; struct qed_cxt_mngr; struct qed_sb_sp_info; +struct qed_ll2_info; struct qed_mcp_info; struct qed_rt_data { @@ -152,6 +153,7 @@ enum QED_RESOURCES { QED_MAC, QED_VLAN, QED_ILT, + QED_LL2_QUEUE, QED_MAX_RESC, }; @@ -360,6 +362,8 @@ struct qed_hwfn { struct qed_sb_attn_info *p_sb_attn; /* Protocol related */ + bool using_ll2; + struct qed_ll2_info *p_ll2_info; struct qed_pf_params pf_params; bool b_rdma_enabled_in_prs; @@ -564,6 +568,11 @@ struct qed_dev { struct qed_dbg_params dbg_params; +#ifdef CONFIG_QED_LL2 + struct qed_cb_ll2_info *ll2; + u8 ll2_mac_address[ETH_ALEN]; +#endif + const struct firmware *firmware; }; diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index dd579b2ef224..d9bea2a9c9f7 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -1839,6 +1839,8 @@ int qed_cxt_set_pf_params(struct qed_hwfn *p_hwfn) /* Set the number of required CORE connections */ u32 core_cids = 1; /* SPQ */ + if (p_hwfn->using_ll2) + core_cids += 4; qed_cxt_set_proto_cid_count(p_hwfn, PROTOCOLID_CORE, core_cids, 0); switch (p_hwfn->hw_info.personality) { diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 13d8b4075b01..9a8e153df841 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -29,6 +29,7 @@ #include "qed_hw.h" #include "qed_init_ops.h" #include "qed_int.h" +#include "qed_ll2.h" #include "qed_mcp.h" #include "qed_reg_addr.h" #include "qed_sp.h" @@ -147,6 +148,9 @@ void qed_resc_free(struct qed_dev *cdev) qed_eq_free(p_hwfn, p_hwfn->p_eq); qed_consq_free(p_hwfn, p_hwfn->p_consq); qed_int_free(p_hwfn); +#ifdef CONFIG_QED_LL2 + qed_ll2_free(p_hwfn, p_hwfn->p_ll2_info); +#endif qed_iov_free(p_hwfn); qed_dmae_info_free(p_hwfn); qed_dcbx_info_free(p_hwfn, p_hwfn->p_dcbx_info); @@ -403,6 +407,9 @@ int qed_qm_reconf(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) int qed_resc_alloc(struct qed_dev *cdev) { +#ifdef CONFIG_QED_LL2 + struct qed_ll2_info *p_ll2_info; +#endif struct qed_consq *p_consq; struct qed_eq *p_eq; int i, rc = 0; @@ -513,6 +520,15 @@ int qed_resc_alloc(struct qed_dev *cdev) goto alloc_no_mem; p_hwfn->p_consq = p_consq; +#ifdef CONFIG_QED_LL2 + if (p_hwfn->using_ll2) { + p_ll2_info = qed_ll2_alloc(p_hwfn); + if (!p_ll2_info) + goto alloc_no_mem; + p_hwfn->p_ll2_info = p_ll2_info; + } +#endif + /* DMA info initialization */ rc = qed_dmae_info_alloc(p_hwfn); if (rc) @@ -561,6 +577,10 @@ void qed_resc_setup(struct qed_dev *cdev) qed_int_setup(p_hwfn, p_hwfn->p_main_ptt); qed_iov_setup(p_hwfn, p_hwfn->p_main_ptt); +#ifdef CONFIG_QED_LL2 + if (p_hwfn->using_ll2) + qed_ll2_setup(p_hwfn, p_hwfn->p_ll2_info); +#endif } } @@ -1304,6 +1324,7 @@ static int qed_hw_get_resc(struct qed_hwfn *p_hwfn) resc_num[QED_VLAN] = (ETH_NUM_VLAN_FILTERS - 1 /*For vlan0*/) / num_funcs; resc_num[QED_ILT] = PXP_NUM_ILT_RECORDS_BB / num_funcs; + resc_num[QED_LL2_QUEUE] = MAX_NUM_LL2_RX_QUEUES / num_funcs; for (i = 0; i < QED_MAX_RESC; i++) resc_start[i] = resc_num[i] * enabled_func_idx; @@ -1327,7 +1348,8 @@ static int qed_hw_get_resc(struct qed_hwfn *p_hwfn) "RL = %d start = %d\n" "MAC = %d start = %d\n" "VLAN = %d start = %d\n" - "ILT = %d start = %d\n", + "ILT = %d start = %d\n" + "LL2_QUEUE = %d start = %d\n", p_hwfn->hw_info.resc_num[QED_SB], p_hwfn->hw_info.resc_start[QED_SB], p_hwfn->hw_info.resc_num[QED_L2_QUEUE], @@ -1343,7 +1365,9 @@ static int qed_hw_get_resc(struct qed_hwfn *p_hwfn) p_hwfn->hw_info.resc_num[QED_VLAN], p_hwfn->hw_info.resc_start[QED_VLAN], p_hwfn->hw_info.resc_num[QED_ILT], - p_hwfn->hw_info.resc_start[QED_ILT]); + p_hwfn->hw_info.resc_start[QED_ILT], + RESC_NUM(p_hwfn, QED_LL2_QUEUE), + RESC_START(p_hwfn, QED_LL2_QUEUE)); return 0; } @@ -2133,6 +2157,98 @@ int qed_fw_rss_eng(struct qed_hwfn *p_hwfn, u8 src_id, u8 *dst_id) return 0; } +static void qed_llh_mac_to_filter(u32 *p_high, u32 *p_low, + u8 *p_filter) +{ + *p_high = p_filter[1] | (p_filter[0] << 8); + *p_low = p_filter[5] | (p_filter[4] << 8) | + (p_filter[3] << 16) | (p_filter[2] << 24); +} + +int qed_llh_add_mac_filter(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u8 *p_filter) +{ + u32 high = 0, low = 0, en; + int i; + + if (!(IS_MF_SI(p_hwfn) || IS_MF_DEFAULT(p_hwfn))) + return 0; + + qed_llh_mac_to_filter(&high, &low, p_filter); + + /* Find a free entry and utilize it */ + for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) { + en = qed_rd(p_hwfn, p_ptt, + NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32)); + if (en) + continue; + qed_wr(p_hwfn, p_ptt, + NIG_REG_LLH_FUNC_FILTER_VALUE + + 2 * i * sizeof(u32), low); + qed_wr(p_hwfn, p_ptt, + NIG_REG_LLH_FUNC_FILTER_VALUE + + (2 * i + 1) * sizeof(u32), high); + qed_wr(p_hwfn, p_ptt, + NIG_REG_LLH_FUNC_FILTER_MODE + i * sizeof(u32), 0); + qed_wr(p_hwfn, p_ptt, + NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE + + i * sizeof(u32), 0); + qed_wr(p_hwfn, p_ptt, + NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32), 1); + break; + } + if (i >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE) { + DP_NOTICE(p_hwfn, + "Failed to find an empty LLH filter to utilize\n"); + return -EINVAL; + } + + DP_VERBOSE(p_hwfn, NETIF_MSG_HW, + "mac: %pM is added at %d\n", + p_filter, i); + + return 0; +} + +void qed_llh_remove_mac_filter(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u8 *p_filter) +{ + u32 high = 0, low = 0; + int i; + + if (!(IS_MF_SI(p_hwfn) || IS_MF_DEFAULT(p_hwfn))) + return; + + qed_llh_mac_to_filter(&high, &low, p_filter); + + /* Find the entry and clean it */ + for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) { + if (qed_rd(p_hwfn, p_ptt, + NIG_REG_LLH_FUNC_FILTER_VALUE + + 2 * i * sizeof(u32)) != low) + continue; + if (qed_rd(p_hwfn, p_ptt, + NIG_REG_LLH_FUNC_FILTER_VALUE + + (2 * i + 1) * sizeof(u32)) != high) + continue; + + qed_wr(p_hwfn, p_ptt, + NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32), 0); + qed_wr(p_hwfn, p_ptt, + NIG_REG_LLH_FUNC_FILTER_VALUE + 2 * i * sizeof(u32), 0); + qed_wr(p_hwfn, p_ptt, + NIG_REG_LLH_FUNC_FILTER_VALUE + + (2 * i + 1) * sizeof(u32), 0); + + DP_VERBOSE(p_hwfn, NETIF_MSG_HW, + "mac: %pM is removed from %d\n", + p_filter, i); + break; + } + if (i >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE) + DP_NOTICE(p_hwfn, "Tried to remove a non-configured filter\n"); +} + static int qed_set_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u32 hw_addr, void *p_eth_qzone, size_t eth_qzone_size, u8 timeset) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h index 343bb0344f62..b6711c106597 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h +++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h @@ -309,6 +309,26 @@ int qed_fw_rss_eng(struct qed_hwfn *p_hwfn, u8 src_id, u8 *dst_id); +/** + * @brief qed_llh_add_mac_filter - configures a MAC filter in llh + * + * @param p_hwfn + * @param p_ptt + * @param p_filter - MAC to add + */ +int qed_llh_add_mac_filter(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u8 *p_filter); + +/** + * @brief qed_llh_remove_mac_filter - removes a MAC filter from llh + * + * @param p_hwfn + * @param p_ptt + * @param p_filter - MAC to remove + */ +void qed_llh_remove_mac_filter(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u8 *p_filter); + /** * *@brief Cleanup of previous driver remains prior to load * diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c new file mode 100644 index 000000000000..e0ec8ed2f92c --- /dev/null +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -0,0 +1,1699 @@ +/* QLogic qed NIC Driver + * + * Copyright (c) 2015 QLogic Corporation + * + * This software is available under the terms of the GNU General Public License + * (GPL) Version 2, available from the file COPYING in the main directory of + * this source tree. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "qed.h" +#include "qed_cxt.h" +#include "qed_dev_api.h" +#include "qed_hsi.h" +#include "qed_hw.h" +#include "qed_int.h" +#include "qed_ll2.h" +#include "qed_mcp.h" +#include "qed_reg_addr.h" +#include "qed_sp.h" + +#define QED_LL2_RX_REGISTERED(ll2) ((ll2)->rx_queue.b_cb_registred) +#define QED_LL2_TX_REGISTERED(ll2) ((ll2)->tx_queue.b_cb_registred) + +#define QED_LL2_TX_SIZE (256) +#define QED_LL2_RX_SIZE (4096) + +struct qed_cb_ll2_info { + int rx_cnt; + u32 rx_size; + u8 handle; + bool frags_mapped; + + /* Lock protecting LL2 buffer lists in sleepless context */ + spinlock_t lock; + struct list_head list; + + const struct qed_ll2_cb_ops *cbs; + void *cb_cookie; +}; + +struct qed_ll2_buffer { + struct list_head list; + void *data; + dma_addr_t phys_addr; +}; + +static void qed_ll2b_complete_tx_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + void *cookie, + dma_addr_t first_frag_addr, + bool b_last_fragment, + bool b_last_packet) +{ + struct qed_dev *cdev = p_hwfn->cdev; + struct sk_buff *skb = cookie; + + /* All we need to do is release the mapping */ + dma_unmap_single(&p_hwfn->cdev->pdev->dev, first_frag_addr, + skb_headlen(skb), DMA_TO_DEVICE); + + if (cdev->ll2->cbs && cdev->ll2->cbs->tx_cb) + cdev->ll2->cbs->tx_cb(cdev->ll2->cb_cookie, skb, + b_last_fragment); + + if (cdev->ll2->frags_mapped) + /* Case where mapped frags were received, need to + * free skb with nr_frags marked as 0 + */ + skb_shinfo(skb)->nr_frags = 0; + + dev_kfree_skb_any(skb); +} + +static int qed_ll2_alloc_buffer(struct qed_dev *cdev, + u8 **data, dma_addr_t *phys_addr) +{ + *data = kmalloc(cdev->ll2->rx_size, GFP_ATOMIC); + if (!(*data)) { + DP_INFO(cdev, "Failed to allocate LL2 buffer data\n"); + return -ENOMEM; + } + + *phys_addr = dma_map_single(&cdev->pdev->dev, + ((*data) + NET_SKB_PAD), + cdev->ll2->rx_size, DMA_FROM_DEVICE); + if (dma_mapping_error(&cdev->pdev->dev, *phys_addr)) { + DP_INFO(cdev, "Failed to map LL2 buffer data\n"); + kfree((*data)); + return -ENOMEM; + } + + return 0; +} + +static int qed_ll2_dealloc_buffer(struct qed_dev *cdev, + struct qed_ll2_buffer *buffer) +{ + spin_lock_bh(&cdev->ll2->lock); + + dma_unmap_single(&cdev->pdev->dev, buffer->phys_addr, + cdev->ll2->rx_size, DMA_FROM_DEVICE); + kfree(buffer->data); + list_del(&buffer->list); + + cdev->ll2->rx_cnt--; + if (!cdev->ll2->rx_cnt) + DP_INFO(cdev, "All LL2 entries were removed\n"); + + spin_unlock_bh(&cdev->ll2->lock); + + return 0; +} + +static void qed_ll2_kill_buffers(struct qed_dev *cdev) +{ + struct qed_ll2_buffer *buffer, *tmp_buffer; + + list_for_each_entry_safe(buffer, tmp_buffer, &cdev->ll2->list, list) + qed_ll2_dealloc_buffer(cdev, buffer); +} + +void qed_ll2b_complete_rx_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + struct qed_ll2_rx_packet *p_pkt, + struct core_rx_fast_path_cqe *p_cqe, + bool b_last_packet) +{ + u16 packet_length = le16_to_cpu(p_cqe->packet_length); + struct qed_ll2_buffer *buffer = p_pkt->cookie; + struct qed_dev *cdev = p_hwfn->cdev; + u16 vlan = le16_to_cpu(p_cqe->vlan); + u32 opaque_data_0, opaque_data_1; + u8 pad = p_cqe->placement_offset; + dma_addr_t new_phys_addr; + struct sk_buff *skb; + bool reuse = false; + int rc = -EINVAL; + u8 *new_data; + + opaque_data_0 = le32_to_cpu(p_cqe->opaque_data.data[0]); + opaque_data_1 = le32_to_cpu(p_cqe->opaque_data.data[1]); + + DP_VERBOSE(p_hwfn, + (NETIF_MSG_RX_STATUS | QED_MSG_STORAGE | NETIF_MSG_PKTDATA), + "Got an LL2 Rx completion: [Buffer at phys 0x%llx, offset 0x%02x] Length 0x%04x Parse_flags 0x%04x vlan 0x%04x Opaque data [0x%08x:0x%08x]\n", + (u64)p_pkt->rx_buf_addr, pad, packet_length, + le16_to_cpu(p_cqe->parse_flags.flags), vlan, + opaque_data_0, opaque_data_1); + + if ((cdev->dp_module & NETIF_MSG_PKTDATA) && buffer->data) { + print_hex_dump(KERN_INFO, "", + DUMP_PREFIX_OFFSET, 16, 1, + buffer->data, packet_length, false); + } + + /* Determine if data is valid */ + if (packet_length < ETH_HLEN) + reuse = true; + + /* Allocate a replacement for buffer; Reuse upon failure */ + if (!reuse) + rc = qed_ll2_alloc_buffer(p_hwfn->cdev, &new_data, + &new_phys_addr); + + /* If need to reuse or there's no replacement buffer, repost this */ + if (rc) + goto out_post; + + skb = build_skb(buffer->data, 0); + if (!skb) { + rc = -ENOMEM; + goto out_post; + } + + pad += NET_SKB_PAD; + skb_reserve(skb, pad); + skb_put(skb, packet_length); + skb_checksum_none_assert(skb); + + /* Get parital ethernet information instead of eth_type_trans(), + * Since we don't have an associated net_device. + */ + skb_reset_mac_header(skb); + skb->protocol = eth_hdr(skb)->h_proto; + + /* Pass SKB onward */ + if (cdev->ll2->cbs && cdev->ll2->cbs->rx_cb) { + if (vlan) + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan); + cdev->ll2->cbs->rx_cb(cdev->ll2->cb_cookie, skb, + opaque_data_0, opaque_data_1); + } + + /* Update Buffer information and update FW producer */ + buffer->data = new_data; + buffer->phys_addr = new_phys_addr; + +out_post: + rc = qed_ll2_post_rx_buffer(QED_LEADING_HWFN(cdev), cdev->ll2->handle, + buffer->phys_addr, 0, buffer, 1); + + if (rc) + qed_ll2_dealloc_buffer(cdev, buffer); +} + +static struct qed_ll2_info *__qed_ll2_handle_sanity(struct qed_hwfn *p_hwfn, + u8 connection_handle, + bool b_lock, + bool b_only_active) +{ + struct qed_ll2_info *p_ll2_conn, *p_ret = NULL; + + if (connection_handle >= QED_MAX_NUM_OF_LL2_CONNECTIONS) + return NULL; + + if (!p_hwfn->p_ll2_info) + return NULL; + + p_ll2_conn = &p_hwfn->p_ll2_info[connection_handle]; + + if (b_only_active) { + if (b_lock) + mutex_lock(&p_ll2_conn->mutex); + if (p_ll2_conn->b_active) + p_ret = p_ll2_conn; + if (b_lock) + mutex_unlock(&p_ll2_conn->mutex); + } else { + p_ret = p_ll2_conn; + } + + return p_ret; +} + +static struct qed_ll2_info *qed_ll2_handle_sanity(struct qed_hwfn *p_hwfn, + u8 connection_handle) +{ + return __qed_ll2_handle_sanity(p_hwfn, connection_handle, false, true); +} + +static struct qed_ll2_info *qed_ll2_handle_sanity_lock(struct qed_hwfn *p_hwfn, + u8 connection_handle) +{ + return __qed_ll2_handle_sanity(p_hwfn, connection_handle, true, true); +} + +static struct qed_ll2_info *qed_ll2_handle_sanity_inactive(struct qed_hwfn + *p_hwfn, + u8 connection_handle) +{ + return __qed_ll2_handle_sanity(p_hwfn, connection_handle, false, false); +} + +static void qed_ll2_txq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle) +{ + bool b_last_packet = false, b_last_frag = false; + struct qed_ll2_tx_packet *p_pkt = NULL; + struct qed_ll2_info *p_ll2_conn; + struct qed_ll2_tx_queue *p_tx; + + p_ll2_conn = qed_ll2_handle_sanity_inactive(p_hwfn, connection_handle); + if (!p_ll2_conn) + return; + + p_tx = &p_ll2_conn->tx_queue; + + while (!list_empty(&p_tx->active_descq)) { + p_pkt = list_first_entry(&p_tx->active_descq, + struct qed_ll2_tx_packet, list_entry); + if (!p_pkt) + break; + + list_del(&p_pkt->list_entry); + b_last_packet = list_empty(&p_tx->active_descq); + list_add_tail(&p_pkt->list_entry, &p_tx->free_descq); + p_tx->cur_completing_packet = *p_pkt; + p_tx->cur_completing_bd_idx = 1; + b_last_frag = p_tx->cur_completing_bd_idx == p_pkt->bd_used; + + qed_ll2b_complete_tx_packet(p_hwfn, p_ll2_conn->my_id, + p_pkt->cookie, + p_pkt->bds_set[0].tx_frag, + b_last_frag, b_last_packet); + } +} + +static int qed_ll2_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie) +{ + struct qed_ll2_info *p_ll2_conn = p_cookie; + struct qed_ll2_tx_queue *p_tx = &p_ll2_conn->tx_queue; + u16 new_idx = 0, num_bds = 0, num_bds_in_packet = 0; + struct qed_ll2_tx_packet *p_pkt; + bool b_last_frag = false; + unsigned long flags; + int rc = -EINVAL; + + spin_lock_irqsave(&p_tx->lock, flags); + if (p_tx->b_completing_packet) { + rc = -EBUSY; + goto out; + } + + new_idx = le16_to_cpu(*p_tx->p_fw_cons); + num_bds = ((s16)new_idx - (s16)p_tx->bds_idx); + while (num_bds) { + if (list_empty(&p_tx->active_descq)) + goto out; + + p_pkt = list_first_entry(&p_tx->active_descq, + struct qed_ll2_tx_packet, list_entry); + if (!p_pkt) + goto out; + + p_tx->b_completing_packet = true; + p_tx->cur_completing_packet = *p_pkt; + num_bds_in_packet = p_pkt->bd_used; + list_del(&p_pkt->list_entry); + + if (num_bds < num_bds_in_packet) { + DP_NOTICE(p_hwfn, + "Rest of BDs does not cover whole packet\n"); + goto out; + } + + num_bds -= num_bds_in_packet; + p_tx->bds_idx += num_bds_in_packet; + while (num_bds_in_packet--) + qed_chain_consume(&p_tx->txq_chain); + + p_tx->cur_completing_bd_idx = 1; + b_last_frag = p_tx->cur_completing_bd_idx == p_pkt->bd_used; + list_add_tail(&p_pkt->list_entry, &p_tx->free_descq); + + spin_unlock_irqrestore(&p_tx->lock, flags); + qed_ll2b_complete_tx_packet(p_hwfn, + p_ll2_conn->my_id, + p_pkt->cookie, + p_pkt->bds_set[0].tx_frag, + b_last_frag, !num_bds); + spin_lock_irqsave(&p_tx->lock, flags); + } + + p_tx->b_completing_packet = false; + rc = 0; +out: + spin_unlock_irqrestore(&p_tx->lock, flags); + return rc; +} + +static int qed_ll2_rxq_completion_reg(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_conn, + union core_rx_cqe_union *p_cqe, + unsigned long lock_flags, + bool b_last_cqe) +{ + struct qed_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue; + struct qed_ll2_rx_packet *p_pkt = NULL; + + if (!list_empty(&p_rx->active_descq)) + p_pkt = list_first_entry(&p_rx->active_descq, + struct qed_ll2_rx_packet, list_entry); + if (!p_pkt) { + DP_NOTICE(p_hwfn, + "LL2 Rx completion but active_descq is empty\n"); + return -EIO; + } + list_del(&p_pkt->list_entry); + + if (qed_chain_consume(&p_rx->rxq_chain) != p_pkt->rxq_bd) + DP_NOTICE(p_hwfn, + "Mismatch between active_descq and the LL2 Rx chain\n"); + list_add_tail(&p_pkt->list_entry, &p_rx->free_descq); + + spin_unlock_irqrestore(&p_rx->lock, lock_flags); + qed_ll2b_complete_rx_packet(p_hwfn, p_ll2_conn->my_id, + p_pkt, &p_cqe->rx_cqe_fp, b_last_cqe); + spin_lock_irqsave(&p_rx->lock, lock_flags); + + return 0; +} + +static int qed_ll2_rxq_completion(struct qed_hwfn *p_hwfn, void *cookie) +{ + struct qed_ll2_info *p_ll2_conn = cookie; + struct qed_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue; + union core_rx_cqe_union *cqe = NULL; + u16 cq_new_idx = 0, cq_old_idx = 0; + unsigned long flags = 0; + int rc = 0; + + spin_lock_irqsave(&p_rx->lock, flags); + cq_new_idx = le16_to_cpu(*p_rx->p_fw_cons); + cq_old_idx = qed_chain_get_cons_idx(&p_rx->rcq_chain); + + while (cq_new_idx != cq_old_idx) { + bool b_last_cqe = (cq_new_idx == cq_old_idx); + + cqe = qed_chain_consume(&p_rx->rcq_chain); + cq_old_idx = qed_chain_get_cons_idx(&p_rx->rcq_chain); + + DP_VERBOSE(p_hwfn, + QED_MSG_LL2, + "LL2 [sw. cons %04x, fw. at %04x] - Got Packet of type %02x\n", + cq_old_idx, cq_new_idx, cqe->rx_cqe_sp.type); + + switch (cqe->rx_cqe_sp.type) { + case CORE_RX_CQE_TYPE_SLOW_PATH: + DP_NOTICE(p_hwfn, "LL2 - unexpected Rx CQE slowpath\n"); + rc = -EINVAL; + break; + case CORE_RX_CQE_TYPE_REGULAR: + rc = qed_ll2_rxq_completion_reg(p_hwfn, p_ll2_conn, + cqe, flags, b_last_cqe); + break; + default: + rc = -EIO; + } + } + + spin_unlock_irqrestore(&p_rx->lock, flags); + return rc; +} + +void qed_ll2_rxq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle) +{ + struct qed_ll2_info *p_ll2_conn = NULL; + struct qed_ll2_rx_packet *p_pkt = NULL; + struct qed_ll2_rx_queue *p_rx; + + p_ll2_conn = qed_ll2_handle_sanity_inactive(p_hwfn, connection_handle); + if (!p_ll2_conn) + return; + + p_rx = &p_ll2_conn->rx_queue; + + while (!list_empty(&p_rx->active_descq)) { + dma_addr_t rx_buf_addr; + void *cookie; + bool b_last; + + p_pkt = list_first_entry(&p_rx->active_descq, + struct qed_ll2_rx_packet, list_entry); + if (!p_pkt) + break; + + list_del(&p_pkt->list_entry); + list_add_tail(&p_pkt->list_entry, &p_rx->free_descq); + + rx_buf_addr = p_pkt->rx_buf_addr; + cookie = p_pkt->cookie; + + b_last = list_empty(&p_rx->active_descq); + } +} + +static int qed_sp_ll2_rx_queue_start(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_conn, + u8 action_on_error) +{ + enum qed_ll2_conn_type conn_type = p_ll2_conn->conn_type; + struct qed_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue; + struct core_rx_start_ramrod_data *p_ramrod = NULL; + struct qed_spq_entry *p_ent = NULL; + struct qed_sp_init_data init_data; + u16 cqe_pbl_size; + int rc = 0; + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = p_ll2_conn->cid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, + CORE_RAMROD_RX_QUEUE_START, + PROTOCOLID_CORE, &init_data); + if (rc) + return rc; + + p_ramrod = &p_ent->ramrod.core_rx_queue_start; + + p_ramrod->sb_id = cpu_to_le16(qed_int_get_sp_sb_id(p_hwfn)); + p_ramrod->sb_index = p_rx->rx_sb_index; + p_ramrod->complete_event_flg = 1; + + p_ramrod->mtu = cpu_to_le16(p_ll2_conn->mtu); + DMA_REGPAIR_LE(p_ramrod->bd_base, + p_rx->rxq_chain.p_phys_addr); + cqe_pbl_size = (u16)qed_chain_get_page_cnt(&p_rx->rcq_chain); + p_ramrod->num_of_pbl_pages = cpu_to_le16(cqe_pbl_size); + DMA_REGPAIR_LE(p_ramrod->cqe_pbl_addr, + qed_chain_get_pbl_phys(&p_rx->rcq_chain)); + + p_ramrod->drop_ttl0_flg = p_ll2_conn->rx_drop_ttl0_flg; + p_ramrod->inner_vlan_removal_en = p_ll2_conn->rx_vlan_removal_en; + p_ramrod->queue_id = p_ll2_conn->queue_id; + p_ramrod->main_func_queue = 1; + + if ((IS_MF_DEFAULT(p_hwfn) || IS_MF_SI(p_hwfn)) && + p_ramrod->main_func_queue && (conn_type != QED_LL2_TYPE_ROCE)) { + p_ramrod->mf_si_bcast_accept_all = 1; + p_ramrod->mf_si_mcast_accept_all = 1; + } else { + p_ramrod->mf_si_bcast_accept_all = 0; + p_ramrod->mf_si_mcast_accept_all = 0; + } + + p_ramrod->action_on_error.error_type = action_on_error; + return qed_spq_post(p_hwfn, p_ent, NULL); +} + +static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_conn) +{ + enum qed_ll2_conn_type conn_type = p_ll2_conn->conn_type; + struct qed_ll2_tx_queue *p_tx = &p_ll2_conn->tx_queue; + struct core_tx_start_ramrod_data *p_ramrod = NULL; + struct qed_spq_entry *p_ent = NULL; + struct qed_sp_init_data init_data; + union qed_qm_pq_params pq_params; + u16 pq_id = 0, pbl_size; + int rc = -EINVAL; + + if (!QED_LL2_TX_REGISTERED(p_ll2_conn)) + return 0; + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = p_ll2_conn->cid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, + CORE_RAMROD_TX_QUEUE_START, + PROTOCOLID_CORE, &init_data); + if (rc) + return rc; + + p_ramrod = &p_ent->ramrod.core_tx_queue_start; + + p_ramrod->sb_id = cpu_to_le16(qed_int_get_sp_sb_id(p_hwfn)); + p_ramrod->sb_index = p_tx->tx_sb_index; + p_ramrod->mtu = cpu_to_le16(p_ll2_conn->mtu); + p_ll2_conn->tx_stats_en = 1; + p_ramrod->stats_en = p_ll2_conn->tx_stats_en; + p_ramrod->stats_id = p_ll2_conn->tx_stats_id; + + DMA_REGPAIR_LE(p_ramrod->pbl_base_addr, + qed_chain_get_pbl_phys(&p_tx->txq_chain)); + pbl_size = qed_chain_get_page_cnt(&p_tx->txq_chain); + p_ramrod->pbl_size = cpu_to_le16(pbl_size); + + memset(&pq_params, 0, sizeof(pq_params)); + pq_params.core.tc = p_ll2_conn->tx_tc; + pq_id = qed_get_qm_pq(p_hwfn, PROTOCOLID_CORE, &pq_params); + p_ramrod->qm_pq_id = cpu_to_le16(pq_id); + + switch (conn_type) { + case QED_LL2_TYPE_ISCSI: + case QED_LL2_TYPE_ISCSI_OOO: + p_ramrod->conn_type = PROTOCOLID_ISCSI; + break; + case QED_LL2_TYPE_ROCE: + p_ramrod->conn_type = PROTOCOLID_ROCE; + break; + default: + p_ramrod->conn_type = PROTOCOLID_ETH; + DP_NOTICE(p_hwfn, "Unknown connection type: %d\n", conn_type); + } + + return qed_spq_post(p_hwfn, p_ent, NULL); +} + +static int qed_sp_ll2_rx_queue_stop(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_conn) +{ + struct core_rx_stop_ramrod_data *p_ramrod = NULL; + struct qed_spq_entry *p_ent = NULL; + struct qed_sp_init_data init_data; + int rc = -EINVAL; + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = p_ll2_conn->cid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, + CORE_RAMROD_RX_QUEUE_STOP, + PROTOCOLID_CORE, &init_data); + if (rc) + return rc; + + p_ramrod = &p_ent->ramrod.core_rx_queue_stop; + + p_ramrod->complete_event_flg = 1; + p_ramrod->queue_id = p_ll2_conn->queue_id; + + return qed_spq_post(p_hwfn, p_ent, NULL); +} + +static int qed_sp_ll2_tx_queue_stop(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_conn) +{ + struct qed_spq_entry *p_ent = NULL; + struct qed_sp_init_data init_data; + int rc = -EINVAL; + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = p_ll2_conn->cid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, + CORE_RAMROD_TX_QUEUE_STOP, + PROTOCOLID_CORE, &init_data); + if (rc) + return rc; + + return qed_spq_post(p_hwfn, p_ent, NULL); +} + +static int +qed_ll2_acquire_connection_rx(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_info, u16 rx_num_desc) +{ + struct qed_ll2_rx_packet *p_descq; + u32 capacity; + int rc = 0; + + if (!rx_num_desc) + goto out; + + rc = qed_chain_alloc(p_hwfn->cdev, + QED_CHAIN_USE_TO_CONSUME_PRODUCE, + QED_CHAIN_MODE_NEXT_PTR, + QED_CHAIN_CNT_TYPE_U16, + rx_num_desc, + sizeof(struct core_rx_bd), + &p_ll2_info->rx_queue.rxq_chain); + if (rc) { + DP_NOTICE(p_hwfn, "Failed to allocate ll2 rxq chain\n"); + goto out; + } + + capacity = qed_chain_get_capacity(&p_ll2_info->rx_queue.rxq_chain); + p_descq = kcalloc(capacity, sizeof(struct qed_ll2_rx_packet), + GFP_KERNEL); + if (!p_descq) { + rc = -ENOMEM; + DP_NOTICE(p_hwfn, "Failed to allocate ll2 Rx desc\n"); + goto out; + } + p_ll2_info->rx_queue.descq_array = p_descq; + + rc = qed_chain_alloc(p_hwfn->cdev, + QED_CHAIN_USE_TO_CONSUME_PRODUCE, + QED_CHAIN_MODE_PBL, + QED_CHAIN_CNT_TYPE_U16, + rx_num_desc, + sizeof(struct core_rx_fast_path_cqe), + &p_ll2_info->rx_queue.rcq_chain); + if (rc) { + DP_NOTICE(p_hwfn, "Failed to allocate ll2 rcq chain\n"); + goto out; + } + + DP_VERBOSE(p_hwfn, QED_MSG_LL2, + "Allocated LL2 Rxq [Type %08x] with 0x%08x buffers\n", + p_ll2_info->conn_type, rx_num_desc); + +out: + return rc; +} + +static int qed_ll2_acquire_connection_tx(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_info, + u16 tx_num_desc) +{ + struct qed_ll2_tx_packet *p_descq; + u32 capacity; + int rc = 0; + + if (!tx_num_desc) + goto out; + + rc = qed_chain_alloc(p_hwfn->cdev, + QED_CHAIN_USE_TO_CONSUME_PRODUCE, + QED_CHAIN_MODE_PBL, + QED_CHAIN_CNT_TYPE_U16, + tx_num_desc, + sizeof(struct core_tx_bd), + &p_ll2_info->tx_queue.txq_chain); + if (rc) + goto out; + + capacity = qed_chain_get_capacity(&p_ll2_info->tx_queue.txq_chain); + p_descq = kcalloc(capacity, sizeof(struct qed_ll2_tx_packet), + GFP_KERNEL); + if (!p_descq) { + rc = -ENOMEM; + goto out; + } + p_ll2_info->tx_queue.descq_array = p_descq; + + DP_VERBOSE(p_hwfn, QED_MSG_LL2, + "Allocated LL2 Txq [Type %08x] with 0x%08x buffers\n", + p_ll2_info->conn_type, tx_num_desc); + +out: + if (rc) + DP_NOTICE(p_hwfn, + "Can't allocate memory for Tx LL2 with 0x%08x buffers\n", + tx_num_desc); + return rc; +} + +int qed_ll2_acquire_connection(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_params, + u16 rx_num_desc, + u16 tx_num_desc, + u8 *p_connection_handle) +{ + qed_int_comp_cb_t comp_rx_cb, comp_tx_cb; + struct qed_ll2_info *p_ll2_info = NULL; + int rc; + u8 i; + + if (!p_connection_handle || !p_hwfn->p_ll2_info) + return -EINVAL; + + /* Find a free connection to be used */ + for (i = 0; (i < QED_MAX_NUM_OF_LL2_CONNECTIONS); i++) { + mutex_lock(&p_hwfn->p_ll2_info[i].mutex); + if (p_hwfn->p_ll2_info[i].b_active) { + mutex_unlock(&p_hwfn->p_ll2_info[i].mutex); + continue; + } + + p_hwfn->p_ll2_info[i].b_active = true; + p_ll2_info = &p_hwfn->p_ll2_info[i]; + mutex_unlock(&p_hwfn->p_ll2_info[i].mutex); + break; + } + if (!p_ll2_info) + return -EBUSY; + + p_ll2_info->conn_type = p_params->conn_type; + p_ll2_info->mtu = p_params->mtu; + p_ll2_info->rx_drop_ttl0_flg = p_params->rx_drop_ttl0_flg; + p_ll2_info->rx_vlan_removal_en = p_params->rx_vlan_removal_en; + p_ll2_info->tx_tc = p_params->tx_tc; + p_ll2_info->tx_dest = p_params->tx_dest; + p_ll2_info->ai_err_packet_too_big = p_params->ai_err_packet_too_big; + p_ll2_info->ai_err_no_buf = p_params->ai_err_no_buf; + + rc = qed_ll2_acquire_connection_rx(p_hwfn, p_ll2_info, rx_num_desc); + if (rc) + goto q_allocate_fail; + + rc = qed_ll2_acquire_connection_tx(p_hwfn, p_ll2_info, tx_num_desc); + if (rc) + goto q_allocate_fail; + + /* Register callbacks for the Rx/Tx queues */ + comp_rx_cb = qed_ll2_rxq_completion; + comp_tx_cb = qed_ll2_txq_completion; + + if (rx_num_desc) { + qed_int_register_cb(p_hwfn, comp_rx_cb, + &p_hwfn->p_ll2_info[i], + &p_ll2_info->rx_queue.rx_sb_index, + &p_ll2_info->rx_queue.p_fw_cons); + p_ll2_info->rx_queue.b_cb_registred = true; + } + + if (tx_num_desc) { + qed_int_register_cb(p_hwfn, + comp_tx_cb, + &p_hwfn->p_ll2_info[i], + &p_ll2_info->tx_queue.tx_sb_index, + &p_ll2_info->tx_queue.p_fw_cons); + p_ll2_info->tx_queue.b_cb_registred = true; + } + + *p_connection_handle = i; + return rc; + +q_allocate_fail: + qed_ll2_release_connection(p_hwfn, i); + return -ENOMEM; +} + +static int qed_ll2_establish_connection_rx(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_conn) +{ + u8 action_on_error = 0; + + if (!QED_LL2_RX_REGISTERED(p_ll2_conn)) + return 0; + + DIRECT_REG_WR(p_ll2_conn->rx_queue.set_prod_addr, 0x0); + + SET_FIELD(action_on_error, + CORE_RX_ACTION_ON_ERROR_PACKET_TOO_BIG, + p_ll2_conn->ai_err_packet_too_big); + SET_FIELD(action_on_error, + CORE_RX_ACTION_ON_ERROR_NO_BUFF, p_ll2_conn->ai_err_no_buf); + + return qed_sp_ll2_rx_queue_start(p_hwfn, p_ll2_conn, action_on_error); +} + +int qed_ll2_establish_connection(struct qed_hwfn *p_hwfn, u8 connection_handle) +{ + struct qed_ll2_info *p_ll2_conn; + struct qed_ll2_rx_queue *p_rx; + struct qed_ll2_tx_queue *p_tx; + int rc = -EINVAL; + u32 i, capacity; + u8 qid; + + p_ll2_conn = qed_ll2_handle_sanity_lock(p_hwfn, connection_handle); + if (!p_ll2_conn) + return -EINVAL; + p_rx = &p_ll2_conn->rx_queue; + p_tx = &p_ll2_conn->tx_queue; + + qed_chain_reset(&p_rx->rxq_chain); + qed_chain_reset(&p_rx->rcq_chain); + INIT_LIST_HEAD(&p_rx->active_descq); + INIT_LIST_HEAD(&p_rx->free_descq); + INIT_LIST_HEAD(&p_rx->posting_descq); + spin_lock_init(&p_rx->lock); + capacity = qed_chain_get_capacity(&p_rx->rxq_chain); + for (i = 0; i < capacity; i++) + list_add_tail(&p_rx->descq_array[i].list_entry, + &p_rx->free_descq); + *p_rx->p_fw_cons = 0; + + qed_chain_reset(&p_tx->txq_chain); + INIT_LIST_HEAD(&p_tx->active_descq); + INIT_LIST_HEAD(&p_tx->free_descq); + INIT_LIST_HEAD(&p_tx->sending_descq); + spin_lock_init(&p_tx->lock); + capacity = qed_chain_get_capacity(&p_tx->txq_chain); + for (i = 0; i < capacity; i++) + list_add_tail(&p_tx->descq_array[i].list_entry, + &p_tx->free_descq); + p_tx->cur_completing_bd_idx = 0; + p_tx->bds_idx = 0; + p_tx->b_completing_packet = false; + p_tx->cur_send_packet = NULL; + p_tx->cur_send_frag_num = 0; + p_tx->cur_completing_frag_num = 0; + *p_tx->p_fw_cons = 0; + + qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_CORE, &p_ll2_conn->cid); + + qid = p_hwfn->hw_info.resc_start[QED_LL2_QUEUE] + connection_handle; + p_ll2_conn->queue_id = qid; + p_ll2_conn->tx_stats_id = qid; + p_rx->set_prod_addr = (u8 __iomem *)p_hwfn->regview + + GTT_BAR0_MAP_REG_TSDM_RAM + + TSTORM_LL2_RX_PRODS_OFFSET(qid); + p_tx->doorbell_addr = (u8 __iomem *)p_hwfn->doorbells + + qed_db_addr(p_ll2_conn->cid, + DQ_DEMS_LEGACY); + + rc = qed_ll2_establish_connection_rx(p_hwfn, p_ll2_conn); + if (rc) + return rc; + + rc = qed_sp_ll2_tx_queue_start(p_hwfn, p_ll2_conn); + if (rc) + return rc; + + if (p_hwfn->hw_info.personality != QED_PCI_ETH_ROCE) + qed_wr(p_hwfn, p_hwfn->p_main_ptt, PRS_REG_USE_LIGHT_L2, 1); + + return rc; +} + +static void qed_ll2_post_rx_buffer_notify_fw(struct qed_hwfn *p_hwfn, + struct qed_ll2_rx_queue *p_rx, + struct qed_ll2_rx_packet *p_curp) +{ + struct qed_ll2_rx_packet *p_posting_packet = NULL; + struct core_ll2_rx_prod rx_prod = { 0, 0, 0 }; + bool b_notify_fw = false; + u16 bd_prod, cq_prod; + + /* This handles the flushing of already posted buffers */ + while (!list_empty(&p_rx->posting_descq)) { + p_posting_packet = list_first_entry(&p_rx->posting_descq, + struct qed_ll2_rx_packet, + list_entry); + list_del(&p_posting_packet->list_entry); + list_add_tail(&p_posting_packet->list_entry, + &p_rx->active_descq); + b_notify_fw = true; + } + + /* This handles the supplied packet [if there is one] */ + if (p_curp) { + list_add_tail(&p_curp->list_entry, &p_rx->active_descq); + b_notify_fw = true; + } + + if (!b_notify_fw) + return; + + bd_prod = qed_chain_get_prod_idx(&p_rx->rxq_chain); + cq_prod = qed_chain_get_prod_idx(&p_rx->rcq_chain); + rx_prod.bd_prod = cpu_to_le16(bd_prod); + rx_prod.cqe_prod = cpu_to_le16(cq_prod); + DIRECT_REG_WR(p_rx->set_prod_addr, *((u32 *)&rx_prod)); +} + +int qed_ll2_post_rx_buffer(struct qed_hwfn *p_hwfn, + u8 connection_handle, + dma_addr_t addr, + u16 buf_len, void *cookie, u8 notify_fw) +{ + struct core_rx_bd_with_buff_len *p_curb = NULL; + struct qed_ll2_rx_packet *p_curp = NULL; + struct qed_ll2_info *p_ll2_conn; + struct qed_ll2_rx_queue *p_rx; + unsigned long flags; + void *p_data; + int rc = 0; + + p_ll2_conn = qed_ll2_handle_sanity(p_hwfn, connection_handle); + if (!p_ll2_conn) + return -EINVAL; + p_rx = &p_ll2_conn->rx_queue; + + spin_lock_irqsave(&p_rx->lock, flags); + if (!list_empty(&p_rx->free_descq)) + p_curp = list_first_entry(&p_rx->free_descq, + struct qed_ll2_rx_packet, list_entry); + if (p_curp) { + if (qed_chain_get_elem_left(&p_rx->rxq_chain) && + qed_chain_get_elem_left(&p_rx->rcq_chain)) { + p_data = qed_chain_produce(&p_rx->rxq_chain); + p_curb = (struct core_rx_bd_with_buff_len *)p_data; + qed_chain_produce(&p_rx->rcq_chain); + } + } + + /* If we're lacking entires, let's try to flush buffers to FW */ + if (!p_curp || !p_curb) { + rc = -EBUSY; + p_curp = NULL; + goto out_notify; + } + + /* We have an Rx packet we can fill */ + DMA_REGPAIR_LE(p_curb->addr, addr); + p_curb->buff_length = cpu_to_le16(buf_len); + p_curp->rx_buf_addr = addr; + p_curp->cookie = cookie; + p_curp->rxq_bd = p_curb; + p_curp->buf_length = buf_len; + list_del(&p_curp->list_entry); + + /* Check if we only want to enqueue this packet without informing FW */ + if (!notify_fw) { + list_add_tail(&p_curp->list_entry, &p_rx->posting_descq); + goto out; + } + +out_notify: + qed_ll2_post_rx_buffer_notify_fw(p_hwfn, p_rx, p_curp); +out: + spin_unlock_irqrestore(&p_rx->lock, flags); + return rc; +} + +static void qed_ll2_prepare_tx_packet_set(struct qed_hwfn *p_hwfn, + struct qed_ll2_tx_queue *p_tx, + struct qed_ll2_tx_packet *p_curp, + u8 num_of_bds, + dma_addr_t first_frag, + u16 first_frag_len, void *p_cookie, + u8 notify_fw) +{ + list_del(&p_curp->list_entry); + p_curp->cookie = p_cookie; + p_curp->bd_used = num_of_bds; + p_curp->notify_fw = notify_fw; + p_tx->cur_send_packet = p_curp; + p_tx->cur_send_frag_num = 0; + + p_curp->bds_set[p_tx->cur_send_frag_num].tx_frag = first_frag; + p_curp->bds_set[p_tx->cur_send_frag_num].frag_len = first_frag_len; + p_tx->cur_send_frag_num++; +} + +static void qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2, + struct qed_ll2_tx_packet *p_curp, + u8 num_of_bds, + enum core_tx_dest tx_dest, + u16 vlan, + u8 bd_flags, + u16 l4_hdr_offset_w, + dma_addr_t first_frag, + u16 first_frag_len) +{ + struct qed_chain *p_tx_chain = &p_ll2->tx_queue.txq_chain; + u16 prod_idx = qed_chain_get_prod_idx(p_tx_chain); + struct core_tx_bd *start_bd = NULL; + u16 frag_idx; + + start_bd = (struct core_tx_bd *)qed_chain_produce(p_tx_chain); + start_bd->nw_vlan_or_lb_echo = cpu_to_le16(vlan); + SET_FIELD(start_bd->bitfield1, CORE_TX_BD_L4_HDR_OFFSET_W, + cpu_to_le16(l4_hdr_offset_w)); + SET_FIELD(start_bd->bitfield1, CORE_TX_BD_TX_DST, tx_dest); + start_bd->bd_flags.as_bitfield = bd_flags; + start_bd->bd_flags.as_bitfield |= CORE_TX_BD_FLAGS_START_BD_MASK << + CORE_TX_BD_FLAGS_START_BD_SHIFT; + SET_FIELD(start_bd->bitfield0, CORE_TX_BD_NBDS, num_of_bds); + DMA_REGPAIR_LE(start_bd->addr, first_frag); + start_bd->nbytes = cpu_to_le16(first_frag_len); + + DP_VERBOSE(p_hwfn, + (NETIF_MSG_TX_QUEUED | QED_MSG_LL2), + "LL2 [q 0x%02x cid 0x%08x type 0x%08x] Tx Producer at [0x%04x] - set with a %04x bytes %02x BDs buffer at %08x:%08x\n", + p_ll2->queue_id, + p_ll2->cid, + p_ll2->conn_type, + prod_idx, + first_frag_len, + num_of_bds, + le32_to_cpu(start_bd->addr.hi), + le32_to_cpu(start_bd->addr.lo)); + + if (p_ll2->tx_queue.cur_send_frag_num == num_of_bds) + return; + + /* Need to provide the packet with additional BDs for frags */ + for (frag_idx = p_ll2->tx_queue.cur_send_frag_num; + frag_idx < num_of_bds; frag_idx++) { + struct core_tx_bd **p_bd = &p_curp->bds_set[frag_idx].txq_bd; + + *p_bd = (struct core_tx_bd *)qed_chain_produce(p_tx_chain); + (*p_bd)->bd_flags.as_bitfield = 0; + (*p_bd)->bitfield1 = 0; + (*p_bd)->bitfield0 = 0; + p_curp->bds_set[frag_idx].tx_frag = 0; + p_curp->bds_set[frag_idx].frag_len = 0; + } +} + +/* This should be called while the Txq spinlock is being held */ +static void qed_ll2_tx_packet_notify(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_conn) +{ + bool b_notify = p_ll2_conn->tx_queue.cur_send_packet->notify_fw; + struct qed_ll2_tx_queue *p_tx = &p_ll2_conn->tx_queue; + struct qed_ll2_tx_packet *p_pkt = NULL; + struct core_db_data db_msg = { 0, 0, 0 }; + u16 bd_prod; + + /* If there are missing BDs, don't do anything now */ + if (p_ll2_conn->tx_queue.cur_send_frag_num != + p_ll2_conn->tx_queue.cur_send_packet->bd_used) + return; + + /* Push the current packet to the list and clean after it */ + list_add_tail(&p_ll2_conn->tx_queue.cur_send_packet->list_entry, + &p_ll2_conn->tx_queue.sending_descq); + p_ll2_conn->tx_queue.cur_send_packet = NULL; + p_ll2_conn->tx_queue.cur_send_frag_num = 0; + + /* Notify FW of packet only if requested to */ + if (!b_notify) + return; + + bd_prod = qed_chain_get_prod_idx(&p_ll2_conn->tx_queue.txq_chain); + + while (!list_empty(&p_tx->sending_descq)) { + p_pkt = list_first_entry(&p_tx->sending_descq, + struct qed_ll2_tx_packet, list_entry); + if (!p_pkt) + break; + + list_del(&p_pkt->list_entry); + list_add_tail(&p_pkt->list_entry, &p_tx->active_descq); + } + + SET_FIELD(db_msg.params, CORE_DB_DATA_DEST, DB_DEST_XCM); + SET_FIELD(db_msg.params, CORE_DB_DATA_AGG_CMD, DB_AGG_CMD_SET); + SET_FIELD(db_msg.params, CORE_DB_DATA_AGG_VAL_SEL, + DQ_XCM_CORE_TX_BD_PROD_CMD); + db_msg.agg_flags = DQ_XCM_CORE_DQ_CF_CMD; + db_msg.spq_prod = cpu_to_le16(bd_prod); + + /* Make sure the BDs data is updated before ringing the doorbell */ + wmb(); + + DIRECT_REG_WR(p_tx->doorbell_addr, *((u32 *)&db_msg)); + + DP_VERBOSE(p_hwfn, + (NETIF_MSG_TX_QUEUED | QED_MSG_LL2), + "LL2 [q 0x%02x cid 0x%08x type 0x%08x] Doorbelled [producer 0x%04x]\n", + p_ll2_conn->queue_id, + p_ll2_conn->cid, p_ll2_conn->conn_type, db_msg.spq_prod); +} + +int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + u8 num_of_bds, + u16 vlan, + u8 bd_flags, + u16 l4_hdr_offset_w, + dma_addr_t first_frag, + u16 first_frag_len, void *cookie, u8 notify_fw) +{ + struct qed_ll2_tx_packet *p_curp = NULL; + struct qed_ll2_info *p_ll2_conn = NULL; + struct qed_ll2_tx_queue *p_tx; + struct qed_chain *p_tx_chain; + unsigned long flags; + int rc = 0; + + p_ll2_conn = qed_ll2_handle_sanity(p_hwfn, connection_handle); + if (!p_ll2_conn) + return -EINVAL; + p_tx = &p_ll2_conn->tx_queue; + p_tx_chain = &p_tx->txq_chain; + + if (num_of_bds > CORE_LL2_TX_MAX_BDS_PER_PACKET) + return -EIO; + + spin_lock_irqsave(&p_tx->lock, flags); + if (p_tx->cur_send_packet) { + rc = -EEXIST; + goto out; + } + + /* Get entry, but only if we have tx elements for it */ + if (!list_empty(&p_tx->free_descq)) + p_curp = list_first_entry(&p_tx->free_descq, + struct qed_ll2_tx_packet, list_entry); + if (p_curp && qed_chain_get_elem_left(p_tx_chain) < num_of_bds) + p_curp = NULL; + + if (!p_curp) { + rc = -EBUSY; + goto out; + } + + /* Prepare packet and BD, and perhaps send a doorbell to FW */ + qed_ll2_prepare_tx_packet_set(p_hwfn, p_tx, p_curp, + num_of_bds, first_frag, + first_frag_len, cookie, notify_fw); + qed_ll2_prepare_tx_packet_set_bd(p_hwfn, p_ll2_conn, p_curp, + num_of_bds, CORE_TX_DEST_NW, + vlan, bd_flags, l4_hdr_offset_w, + first_frag, first_frag_len); + + qed_ll2_tx_packet_notify(p_hwfn, p_ll2_conn); + +out: + spin_unlock_irqrestore(&p_tx->lock, flags); + return rc; +} + +int qed_ll2_set_fragment_of_tx_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + dma_addr_t addr, u16 nbytes) +{ + struct qed_ll2_tx_packet *p_cur_send_packet = NULL; + struct qed_ll2_info *p_ll2_conn = NULL; + u16 cur_send_frag_num = 0; + struct core_tx_bd *p_bd; + unsigned long flags; + + p_ll2_conn = qed_ll2_handle_sanity(p_hwfn, connection_handle); + if (!p_ll2_conn) + return -EINVAL; + + if (!p_ll2_conn->tx_queue.cur_send_packet) + return -EINVAL; + + p_cur_send_packet = p_ll2_conn->tx_queue.cur_send_packet; + cur_send_frag_num = p_ll2_conn->tx_queue.cur_send_frag_num; + + if (cur_send_frag_num >= p_cur_send_packet->bd_used) + return -EINVAL; + + /* Fill the BD information, and possibly notify FW */ + p_bd = p_cur_send_packet->bds_set[cur_send_frag_num].txq_bd; + DMA_REGPAIR_LE(p_bd->addr, addr); + p_bd->nbytes = cpu_to_le16(nbytes); + p_cur_send_packet->bds_set[cur_send_frag_num].tx_frag = addr; + p_cur_send_packet->bds_set[cur_send_frag_num].frag_len = nbytes; + + p_ll2_conn->tx_queue.cur_send_frag_num++; + + spin_lock_irqsave(&p_ll2_conn->tx_queue.lock, flags); + qed_ll2_tx_packet_notify(p_hwfn, p_ll2_conn); + spin_unlock_irqrestore(&p_ll2_conn->tx_queue.lock, flags); + + return 0; +} + +int qed_ll2_terminate_connection(struct qed_hwfn *p_hwfn, u8 connection_handle) +{ + struct qed_ll2_info *p_ll2_conn = NULL; + int rc = -EINVAL; + + p_ll2_conn = qed_ll2_handle_sanity_lock(p_hwfn, connection_handle); + if (!p_ll2_conn) + return -EINVAL; + + /* Stop Tx & Rx of connection, if needed */ + if (QED_LL2_TX_REGISTERED(p_ll2_conn)) { + rc = qed_sp_ll2_tx_queue_stop(p_hwfn, p_ll2_conn); + if (rc) + return rc; + qed_ll2_txq_flush(p_hwfn, connection_handle); + } + + if (QED_LL2_RX_REGISTERED(p_ll2_conn)) { + rc = qed_sp_ll2_rx_queue_stop(p_hwfn, p_ll2_conn); + if (rc) + return rc; + qed_ll2_rxq_flush(p_hwfn, connection_handle); + } + + return rc; +} + +void qed_ll2_release_connection(struct qed_hwfn *p_hwfn, u8 connection_handle) +{ + struct qed_ll2_info *p_ll2_conn = NULL; + + p_ll2_conn = qed_ll2_handle_sanity(p_hwfn, connection_handle); + if (!p_ll2_conn) + return; + + if (QED_LL2_RX_REGISTERED(p_ll2_conn)) { + p_ll2_conn->rx_queue.b_cb_registred = false; + qed_int_unregister_cb(p_hwfn, p_ll2_conn->rx_queue.rx_sb_index); + } + + if (QED_LL2_TX_REGISTERED(p_ll2_conn)) { + p_ll2_conn->tx_queue.b_cb_registred = false; + qed_int_unregister_cb(p_hwfn, p_ll2_conn->tx_queue.tx_sb_index); + } + + kfree(p_ll2_conn->tx_queue.descq_array); + qed_chain_free(p_hwfn->cdev, &p_ll2_conn->tx_queue.txq_chain); + + kfree(p_ll2_conn->rx_queue.descq_array); + qed_chain_free(p_hwfn->cdev, &p_ll2_conn->rx_queue.rxq_chain); + qed_chain_free(p_hwfn->cdev, &p_ll2_conn->rx_queue.rcq_chain); + + qed_cxt_release_cid(p_hwfn, p_ll2_conn->cid); + + mutex_lock(&p_ll2_conn->mutex); + p_ll2_conn->b_active = false; + mutex_unlock(&p_ll2_conn->mutex); +} + +struct qed_ll2_info *qed_ll2_alloc(struct qed_hwfn *p_hwfn) +{ + struct qed_ll2_info *p_ll2_connections; + u8 i; + + /* Allocate LL2's set struct */ + p_ll2_connections = kcalloc(QED_MAX_NUM_OF_LL2_CONNECTIONS, + sizeof(struct qed_ll2_info), GFP_KERNEL); + if (!p_ll2_connections) { + DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_ll2'\n"); + return NULL; + } + + for (i = 0; i < QED_MAX_NUM_OF_LL2_CONNECTIONS; i++) + p_ll2_connections[i].my_id = i; + + return p_ll2_connections; +} + +void qed_ll2_setup(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_connections) +{ + int i; + + for (i = 0; i < QED_MAX_NUM_OF_LL2_CONNECTIONS; i++) + mutex_init(&p_ll2_connections[i].mutex); +} + +void qed_ll2_free(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_connections) +{ + kfree(p_ll2_connections); +} + +static void _qed_ll2_get_tstats(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct qed_ll2_info *p_ll2_conn, + struct qed_ll2_stats *p_stats) +{ + struct core_ll2_tstorm_per_queue_stat tstats; + u8 qid = p_ll2_conn->queue_id; + u32 tstats_addr; + + memset(&tstats, 0, sizeof(tstats)); + tstats_addr = BAR0_MAP_REG_TSDM_RAM + + CORE_LL2_TSTORM_PER_QUEUE_STAT_OFFSET(qid); + qed_memcpy_from(p_hwfn, p_ptt, &tstats, tstats_addr, sizeof(tstats)); + + p_stats->packet_too_big_discard = + HILO_64_REGPAIR(tstats.packet_too_big_discard); + p_stats->no_buff_discard = HILO_64_REGPAIR(tstats.no_buff_discard); +} + +static void _qed_ll2_get_ustats(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct qed_ll2_info *p_ll2_conn, + struct qed_ll2_stats *p_stats) +{ + struct core_ll2_ustorm_per_queue_stat ustats; + u8 qid = p_ll2_conn->queue_id; + u32 ustats_addr; + + memset(&ustats, 0, sizeof(ustats)); + ustats_addr = BAR0_MAP_REG_USDM_RAM + + CORE_LL2_USTORM_PER_QUEUE_STAT_OFFSET(qid); + qed_memcpy_from(p_hwfn, p_ptt, &ustats, ustats_addr, sizeof(ustats)); + + p_stats->rcv_ucast_bytes = HILO_64_REGPAIR(ustats.rcv_ucast_bytes); + p_stats->rcv_mcast_bytes = HILO_64_REGPAIR(ustats.rcv_mcast_bytes); + p_stats->rcv_bcast_bytes = HILO_64_REGPAIR(ustats.rcv_bcast_bytes); + p_stats->rcv_ucast_pkts = HILO_64_REGPAIR(ustats.rcv_ucast_pkts); + p_stats->rcv_mcast_pkts = HILO_64_REGPAIR(ustats.rcv_mcast_pkts); + p_stats->rcv_bcast_pkts = HILO_64_REGPAIR(ustats.rcv_bcast_pkts); +} + +static void _qed_ll2_get_pstats(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct qed_ll2_info *p_ll2_conn, + struct qed_ll2_stats *p_stats) +{ + struct core_ll2_pstorm_per_queue_stat pstats; + u8 stats_id = p_ll2_conn->tx_stats_id; + u32 pstats_addr; + + memset(&pstats, 0, sizeof(pstats)); + pstats_addr = BAR0_MAP_REG_PSDM_RAM + + CORE_LL2_PSTORM_PER_QUEUE_STAT_OFFSET(stats_id); + qed_memcpy_from(p_hwfn, p_ptt, &pstats, pstats_addr, sizeof(pstats)); + + p_stats->sent_ucast_bytes = HILO_64_REGPAIR(pstats.sent_ucast_bytes); + p_stats->sent_mcast_bytes = HILO_64_REGPAIR(pstats.sent_mcast_bytes); + p_stats->sent_bcast_bytes = HILO_64_REGPAIR(pstats.sent_bcast_bytes); + p_stats->sent_ucast_pkts = HILO_64_REGPAIR(pstats.sent_ucast_pkts); + p_stats->sent_mcast_pkts = HILO_64_REGPAIR(pstats.sent_mcast_pkts); + p_stats->sent_bcast_pkts = HILO_64_REGPAIR(pstats.sent_bcast_pkts); +} + +int qed_ll2_get_stats(struct qed_hwfn *p_hwfn, + u8 connection_handle, struct qed_ll2_stats *p_stats) +{ + struct qed_ll2_info *p_ll2_conn = NULL; + struct qed_ptt *p_ptt; + + memset(p_stats, 0, sizeof(*p_stats)); + + if ((connection_handle >= QED_MAX_NUM_OF_LL2_CONNECTIONS) || + !p_hwfn->p_ll2_info) + return -EINVAL; + + p_ll2_conn = &p_hwfn->p_ll2_info[connection_handle]; + + p_ptt = qed_ptt_acquire(p_hwfn); + if (!p_ptt) { + DP_ERR(p_hwfn, "Failed to acquire ptt\n"); + return -EINVAL; + } + + _qed_ll2_get_tstats(p_hwfn, p_ptt, p_ll2_conn, p_stats); + _qed_ll2_get_ustats(p_hwfn, p_ptt, p_ll2_conn, p_stats); + if (p_ll2_conn->tx_stats_en) + _qed_ll2_get_pstats(p_hwfn, p_ptt, p_ll2_conn, p_stats); + + qed_ptt_release(p_hwfn, p_ptt); + return 0; +} + +static void qed_ll2_register_cb_ops(struct qed_dev *cdev, + const struct qed_ll2_cb_ops *ops, + void *cookie) +{ + cdev->ll2->cbs = ops; + cdev->ll2->cb_cookie = cookie; +} + +static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params) +{ + struct qed_ll2_info ll2_info; + struct qed_ll2_buffer *buffer; + enum qed_ll2_conn_type conn_type; + struct qed_ptt *p_ptt; + int rc, i; + + /* Initialize LL2 locks & lists */ + INIT_LIST_HEAD(&cdev->ll2->list); + spin_lock_init(&cdev->ll2->lock); + cdev->ll2->rx_size = NET_SKB_PAD + ETH_HLEN + + L1_CACHE_BYTES + params->mtu; + cdev->ll2->frags_mapped = params->frags_mapped; + + /*Allocate memory for LL2 */ + DP_INFO(cdev, "Allocating LL2 buffers of size %08x bytes\n", + cdev->ll2->rx_size); + for (i = 0; i < QED_LL2_RX_SIZE; i++) { + buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); + if (!buffer) { + DP_INFO(cdev, "Failed to allocate LL2 buffers\n"); + goto fail; + } + + rc = qed_ll2_alloc_buffer(cdev, (u8 **)&buffer->data, + &buffer->phys_addr); + if (rc) { + kfree(buffer); + goto fail; + } + + list_add_tail(&buffer->list, &cdev->ll2->list); + } + + switch (QED_LEADING_HWFN(cdev)->hw_info.personality) { + case QED_PCI_ISCSI: + conn_type = QED_LL2_TYPE_ISCSI; + break; + case QED_PCI_ETH_ROCE: + conn_type = QED_LL2_TYPE_ROCE; + break; + default: + conn_type = QED_LL2_TYPE_TEST; + } + + /* Prepare the temporary ll2 information */ + memset(&ll2_info, 0, sizeof(ll2_info)); + ll2_info.conn_type = conn_type; + ll2_info.mtu = params->mtu; + ll2_info.rx_drop_ttl0_flg = params->drop_ttl0_packets; + ll2_info.rx_vlan_removal_en = params->rx_vlan_stripping; + ll2_info.tx_tc = 0; + ll2_info.tx_dest = CORE_TX_DEST_NW; + + rc = qed_ll2_acquire_connection(QED_LEADING_HWFN(cdev), &ll2_info, + QED_LL2_RX_SIZE, QED_LL2_TX_SIZE, + &cdev->ll2->handle); + if (rc) { + DP_INFO(cdev, "Failed to acquire LL2 connection\n"); + goto fail; + } + + rc = qed_ll2_establish_connection(QED_LEADING_HWFN(cdev), + cdev->ll2->handle); + if (rc) { + DP_INFO(cdev, "Failed to establish LL2 connection\n"); + goto release_fail; + } + + /* Post all Rx buffers to FW */ + spin_lock_bh(&cdev->ll2->lock); + list_for_each_entry(buffer, &cdev->ll2->list, list) { + rc = qed_ll2_post_rx_buffer(QED_LEADING_HWFN(cdev), + cdev->ll2->handle, + buffer->phys_addr, 0, buffer, 1); + if (rc) { + DP_INFO(cdev, + "Failed to post an Rx buffer; Deleting it\n"); + dma_unmap_single(&cdev->pdev->dev, buffer->phys_addr, + cdev->ll2->rx_size, DMA_FROM_DEVICE); + kfree(buffer->data); + list_del(&buffer->list); + kfree(buffer); + } else { + cdev->ll2->rx_cnt++; + } + } + spin_unlock_bh(&cdev->ll2->lock); + + if (!cdev->ll2->rx_cnt) { + DP_INFO(cdev, "Failed passing even a single Rx buffer\n"); + goto release_terminate; + } + + if (!is_valid_ether_addr(params->ll2_mac_address)) { + DP_INFO(cdev, "Invalid Ethernet address\n"); + goto release_terminate; + } + + p_ptt = qed_ptt_acquire(QED_LEADING_HWFN(cdev)); + if (!p_ptt) { + DP_INFO(cdev, "Failed to acquire PTT\n"); + goto release_terminate; + } + + rc = qed_llh_add_mac_filter(QED_LEADING_HWFN(cdev), p_ptt, + params->ll2_mac_address); + qed_ptt_release(QED_LEADING_HWFN(cdev), p_ptt); + if (rc) { + DP_ERR(cdev, "Failed to allocate LLH filter\n"); + goto release_terminate_all; + } + + ether_addr_copy(cdev->ll2_mac_address, params->ll2_mac_address); + + return 0; + +release_terminate_all: + +release_terminate: + qed_ll2_terminate_connection(QED_LEADING_HWFN(cdev), cdev->ll2->handle); +release_fail: + qed_ll2_release_connection(QED_LEADING_HWFN(cdev), cdev->ll2->handle); +fail: + qed_ll2_kill_buffers(cdev); + cdev->ll2->handle = QED_LL2_UNUSED_HANDLE; + return -EINVAL; +} + +static int qed_ll2_stop(struct qed_dev *cdev) +{ + struct qed_ptt *p_ptt; + int rc; + + if (cdev->ll2->handle == QED_LL2_UNUSED_HANDLE) + return 0; + + p_ptt = qed_ptt_acquire(QED_LEADING_HWFN(cdev)); + if (!p_ptt) { + DP_INFO(cdev, "Failed to acquire PTT\n"); + goto fail; + } + + qed_llh_remove_mac_filter(QED_LEADING_HWFN(cdev), p_ptt, + cdev->ll2_mac_address); + qed_ptt_release(QED_LEADING_HWFN(cdev), p_ptt); + eth_zero_addr(cdev->ll2_mac_address); + + rc = qed_ll2_terminate_connection(QED_LEADING_HWFN(cdev), + cdev->ll2->handle); + if (rc) + DP_INFO(cdev, "Failed to terminate LL2 connection\n"); + + qed_ll2_kill_buffers(cdev); + + qed_ll2_release_connection(QED_LEADING_HWFN(cdev), cdev->ll2->handle); + cdev->ll2->handle = QED_LL2_UNUSED_HANDLE; + + return rc; +fail: + return -EINVAL; +} + +static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb) +{ + const skb_frag_t *frag; + int rc = -EINVAL, i; + dma_addr_t mapping; + u16 vlan = 0; + u8 flags = 0; + + if (unlikely(skb->ip_summed != CHECKSUM_NONE)) { + DP_INFO(cdev, "Cannot transmit a checksumed packet\n"); + return -EINVAL; + } + + if (1 + skb_shinfo(skb)->nr_frags > CORE_LL2_TX_MAX_BDS_PER_PACKET) { + DP_ERR(cdev, "Cannot transmit a packet with %d fragments\n", + 1 + skb_shinfo(skb)->nr_frags); + return -EINVAL; + } + + mapping = dma_map_single(&cdev->pdev->dev, skb->data, + skb->len, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(&cdev->pdev->dev, mapping))) { + DP_NOTICE(cdev, "SKB mapping failed\n"); + return -EINVAL; + } + + /* Request HW to calculate IP csum */ + if (!((vlan_get_protocol(skb) == htons(ETH_P_IPV6)) && + ipv6_hdr(skb)->nexthdr == NEXTHDR_IPV6)) + flags |= BIT(CORE_TX_BD_FLAGS_IP_CSUM_SHIFT); + + if (skb_vlan_tag_present(skb)) { + vlan = skb_vlan_tag_get(skb); + flags |= BIT(CORE_TX_BD_FLAGS_VLAN_INSERTION_SHIFT); + } + + rc = qed_ll2_prepare_tx_packet(QED_LEADING_HWFN(cdev), + cdev->ll2->handle, + 1 + skb_shinfo(skb)->nr_frags, + vlan, flags, 0, mapping, + skb->len, skb, 1); + if (rc) + goto err; + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + frag = &skb_shinfo(skb)->frags[i]; + if (!cdev->ll2->frags_mapped) { + mapping = skb_frag_dma_map(&cdev->pdev->dev, frag, 0, + skb_frag_size(frag), + DMA_TO_DEVICE); + + if (unlikely(dma_mapping_error(&cdev->pdev->dev, + mapping))) { + DP_NOTICE(cdev, + "Unable to map frag - dropping packet\n"); + goto err; + } + } else { + mapping = page_to_phys(skb_frag_page(frag)) | + frag->page_offset; + } + + rc = qed_ll2_set_fragment_of_tx_packet(QED_LEADING_HWFN(cdev), + cdev->ll2->handle, + mapping, + skb_frag_size(frag)); + + /* if failed not much to do here, partial packet has been posted + * we can't free memory, will need to wait for completion. + */ + if (rc) + goto err2; + } + + return 0; + +err: + dma_unmap_single(&cdev->pdev->dev, mapping, skb->len, DMA_TO_DEVICE); + +err2: + return rc; +} + +static int qed_ll2_stats(struct qed_dev *cdev, struct qed_ll2_stats *stats) +{ + if (!cdev->ll2) + return -EINVAL; + + return qed_ll2_get_stats(QED_LEADING_HWFN(cdev), + cdev->ll2->handle, stats); +} + +const struct qed_ll2_ops qed_ll2_ops_pass = { + .start = &qed_ll2_start, + .stop = &qed_ll2_stop, + .start_xmit = &qed_ll2_start_xmit, + .register_cb_ops = &qed_ll2_register_cb_ops, + .get_stats = &qed_ll2_stats, +}; + +int qed_ll2_alloc_if(struct qed_dev *cdev) +{ + cdev->ll2 = kzalloc(sizeof(*cdev->ll2), GFP_KERNEL); + return cdev->ll2 ? 0 : -ENOMEM; +} + +void qed_ll2_dealloc_if(struct qed_dev *cdev) +{ + kfree(cdev->ll2); + cdev->ll2 = NULL; +} diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.h b/drivers/net/ethernet/qlogic/qed/qed_ll2.h new file mode 100644 index 000000000000..a037c4845928 --- /dev/null +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.h @@ -0,0 +1,289 @@ +/* QLogic qed NIC Driver + * + * Copyright (c) 2015 QLogic Corporation + * + * This software is available under the terms of the GNU General Public License + * (GPL) Version 2, available from the file COPYING in the main directory of + * this source tree. + */ + +#ifndef _QED_LL2_H +#define _QED_LL2_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include "qed.h" +#include "qed_hsi.h" +#include "qed_sp.h" + +#define QED_MAX_NUM_OF_LL2_CONNECTIONS (4) + +enum qed_ll2_conn_type { + QED_LL2_TYPE_RESERVED, + QED_LL2_TYPE_ISCSI, + QED_LL2_TYPE_TEST, + QED_LL2_TYPE_ISCSI_OOO, + QED_LL2_TYPE_RESERVED2, + QED_LL2_TYPE_ROCE, + QED_LL2_TYPE_RESERVED3, + MAX_QED_LL2_RX_CONN_TYPE +}; + +struct qed_ll2_rx_packet { + struct list_head list_entry; + struct core_rx_bd_with_buff_len *rxq_bd; + dma_addr_t rx_buf_addr; + u16 buf_length; + void *cookie; + u8 placement_offset; + u16 parse_flags; + u16 packet_length; + u16 vlan; + u32 opaque_data[2]; +}; + +struct qed_ll2_tx_packet { + struct list_head list_entry; + u16 bd_used; + u16 vlan; + u16 l4_hdr_offset_w; + u8 bd_flags; + bool notify_fw; + void *cookie; + + struct { + struct core_tx_bd *txq_bd; + dma_addr_t tx_frag; + u16 frag_len; + } bds_set[ETH_TX_MAX_BDS_PER_NON_LSO_PACKET]; +}; + +struct qed_ll2_rx_queue { + /* Lock protecting the Rx queue manipulation */ + spinlock_t lock; + struct qed_chain rxq_chain; + struct qed_chain rcq_chain; + u8 rx_sb_index; + bool b_cb_registred; + __le16 *p_fw_cons; + struct list_head active_descq; + struct list_head free_descq; + struct list_head posting_descq; + struct qed_ll2_rx_packet *descq_array; + void __iomem *set_prod_addr; +}; + +struct qed_ll2_tx_queue { + /* Lock protecting the Tx queue manipulation */ + spinlock_t lock; + struct qed_chain txq_chain; + u8 tx_sb_index; + bool b_cb_registred; + __le16 *p_fw_cons; + struct list_head active_descq; + struct list_head free_descq; + struct list_head sending_descq; + struct qed_ll2_tx_packet *descq_array; + struct qed_ll2_tx_packet *cur_send_packet; + struct qed_ll2_tx_packet cur_completing_packet; + u16 cur_completing_bd_idx; + void __iomem *doorbell_addr; + u16 bds_idx; + u16 cur_send_frag_num; + u16 cur_completing_frag_num; + bool b_completing_packet; +}; + +struct qed_ll2_info { + /* Lock protecting the state of LL2 */ + struct mutex mutex; + enum qed_ll2_conn_type conn_type; + u32 cid; + u8 my_id; + u8 queue_id; + u8 tx_stats_id; + bool b_active; + u16 mtu; + u8 rx_drop_ttl0_flg; + u8 rx_vlan_removal_en; + u8 tx_tc; + enum core_tx_dest tx_dest; + enum core_error_handle ai_err_packet_too_big; + enum core_error_handle ai_err_no_buf; + u8 tx_stats_en; + struct qed_ll2_rx_queue rx_queue; + struct qed_ll2_tx_queue tx_queue; +}; + +/** + * @brief qed_ll2_acquire_connection - allocate resources, + * starts rx & tx (if relevant) queues pair. Provides + * connecion handler as output parameter. + * + * @param p_hwfn + * @param p_params Contain various configuration properties + * @param rx_num_desc + * @param tx_num_desc + * + * @param p_connection_handle Output container for LL2 connection's handle + * + * @return 0 on success, failure otherwise + */ +int qed_ll2_acquire_connection(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_params, + u16 rx_num_desc, + u16 tx_num_desc, + u8 *p_connection_handle); + +/** + * @brief qed_ll2_establish_connection - start previously + * allocated LL2 queues pair + * + * @param p_hwfn + * @param p_ptt + * @param connection_handle LL2 connection's handle obtained from + * qed_ll2_require_connection + * + * @return 0 on success, failure otherwise + */ +int qed_ll2_establish_connection(struct qed_hwfn *p_hwfn, u8 connection_handle); + +/** + * @brief qed_ll2_post_rx_buffers - submit buffers to LL2 Rx queue. + * + * @param p_hwfn + * @param connection_handle LL2 connection's handle obtained from + * qed_ll2_require_connection + * @param addr rx (physical address) buffers to submit + * @param cookie + * @param notify_fw produce corresponding Rx BD immediately + * + * @return 0 on success, failure otherwise + */ +int qed_ll2_post_rx_buffer(struct qed_hwfn *p_hwfn, + u8 connection_handle, + dma_addr_t addr, + u16 buf_len, void *cookie, u8 notify_fw); + +/** + * @brief qed_ll2_prepare_tx_packet - request for start Tx BD + * to prepare Tx packet submission to FW. + * + * @param p_hwfn + * @param connection_handle LL2 connection's handle obtained from + * qed_ll2_require_connection + * @param num_of_bds a number of requested BD equals a number of + * fragments in Tx packet + * @param vlan VLAN to insert to packet (if insertion set) + * @param bd_flags + * @param l4_hdr_offset_w L4 Header Offset from start of packet + * (in words). This is needed if both l4_csum + * and ipv6_ext are set + * @param first_frag + * @param first_frag_len + * @param cookie + * + * @param notify_fw + * + * @return 0 on success, failure otherwise + */ +int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + u8 num_of_bds, + u16 vlan, + u8 bd_flags, + u16 l4_hdr_offset_w, + dma_addr_t first_frag, + u16 first_frag_len, void *cookie, u8 notify_fw); + +/** + * @brief qed_ll2_release_connection - releases resources + * allocated for LL2 connection + * + * @param p_hwfn + * @param connection_handle LL2 connection's handle obtained from + * qed_ll2_require_connection + */ +void qed_ll2_release_connection(struct qed_hwfn *p_hwfn, u8 connection_handle); + +/** + * @brief qed_ll2_set_fragment_of_tx_packet - provides fragments to fill + * Tx BD of BDs requested by + * qed_ll2_prepare_tx_packet + * + * @param p_hwfn + * @param connection_handle LL2 connection's handle + * obtained from + * qed_ll2_require_connection + * @param addr + * @param nbytes + * + * @return 0 on success, failure otherwise + */ +int qed_ll2_set_fragment_of_tx_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + dma_addr_t addr, u16 nbytes); + +/** + * @brief qed_ll2_terminate_connection - stops Tx/Rx queues + * + * + * @param p_hwfn + * @param connection_handle LL2 connection's handle + * obtained from + * qed_ll2_require_connection + * + * @return 0 on success, failure otherwise + */ +int qed_ll2_terminate_connection(struct qed_hwfn *p_hwfn, u8 connection_handle); + +/** + * @brief qed_ll2_get_stats - get LL2 queue's statistics + * + * + * @param p_hwfn + * @param connection_handle LL2 connection's handle obtained from + * qed_ll2_require_connection + * @param p_stats + * + * @return 0 on success, failure otherwise + */ +int qed_ll2_get_stats(struct qed_hwfn *p_hwfn, + u8 connection_handle, struct qed_ll2_stats *p_stats); + +/** + * @brief qed_ll2_alloc - Allocates LL2 connections set + * + * @param p_hwfn + * + * @return pointer to alocated qed_ll2_info or NULL + */ +struct qed_ll2_info *qed_ll2_alloc(struct qed_hwfn *p_hwfn); + +/** + * @brief qed_ll2_setup - Inits LL2 connections set + * + * @param p_hwfn + * @param p_ll2_connections + * + */ +void qed_ll2_setup(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_connections); + +/** + * @brief qed_ll2_free - Releases LL2 connections set + * + * @param p_hwfn + * @param p_ll2_connections + * + */ +void qed_ll2_free(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_connections); + +#endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index b730a632c383..48cdf62c025b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -22,11 +22,13 @@ #include #include #include +#include #include "qed.h" #include "qed_sriov.h" #include "qed_sp.h" #include "qed_dev_api.h" +#include "qed_ll2.h" #include "qed_mcp.h" #include "qed_hw.h" #include "qed_selftest.h" @@ -608,7 +610,16 @@ static int qed_nic_reset(struct qed_dev *cdev) static int qed_nic_setup(struct qed_dev *cdev) { - int rc; + int rc, i; + + /* Determine if interface is going to require LL2 */ + if (QED_LEADING_HWFN(cdev)->hw_info.personality != QED_PCI_ETH) { + for (i = 0; i < cdev->num_hwfns; i++) { + struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; + + p_hwfn->using_ll2 = true; + } + } rc = qed_resc_alloc(cdev); if (rc) @@ -873,6 +884,12 @@ static int qed_slowpath_start(struct qed_dev *cdev, DP_INFO(cdev, "HW initialization and function start completed successfully\n"); + /* Allocate LL2 interface if needed */ + if (QED_LEADING_HWFN(cdev)->using_ll2) { + rc = qed_ll2_alloc_if(cdev); + if (rc) + goto err3; + } if (IS_PF(cdev)) { hwfn = QED_LEADING_HWFN(cdev); drv_version.version = (params->drv_major << 24) | @@ -893,6 +910,8 @@ static int qed_slowpath_start(struct qed_dev *cdev, return 0; +err3: + qed_hw_stop(cdev); err2: qed_hw_timers_stop_all(cdev); if (IS_PF(cdev)) @@ -915,6 +934,8 @@ static int qed_slowpath_stop(struct qed_dev *cdev) if (!cdev) return -ENODEV; + qed_ll2_dealloc_if(cdev); + if (IS_PF(cdev)) { qed_free_stream_mem(cdev); if (IS_QED_ETH_IF(cdev)) diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h index 759cb04e02b0..e75738d21783 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h +++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h @@ -208,6 +208,26 @@ 0x50196cUL #define NIG_REG_LLH_CLS_TYPE_DUALMODE \ 0x501964UL +#define NIG_REG_LLH_FUNC_FILTER_VALUE \ + 0x501a00UL +#define NIG_REG_LLH_FUNC_FILTER_VALUE_SIZE \ + 32 +#define NIG_REG_LLH_FUNC_FILTER_EN \ + 0x501a80UL +#define NIG_REG_LLH_FUNC_FILTER_EN_SIZE \ + 16 +#define NIG_REG_LLH_FUNC_FILTER_MODE \ + 0x501ac0UL +#define NIG_REG_LLH_FUNC_FILTER_MODE_SIZE \ + 16 +#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE \ + 0x501b00UL +#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_SIZE \ + 16 +#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL \ + 0x501b40UL +#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_SIZE \ + 16 #define NCSI_REG_CONFIG \ 0x040200UL #define PBF_REG_INIT \ @@ -264,6 +284,8 @@ 0x1f0a1cUL #define PRS_REG_ROCE_DEST_QP_MAX_PF \ 0x1f0430UL +#define PRS_REG_USE_LIGHT_L2 \ + 0x1f096cUL #define PSDM_REG_ENABLE_IN1 \ 0xfa0004UL #define PSEM_REG_ENABLE_IN \ diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h index a548504c3420..a3c539f1c2ac 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h @@ -61,6 +61,10 @@ union ramrod_data { struct vport_start_ramrod_data vport_start; struct vport_stop_ramrod_data vport_stop; struct vport_update_ramrod_data vport_update; + struct core_rx_start_ramrod_data core_rx_queue_start; + struct core_rx_stop_ramrod_data core_rx_queue_stop; + struct core_tx_start_ramrod_data core_tx_queue_start; + struct core_tx_stop_ramrod_data core_tx_queue_stop; struct vport_filter_update_ramrod_data vport_filter_update; struct rdma_init_func_ramrod_data rdma_init_func; diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index e4546abcea08..c2d74e8785cf 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -627,6 +627,7 @@ enum DP_MODULE { QED_MSG_SP = 0x100000, QED_MSG_STORAGE = 0x200000, QED_MSG_CXT = 0x800000, + QED_MSG_LL2 = 0x1000000, QED_MSG_ILT = 0x2000000, QED_MSG_ROCE = 0x4000000, QED_MSG_DEBUG = 0x8000000, diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h new file mode 100644 index 000000000000..fd75c265dba3 --- /dev/null +++ b/include/linux/qed/qed_ll2_if.h @@ -0,0 +1,139 @@ +/* QLogic qed NIC Driver + * + * Copyright (c) 2015 QLogic Corporation + * + * This software is available under the terms of the GNU General Public License + * (GPL) Version 2, available from the file COPYING in the main directory of + * this source tree. + */ + +#ifndef _QED_LL2_IF_H +#define _QED_LL2_IF_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct qed_ll2_stats { + u64 gsi_invalid_hdr; + u64 gsi_invalid_pkt_length; + u64 gsi_unsupported_pkt_typ; + u64 gsi_crcchksm_error; + + u64 packet_too_big_discard; + u64 no_buff_discard; + + u64 rcv_ucast_bytes; + u64 rcv_mcast_bytes; + u64 rcv_bcast_bytes; + u64 rcv_ucast_pkts; + u64 rcv_mcast_pkts; + u64 rcv_bcast_pkts; + + u64 sent_ucast_bytes; + u64 sent_mcast_bytes; + u64 sent_bcast_bytes; + u64 sent_ucast_pkts; + u64 sent_mcast_pkts; + u64 sent_bcast_pkts; +}; + +#define QED_LL2_UNUSED_HANDLE (0xff) + +struct qed_ll2_cb_ops { + int (*rx_cb)(void *, struct sk_buff *, u32, u32); + int (*tx_cb)(void *, struct sk_buff *, bool); +}; + +struct qed_ll2_params { + u16 mtu; + bool drop_ttl0_packets; + bool rx_vlan_stripping; + u8 tx_tc; + bool frags_mapped; + u8 ll2_mac_address[ETH_ALEN]; +}; + +struct qed_ll2_ops { +/** + * @brief start - initializes ll2 + * + * @param cdev + * @param params - protocol driver configuration for the ll2. + * + * @return 0 on success, otherwise error value. + */ + int (*start)(struct qed_dev *cdev, struct qed_ll2_params *params); + +/** + * @brief stop - stops the ll2 + * + * @param cdev + * + * @return 0 on success, otherwise error value. + */ + int (*stop)(struct qed_dev *cdev); + +/** + * @brief start_xmit - transmits an skb over the ll2 interface + * + * @param cdev + * @param skb + * + * @return 0 on success, otherwise error value. + */ + int (*start_xmit)(struct qed_dev *cdev, struct sk_buff *skb); + +/** + * @brief register_cb_ops - protocol driver register the callback for Rx/Tx + * packets. Should be called before `start'. + * + * @param cdev + * @param cookie - to be passed to the callback functions. + * @param ops - the callback functions to register for Rx / Tx. + * + * @return 0 on success, otherwise error value. + */ + void (*register_cb_ops)(struct qed_dev *cdev, + const struct qed_ll2_cb_ops *ops, + void *cookie); + +/** + * @brief get LL2 related statistics + * + * @param cdev + * @param stats - pointer to struct that would be filled with stats + * + * @return 0 on success, error otherwise. + */ + int (*get_stats)(struct qed_dev *cdev, struct qed_ll2_stats *stats); +}; + +#ifdef CONFIG_QED_LL2 +int qed_ll2_alloc_if(struct qed_dev *); +void qed_ll2_dealloc_if(struct qed_dev *); +#else +static const struct qed_ll2_ops qed_ll2_ops_pass = { + .start = NULL, + .stop = NULL, + .start_xmit = NULL, + .register_cb_ops = NULL, + .get_stats = NULL, +}; + +static inline int qed_ll2_alloc_if(struct qed_dev *cdev) +{ + return 0; +} + +static inline void qed_ll2_dealloc_if(struct qed_dev *cdev) +{ +} +#endif +#endif From cee9fbd8e2e9e713cd8bf227c6492fd8854de74b Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Sat, 1 Oct 2016 21:59:56 +0300 Subject: [PATCH 1033/1050] qede: Add qedr framework Adds a skeletal implementation of the qede RoCE driver - The qedr has some dependencies of the state of the underlying base interface. This adds some logic required with mutual registrations and the ability to pass updates on 'intresting' events. Signed-off-by: Ram Amrani Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/Kconfig | 18 +- drivers/net/ethernet/qlogic/qede/Makefile | 1 + drivers/net/ethernet/qlogic/qede/qede.h | 9 + drivers/net/ethernet/qlogic/qede/qede_main.c | 35 ++- drivers/net/ethernet/qlogic/qede/qede_roce.c | 314 +++++++++++++++++++ include/linux/qed/qed_if.h | 3 +- include/linux/qed/qede_roce.h | 88 ++++++ 7 files changed, 451 insertions(+), 17 deletions(-) create mode 100644 drivers/net/ethernet/qlogic/qede/qede_roce.c create mode 100644 include/linux/qed/qede_roce.h diff --git a/drivers/net/ethernet/qlogic/Kconfig b/drivers/net/ethernet/qlogic/Kconfig index 9eb3b1914cf5..0df1391f9663 100644 --- a/drivers/net/ethernet/qlogic/Kconfig +++ b/drivers/net/ethernet/qlogic/Kconfig @@ -89,12 +89,7 @@ config QED This enables the support for ... config QED_LL2 - bool "Qlogic QED Light L2 interface" - default n - depends on QED - ---help--- - This enables support for Light L2 interface which is required - by all qed protocol drivers other than qede. + bool config QED_SRIOV bool "QLogic QED 25/40/100Gb SR-IOV support" @@ -112,4 +107,15 @@ config QEDE ---help--- This enables the support for ... +config INFINIBAND_QEDR + tristate "QLogic qede RoCE sources [debug]" + depends on QEDE && 64BIT + select QED_LL2 + default n + ---help--- + This provides a temporary node that allows the compilation + and logical testing of the InfiniBand over Ethernet support + for QLogic QED. This would be replaced by the 'real' option + once the QEDR driver is added [+relocated]. + endif # NET_VENDOR_QLOGIC diff --git a/drivers/net/ethernet/qlogic/qede/Makefile b/drivers/net/ethernet/qlogic/qede/Makefile index 74a49850d74d..28dc58919c85 100644 --- a/drivers/net/ethernet/qlogic/qede/Makefile +++ b/drivers/net/ethernet/qlogic/qede/Makefile @@ -2,3 +2,4 @@ obj-$(CONFIG_QEDE) := qede.o qede-y := qede_main.o qede_ethtool.o qede-$(CONFIG_DCB) += qede_dcbnl.o +qede-$(CONFIG_INFINIBAND_QEDR) += qede_roce.o diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index e01adce4a966..28c0e9f42c9e 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -106,6 +106,13 @@ struct qede_vlan { bool configured; }; +struct qede_rdma_dev { + struct qedr_dev *qedr_dev; + struct list_head entry; + struct list_head roce_event_list; + struct workqueue_struct *roce_wq; +}; + struct qede_dev { struct qed_dev *cdev; struct net_device *ndev; @@ -185,6 +192,8 @@ struct qede_dev { unsigned long sp_flags; u16 vxlan_dst_port; u16 geneve_dst_port; + + struct qede_rdma_dev rdma_info; }; enum QEDE_STATE { diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 0e198fe89d1a..343038ca047d 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -36,7 +36,7 @@ #include #include #include - +#include #include "qede.h" static char version[] = @@ -193,8 +193,7 @@ static int qede_netdev_event(struct notifier_block *this, unsigned long event, struct ethtool_drvinfo drvinfo; struct qede_dev *edev; - /* Currently only support name change */ - if (event != NETDEV_CHANGENAME) + if (event != NETDEV_CHANGENAME && event != NETDEV_CHANGEADDR) goto done; /* Check whether this is a qede device */ @@ -207,11 +206,18 @@ static int qede_netdev_event(struct notifier_block *this, unsigned long event, goto done; edev = netdev_priv(ndev); - /* Notify qed of the name change */ - if (!edev->ops || !edev->ops->common) - goto done; - edev->ops->common->set_id(edev->cdev, edev->ndev->name, - "qede"); + switch (event) { + case NETDEV_CHANGENAME: + /* Notify qed of the name change */ + if (!edev->ops || !edev->ops->common) + goto done; + edev->ops->common->set_id(edev->cdev, edev->ndev->name, "qede"); + break; + case NETDEV_CHANGEADDR: + edev = netdev_priv(ndev); + qede_roce_event_changeaddr(edev); + break; + } done: return NOTIFY_DONE; @@ -2545,10 +2551,14 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, qede_init_ndev(edev); + rc = qede_roce_dev_add(edev); + if (rc) + goto err3; + rc = register_netdev(edev->ndev); if (rc) { DP_NOTICE(edev, "Cannot register net-device\n"); - goto err3; + goto err4; } edev->ops->common->set_id(cdev, edev->ndev->name, DRV_MODULE_VERSION); @@ -2568,6 +2578,8 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, return 0; +err4: + qede_roce_dev_remove(edev); err3: free_netdev(edev->ndev); err2: @@ -2614,8 +2626,11 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode) DP_INFO(edev, "Starting qede_remove\n"); cancel_delayed_work_sync(&edev->sp_task); + unregister_netdev(ndev); + qede_roce_dev_remove(edev); + edev->ops->common->set_power_state(cdev, PCI_D0); pci_set_drvdata(pdev, NULL); @@ -3512,6 +3527,7 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode) DP_INFO(edev, "Starting qede unload\n"); + qede_roce_dev_event_close(edev); mutex_lock(&edev->qede_lock); edev->state = QEDE_STATE_CLOSED; @@ -3612,6 +3628,7 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode) /* Query whether link is already-up */ memset(&link_output, 0, sizeof(link_output)); edev->ops->common->get_link(edev->cdev, &link_output); + qede_roce_dev_event_open(edev); qede_link_update(edev, &link_output); DP_INFO(edev, "Ending successfully qede load\n"); diff --git a/drivers/net/ethernet/qlogic/qede/qede_roce.c b/drivers/net/ethernet/qlogic/qede/qede_roce.c new file mode 100644 index 000000000000..9867f960b063 --- /dev/null +++ b/drivers/net/ethernet/qlogic/qede/qede_roce.c @@ -0,0 +1,314 @@ +/* QLogic qedr NIC Driver + * Copyright (c) 2015-2016 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include +#include +#include +#include +#include "qede.h" + +static struct qedr_driver *qedr_drv; +static LIST_HEAD(qedr_dev_list); +static DEFINE_MUTEX(qedr_dev_list_lock); + +bool qede_roce_supported(struct qede_dev *dev) +{ + return dev->dev_info.common.rdma_supported; +} + +static void _qede_roce_dev_add(struct qede_dev *edev) +{ + if (!qedr_drv) + return; + + edev->rdma_info.qedr_dev = qedr_drv->add(edev->cdev, edev->pdev, + edev->ndev); +} + +static int qede_roce_create_wq(struct qede_dev *edev) +{ + INIT_LIST_HEAD(&edev->rdma_info.roce_event_list); + edev->rdma_info.roce_wq = create_singlethread_workqueue("roce_wq"); + if (!edev->rdma_info.roce_wq) { + DP_NOTICE(edev, "qedr: Could not create workqueue\n"); + return -ENOMEM; + } + + return 0; +} + +static void qede_roce_cleanup_event(struct qede_dev *edev) +{ + struct list_head *head = &edev->rdma_info.roce_event_list; + struct qede_roce_event_work *event_node; + + flush_workqueue(edev->rdma_info.roce_wq); + while (!list_empty(head)) { + event_node = list_entry(head->next, struct qede_roce_event_work, + list); + cancel_work_sync(&event_node->work); + list_del(&event_node->list); + kfree(event_node); + } +} + +static void qede_roce_destroy_wq(struct qede_dev *edev) +{ + qede_roce_cleanup_event(edev); + destroy_workqueue(edev->rdma_info.roce_wq); +} + +int qede_roce_dev_add(struct qede_dev *edev) +{ + int rc = 0; + + if (qede_roce_supported(edev)) { + rc = qede_roce_create_wq(edev); + if (rc) + return rc; + + INIT_LIST_HEAD(&edev->rdma_info.entry); + mutex_lock(&qedr_dev_list_lock); + list_add_tail(&edev->rdma_info.entry, &qedr_dev_list); + _qede_roce_dev_add(edev); + mutex_unlock(&qedr_dev_list_lock); + } + + return rc; +} + +static void _qede_roce_dev_remove(struct qede_dev *edev) +{ + if (qedr_drv && qedr_drv->remove && edev->rdma_info.qedr_dev) + qedr_drv->remove(edev->rdma_info.qedr_dev); + edev->rdma_info.qedr_dev = NULL; +} + +void qede_roce_dev_remove(struct qede_dev *edev) +{ + if (!qede_roce_supported(edev)) + return; + + qede_roce_destroy_wq(edev); + mutex_lock(&qedr_dev_list_lock); + _qede_roce_dev_remove(edev); + list_del(&edev->rdma_info.entry); + mutex_unlock(&qedr_dev_list_lock); +} + +static void _qede_roce_dev_open(struct qede_dev *edev) +{ + if (qedr_drv && edev->rdma_info.qedr_dev && qedr_drv->notify) + qedr_drv->notify(edev->rdma_info.qedr_dev, QEDE_UP); +} + +static void qede_roce_dev_open(struct qede_dev *edev) +{ + if (!qede_roce_supported(edev)) + return; + + mutex_lock(&qedr_dev_list_lock); + _qede_roce_dev_open(edev); + mutex_unlock(&qedr_dev_list_lock); +} + +static void _qede_roce_dev_close(struct qede_dev *edev) +{ + if (qedr_drv && edev->rdma_info.qedr_dev && qedr_drv->notify) + qedr_drv->notify(edev->rdma_info.qedr_dev, QEDE_DOWN); +} + +static void qede_roce_dev_close(struct qede_dev *edev) +{ + if (!qede_roce_supported(edev)) + return; + + mutex_lock(&qedr_dev_list_lock); + _qede_roce_dev_close(edev); + mutex_unlock(&qedr_dev_list_lock); +} + +static void qede_roce_dev_shutdown(struct qede_dev *edev) +{ + if (!qede_roce_supported(edev)) + return; + + mutex_lock(&qedr_dev_list_lock); + if (qedr_drv && edev->rdma_info.qedr_dev && qedr_drv->notify) + qedr_drv->notify(edev->rdma_info.qedr_dev, QEDE_CLOSE); + mutex_unlock(&qedr_dev_list_lock); +} + +int qede_roce_register_driver(struct qedr_driver *drv) +{ + struct qede_dev *edev; + u8 qedr_counter = 0; + + mutex_lock(&qedr_dev_list_lock); + if (qedr_drv) { + mutex_unlock(&qedr_dev_list_lock); + return -EINVAL; + } + qedr_drv = drv; + + list_for_each_entry(edev, &qedr_dev_list, rdma_info.entry) { + struct net_device *ndev; + + qedr_counter++; + _qede_roce_dev_add(edev); + ndev = edev->ndev; + if (netif_running(ndev) && netif_oper_up(ndev)) + _qede_roce_dev_open(edev); + } + mutex_unlock(&qedr_dev_list_lock); + + DP_INFO(edev, "qedr: discovered and registered %d RoCE funcs\n", + qedr_counter); + + return 0; +} +EXPORT_SYMBOL(qede_roce_register_driver); + +void qede_roce_unregister_driver(struct qedr_driver *drv) +{ + struct qede_dev *edev; + + mutex_lock(&qedr_dev_list_lock); + list_for_each_entry(edev, &qedr_dev_list, rdma_info.entry) { + if (edev->rdma_info.qedr_dev) + _qede_roce_dev_remove(edev); + } + qedr_drv = NULL; + mutex_unlock(&qedr_dev_list_lock); +} +EXPORT_SYMBOL(qede_roce_unregister_driver); + +static void qede_roce_changeaddr(struct qede_dev *edev) +{ + if (!qede_roce_supported(edev)) + return; + + if (qedr_drv && edev->rdma_info.qedr_dev && qedr_drv->notify) + qedr_drv->notify(edev->rdma_info.qedr_dev, QEDE_CHANGE_ADDR); +} + +struct qede_roce_event_work *qede_roce_get_free_event_node(struct qede_dev + *edev) +{ + struct qede_roce_event_work *event_node = NULL; + struct list_head *list_node = NULL; + bool found = false; + + list_for_each(list_node, &edev->rdma_info.roce_event_list) { + event_node = list_entry(list_node, struct qede_roce_event_work, + list); + if (!work_pending(&event_node->work)) { + found = true; + break; + } + } + + if (!found) { + event_node = kzalloc(sizeof(*event_node), GFP_KERNEL); + if (!event_node) { + DP_NOTICE(edev, + "qedr: Could not allocate memory for roce work\n"); + return NULL; + } + list_add_tail(&event_node->list, + &edev->rdma_info.roce_event_list); + } + + return event_node; +} + +static void qede_roce_handle_event(struct work_struct *work) +{ + struct qede_roce_event_work *event_node; + enum qede_roce_event event; + struct qede_dev *edev; + + event_node = container_of(work, struct qede_roce_event_work, work); + event = event_node->event; + edev = event_node->ptr; + + switch (event) { + case QEDE_UP: + qede_roce_dev_open(edev); + break; + case QEDE_DOWN: + qede_roce_dev_close(edev); + break; + case QEDE_CLOSE: + qede_roce_dev_shutdown(edev); + break; + case QEDE_CHANGE_ADDR: + qede_roce_changeaddr(edev); + break; + default: + DP_NOTICE(edev, "Invalid roce event %d", event); + } +} + +static void qede_roce_add_event(struct qede_dev *edev, + enum qede_roce_event event) +{ + struct qede_roce_event_work *event_node; + + if (!edev->rdma_info.qedr_dev) + return; + + event_node = qede_roce_get_free_event_node(edev); + if (!event_node) + return; + + event_node->event = event; + event_node->ptr = edev; + + INIT_WORK(&event_node->work, qede_roce_handle_event); + queue_work(edev->rdma_info.roce_wq, &event_node->work); +} + +void qede_roce_dev_event_open(struct qede_dev *edev) +{ + qede_roce_add_event(edev, QEDE_UP); +} + +void qede_roce_dev_event_close(struct qede_dev *edev) +{ + qede_roce_add_event(edev, QEDE_DOWN); +} + +void qede_roce_event_changeaddr(struct qede_dev *edev) +{ + qede_roce_add_event(edev, QEDE_CHANGE_ADDR); +} diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index c2d74e8785cf..e313742b571d 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -260,11 +260,10 @@ struct qed_dev_info { /* MFW version */ u32 mfw_rev; - bool rdma_supported; - u32 flash_size; u8 mf_mode; bool tx_switching; + bool rdma_supported; }; enum qed_sb_type { diff --git a/include/linux/qed/qede_roce.h b/include/linux/qed/qede_roce.h new file mode 100644 index 000000000000..99fbe6d55acb --- /dev/null +++ b/include/linux/qed/qede_roce.h @@ -0,0 +1,88 @@ +/* QLogic qedr NIC Driver + * Copyright (c) 2015-2016 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef QEDE_ROCE_H +#define QEDE_ROCE_H + +struct qedr_dev; +struct qed_dev; +struct qede_dev; + +enum qede_roce_event { + QEDE_UP, + QEDE_DOWN, + QEDE_CHANGE_ADDR, + QEDE_CLOSE +}; + +struct qede_roce_event_work { + struct list_head list; + struct work_struct work; + void *ptr; + enum qede_roce_event event; +}; + +struct qedr_driver { + unsigned char name[32]; + + struct qedr_dev* (*add)(struct qed_dev *, struct pci_dev *, + struct net_device *); + + void (*remove)(struct qedr_dev *); + void (*notify)(struct qedr_dev *, enum qede_roce_event); +}; + +/* APIs for RoCE driver to register callback handlers, + * which will be invoked when device is added, removed, ifup, ifdown + */ +int qede_roce_register_driver(struct qedr_driver *drv); +void qede_roce_unregister_driver(struct qedr_driver *drv); + +bool qede_roce_supported(struct qede_dev *dev); + +#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) +int qede_roce_dev_add(struct qede_dev *dev); +void qede_roce_dev_event_open(struct qede_dev *dev); +void qede_roce_dev_event_close(struct qede_dev *dev); +void qede_roce_dev_remove(struct qede_dev *dev); +void qede_roce_event_changeaddr(struct qede_dev *qedr); +#else +static inline int qede_roce_dev_add(struct qede_dev *dev) +{ + return 0; +} + +static inline void qede_roce_dev_event_open(struct qede_dev *dev) {} +static inline void qede_roce_dev_event_close(struct qede_dev *dev) {} +static inline void qede_roce_dev_remove(struct qede_dev *dev) {} +static inline void qede_roce_event_changeaddr(struct qede_dev *qedr) {} +#endif +#endif From 51ff17251c9c2c2e71974149d22bc73ea09c27cc Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Sat, 1 Oct 2016 21:59:57 +0300 Subject: [PATCH 1034/1050] qed: Add support for RoCE hw init This adds the backbone required for the various HW initalizations which are necessary for the qedr driver - FW notification, resource initializations, etc. Signed-off-by: Ram Amrani Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/Makefile | 1 + drivers/net/ethernet/qlogic/qed/qed.h | 33 +- drivers/net/ethernet/qlogic/qed/qed_cxt.c | 6 + drivers/net/ethernet/qlogic/qed/qed_cxt.h | 6 + drivers/net/ethernet/qlogic/qed/qed_dev.c | 154 +++ drivers/net/ethernet/qlogic/qed/qed_main.c | 44 +- .../net/ethernet/qlogic/qed/qed_reg_addr.h | 8 +- drivers/net/ethernet/qlogic/qed/qed_roce.c | 886 ++++++++++++++++++ drivers/net/ethernet/qlogic/qed/qed_roce.h | 123 +++ drivers/net/ethernet/qlogic/qed/qed_sp.h | 1 + drivers/net/ethernet/qlogic/qed/qed_spq.c | 8 + drivers/net/ethernet/qlogic/qed/qed_sriov.c | 4 +- drivers/net/ethernet/qlogic/qed/qed_vf.c | 2 +- include/linux/qed/common_hsi.h | 1 + include/linux/qed/qed_if.h | 5 +- include/linux/qed/qed_roce_if.h | 345 +++++++ include/linux/qed/rdma_common.h | 1 + 17 files changed, 1620 insertions(+), 8 deletions(-) create mode 100644 drivers/net/ethernet/qlogic/qed/qed_roce.c create mode 100644 drivers/net/ethernet/qlogic/qed/qed_roce.h create mode 100644 include/linux/qed/qed_roce_if.h diff --git a/drivers/net/ethernet/qlogic/qed/Makefile b/drivers/net/ethernet/qlogic/qed/Makefile index e067098f10a9..cda0af7fbc20 100644 --- a/drivers/net/ethernet/qlogic/qed/Makefile +++ b/drivers/net/ethernet/qlogic/qed/Makefile @@ -5,3 +5,4 @@ qed-y := qed_cxt.o qed_dev.o qed_hw.o qed_init_fw_funcs.o qed_init_ops.o \ qed_selftest.o qed_dcbx.o qed_debug.o qed-$(CONFIG_QED_SRIOV) += qed_sriov.o qed_vf.o qed-$(CONFIG_QED_LL2) += qed_ll2.o +qed-$(CONFIG_INFINIBAND_QEDR) += qed_roce.o diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 91b571a3670b..c5a098a2ca2c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -35,6 +35,9 @@ extern const struct qed_common_ops qed_common_ops_pass; #define QED_WFQ_UNIT 100 +#define QED_WID_SIZE (1024) +#define QED_PF_DEMS_SIZE (4) + /* cau states */ enum qed_coalescing_mode { QED_COAL_MODE_DISABLE, @@ -48,6 +51,14 @@ enum qed_mcp_protocol_type; /* helpers */ static inline u32 qed_db_addr(u32 cid, u32 DEMS) +{ + u32 db_addr = FIELD_VALUE(DB_LEGACY_ADDR_DEMS, DEMS) | + (cid * QED_PF_DEMS_SIZE); + + return db_addr; +} + +static inline u32 qed_db_addr_vf(u32 cid, u32 DEMS) { u32 db_addr = FIELD_VALUE(DB_LEGACY_ADDR_DEMS, DEMS) | FIELD_VALUE(DB_LEGACY_ADDR_ICID, cid); @@ -152,14 +163,17 @@ enum QED_RESOURCES { QED_RL, QED_MAC, QED_VLAN, + QED_RDMA_CNQ_RAM, QED_ILT, QED_LL2_QUEUE, + QED_RDMA_STATS_QUEUE, QED_MAX_RESC, }; enum QED_FEATURE { QED_PF_L2_QUE, QED_VF, + QED_RDMA_CNQ, QED_MAX_FEATURES, }; @@ -364,6 +378,7 @@ struct qed_hwfn { /* Protocol related */ bool using_ll2; struct qed_ll2_info *p_ll2_info; + struct qed_rdma_info *p_rdma_info; struct qed_pf_params pf_params; bool b_rdma_enabled_in_prs; @@ -402,6 +417,17 @@ struct qed_hwfn { struct dbg_tools_data dbg_info; + /* PWM region specific data */ + u32 dpi_size; + u32 dpi_count; + + /* This is used to calculate the doorbell address */ + u32 dpi_start_offset; + + /* If one of the following is set then EDPM shouldn't be used */ + u8 dcbx_no_edpm; + u8 db_bar_no_edpm; + struct qed_simd_fp_handler simd_proto_handler[64]; #ifdef CONFIG_QED_SRIOV @@ -435,6 +461,8 @@ struct qed_int_params { bool fp_initialized; u8 fp_msix_base; u8 fp_msix_cnt; + u8 rdma_msix_base; + u8 rdma_msix_cnt; }; struct qed_dbg_feature { @@ -541,7 +569,6 @@ struct qed_dev { bool b_is_vf; u32 drv_type; - struct qed_eth_stats *reset_stats; struct qed_fw_data *fw_data; @@ -574,6 +601,10 @@ struct qed_dev { #endif const struct firmware *firmware; + + u32 rdma_max_sge; + u32 rdma_max_inline; + u32 rdma_max_srq_sge; }; #define NUM_OF_VFS(dev) MAX_NUM_VFS_BB diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index d9bea2a9c9f7..82370a1a59ad 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -48,7 +48,13 @@ #define TM_ELEM_SIZE 4 /* ILT constants */ +#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) +/* For RoCE we configure to 64K to cover for RoCE max tasks 256K purpose. */ +#define ILT_DEFAULT_HW_P_SIZE 4 +#else #define ILT_DEFAULT_HW_P_SIZE 3 +#endif + #define ILT_PAGE_IN_BYTES(hw_p_size) (1U << ((hw_p_size) + 12)) #define ILT_CFG_REG(cli, reg) PSWRQ2_REG_ ## cli ## _ ## reg ## _RT_OFFSET diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.h b/drivers/net/ethernet/qlogic/qed/qed_cxt.h index c6f6f2e8192d..d00ad055802b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.h +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.h @@ -170,6 +170,12 @@ int qed_qm_reconf(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); */ void qed_cxt_release_cid(struct qed_hwfn *p_hwfn, u32 cid); +int qed_cxt_dynamic_ilt_alloc(struct qed_hwfn *p_hwfn, + enum qed_cxt_elem_type elem_type, u32 iid); +u32 qed_cxt_get_proto_tid_count(struct qed_hwfn *p_hwfn, + enum protocol_type type); +u32 qed_cxt_get_proto_cid_start(struct qed_hwfn *p_hwfn, + enum protocol_type type); #define QED_CTX_WORKING_MEM 0 #define QED_CTX_FL_MEM 1 diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 9a8e153df841..754f6a908858 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -35,9 +35,13 @@ #include "qed_sp.h" #include "qed_sriov.h" #include "qed_vf.h" +#include "qed_roce.h" static DEFINE_SPINLOCK(qm_lock); +#define QED_MIN_DPIS (4) +#define QED_MIN_PWM_REGION (QED_WID_SIZE * QED_MIN_DPIS) + /* API common to all protocols */ enum BAR_ID { BAR_ID_0, /* used for GRC */ @@ -787,6 +791,136 @@ static int qed_hw_init_common(struct qed_hwfn *p_hwfn, return rc; } +static int +qed_hw_init_dpi_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 pwm_region_size, u32 n_cpus) +{ + u32 dpi_page_size_1, dpi_page_size_2, dpi_page_size; + u32 dpi_bit_shift, dpi_count; + u32 min_dpis; + + /* Calculate DPI size */ + dpi_page_size_1 = QED_WID_SIZE * n_cpus; + dpi_page_size_2 = max_t(u32, QED_WID_SIZE, PAGE_SIZE); + dpi_page_size = max_t(u32, dpi_page_size_1, dpi_page_size_2); + dpi_page_size = roundup_pow_of_two(dpi_page_size); + dpi_bit_shift = ilog2(dpi_page_size / 4096); + + dpi_count = pwm_region_size / dpi_page_size; + + min_dpis = p_hwfn->pf_params.rdma_pf_params.min_dpis; + min_dpis = max_t(u32, QED_MIN_DPIS, min_dpis); + + p_hwfn->dpi_size = dpi_page_size; + p_hwfn->dpi_count = dpi_count; + + qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_DPI_BIT_SHIFT, dpi_bit_shift); + + if (dpi_count < min_dpis) + return -EINVAL; + + return 0; +} + +enum QED_ROCE_EDPM_MODE { + QED_ROCE_EDPM_MODE_ENABLE = 0, + QED_ROCE_EDPM_MODE_FORCE_ON = 1, + QED_ROCE_EDPM_MODE_DISABLE = 2, +}; + +static int +qed_hw_init_pf_doorbell_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +{ + u32 pwm_regsize, norm_regsize; + u32 non_pwm_conn, min_addr_reg1; + u32 db_bar_size, n_cpus; + u32 roce_edpm_mode; + u32 pf_dems_shift; + int rc = 0; + u8 cond; + + db_bar_size = qed_hw_bar_size(p_hwfn, BAR_ID_1); + if (p_hwfn->cdev->num_hwfns > 1) + db_bar_size /= 2; + + /* Calculate doorbell regions */ + non_pwm_conn = qed_cxt_get_proto_cid_start(p_hwfn, PROTOCOLID_CORE) + + qed_cxt_get_proto_cid_count(p_hwfn, PROTOCOLID_CORE, + NULL) + + qed_cxt_get_proto_cid_count(p_hwfn, PROTOCOLID_ETH, + NULL); + norm_regsize = roundup(QED_PF_DEMS_SIZE * non_pwm_conn, 4096); + min_addr_reg1 = norm_regsize / 4096; + pwm_regsize = db_bar_size - norm_regsize; + + /* Check that the normal and PWM sizes are valid */ + if (db_bar_size < norm_regsize) { + DP_ERR(p_hwfn->cdev, + "Doorbell BAR size 0x%x is too small (normal region is 0x%0x )\n", + db_bar_size, norm_regsize); + return -EINVAL; + } + + if (pwm_regsize < QED_MIN_PWM_REGION) { + DP_ERR(p_hwfn->cdev, + "PWM region size 0x%0x is too small. Should be at least 0x%0x (Doorbell BAR size is 0x%x and normal region size is 0x%0x)\n", + pwm_regsize, + QED_MIN_PWM_REGION, db_bar_size, norm_regsize); + return -EINVAL; + } + + /* Calculate number of DPIs */ + roce_edpm_mode = p_hwfn->pf_params.rdma_pf_params.roce_edpm_mode; + if ((roce_edpm_mode == QED_ROCE_EDPM_MODE_ENABLE) || + ((roce_edpm_mode == QED_ROCE_EDPM_MODE_FORCE_ON))) { + /* Either EDPM is mandatory, or we are attempting to allocate a + * WID per CPU. + */ + n_cpus = num_active_cpus(); + rc = qed_hw_init_dpi_size(p_hwfn, p_ptt, pwm_regsize, n_cpus); + } + + cond = (rc && (roce_edpm_mode == QED_ROCE_EDPM_MODE_ENABLE)) || + (roce_edpm_mode == QED_ROCE_EDPM_MODE_DISABLE); + if (cond || p_hwfn->dcbx_no_edpm) { + /* Either EDPM is disabled from user configuration, or it is + * disabled via DCBx, or it is not mandatory and we failed to + * allocated a WID per CPU. + */ + n_cpus = 1; + rc = qed_hw_init_dpi_size(p_hwfn, p_ptt, pwm_regsize, n_cpus); + + if (cond) + qed_rdma_dpm_bar(p_hwfn, p_ptt); + } + + DP_INFO(p_hwfn, + "doorbell bar: normal_region_size=%d, pwm_region_size=%d, dpi_size=%d, dpi_count=%d, roce_edpm=%s\n", + norm_regsize, + pwm_regsize, + p_hwfn->dpi_size, + p_hwfn->dpi_count, + ((p_hwfn->dcbx_no_edpm) || (p_hwfn->db_bar_no_edpm)) ? + "disabled" : "enabled"); + + if (rc) { + DP_ERR(p_hwfn, + "Failed to allocate enough DPIs. Allocated %d but the current minimum is %d.\n", + p_hwfn->dpi_count, + p_hwfn->pf_params.rdma_pf_params.min_dpis); + return -EINVAL; + } + + p_hwfn->dpi_start_offset = norm_regsize; + + /* DEMS size is configured log2 of DWORDs, hence the division by 4 */ + pf_dems_shift = ilog2(QED_PF_DEMS_SIZE / 4); + qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_ICID_BIT_SHIFT_NORM, pf_dems_shift); + qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_MIN_ADDR_REG1, min_addr_reg1); + + return 0; +} + static int qed_hw_init_port(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, int hw_mode) { @@ -860,6 +994,10 @@ static int qed_hw_init_pf(struct qed_hwfn *p_hwfn, /* Pure runtime initializations - directly to the HW */ qed_int_igu_init_pure_rt(p_hwfn, p_ptt, true, true); + rc = qed_hw_init_pf_doorbell_bar(p_hwfn, p_ptt); + if (rc) + return rc; + if (b_hw_start) { /* enable interrupts */ qed_int_igu_enable(p_hwfn, p_ptt, int_mode); @@ -1284,6 +1422,19 @@ static void qed_hw_set_feat(struct qed_hwfn *p_hwfn) u32 *feat_num = p_hwfn->hw_info.feat_num; int num_features = 1; +#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) + /* Roce CNQ each requires: 1 status block + 1 CNQ. We divide the + * status blocks equally between L2 / RoCE but with consideration as + * to how many l2 queues / cnqs we have + */ + if (p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE) { + num_features++; + + feat_num[QED_RDMA_CNQ] = + min_t(u32, RESC_NUM(p_hwfn, QED_SB) / num_features, + RESC_NUM(p_hwfn, QED_RDMA_CNQ_RAM)); + } +#endif feat_num[QED_PF_L2_QUE] = min_t(u32, RESC_NUM(p_hwfn, QED_SB) / num_features, RESC_NUM(p_hwfn, QED_L2_QUEUE)); @@ -1325,6 +1476,9 @@ static int qed_hw_get_resc(struct qed_hwfn *p_hwfn) num_funcs; resc_num[QED_ILT] = PXP_NUM_ILT_RECORDS_BB / num_funcs; resc_num[QED_LL2_QUEUE] = MAX_NUM_LL2_RX_QUEUES / num_funcs; + resc_num[QED_RDMA_CNQ_RAM] = NUM_OF_CMDQS_CQS / num_funcs; + resc_num[QED_RDMA_STATS_QUEUE] = RDMA_NUM_STATISTIC_COUNTERS_BB / + num_funcs; for (i = 0; i < QED_MAX_RESC; i++) resc_start[i] = resc_num[i] * enabled_func_idx; diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 48cdf62c025b..4ee3151e80c2 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -33,6 +33,11 @@ #include "qed_hw.h" #include "qed_selftest.h" +#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) +#define QED_ROCE_QPS (8192) +#define QED_ROCE_DPIS (8) +#endif + static char version[] = "QLogic FastLinQ 4xxxx Core Module qed " DRV_MODULE_VERSION "\n"; @@ -206,8 +211,8 @@ int qed_fill_dev_info(struct qed_dev *cdev, dev_info->pci_mem_start = cdev->pci_params.mem_start; dev_info->pci_mem_end = cdev->pci_params.mem_end; dev_info->pci_irq = cdev->pci_params.irq; - dev_info->rdma_supported = - (cdev->hwfns[0].hw_info.personality == QED_PCI_ETH_ROCE); + dev_info->rdma_supported = (cdev->hwfns[0].hw_info.personality == + QED_PCI_ETH_ROCE); dev_info->is_mf_default = IS_MF_DEFAULT(&cdev->hwfns[0]); ether_addr_copy(dev_info->hw_mac, cdev->hwfns[0].hw_info.hw_mac_addr); @@ -677,6 +682,9 @@ static int qed_slowpath_setup_int(struct qed_dev *cdev, enum qed_int_mode int_mode) { struct qed_sb_cnt_info sb_cnt_info; +#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) + int num_l2_queues; +#endif int rc; int i; @@ -707,6 +715,31 @@ static int qed_slowpath_setup_int(struct qed_dev *cdev, cdev->int_params.fp_msix_cnt = cdev->int_params.out.num_vectors - cdev->num_hwfns; +#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) + num_l2_queues = 0; + for_each_hwfn(cdev, i) + num_l2_queues += FEAT_NUM(&cdev->hwfns[i], QED_PF_L2_QUE); + + DP_VERBOSE(cdev, QED_MSG_RDMA, + "cdev->int_params.fp_msix_cnt=%d num_l2_queues=%d\n", + cdev->int_params.fp_msix_cnt, num_l2_queues); + + if (cdev->int_params.fp_msix_cnt > num_l2_queues) { + cdev->int_params.rdma_msix_cnt = + (cdev->int_params.fp_msix_cnt - num_l2_queues) + / cdev->num_hwfns; + cdev->int_params.rdma_msix_base = + cdev->int_params.fp_msix_base + num_l2_queues; + cdev->int_params.fp_msix_cnt = num_l2_queues; + } else { + cdev->int_params.rdma_msix_cnt = 0; + } + + DP_VERBOSE(cdev, QED_MSG_RDMA, "roce_msix_cnt=%d roce_msix_base=%d\n", + cdev->int_params.rdma_msix_cnt, + cdev->int_params.rdma_msix_base); +#endif + return 0; } @@ -810,6 +843,13 @@ static void qed_update_pf_params(struct qed_dev *cdev, { int i; +#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) + params->rdma_pf_params.num_qps = QED_ROCE_QPS; + params->rdma_pf_params.min_dpis = QED_ROCE_DPIS; + /* divide by 3 the MRs to avoid MF ILT overflow */ + params->rdma_pf_params.num_mrs = RDMA_MAX_TIDS; + params->rdma_pf_params.gl_pi = QED_ROCE_PROTOCOL_INDEX; +#endif for (i = 0; i < cdev->num_hwfns; i++) { struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h index e75738d21783..b414a0542177 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h +++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h @@ -1448,5 +1448,11 @@ 0x620000UL #define PHY_PCIE_REG_PHY1 \ 0x624000UL - +#define NIG_REG_ROCE_DUPLICATE_TO_HOST 0x5088f0UL +#define PRS_REG_LIGHT_L2_ETHERTYPE_EN 0x1f0968UL +#define NIG_REG_LLH_ENG_CLS_ENG_ID_TBL 0x501b90UL +#define DORQ_REG_PF_DPM_ENABLE 0x100510UL +#define DORQ_REG_PF_ICID_BIT_SHIFT_NORM 0x100448UL +#define DORQ_REG_PF_MIN_ADDR_REG1 0x100400UL +#define DORQ_REG_PF_DPI_BIT_SHIFT 0x100450UL #endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c new file mode 100644 index 000000000000..4c53b857cc1c --- /dev/null +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -0,0 +1,886 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015-2016 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "qed.h" +#include "qed_cxt.h" +#include "qed_hsi.h" +#include "qed_hw.h" +#include "qed_init_ops.h" +#include "qed_int.h" +#include "qed_ll2.h" +#include "qed_mcp.h" +#include "qed_reg_addr.h" +#include "qed_sp.h" +#include "qed_roce.h" + +void qed_async_roce_event(struct qed_hwfn *p_hwfn, + struct event_ring_entry *p_eqe) +{ + struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info; + + p_rdma_info->events.affiliated_event(p_rdma_info->events.context, + p_eqe->opcode, &p_eqe->data); +} + +static int qed_rdma_bmap_alloc(struct qed_hwfn *p_hwfn, + struct qed_bmap *bmap, u32 max_count) +{ + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "max_count = %08x\n", max_count); + + bmap->max_count = max_count; + + bmap->bitmap = kzalloc(BITS_TO_LONGS(max_count) * sizeof(long), + GFP_KERNEL); + if (!bmap->bitmap) { + DP_NOTICE(p_hwfn, + "qed bmap alloc failed: cannot allocate memory (bitmap)\n"); + return -ENOMEM; + } + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocated bitmap %p\n", + bmap->bitmap); + return 0; +} + +static int qed_rdma_bmap_alloc_id(struct qed_hwfn *p_hwfn, + struct qed_bmap *bmap, u32 *id_num) +{ + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "bmap = %p\n", bmap); + + *id_num = find_first_zero_bit(bmap->bitmap, bmap->max_count); + + if (*id_num >= bmap->max_count) { + DP_NOTICE(p_hwfn, "no id available max_count=%d\n", + bmap->max_count); + return -EINVAL; + } + + __set_bit(*id_num, bmap->bitmap); + + return 0; +} + +static void qed_bmap_release_id(struct qed_hwfn *p_hwfn, + struct qed_bmap *bmap, u32 id_num) +{ + bool b_acquired; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "id_num = %08x", id_num); + if (id_num >= bmap->max_count) + return; + + b_acquired = test_and_clear_bit(id_num, bmap->bitmap); + if (!b_acquired) { + DP_NOTICE(p_hwfn, "ID %d already released\n", id_num); + return; + } +} + +u32 qed_rdma_get_sb_id(void *p_hwfn, u32 rel_sb_id) +{ + /* First sb id for RoCE is after all the l2 sb */ + return FEAT_NUM((struct qed_hwfn *)p_hwfn, QED_PF_L2_QUE) + rel_sb_id; +} + +u32 qed_rdma_query_cau_timer_res(void *rdma_cxt) +{ + return QED_CAU_DEF_RX_TIMER_RES; +} + +static int qed_rdma_alloc(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct qed_rdma_start_in_params *params) +{ + struct qed_rdma_info *p_rdma_info; + u32 num_cons, num_tasks; + int rc = -ENOMEM; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocating RDMA\n"); + + /* Allocate a struct with current pf rdma info */ + p_rdma_info = kzalloc(sizeof(*p_rdma_info), GFP_KERNEL); + if (!p_rdma_info) { + DP_NOTICE(p_hwfn, + "qed rdma alloc failed: cannot allocate memory (rdma info). rc = %d\n", + rc); + return rc; + } + + p_hwfn->p_rdma_info = p_rdma_info; + p_rdma_info->proto = PROTOCOLID_ROCE; + + num_cons = qed_cxt_get_proto_cid_count(p_hwfn, p_rdma_info->proto, 0); + + p_rdma_info->num_qps = num_cons / 2; + + num_tasks = qed_cxt_get_proto_tid_count(p_hwfn, PROTOCOLID_ROCE); + + /* Each MR uses a single task */ + p_rdma_info->num_mrs = num_tasks; + + /* Queue zone lines are shared between RoCE and L2 in such a way that + * they can be used by each without obstructing the other. + */ + p_rdma_info->queue_zone_base = (u16)FEAT_NUM(p_hwfn, QED_L2_QUEUE); + + /* Allocate a struct with device params and fill it */ + p_rdma_info->dev = kzalloc(sizeof(*p_rdma_info->dev), GFP_KERNEL); + if (!p_rdma_info->dev) { + DP_NOTICE(p_hwfn, + "qed rdma alloc failed: cannot allocate memory (rdma info dev). rc = %d\n", + rc); + goto free_rdma_info; + } + + /* Allocate a struct with port params and fill it */ + p_rdma_info->port = kzalloc(sizeof(*p_rdma_info->port), GFP_KERNEL); + if (!p_rdma_info->port) { + DP_NOTICE(p_hwfn, + "qed rdma alloc failed: cannot allocate memory (rdma info port). rc = %d\n", + rc); + goto free_rdma_dev; + } + + /* Allocate bit map for pd's */ + rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->pd_map, RDMA_MAX_PDS); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Failed to allocate pd_map, rc = %d\n", + rc); + goto free_rdma_port; + } + + /* Allocate DPI bitmap */ + rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->dpi_map, + p_hwfn->dpi_count); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Failed to allocate DPI bitmap, rc = %d\n", rc); + goto free_pd_map; + } + + /* Allocate bitmap for cq's. The maximum number of CQs is bounded to + * twice the number of QPs. + */ + rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->cq_map, + p_rdma_info->num_qps * 2); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Failed to allocate cq bitmap, rc = %d\n", rc); + goto free_dpi_map; + } + + /* Allocate bitmap for toggle bit for cq icids + * We toggle the bit every time we create or resize cq for a given icid. + * The maximum number of CQs is bounded to twice the number of QPs. + */ + rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->toggle_bits, + p_rdma_info->num_qps * 2); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Failed to allocate toogle bits, rc = %d\n", rc); + goto free_cq_map; + } + + /* Allocate bitmap for itids */ + rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->tid_map, + p_rdma_info->num_mrs); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Failed to allocate itids bitmaps, rc = %d\n", rc); + goto free_toggle_map; + } + + /* Allocate bitmap for cids used for qps. */ + rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->cid_map, num_cons); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Failed to allocate cid bitmap, rc = %d\n", rc); + goto free_tid_map; + } + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocation successful\n"); + return 0; + +free_tid_map: + kfree(p_rdma_info->tid_map.bitmap); +free_toggle_map: + kfree(p_rdma_info->toggle_bits.bitmap); +free_cq_map: + kfree(p_rdma_info->cq_map.bitmap); +free_dpi_map: + kfree(p_rdma_info->dpi_map.bitmap); +free_pd_map: + kfree(p_rdma_info->pd_map.bitmap); +free_rdma_port: + kfree(p_rdma_info->port); +free_rdma_dev: + kfree(p_rdma_info->dev); +free_rdma_info: + kfree(p_rdma_info); + + return rc; +} + +void qed_rdma_resc_free(struct qed_hwfn *p_hwfn) +{ + struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info; + + kfree(p_rdma_info->cid_map.bitmap); + kfree(p_rdma_info->tid_map.bitmap); + kfree(p_rdma_info->toggle_bits.bitmap); + kfree(p_rdma_info->cq_map.bitmap); + kfree(p_rdma_info->dpi_map.bitmap); + kfree(p_rdma_info->pd_map.bitmap); + + kfree(p_rdma_info->port); + kfree(p_rdma_info->dev); + + kfree(p_rdma_info); +} + +static void qed_rdma_free(struct qed_hwfn *p_hwfn) +{ + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Freeing RDMA\n"); + + qed_rdma_resc_free(p_hwfn); +} + +static void qed_rdma_get_guid(struct qed_hwfn *p_hwfn, u8 *guid) +{ + guid[0] = p_hwfn->hw_info.hw_mac_addr[0] ^ 2; + guid[1] = p_hwfn->hw_info.hw_mac_addr[1]; + guid[2] = p_hwfn->hw_info.hw_mac_addr[2]; + guid[3] = 0xff; + guid[4] = 0xfe; + guid[5] = p_hwfn->hw_info.hw_mac_addr[3]; + guid[6] = p_hwfn->hw_info.hw_mac_addr[4]; + guid[7] = p_hwfn->hw_info.hw_mac_addr[5]; +} + +static void qed_rdma_init_events(struct qed_hwfn *p_hwfn, + struct qed_rdma_start_in_params *params) +{ + struct qed_rdma_events *events; + + events = &p_hwfn->p_rdma_info->events; + + events->unaffiliated_event = params->events->unaffiliated_event; + events->affiliated_event = params->events->affiliated_event; + events->context = params->events->context; +} + +static void qed_rdma_init_devinfo(struct qed_hwfn *p_hwfn, + struct qed_rdma_start_in_params *params) +{ + struct qed_rdma_device *dev = p_hwfn->p_rdma_info->dev; + struct qed_dev *cdev = p_hwfn->cdev; + u32 pci_status_control; + u32 num_qps; + + /* Vendor specific information */ + dev->vendor_id = cdev->vendor_id; + dev->vendor_part_id = cdev->device_id; + dev->hw_ver = 0; + dev->fw_ver = (FW_MAJOR_VERSION << 24) | (FW_MINOR_VERSION << 16) | + (FW_REVISION_VERSION << 8) | (FW_ENGINEERING_VERSION); + + qed_rdma_get_guid(p_hwfn, (u8 *)&dev->sys_image_guid); + dev->node_guid = dev->sys_image_guid; + + dev->max_sge = min_t(u32, RDMA_MAX_SGE_PER_SQ_WQE, + RDMA_MAX_SGE_PER_RQ_WQE); + + if (cdev->rdma_max_sge) + dev->max_sge = min_t(u32, cdev->rdma_max_sge, dev->max_sge); + + dev->max_inline = ROCE_REQ_MAX_INLINE_DATA_SIZE; + + dev->max_inline = (cdev->rdma_max_inline) ? + min_t(u32, cdev->rdma_max_inline, dev->max_inline) : + dev->max_inline; + + dev->max_wqe = QED_RDMA_MAX_WQE; + dev->max_cnq = (u8)FEAT_NUM(p_hwfn, QED_RDMA_CNQ); + + /* The number of QPs may be higher than QED_ROCE_MAX_QPS, because + * it is up-aligned to 16 and then to ILT page size within qed cxt. + * This is OK in terms of ILT but we don't want to configure the FW + * above its abilities + */ + num_qps = ROCE_MAX_QPS; + num_qps = min_t(u64, num_qps, p_hwfn->p_rdma_info->num_qps); + dev->max_qp = num_qps; + + /* CQs uses the same icids that QPs use hence they are limited by the + * number of icids. There are two icids per QP. + */ + dev->max_cq = num_qps * 2; + + /* The number of mrs is smaller by 1 since the first is reserved */ + dev->max_mr = p_hwfn->p_rdma_info->num_mrs - 1; + dev->max_mr_size = QED_RDMA_MAX_MR_SIZE; + + /* The maximum CQE capacity per CQ supported. + * max number of cqes will be in two layer pbl, + * 8 is the pointer size in bytes + * 32 is the size of cq element in bytes + */ + if (params->cq_mode == QED_RDMA_CQ_MODE_32_BITS) + dev->max_cqe = QED_RDMA_MAX_CQE_32_BIT; + else + dev->max_cqe = QED_RDMA_MAX_CQE_16_BIT; + + dev->max_mw = 0; + dev->max_fmr = QED_RDMA_MAX_FMR; + dev->max_mr_mw_fmr_pbl = (PAGE_SIZE / 8) * (PAGE_SIZE / 8); + dev->max_mr_mw_fmr_size = dev->max_mr_mw_fmr_pbl * PAGE_SIZE; + dev->max_pkey = QED_RDMA_MAX_P_KEY; + + dev->max_qp_resp_rd_atomic_resc = RDMA_RING_PAGE_SIZE / + (RDMA_RESP_RD_ATOMIC_ELM_SIZE * 2); + dev->max_qp_req_rd_atomic_resc = RDMA_RING_PAGE_SIZE / + RDMA_REQ_RD_ATOMIC_ELM_SIZE; + dev->max_dev_resp_rd_atomic_resc = dev->max_qp_resp_rd_atomic_resc * + p_hwfn->p_rdma_info->num_qps; + dev->page_size_caps = QED_RDMA_PAGE_SIZE_CAPS; + dev->dev_ack_delay = QED_RDMA_ACK_DELAY; + dev->max_pd = RDMA_MAX_PDS; + dev->max_ah = p_hwfn->p_rdma_info->num_qps; + dev->max_stats_queues = (u8)RESC_NUM(p_hwfn, QED_RDMA_STATS_QUEUE); + + /* Set capablities */ + dev->dev_caps = 0; + SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_RNR_NAK, 1); + SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_PORT_ACTIVE_EVENT, 1); + SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_PORT_CHANGE_EVENT, 1); + SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_RESIZE_CQ, 1); + SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_BASE_MEMORY_EXT, 1); + SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_BASE_QUEUE_EXT, 1); + SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_ZBVA, 1); + SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_LOCAL_INV_FENCE, 1); + + /* Check atomic operations support in PCI configuration space. */ + pci_read_config_dword(cdev->pdev, + cdev->pdev->pcie_cap + PCI_EXP_DEVCTL2, + &pci_status_control); + + if (pci_status_control & PCI_EXP_DEVCTL2_LTR_EN) + SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_ATOMIC_OP, 1); +} + +static void qed_rdma_init_port(struct qed_hwfn *p_hwfn) +{ + struct qed_rdma_port *port = p_hwfn->p_rdma_info->port; + struct qed_rdma_device *dev = p_hwfn->p_rdma_info->dev; + + port->port_state = p_hwfn->mcp_info->link_output.link_up ? + QED_RDMA_PORT_UP : QED_RDMA_PORT_DOWN; + + port->max_msg_size = min_t(u64, + (dev->max_mr_mw_fmr_size * + p_hwfn->cdev->rdma_max_sge), + BIT(31)); + + port->pkey_bad_counter = 0; +} + +static int qed_rdma_init_hw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +{ + u32 ll2_ethertype_en; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Initializing HW\n"); + p_hwfn->b_rdma_enabled_in_prs = false; + + qed_wr(p_hwfn, p_ptt, PRS_REG_ROCE_DEST_QP_MAX_PF, 0); + + p_hwfn->rdma_prs_search_reg = PRS_REG_SEARCH_ROCE; + + /* We delay writing to this reg until first cid is allocated. See + * qed_cxt_dynamic_ilt_alloc function for more details + */ + ll2_ethertype_en = qed_rd(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN); + qed_wr(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN, + (ll2_ethertype_en | 0x01)); + + if (qed_cxt_get_proto_cid_start(p_hwfn, PROTOCOLID_ROCE) % 2) { + DP_NOTICE(p_hwfn, "The first RoCE's cid should be even\n"); + return -EINVAL; + } + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Initializing HW - Done\n"); + return 0; +} + +static int qed_rdma_start_fw(struct qed_hwfn *p_hwfn, + struct qed_rdma_start_in_params *params, + struct qed_ptt *p_ptt) +{ + struct rdma_init_func_ramrod_data *p_ramrod; + struct qed_rdma_cnq_params *p_cnq_pbl_list; + struct rdma_init_func_hdr *p_params_header; + struct rdma_cnq_params *p_cnq_params; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + u32 cnq_id, sb_id; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Starting FW\n"); + + /* Save the number of cnqs for the function close ramrod */ + p_hwfn->p_rdma_info->num_cnqs = params->desired_cnq; + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, RDMA_RAMROD_FUNC_INIT, + p_hwfn->p_rdma_info->proto, &init_data); + if (rc) + return rc; + + p_ramrod = &p_ent->ramrod.roce_init_func.rdma; + + p_params_header = &p_ramrod->params_header; + p_params_header->cnq_start_offset = (u8)RESC_START(p_hwfn, + QED_RDMA_CNQ_RAM); + p_params_header->num_cnqs = params->desired_cnq; + + if (params->cq_mode == QED_RDMA_CQ_MODE_16_BITS) + p_params_header->cq_ring_mode = 1; + else + p_params_header->cq_ring_mode = 0; + + for (cnq_id = 0; cnq_id < params->desired_cnq; cnq_id++) { + sb_id = qed_rdma_get_sb_id(p_hwfn, cnq_id); + p_cnq_params = &p_ramrod->cnq_params[cnq_id]; + p_cnq_pbl_list = ¶ms->cnq_pbl_list[cnq_id]; + p_cnq_params->sb_num = + cpu_to_le16(p_hwfn->sbs_info[sb_id]->igu_sb_id); + + p_cnq_params->sb_index = p_hwfn->pf_params.rdma_pf_params.gl_pi; + p_cnq_params->num_pbl_pages = p_cnq_pbl_list->num_pbl_pages; + + DMA_REGPAIR_LE(p_cnq_params->pbl_base_addr, + p_cnq_pbl_list->pbl_ptr); + + /* we assume here that cnq_id and qz_offset are the same */ + p_cnq_params->queue_zone_num = + cpu_to_le16(p_hwfn->p_rdma_info->queue_zone_base + + cnq_id); + } + + return qed_spq_post(p_hwfn, p_ent, NULL); +} + +static int qed_rdma_reserve_lkey(struct qed_hwfn *p_hwfn) +{ + struct qed_rdma_device *dev = p_hwfn->p_rdma_info->dev; + + /* The first DPI is reserved for the Kernel */ + __set_bit(0, p_hwfn->p_rdma_info->dpi_map.bitmap); + + /* Tid 0 will be used as the key for "reserved MR". + * The driver should allocate memory for it so it can be loaded but no + * ramrod should be passed on it. + */ + qed_rdma_alloc_tid(p_hwfn, &dev->reserved_lkey); + if (dev->reserved_lkey != RDMA_RESERVED_LKEY) { + DP_NOTICE(p_hwfn, + "Reserved lkey should be equal to RDMA_RESERVED_LKEY\n"); + return -EINVAL; + } + + return 0; +} + +static int qed_rdma_setup(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct qed_rdma_start_in_params *params) +{ + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "RDMA setup\n"); + + spin_lock_init(&p_hwfn->p_rdma_info->lock); + + qed_rdma_init_devinfo(p_hwfn, params); + qed_rdma_init_port(p_hwfn); + qed_rdma_init_events(p_hwfn, params); + + rc = qed_rdma_reserve_lkey(p_hwfn); + if (rc) + return rc; + + rc = qed_rdma_init_hw(p_hwfn, p_ptt); + if (rc) + return rc; + + return qed_rdma_start_fw(p_hwfn, params, p_ptt); +} + +int qed_rdma_stop(void *rdma_cxt) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + struct rdma_close_func_ramrod_data *p_ramrod; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + struct qed_ptt *p_ptt; + u32 ll2_ethertype_en; + int rc = -EBUSY; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "RDMA stop\n"); + + p_ptt = qed_ptt_acquire(p_hwfn); + if (!p_ptt) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Failed to acquire PTT\n"); + return rc; + } + + /* Disable RoCE search */ + qed_wr(p_hwfn, p_ptt, p_hwfn->rdma_prs_search_reg, 0); + p_hwfn->b_rdma_enabled_in_prs = false; + + qed_wr(p_hwfn, p_ptt, PRS_REG_ROCE_DEST_QP_MAX_PF, 0); + + ll2_ethertype_en = qed_rd(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN); + + qed_wr(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN, + (ll2_ethertype_en & 0xFFFE)); + + qed_ptt_release(p_hwfn, p_ptt); + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + /* Stop RoCE */ + rc = qed_sp_init_request(p_hwfn, &p_ent, RDMA_RAMROD_FUNC_CLOSE, + p_hwfn->p_rdma_info->proto, &init_data); + if (rc) + goto out; + + p_ramrod = &p_ent->ramrod.rdma_close_func; + + p_ramrod->num_cnqs = p_hwfn->p_rdma_info->num_cnqs; + p_ramrod->cnq_start_offset = (u8)RESC_START(p_hwfn, QED_RDMA_CNQ_RAM); + + rc = qed_spq_post(p_hwfn, p_ent, NULL); + +out: + qed_rdma_free(p_hwfn); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "RDMA stop done, rc = %d\n", rc); + return rc; +} + +int qed_rdma_add_user(void *rdma_cxt, + struct qed_rdma_add_user_out_params *out_params) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + u32 dpi_start_offset; + u32 returned_id = 0; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Adding User\n"); + + /* Allocate DPI */ + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + rc = qed_rdma_bmap_alloc_id(p_hwfn, &p_hwfn->p_rdma_info->dpi_map, + &returned_id); + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); + + out_params->dpi = (u16)returned_id; + + /* Calculate the corresponding DPI address */ + dpi_start_offset = p_hwfn->dpi_start_offset; + + out_params->dpi_addr = (u64)((u8 __iomem *)p_hwfn->doorbells + + dpi_start_offset + + ((out_params->dpi) * p_hwfn->dpi_size)); + + out_params->dpi_phys_addr = p_hwfn->cdev->db_phys_addr + + dpi_start_offset + + ((out_params->dpi) * p_hwfn->dpi_size); + + out_params->dpi_size = p_hwfn->dpi_size; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Adding user - done, rc = %d\n", rc); + return rc; +} + +struct qed_rdma_device *qed_rdma_query_device(void *rdma_cxt) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Query device\n"); + + /* Return struct with device parameters */ + return p_hwfn->p_rdma_info->dev; +} + +int qed_rdma_alloc_tid(void *rdma_cxt, u32 *itid) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocate TID\n"); + + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + rc = qed_rdma_bmap_alloc_id(p_hwfn, + &p_hwfn->p_rdma_info->tid_map, itid); + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); + if (rc) + goto out; + + rc = qed_cxt_dynamic_ilt_alloc(p_hwfn, QED_ELEM_TASK, *itid); +out: + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocate TID - done, rc = %d\n", rc); + return rc; +} + +void qed_rdma_cnq_prod_update(void *rdma_cxt, u8 qz_offset, u16 prod) +{ + struct qed_hwfn *p_hwfn; + u16 qz_num; + u32 addr; + + p_hwfn = (struct qed_hwfn *)rdma_cxt; + qz_num = p_hwfn->p_rdma_info->queue_zone_base + qz_offset; + addr = GTT_BAR0_MAP_REG_USDM_RAM + + USTORM_COMMON_QUEUE_CONS_OFFSET(qz_num); + + REG_WR16(p_hwfn, addr, prod); + + /* keep prod updates ordered */ + wmb(); +} + +static int qed_fill_rdma_dev_info(struct qed_dev *cdev, + struct qed_dev_rdma_info *info) +{ + memset(info, 0, sizeof(*info)); + + info->rdma_type = QED_RDMA_TYPE_ROCE; + + qed_fill_dev_info(cdev, &info->common); + + return 0; +} + +static int qed_rdma_get_sb_start(struct qed_dev *cdev) +{ + int feat_num; + + if (cdev->num_hwfns > 1) + feat_num = FEAT_NUM(QED_LEADING_HWFN(cdev), QED_PF_L2_QUE); + else + feat_num = FEAT_NUM(QED_LEADING_HWFN(cdev), QED_PF_L2_QUE) * + cdev->num_hwfns; + + return feat_num; +} + +static int qed_rdma_get_min_cnq_msix(struct qed_dev *cdev) +{ + int n_cnq = FEAT_NUM(QED_LEADING_HWFN(cdev), QED_RDMA_CNQ); + int n_msix = cdev->int_params.rdma_msix_cnt; + + return min_t(int, n_cnq, n_msix); +} + +static int qed_rdma_set_int(struct qed_dev *cdev, u16 cnt) +{ + int limit = 0; + + /* Mark the fastpath as free/used */ + cdev->int_params.fp_initialized = cnt ? true : false; + + if (cdev->int_params.out.int_mode != QED_INT_MODE_MSIX) { + DP_ERR(cdev, + "qed roce supports only MSI-X interrupts (detected %d).\n", + cdev->int_params.out.int_mode); + return -EINVAL; + } else if (cdev->int_params.fp_msix_cnt) { + limit = cdev->int_params.rdma_msix_cnt; + } + + if (!limit) + return -ENOMEM; + + return min_t(int, cnt, limit); +} + +static int qed_rdma_get_int(struct qed_dev *cdev, struct qed_int_info *info) +{ + memset(info, 0, sizeof(*info)); + + if (!cdev->int_params.fp_initialized) { + DP_INFO(cdev, + "Protocol driver requested interrupt information, but its support is not yet configured\n"); + return -EINVAL; + } + + if (cdev->int_params.out.int_mode == QED_INT_MODE_MSIX) { + int msix_base = cdev->int_params.rdma_msix_base; + + info->msix_cnt = cdev->int_params.rdma_msix_cnt; + info->msix = &cdev->int_params.msix_table[msix_base]; + + DP_VERBOSE(cdev, QED_MSG_RDMA, "msix_cnt = %d msix_base=%d\n", + info->msix_cnt, msix_base); + } + + return 0; +} + +static void *qed_rdma_get_rdma_ctx(struct qed_dev *cdev) +{ + return QED_LEADING_HWFN(cdev); +} + +static void qed_rdma_dpm_conf(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +{ + u32 val; + + val = (p_hwfn->dcbx_no_edpm || p_hwfn->db_bar_no_edpm) ? 0 : 1; + + qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_DPM_ENABLE, val); + DP_VERBOSE(p_hwfn, (QED_MSG_DCB | QED_MSG_RDMA), + "Changing DPM_EN state to %d (DCBX=%d, DB_BAR=%d)\n", + val, p_hwfn->dcbx_no_edpm, p_hwfn->db_bar_no_edpm); +} + +void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +{ + p_hwfn->db_bar_no_edpm = true; + + qed_rdma_dpm_conf(p_hwfn, p_ptt); +} + +int qed_rdma_start(void *rdma_cxt, struct qed_rdma_start_in_params *params) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + struct qed_ptt *p_ptt; + int rc = -EBUSY; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "desired_cnq = %08x\n", params->desired_cnq); + + p_ptt = qed_ptt_acquire(p_hwfn); + if (!p_ptt) + goto err; + + rc = qed_rdma_alloc(p_hwfn, p_ptt, params); + if (rc) + goto err1; + + rc = qed_rdma_setup(p_hwfn, p_ptt, params); + if (rc) + goto err2; + + qed_ptt_release(p_hwfn, p_ptt); + + return rc; + +err2: + qed_rdma_free(p_hwfn); +err1: + qed_ptt_release(p_hwfn, p_ptt); +err: + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "RDMA start - error, rc = %d\n", rc); + return rc; +} + +static int qed_rdma_init(struct qed_dev *cdev, + struct qed_rdma_start_in_params *params) +{ + return qed_rdma_start(QED_LEADING_HWFN(cdev), params); +} + +void qed_rdma_remove_user(void *rdma_cxt, u16 dpi) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "dpi = %08x\n", dpi); + + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->dpi_map, dpi); + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); +} + +static const struct qed_rdma_ops qed_rdma_ops_pass = { + .common = &qed_common_ops_pass, + .fill_dev_info = &qed_fill_rdma_dev_info, + .rdma_get_rdma_ctx = &qed_rdma_get_rdma_ctx, + .rdma_init = &qed_rdma_init, + .rdma_add_user = &qed_rdma_add_user, + .rdma_remove_user = &qed_rdma_remove_user, + .rdma_stop = &qed_rdma_stop, + .rdma_query_device = &qed_rdma_query_device, + .rdma_get_start_sb = &qed_rdma_get_sb_start, + .rdma_get_rdma_int = &qed_rdma_get_int, + .rdma_set_rdma_int = &qed_rdma_set_int, + .rdma_get_min_cnq_msix = &qed_rdma_get_min_cnq_msix, + .rdma_cnq_prod_update = &qed_rdma_cnq_prod_update, +}; + +const struct qed_rdma_ops *qed_get_rdma_ops() +{ + return &qed_rdma_ops_pass; +} +EXPORT_SYMBOL(qed_get_rdma_ops); diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.h b/drivers/net/ethernet/qlogic/qed/qed_roce.h new file mode 100644 index 000000000000..e55048106a83 --- /dev/null +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.h @@ -0,0 +1,123 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015-2016 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _QED_ROCE_H +#define _QED_ROCE_H +#include +#include +#include +#include +#include +#include +#include +#include +#include "qed.h" +#include "qed_dev_api.h" +#include "qed_hsi.h" + +#define QED_RDMA_MAX_FMR (RDMA_MAX_TIDS) +#define QED_RDMA_MAX_P_KEY (1) +#define QED_RDMA_MAX_WQE (0x7FFF) +#define QED_RDMA_MAX_SRQ_WQE_ELEM (0x7FFF) +#define QED_RDMA_PAGE_SIZE_CAPS (0xFFFFF000) +#define QED_RDMA_ACK_DELAY (15) +#define QED_RDMA_MAX_MR_SIZE (0x10000000000ULL) +#define QED_RDMA_MAX_CQS (RDMA_MAX_CQS) +#define QED_RDMA_MAX_MRS (RDMA_MAX_TIDS) +/* Add 1 for header element */ +#define QED_RDMA_MAX_SRQ_ELEM_PER_WQE (RDMA_MAX_SGE_PER_RQ_WQE + 1) +#define QED_RDMA_MAX_SGE_PER_SRQ_WQE (RDMA_MAX_SGE_PER_RQ_WQE) +#define QED_RDMA_SRQ_WQE_ELEM_SIZE (16) +#define QED_RDMA_MAX_SRQS (32 * 1024) + +#define QED_RDMA_MAX_CQE_32_BIT (0x7FFFFFFF - 1) +#define QED_RDMA_MAX_CQE_16_BIT (0x7FFF - 1) + +enum qed_rdma_toggle_bit { + QED_RDMA_TOGGLE_BIT_CLEAR = 0, + QED_RDMA_TOGGLE_BIT_SET = 1 +}; + +struct qed_bmap { + unsigned long *bitmap; + u32 max_count; +}; + +struct qed_rdma_info { + /* spin lock to protect bitmaps */ + spinlock_t lock; + + struct qed_bmap cq_map; + struct qed_bmap pd_map; + struct qed_bmap tid_map; + struct qed_bmap qp_map; + struct qed_bmap srq_map; + struct qed_bmap cid_map; + struct qed_bmap dpi_map; + struct qed_bmap toggle_bits; + struct qed_rdma_events events; + struct qed_rdma_device *dev; + struct qed_rdma_port *port; + u32 last_tid; + u8 num_cnqs; + u32 num_qps; + u32 num_mrs; + u16 queue_zone_base; + enum protocol_type proto; +}; + +int +qed_rdma_add_user(void *rdma_cxt, + struct qed_rdma_add_user_out_params *out_params); +int qed_rdma_alloc_pd(void *rdma_cxt, u16 *pd); +int qed_rdma_alloc_tid(void *rdma_cxt, u32 *tid); +int qed_rdma_deregister_tid(void *rdma_cxt, u32 tid); +void qed_rdma_free_tid(void *rdma_cxt, u32 tid); +struct qed_rdma_device *qed_rdma_query_device(void *rdma_cxt); +int +qed_rdma_register_tid(void *rdma_cxt, + struct qed_rdma_register_tid_in_params *params); +void qed_rdma_remove_user(void *rdma_cxt, u16 dpi); +int qed_rdma_start(void *p_hwfn, struct qed_rdma_start_in_params *params); +int qed_rdma_stop(void *rdma_cxt); +u32 qed_rdma_get_sb_id(void *p_hwfn, u32 rel_sb_id); +u32 qed_rdma_query_cau_timer_res(void *p_hwfn); +void qed_rdma_cnq_prod_update(void *rdma_cxt, u8 cnq_index, u16 prod); +void qed_rdma_resc_free(struct qed_hwfn *p_hwfn); +void qed_async_roce_event(struct qed_hwfn *p_hwfn, + struct event_ring_entry *p_eqe); + +#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) +void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); +#else +void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) {} +#endif +#endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h index a3c539f1c2ac..652c90819758 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h @@ -85,6 +85,7 @@ union ramrod_data { struct rdma_srq_create_ramrod_data rdma_create_srq; struct rdma_srq_destroy_ramrod_data rdma_destroy_srq; struct rdma_srq_modify_ramrod_data rdma_modify_srq; + struct roce_init_func_ramrod_data roce_init_func; struct iscsi_slow_path_hdr iscsi_empty; struct iscsi_init_ramrod_params iscsi_init; diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c index 349af182d085..caff41544898 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_spq.c +++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c @@ -28,6 +28,9 @@ #include "qed_reg_addr.h" #include "qed_sp.h" #include "qed_sriov.h" +#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) +#include "qed_roce.h" +#endif /*************************************************************************** * Structures & Definitions @@ -237,6 +240,11 @@ qed_async_event_completion(struct qed_hwfn *p_hwfn, struct event_ring_entry *p_eqe) { switch (p_eqe->protocol_id) { +#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) + case PROTOCOLID_ROCE: + qed_async_roce_event(p_hwfn, p_eqe); + return 0; +#endif case PROTOCOLID_COMMON: return qed_sriov_eqe_event(p_hwfn, p_eqe->opcode, diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index a4a3cead15bb..d2d6621fe0e5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -1851,8 +1851,8 @@ static void qed_iov_vf_mbx_start_txq_resp(struct qed_hwfn *p_hwfn, if ((status == PFVF_STATUS_SUCCESS) && !b_legacy) { u16 qid = mbx->req_virt->start_txq.tx_qid; - p_tlv->offset = qed_db_addr(p_vf->vf_queues[qid].fw_cid, - DQ_DEMS_LEGACY); + p_tlv->offset = qed_db_addr_vf(p_vf->vf_queues[qid].fw_cid, + DQ_DEMS_LEGACY); } qed_iov_send_response(p_hwfn, p_ptt, p_vf, length, status); diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c index 85334ceaf69c..abf5bf11f865 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.c +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c @@ -544,7 +544,7 @@ int qed_vf_pf_txq_start(struct qed_hwfn *p_hwfn, u8 cid = p_iov->acquire_resp.resc.cid[tx_queue_id]; u32 db_addr; - db_addr = qed_db_addr(cid, DQ_DEMS_LEGACY); + db_addr = qed_db_addr_vf(cid, DQ_DEMS_LEGACY); *pp_doorbell = (u8 __iomem *)p_hwfn->doorbells + db_addr; } diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h index 19027635df0d..734deb094618 100644 --- a/include/linux/qed/common_hsi.h +++ b/include/linux/qed/common_hsi.h @@ -674,6 +674,7 @@ union event_ring_data { struct iscsi_eqe_data iscsi_info; struct malicious_vf_eqe_data malicious_vf; struct initial_cleanup_eqe_data vf_init_cleanup; + struct regpair roce_handle; }; /* Event Ring Entry */ diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index e313742b571d..f9ae903bbb84 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -34,6 +34,8 @@ enum dcbx_protocol_type { DCBX_MAX_PROTOCOL_TYPE }; +#define QED_ROCE_PROTOCOL_INDEX (3) + #ifdef CONFIG_DCB #define QED_LLDP_CHASSIS_ID_STAT_LEN 4 #define QED_LLDP_PORT_ID_STAT_LEN 4 @@ -268,6 +270,7 @@ struct qed_dev_info { enum qed_sb_type { QED_SB_TYPE_L2_QUEUE, + QED_SB_TYPE_CNQ, }; enum qed_protocol { @@ -628,7 +631,7 @@ enum DP_MODULE { QED_MSG_CXT = 0x800000, QED_MSG_LL2 = 0x1000000, QED_MSG_ILT = 0x2000000, - QED_MSG_ROCE = 0x4000000, + QED_MSG_RDMA = 0x4000000, QED_MSG_DEBUG = 0x8000000, /* to be added...up to 0x8000000 */ }; diff --git a/include/linux/qed/qed_roce_if.h b/include/linux/qed/qed_roce_if.h new file mode 100644 index 000000000000..0f7d5275e515 --- /dev/null +++ b/include/linux/qed/qed_roce_if.h @@ -0,0 +1,345 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015-2016 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _QED_ROCE_IF_H +#define _QED_ROCE_IF_H +#include +#include +#include +#include +#include +#include +#include +#include + +#define QED_RDMA_MAX_CNQ_SIZE (0xFFFF) + +/* rdma interface */ +enum qed_rdma_tid_type { + QED_RDMA_TID_REGISTERED_MR, + QED_RDMA_TID_FMR, + QED_RDMA_TID_MW_TYPE1, + QED_RDMA_TID_MW_TYPE2A +}; + +struct qed_rdma_events { + void *context; + void (*affiliated_event)(void *context, u8 fw_event_code, + void *fw_handle); + void (*unaffiliated_event)(void *context, u8 event_code); +}; + +struct qed_rdma_device { + u32 vendor_id; + u32 vendor_part_id; + u32 hw_ver; + u64 fw_ver; + + u64 node_guid; + u64 sys_image_guid; + + u8 max_cnq; + u8 max_sge; + u8 max_srq_sge; + u16 max_inline; + u32 max_wqe; + u32 max_srq_wqe; + u8 max_qp_resp_rd_atomic_resc; + u8 max_qp_req_rd_atomic_resc; + u64 max_dev_resp_rd_atomic_resc; + u32 max_cq; + u32 max_qp; + u32 max_srq; + u32 max_mr; + u64 max_mr_size; + u32 max_cqe; + u32 max_mw; + u32 max_fmr; + u32 max_mr_mw_fmr_pbl; + u64 max_mr_mw_fmr_size; + u32 max_pd; + u32 max_ah; + u8 max_pkey; + u16 max_srq_wr; + u8 max_stats_queues; + u32 dev_caps; + + /* Abilty to support RNR-NAK generation */ + +#define QED_RDMA_DEV_CAP_RNR_NAK_MASK 0x1 +#define QED_RDMA_DEV_CAP_RNR_NAK_SHIFT 0 + /* Abilty to support shutdown port */ +#define QED_RDMA_DEV_CAP_SHUTDOWN_PORT_MASK 0x1 +#define QED_RDMA_DEV_CAP_SHUTDOWN_PORT_SHIFT 1 + /* Abilty to support port active event */ +#define QED_RDMA_DEV_CAP_PORT_ACTIVE_EVENT_MASK 0x1 +#define QED_RDMA_DEV_CAP_PORT_ACTIVE_EVENT_SHIFT 2 + /* Abilty to support port change event */ +#define QED_RDMA_DEV_CAP_PORT_CHANGE_EVENT_MASK 0x1 +#define QED_RDMA_DEV_CAP_PORT_CHANGE_EVENT_SHIFT 3 + /* Abilty to support system image GUID */ +#define QED_RDMA_DEV_CAP_SYS_IMAGE_MASK 0x1 +#define QED_RDMA_DEV_CAP_SYS_IMAGE_SHIFT 4 + /* Abilty to support bad P_Key counter support */ +#define QED_RDMA_DEV_CAP_BAD_PKEY_CNT_MASK 0x1 +#define QED_RDMA_DEV_CAP_BAD_PKEY_CNT_SHIFT 5 + /* Abilty to support atomic operations */ +#define QED_RDMA_DEV_CAP_ATOMIC_OP_MASK 0x1 +#define QED_RDMA_DEV_CAP_ATOMIC_OP_SHIFT 6 +#define QED_RDMA_DEV_CAP_RESIZE_CQ_MASK 0x1 +#define QED_RDMA_DEV_CAP_RESIZE_CQ_SHIFT 7 + /* Abilty to support modifying the maximum number of + * outstanding work requests per QP + */ +#define QED_RDMA_DEV_CAP_RESIZE_MAX_WR_MASK 0x1 +#define QED_RDMA_DEV_CAP_RESIZE_MAX_WR_SHIFT 8 + /* Abilty to support automatic path migration */ +#define QED_RDMA_DEV_CAP_AUTO_PATH_MIG_MASK 0x1 +#define QED_RDMA_DEV_CAP_AUTO_PATH_MIG_SHIFT 9 + /* Abilty to support the base memory management extensions */ +#define QED_RDMA_DEV_CAP_BASE_MEMORY_EXT_MASK 0x1 +#define QED_RDMA_DEV_CAP_BASE_MEMORY_EXT_SHIFT 10 +#define QED_RDMA_DEV_CAP_BASE_QUEUE_EXT_MASK 0x1 +#define QED_RDMA_DEV_CAP_BASE_QUEUE_EXT_SHIFT 11 + /* Abilty to support multipile page sizes per memory region */ +#define QED_RDMA_DEV_CAP_MULTI_PAGE_PER_MR_EXT_MASK 0x1 +#define QED_RDMA_DEV_CAP_MULTI_PAGE_PER_MR_EXT_SHIFT 12 + /* Abilty to support block list physical buffer list */ +#define QED_RDMA_DEV_CAP_BLOCK_MODE_MASK 0x1 +#define QED_RDMA_DEV_CAP_BLOCK_MODE_SHIFT 13 + /* Abilty to support zero based virtual addresses */ +#define QED_RDMA_DEV_CAP_ZBVA_MASK 0x1 +#define QED_RDMA_DEV_CAP_ZBVA_SHIFT 14 + /* Abilty to support local invalidate fencing */ +#define QED_RDMA_DEV_CAP_LOCAL_INV_FENCE_MASK 0x1 +#define QED_RDMA_DEV_CAP_LOCAL_INV_FENCE_SHIFT 15 + /* Abilty to support Loopback on QP */ +#define QED_RDMA_DEV_CAP_LB_INDICATOR_MASK 0x1 +#define QED_RDMA_DEV_CAP_LB_INDICATOR_SHIFT 16 + u64 page_size_caps; + u8 dev_ack_delay; + u32 reserved_lkey; + u32 bad_pkey_counter; + struct qed_rdma_events events; +}; + +enum qed_port_state { + QED_RDMA_PORT_UP, + QED_RDMA_PORT_DOWN, +}; + +enum qed_roce_capability { + QED_ROCE_V1 = 1 << 0, + QED_ROCE_V2 = 1 << 1, +}; + +struct qed_rdma_port { + enum qed_port_state port_state; + int link_speed; + u64 max_msg_size; + u8 source_gid_table_len; + void *source_gid_table_ptr; + u8 pkey_table_len; + void *pkey_table_ptr; + u32 pkey_bad_counter; + enum qed_roce_capability capability; +}; + +struct qed_rdma_cnq_params { + u8 num_pbl_pages; + u64 pbl_ptr; +}; + +/* The CQ Mode affects the CQ doorbell transaction size. + * 64/32 bit machines should configure to 32/16 bits respectively. + */ +enum qed_rdma_cq_mode { + QED_RDMA_CQ_MODE_16_BITS, + QED_RDMA_CQ_MODE_32_BITS, +}; + +struct qed_roce_dcqcn_params { + u8 notification_point; + u8 reaction_point; + + /* fields for notification point */ + u32 cnp_send_timeout; + + /* fields for reaction point */ + u32 rl_bc_rate; + u16 rl_max_rate; + u16 rl_r_ai; + u16 rl_r_hai; + u16 dcqcn_g; + u32 dcqcn_k_us; + u32 dcqcn_timeout_us; +}; + +struct qed_rdma_start_in_params { + struct qed_rdma_events *events; + struct qed_rdma_cnq_params cnq_pbl_list[128]; + u8 desired_cnq; + enum qed_rdma_cq_mode cq_mode; + struct qed_roce_dcqcn_params dcqcn_params; + u16 max_mtu; + u8 mac_addr[ETH_ALEN]; + u8 iwarp_flags; +}; + +struct qed_rdma_add_user_out_params { + u16 dpi; + u64 dpi_addr; + u64 dpi_phys_addr; + u32 dpi_size; +}; + +enum roce_mode { + ROCE_V1, + ROCE_V2_IPV4, + ROCE_V2_IPV6, + MAX_ROCE_MODE +}; + +union qed_gid { + u8 bytes[16]; + u16 words[8]; + u32 dwords[4]; + u64 qwords[2]; + u32 ipv4_addr; +}; + +struct qed_rdma_register_tid_in_params { + u32 itid; + enum qed_rdma_tid_type tid_type; + u8 key; + u16 pd; + bool local_read; + bool local_write; + bool remote_read; + bool remote_write; + bool remote_atomic; + bool mw_bind; + u64 pbl_ptr; + bool pbl_two_level; + u8 pbl_page_size_log; + u8 page_size_log; + u32 fbo; + u64 length; + u64 vaddr; + bool zbva; + bool phy_mr; + bool dma_mr; + + bool dif_enabled; + u64 dif_error_addr; + u64 dif_runt_addr; +}; + +struct qed_rdma_create_srq_in_params { + u64 pbl_base_addr; + u64 prod_pair_addr; + u16 num_pages; + u16 pd_id; + u16 page_size; +}; + +struct qed_rdma_create_srq_out_params { + u16 srq_id; +}; + +struct qed_rdma_destroy_srq_in_params { + u16 srq_id; +}; + +struct qed_rdma_modify_srq_in_params { + u32 wqe_limit; + u16 srq_id; +}; + +struct qed_rdma_stats_out_params { + u64 sent_bytes; + u64 sent_pkts; + u64 rcv_bytes; + u64 rcv_pkts; +}; + +struct qed_rdma_counters_out_params { + u64 pd_count; + u64 max_pd; + u64 dpi_count; + u64 max_dpi; + u64 cq_count; + u64 max_cq; + u64 qp_count; + u64 max_qp; + u64 tid_count; + u64 max_tid; +}; + +#define QED_ROCE_TX_HEAD_FAILURE (1) +#define QED_ROCE_TX_FRAG_FAILURE (2) + +enum qed_rdma_type { + QED_RDMA_TYPE_ROCE, +}; + +struct qed_dev_rdma_info { + struct qed_dev_info common; + enum qed_rdma_type rdma_type; +}; + +struct qed_rdma_ops { + const struct qed_common_ops *common; + + int (*fill_dev_info)(struct qed_dev *cdev, + struct qed_dev_rdma_info *info); + void *(*rdma_get_rdma_ctx)(struct qed_dev *cdev); + + int (*rdma_init)(struct qed_dev *dev, + struct qed_rdma_start_in_params *iparams); + + int (*rdma_add_user)(void *rdma_cxt, + struct qed_rdma_add_user_out_params *oparams); + + void (*rdma_remove_user)(void *rdma_cxt, u16 dpi); + int (*rdma_stop)(void *rdma_cxt); + struct qed_rdma_device* (*rdma_query_device)(void *rdma_cxt); + int (*rdma_get_start_sb)(struct qed_dev *cdev); + int (*rdma_get_min_cnq_msix)(struct qed_dev *cdev); + void (*rdma_cnq_prod_update)(void *rdma_cxt, u8 cnq_index, u16 prod); + int (*rdma_get_rdma_int)(struct qed_dev *cdev, + struct qed_int_info *info); + int (*rdma_set_rdma_int)(struct qed_dev *cdev, u16 cnt); +}; + +const struct qed_rdma_ops *qed_get_rdma_ops(void); + +#endif diff --git a/include/linux/qed/rdma_common.h b/include/linux/qed/rdma_common.h index 187991c1f439..7663725faa94 100644 --- a/include/linux/qed/rdma_common.h +++ b/include/linux/qed/rdma_common.h @@ -28,6 +28,7 @@ #define RDMA_MAX_PDS (64 * 1024) #define RDMA_NUM_STATISTIC_COUNTERS MAX_NUM_VPORTS +#define RDMA_NUM_STATISTIC_COUNTERS_BB MAX_NUM_VPORTS_BB #define RDMA_TASK_TYPE (PROTOCOLID_ROCE) From c295f86e60f5ba67f0f4bba2bb2c22b3cbf01ec1 Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Sat, 1 Oct 2016 21:59:58 +0300 Subject: [PATCH 1035/1050] qed: PD,PKEY and CQ verb support Add support for the configurations of the protection domain and completion queues. Signed-off-by: Ram Amrani Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_roce.c | 324 +++++++++++++++++++++ drivers/net/ethernet/qlogic/qed/qed_roce.h | 21 ++ include/linux/qed/qed_roce_if.h | 30 ++ 3 files changed, 375 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c index 4c53b857cc1c..f9551643428f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.c +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -663,6 +663,22 @@ int qed_rdma_add_user(void *rdma_cxt, return rc; } +struct qed_rdma_port *qed_rdma_query_port(void *rdma_cxt) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + struct qed_rdma_port *p_port = p_hwfn->p_rdma_info->port; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "RDMA Query port\n"); + + /* Link may have changed */ + p_port->port_state = p_hwfn->mcp_info->link_output.link_up ? + QED_RDMA_PORT_UP : QED_RDMA_PORT_DOWN; + + p_port->link_speed = p_hwfn->mcp_info->link_output.speed; + + return p_port; +} + struct qed_rdma_device *qed_rdma_query_device(void *rdma_cxt) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; @@ -788,6 +804,309 @@ static int qed_rdma_get_int(struct qed_dev *cdev, struct qed_int_info *info) return 0; } +int qed_rdma_alloc_pd(void *rdma_cxt, u16 *pd) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + u32 returned_id; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Alloc PD\n"); + + /* Allocates an unused protection domain */ + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + rc = qed_rdma_bmap_alloc_id(p_hwfn, + &p_hwfn->p_rdma_info->pd_map, &returned_id); + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); + + *pd = (u16)returned_id; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Alloc PD - done, rc = %d\n", rc); + return rc; +} + +void qed_rdma_free_pd(void *rdma_cxt, u16 pd) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "pd = %08x\n", pd); + + /* Returns a previously allocated protection domain for reuse */ + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->pd_map, pd); + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); +} + +static enum qed_rdma_toggle_bit +qed_rdma_toggle_bit_create_resize_cq(struct qed_hwfn *p_hwfn, u16 icid) +{ + struct qed_rdma_info *p_info = p_hwfn->p_rdma_info; + enum qed_rdma_toggle_bit toggle_bit; + u32 bmap_id; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", icid); + + /* the function toggle the bit that is related to a given icid + * and returns the new toggle bit's value + */ + bmap_id = icid - qed_cxt_get_proto_cid_start(p_hwfn, p_info->proto); + + spin_lock_bh(&p_info->lock); + toggle_bit = !test_and_change_bit(bmap_id, + p_info->toggle_bits.bitmap); + spin_unlock_bh(&p_info->lock); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QED_RDMA_TOGGLE_BIT_= %d\n", + toggle_bit); + + return toggle_bit; +} + +int qed_rdma_create_cq(void *rdma_cxt, + struct qed_rdma_create_cq_in_params *params, u16 *icid) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + struct qed_rdma_info *p_info = p_hwfn->p_rdma_info; + struct rdma_create_cq_ramrod_data *p_ramrod; + enum qed_rdma_toggle_bit toggle_bit; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + u32 returned_id, start_cid; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "cq_handle = %08x%08x\n", + params->cq_handle_hi, params->cq_handle_lo); + + /* Allocate icid */ + spin_lock_bh(&p_info->lock); + rc = qed_rdma_bmap_alloc_id(p_hwfn, + &p_info->cq_map, &returned_id); + spin_unlock_bh(&p_info->lock); + + if (rc) { + DP_NOTICE(p_hwfn, "Can't create CQ, rc = %d\n", rc); + return rc; + } + + start_cid = qed_cxt_get_proto_cid_start(p_hwfn, + p_info->proto); + *icid = returned_id + start_cid; + + /* Check if icid requires a page allocation */ + rc = qed_cxt_dynamic_ilt_alloc(p_hwfn, QED_ELEM_CXT, *icid); + if (rc) + goto err; + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = *icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + /* Send create CQ ramrod */ + rc = qed_sp_init_request(p_hwfn, &p_ent, + RDMA_RAMROD_CREATE_CQ, + p_info->proto, &init_data); + if (rc) + goto err; + + p_ramrod = &p_ent->ramrod.rdma_create_cq; + + p_ramrod->cq_handle.hi = cpu_to_le32(params->cq_handle_hi); + p_ramrod->cq_handle.lo = cpu_to_le32(params->cq_handle_lo); + p_ramrod->dpi = cpu_to_le16(params->dpi); + p_ramrod->is_two_level_pbl = params->pbl_two_level; + p_ramrod->max_cqes = cpu_to_le32(params->cq_size); + DMA_REGPAIR_LE(p_ramrod->pbl_addr, params->pbl_ptr); + p_ramrod->pbl_num_pages = cpu_to_le16(params->pbl_num_pages); + p_ramrod->cnq_id = (u8)RESC_START(p_hwfn, QED_RDMA_CNQ_RAM) + + params->cnq_id; + p_ramrod->int_timeout = params->int_timeout; + + /* toggle the bit for every resize or create cq for a given icid */ + toggle_bit = qed_rdma_toggle_bit_create_resize_cq(p_hwfn, *icid); + + p_ramrod->toggle_bit = toggle_bit; + + rc = qed_spq_post(p_hwfn, p_ent, NULL); + if (rc) { + /* restore toggle bit */ + qed_rdma_toggle_bit_create_resize_cq(p_hwfn, *icid); + goto err; + } + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Created CQ, rc = %d\n", rc); + return rc; + +err: + /* release allocated icid */ + qed_bmap_release_id(p_hwfn, &p_info->cq_map, returned_id); + DP_NOTICE(p_hwfn, "Create CQ failed, rc = %d\n", rc); + + return rc; +} + +int qed_rdma_resize_cq(void *rdma_cxt, + struct qed_rdma_resize_cq_in_params *in_params, + struct qed_rdma_resize_cq_out_params *out_params) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + struct rdma_resize_cq_output_params *p_ramrod_res; + struct rdma_resize_cq_ramrod_data *p_ramrod; + enum qed_rdma_toggle_bit toggle_bit; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + dma_addr_t ramrod_res_phys; + u8 fw_return_code; + int rc = -ENOMEM; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", in_params->icid); + + p_ramrod_res = + (struct rdma_resize_cq_output_params *) + dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, + sizeof(struct rdma_resize_cq_output_params), + &ramrod_res_phys, GFP_KERNEL); + if (!p_ramrod_res) { + DP_NOTICE(p_hwfn, + "qed resize cq failed: cannot allocate memory (ramrod)\n"); + return rc; + } + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = in_params->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, + RDMA_RAMROD_RESIZE_CQ, + p_hwfn->p_rdma_info->proto, &init_data); + if (rc) + goto err; + + p_ramrod = &p_ent->ramrod.rdma_resize_cq; + + p_ramrod->flags = 0; + + /* toggle the bit for every resize or create cq for a given icid */ + toggle_bit = qed_rdma_toggle_bit_create_resize_cq(p_hwfn, + in_params->icid); + + SET_FIELD(p_ramrod->flags, + RDMA_RESIZE_CQ_RAMROD_DATA_TOGGLE_BIT, toggle_bit); + + SET_FIELD(p_ramrod->flags, + RDMA_RESIZE_CQ_RAMROD_DATA_IS_TWO_LEVEL_PBL, + in_params->pbl_two_level); + + p_ramrod->pbl_log_page_size = in_params->pbl_page_size_log - 12; + p_ramrod->pbl_num_pages = cpu_to_le16(in_params->pbl_num_pages); + p_ramrod->max_cqes = cpu_to_le32(in_params->cq_size); + DMA_REGPAIR_LE(p_ramrod->pbl_addr, in_params->pbl_ptr); + DMA_REGPAIR_LE(p_ramrod->output_params_addr, ramrod_res_phys); + + rc = qed_spq_post(p_hwfn, p_ent, &fw_return_code); + if (rc) + goto err; + + if (fw_return_code != RDMA_RETURN_OK) { + DP_NOTICE(p_hwfn, "fw_return_code = %d\n", fw_return_code); + rc = -EINVAL; + goto err; + } + + out_params->prod = le32_to_cpu(p_ramrod_res->old_cq_prod); + out_params->cons = le32_to_cpu(p_ramrod_res->old_cq_cons); + + dma_free_coherent(&p_hwfn->cdev->pdev->dev, + sizeof(struct rdma_resize_cq_output_params), + p_ramrod_res, ramrod_res_phys); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Resized CQ, rc = %d\n", rc); + + return rc; + +err: dma_free_coherent(&p_hwfn->cdev->pdev->dev, + sizeof(struct rdma_resize_cq_output_params), + p_ramrod_res, ramrod_res_phys); + DP_NOTICE(p_hwfn, "Resized CQ, Failed - rc = %d\n", rc); + + return rc; +} + +int qed_rdma_destroy_cq(void *rdma_cxt, + struct qed_rdma_destroy_cq_in_params *in_params, + struct qed_rdma_destroy_cq_out_params *out_params) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + struct rdma_destroy_cq_output_params *p_ramrod_res; + struct rdma_destroy_cq_ramrod_data *p_ramrod; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + dma_addr_t ramrod_res_phys; + int rc = -ENOMEM; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", in_params->icid); + + p_ramrod_res = + (struct rdma_destroy_cq_output_params *) + dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, + sizeof(struct rdma_destroy_cq_output_params), + &ramrod_res_phys, GFP_KERNEL); + if (!p_ramrod_res) { + DP_NOTICE(p_hwfn, + "qed destroy cq failed: cannot allocate memory (ramrod)\n"); + return rc; + } + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = in_params->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + /* Send destroy CQ ramrod */ + rc = qed_sp_init_request(p_hwfn, &p_ent, + RDMA_RAMROD_DESTROY_CQ, + p_hwfn->p_rdma_info->proto, &init_data); + if (rc) + goto err; + + p_ramrod = &p_ent->ramrod.rdma_destroy_cq; + DMA_REGPAIR_LE(p_ramrod->output_params_addr, ramrod_res_phys); + + rc = qed_spq_post(p_hwfn, p_ent, NULL); + if (rc) + goto err; + + out_params->num_cq_notif = le16_to_cpu(p_ramrod_res->cnq_num); + + dma_free_coherent(&p_hwfn->cdev->pdev->dev, + sizeof(struct rdma_destroy_cq_output_params), + p_ramrod_res, ramrod_res_phys); + + /* Free icid */ + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + + qed_bmap_release_id(p_hwfn, + &p_hwfn->p_rdma_info->cq_map, + (in_params->icid - + qed_cxt_get_proto_cid_start(p_hwfn, + p_hwfn-> + p_rdma_info->proto))); + + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Destroyed CQ, rc = %d\n", rc); + return rc; + +err: dma_free_coherent(&p_hwfn->cdev->pdev->dev, + sizeof(struct rdma_destroy_cq_output_params), + p_ramrod_res, ramrod_res_phys); + + return rc; +} + static void *qed_rdma_get_rdma_ctx(struct qed_dev *cdev) { return QED_LEADING_HWFN(cdev); @@ -871,12 +1190,17 @@ static const struct qed_rdma_ops qed_rdma_ops_pass = { .rdma_add_user = &qed_rdma_add_user, .rdma_remove_user = &qed_rdma_remove_user, .rdma_stop = &qed_rdma_stop, + .rdma_query_port = &qed_rdma_query_port, .rdma_query_device = &qed_rdma_query_device, .rdma_get_start_sb = &qed_rdma_get_sb_start, .rdma_get_rdma_int = &qed_rdma_get_int, .rdma_set_rdma_int = &qed_rdma_set_int, .rdma_get_min_cnq_msix = &qed_rdma_get_min_cnq_msix, .rdma_cnq_prod_update = &qed_rdma_cnq_prod_update, + .rdma_alloc_pd = &qed_rdma_alloc_pd, + .rdma_dealloc_pd = &qed_rdma_free_pd, + .rdma_create_cq = &qed_rdma_create_cq, + .rdma_destroy_cq = &qed_rdma_destroy_cq, }; const struct qed_rdma_ops *qed_get_rdma_ops() diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.h b/drivers/net/ethernet/qlogic/qed/qed_roce.h index e55048106a83..1fe73707e0b5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.h +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.h @@ -94,6 +94,26 @@ struct qed_rdma_info { enum protocol_type proto; }; +struct qed_rdma_resize_cq_in_params { + u16 icid; + u32 cq_size; + bool pbl_two_level; + u64 pbl_ptr; + u16 pbl_num_pages; + u8 pbl_page_size_log; +}; + +struct qed_rdma_resize_cq_out_params { + u32 prod; + u32 cons; +}; + +struct qed_rdma_resize_cnq_in_params { + u32 cnq_id; + u32 pbl_page_size_log; + u64 pbl_ptr; +}; + int qed_rdma_add_user(void *rdma_cxt, struct qed_rdma_add_user_out_params *out_params); @@ -102,6 +122,7 @@ int qed_rdma_alloc_tid(void *rdma_cxt, u32 *tid); int qed_rdma_deregister_tid(void *rdma_cxt, u32 tid); void qed_rdma_free_tid(void *rdma_cxt, u32 tid); struct qed_rdma_device *qed_rdma_query_device(void *rdma_cxt); +struct qed_rdma_port *qed_rdma_query_port(void *rdma_cxt); int qed_rdma_register_tid(void *rdma_cxt, struct qed_rdma_register_tid_in_params *params); diff --git a/include/linux/qed/qed_roce_if.h b/include/linux/qed/qed_roce_if.h index 0f7d5275e515..b559b1c9e76d 100644 --- a/include/linux/qed/qed_roce_if.h +++ b/include/linux/qed/qed_roce_if.h @@ -263,6 +263,19 @@ struct qed_rdma_register_tid_in_params { u64 dif_runt_addr; }; +struct qed_rdma_create_cq_in_params { + u32 cq_handle_lo; + u32 cq_handle_hi; + u32 cq_size; + u16 dpi; + bool pbl_two_level; + u64 pbl_ptr; + u16 pbl_num_pages; + u8 pbl_page_size_log; + u8 cnq_id; + u16 int_timeout; +}; + struct qed_rdma_create_srq_in_params { u64 pbl_base_addr; u64 prod_pair_addr; @@ -271,6 +284,14 @@ struct qed_rdma_create_srq_in_params { u16 page_size; }; +struct qed_rdma_destroy_cq_in_params { + u16 icid; +}; + +struct qed_rdma_destroy_cq_out_params { + u16 num_cq_notif; +}; + struct qed_rdma_create_srq_out_params { u16 srq_id; }; @@ -332,12 +353,21 @@ struct qed_rdma_ops { void (*rdma_remove_user)(void *rdma_cxt, u16 dpi); int (*rdma_stop)(void *rdma_cxt); struct qed_rdma_device* (*rdma_query_device)(void *rdma_cxt); + struct qed_rdma_port* (*rdma_query_port)(void *rdma_cxt); int (*rdma_get_start_sb)(struct qed_dev *cdev); int (*rdma_get_min_cnq_msix)(struct qed_dev *cdev); void (*rdma_cnq_prod_update)(void *rdma_cxt, u8 cnq_index, u16 prod); int (*rdma_get_rdma_int)(struct qed_dev *cdev, struct qed_int_info *info); int (*rdma_set_rdma_int)(struct qed_dev *cdev, u16 cnt); + int (*rdma_alloc_pd)(void *rdma_cxt, u16 *pd); + void (*rdma_dealloc_pd)(void *rdma_cxt, u16 pd); + int (*rdma_create_cq)(void *rdma_cxt, + struct qed_rdma_create_cq_in_params *params, + u16 *icid); + int (*rdma_destroy_cq)(void *rdma_cxt, + struct qed_rdma_destroy_cq_in_params *iparams, + struct qed_rdma_destroy_cq_out_params *oparams); }; const struct qed_rdma_ops *qed_get_rdma_ops(void); From f109394033521862f2558df93d9afc4dfa829c6a Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Sat, 1 Oct 2016 21:59:59 +0300 Subject: [PATCH 1036/1050] qed: Add support for QP verbs Add support for the slowpath configurations of Queue Pair verbs which adds, deletes, modifies and queries Queue Pairs. Signed-off-by: Ram Amrani Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_cxt.h | 1 + drivers/net/ethernet/qlogic/qed/qed_roce.c | 1197 ++++++++++++++++++++ drivers/net/ethernet/qlogic/qed/qed_roce.h | 71 ++ include/linux/qed/qed_roce_if.h | 144 +++ 4 files changed, 1413 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.h b/drivers/net/ethernet/qlogic/qed/qed_cxt.h index d00ad055802b..2b8bdaa77800 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.h +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.h @@ -176,6 +176,7 @@ u32 qed_cxt_get_proto_tid_count(struct qed_hwfn *p_hwfn, enum protocol_type type); u32 qed_cxt_get_proto_cid_start(struct qed_hwfn *p_hwfn, enum protocol_type type); +int qed_cxt_free_proto_ilt(struct qed_hwfn *p_hwfn, enum protocol_type proto); #define QED_CTX_WORKING_MEM 0 #define QED_CTX_FL_MEM 1 diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c index f9551643428f..438a0badc4e1 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.c +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -1107,6 +1107,1199 @@ err: dma_free_coherent(&p_hwfn->cdev->pdev->dev, return rc; } +static void qed_rdma_set_fw_mac(u16 *p_fw_mac, u8 *p_qed_mac) +{ + p_fw_mac[0] = cpu_to_le16((p_qed_mac[0] << 8) + p_qed_mac[1]); + p_fw_mac[1] = cpu_to_le16((p_qed_mac[2] << 8) + p_qed_mac[3]); + p_fw_mac[2] = cpu_to_le16((p_qed_mac[4] << 8) + p_qed_mac[5]); +} + +static void qed_rdma_copy_gids(struct qed_rdma_qp *qp, __le32 *src_gid, + __le32 *dst_gid) +{ + u32 i; + + if (qp->roce_mode == ROCE_V2_IPV4) { + /* The IPv4 addresses shall be aligned to the highest word. + * The lower words must be zero. + */ + memset(src_gid, 0, sizeof(union qed_gid)); + memset(dst_gid, 0, sizeof(union qed_gid)); + src_gid[3] = cpu_to_le32(qp->sgid.ipv4_addr); + dst_gid[3] = cpu_to_le32(qp->dgid.ipv4_addr); + } else { + /* GIDs and IPv6 addresses coincide in location and size */ + for (i = 0; i < ARRAY_SIZE(qp->sgid.dwords); i++) { + src_gid[i] = cpu_to_le32(qp->sgid.dwords[i]); + dst_gid[i] = cpu_to_le32(qp->dgid.dwords[i]); + } + } +} + +static enum roce_flavor qed_roce_mode_to_flavor(enum roce_mode roce_mode) +{ + enum roce_flavor flavor; + + switch (roce_mode) { + case ROCE_V1: + flavor = PLAIN_ROCE; + break; + case ROCE_V2_IPV4: + flavor = RROCE_IPV4; + break; + case ROCE_V2_IPV6: + flavor = ROCE_V2_IPV6; + break; + default: + flavor = MAX_ROCE_MODE; + break; + } + return flavor; +} + +int qed_roce_alloc_cid(struct qed_hwfn *p_hwfn, u16 *cid) +{ + struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info; + u32 responder_icid; + u32 requester_icid; + int rc; + + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + rc = qed_rdma_bmap_alloc_id(p_hwfn, &p_rdma_info->cid_map, + &responder_icid); + if (rc) { + spin_unlock_bh(&p_rdma_info->lock); + return rc; + } + + rc = qed_rdma_bmap_alloc_id(p_hwfn, &p_rdma_info->cid_map, + &requester_icid); + + spin_unlock_bh(&p_rdma_info->lock); + if (rc) + goto err; + + /* the two icid's should be adjacent */ + if ((requester_icid - responder_icid) != 1) { + DP_NOTICE(p_hwfn, "Failed to allocate two adjacent qp's'\n"); + rc = -EINVAL; + goto err; + } + + responder_icid += qed_cxt_get_proto_cid_start(p_hwfn, + p_rdma_info->proto); + requester_icid += qed_cxt_get_proto_cid_start(p_hwfn, + p_rdma_info->proto); + + /* If these icids require a new ILT line allocate DMA-able context for + * an ILT page + */ + rc = qed_cxt_dynamic_ilt_alloc(p_hwfn, QED_ELEM_CXT, responder_icid); + if (rc) + goto err; + + rc = qed_cxt_dynamic_ilt_alloc(p_hwfn, QED_ELEM_CXT, requester_icid); + if (rc) + goto err; + + *cid = (u16)responder_icid; + return rc; + +err: + spin_lock_bh(&p_rdma_info->lock); + qed_bmap_release_id(p_hwfn, &p_rdma_info->cid_map, responder_icid); + qed_bmap_release_id(p_hwfn, &p_rdma_info->cid_map, requester_icid); + + spin_unlock_bh(&p_rdma_info->lock); + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Allocate CID - failed, rc = %d\n", rc); + return rc; +} + +static int qed_roce_sp_create_responder(struct qed_hwfn *p_hwfn, + struct qed_rdma_qp *qp) +{ + struct roce_create_qp_resp_ramrod_data *p_ramrod; + struct qed_sp_init_data init_data; + union qed_qm_pq_params qm_params; + enum roce_flavor roce_flavor; + struct qed_spq_entry *p_ent; + u16 physical_queue0 = 0; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); + + /* Allocate DMA-able memory for IRQ */ + qp->irq_num_pages = 1; + qp->irq = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, + RDMA_RING_PAGE_SIZE, + &qp->irq_phys_addr, GFP_KERNEL); + if (!qp->irq) { + rc = -ENOMEM; + DP_NOTICE(p_hwfn, + "qed create responder failed: cannot allocate memory (irq). rc = %d\n", + rc); + return rc; + } + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, ROCE_RAMROD_CREATE_QP, + PROTOCOLID_ROCE, &init_data); + if (rc) + goto err; + + p_ramrod = &p_ent->ramrod.roce_create_qp_resp; + + p_ramrod->flags = 0; + + roce_flavor = qed_roce_mode_to_flavor(qp->roce_mode); + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_ROCE_FLAVOR, roce_flavor); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_RDMA_RD_EN, + qp->incoming_rdma_read_en); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_RDMA_WR_EN, + qp->incoming_rdma_write_en); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_ATOMIC_EN, + qp->incoming_atomic_en); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_E2E_FLOW_CONTROL_EN, + qp->e2e_flow_control_en); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_SRQ_FLG, qp->use_srq); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_RESERVED_KEY_EN, + qp->fmr_and_reserved_lkey); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_MIN_RNR_NAK_TIMER, + qp->min_rnr_nak_timer); + + p_ramrod->max_ird = qp->max_rd_atomic_resp; + p_ramrod->traffic_class = qp->traffic_class_tos; + p_ramrod->hop_limit = qp->hop_limit_ttl; + p_ramrod->irq_num_pages = qp->irq_num_pages; + p_ramrod->p_key = cpu_to_le16(qp->pkey); + p_ramrod->flow_label = cpu_to_le32(qp->flow_label); + p_ramrod->dst_qp_id = cpu_to_le32(qp->dest_qp); + p_ramrod->mtu = cpu_to_le16(qp->mtu); + p_ramrod->initial_psn = cpu_to_le32(qp->rq_psn); + p_ramrod->pd = cpu_to_le16(qp->pd); + p_ramrod->rq_num_pages = cpu_to_le16(qp->rq_num_pages); + DMA_REGPAIR_LE(p_ramrod->rq_pbl_addr, qp->rq_pbl_ptr); + DMA_REGPAIR_LE(p_ramrod->irq_pbl_addr, qp->irq_phys_addr); + qed_rdma_copy_gids(qp, p_ramrod->src_gid, p_ramrod->dst_gid); + p_ramrod->qp_handle_for_async.hi = cpu_to_le32(qp->qp_handle_async.hi); + p_ramrod->qp_handle_for_async.lo = cpu_to_le32(qp->qp_handle_async.lo); + p_ramrod->qp_handle_for_cqe.hi = cpu_to_le32(qp->qp_handle.hi); + p_ramrod->qp_handle_for_cqe.lo = cpu_to_le32(qp->qp_handle.lo); + p_ramrod->stats_counter_id = p_hwfn->rel_pf_id; + p_ramrod->cq_cid = cpu_to_le32((p_hwfn->hw_info.opaque_fid << 16) | + qp->rq_cq_id); + + memset(&qm_params, 0, sizeof(qm_params)); + qm_params.roce.qpid = qp->icid >> 1; + physical_queue0 = qed_get_qm_pq(p_hwfn, PROTOCOLID_ROCE, &qm_params); + + p_ramrod->physical_queue0 = cpu_to_le16(physical_queue0); + p_ramrod->dpi = cpu_to_le16(qp->dpi); + + qed_rdma_set_fw_mac(p_ramrod->remote_mac_addr, qp->remote_mac_addr); + qed_rdma_set_fw_mac(p_ramrod->local_mac_addr, qp->local_mac_addr); + + p_ramrod->udp_src_port = qp->udp_src_port; + p_ramrod->vlan_id = cpu_to_le16(qp->vlan_id); + p_ramrod->srq_id.srq_idx = cpu_to_le16(qp->srq_id); + p_ramrod->srq_id.opaque_fid = cpu_to_le16(p_hwfn->hw_info.opaque_fid); + + p_ramrod->stats_counter_id = RESC_START(p_hwfn, QED_RDMA_STATS_QUEUE) + + qp->stats_queue; + + rc = qed_spq_post(p_hwfn, p_ent, NULL); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d physical_queue0 = 0x%x\n", + rc, physical_queue0); + + if (rc) + goto err; + + qp->resp_offloaded = true; + + return rc; + +err: + DP_NOTICE(p_hwfn, "create responder - failed, rc = %d\n", rc); + dma_free_coherent(&p_hwfn->cdev->pdev->dev, + qp->irq_num_pages * RDMA_RING_PAGE_SIZE, + qp->irq, qp->irq_phys_addr); + + return rc; +} + +static int qed_roce_sp_create_requester(struct qed_hwfn *p_hwfn, + struct qed_rdma_qp *qp) +{ + struct roce_create_qp_req_ramrod_data *p_ramrod; + struct qed_sp_init_data init_data; + union qed_qm_pq_params qm_params; + enum roce_flavor roce_flavor; + struct qed_spq_entry *p_ent; + u16 physical_queue0 = 0; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); + + /* Allocate DMA-able memory for ORQ */ + qp->orq_num_pages = 1; + qp->orq = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, + RDMA_RING_PAGE_SIZE, + &qp->orq_phys_addr, GFP_KERNEL); + if (!qp->orq) { + rc = -ENOMEM; + DP_NOTICE(p_hwfn, + "qed create requester failed: cannot allocate memory (orq). rc = %d\n", + rc); + return rc; + } + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid + 1; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, + ROCE_RAMROD_CREATE_QP, + PROTOCOLID_ROCE, &init_data); + if (rc) + goto err; + + p_ramrod = &p_ent->ramrod.roce_create_qp_req; + + p_ramrod->flags = 0; + + roce_flavor = qed_roce_mode_to_flavor(qp->roce_mode); + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_REQ_RAMROD_DATA_ROCE_FLAVOR, roce_flavor); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_REQ_RAMROD_DATA_FMR_AND_RESERVED_EN, + qp->fmr_and_reserved_lkey); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_REQ_RAMROD_DATA_SIGNALED_COMP, qp->signal_all); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_REQ_RAMROD_DATA_ERR_RETRY_CNT, qp->retry_cnt); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_REQ_RAMROD_DATA_RNR_NAK_CNT, + qp->rnr_retry_cnt); + + p_ramrod->max_ord = qp->max_rd_atomic_req; + p_ramrod->traffic_class = qp->traffic_class_tos; + p_ramrod->hop_limit = qp->hop_limit_ttl; + p_ramrod->orq_num_pages = qp->orq_num_pages; + p_ramrod->p_key = cpu_to_le16(qp->pkey); + p_ramrod->flow_label = cpu_to_le32(qp->flow_label); + p_ramrod->dst_qp_id = cpu_to_le32(qp->dest_qp); + p_ramrod->ack_timeout_val = cpu_to_le32(qp->ack_timeout); + p_ramrod->mtu = cpu_to_le16(qp->mtu); + p_ramrod->initial_psn = cpu_to_le32(qp->sq_psn); + p_ramrod->pd = cpu_to_le16(qp->pd); + p_ramrod->sq_num_pages = cpu_to_le16(qp->sq_num_pages); + DMA_REGPAIR_LE(p_ramrod->sq_pbl_addr, qp->sq_pbl_ptr); + DMA_REGPAIR_LE(p_ramrod->orq_pbl_addr, qp->orq_phys_addr); + qed_rdma_copy_gids(qp, p_ramrod->src_gid, p_ramrod->dst_gid); + p_ramrod->qp_handle_for_async.hi = cpu_to_le32(qp->qp_handle_async.hi); + p_ramrod->qp_handle_for_async.lo = cpu_to_le32(qp->qp_handle_async.lo); + p_ramrod->qp_handle_for_cqe.hi = cpu_to_le32(qp->qp_handle.hi); + p_ramrod->qp_handle_for_cqe.lo = cpu_to_le32(qp->qp_handle.lo); + p_ramrod->stats_counter_id = p_hwfn->rel_pf_id; + p_ramrod->cq_cid = cpu_to_le32((p_hwfn->hw_info.opaque_fid << 16) | + qp->sq_cq_id); + + memset(&qm_params, 0, sizeof(qm_params)); + qm_params.roce.qpid = qp->icid >> 1; + physical_queue0 = qed_get_qm_pq(p_hwfn, PROTOCOLID_ROCE, &qm_params); + + p_ramrod->physical_queue0 = cpu_to_le16(physical_queue0); + p_ramrod->dpi = cpu_to_le16(qp->dpi); + + qed_rdma_set_fw_mac(p_ramrod->remote_mac_addr, qp->remote_mac_addr); + qed_rdma_set_fw_mac(p_ramrod->local_mac_addr, qp->local_mac_addr); + + p_ramrod->udp_src_port = qp->udp_src_port; + p_ramrod->vlan_id = cpu_to_le16(qp->vlan_id); + p_ramrod->stats_counter_id = RESC_START(p_hwfn, QED_RDMA_STATS_QUEUE) + + qp->stats_queue; + + rc = qed_spq_post(p_hwfn, p_ent, NULL); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d\n", rc); + + if (rc) + goto err; + + qp->req_offloaded = true; + + return rc; + +err: + DP_NOTICE(p_hwfn, "Create requested - failed, rc = %d\n", rc); + dma_free_coherent(&p_hwfn->cdev->pdev->dev, + qp->orq_num_pages * RDMA_RING_PAGE_SIZE, + qp->orq, qp->orq_phys_addr); + return rc; +} + +static int qed_roce_sp_modify_responder(struct qed_hwfn *p_hwfn, + struct qed_rdma_qp *qp, + bool move_to_err, u32 modify_flags) +{ + struct roce_modify_qp_resp_ramrod_data *p_ramrod; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); + + if (move_to_err && !qp->resp_offloaded) + return 0; + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, + ROCE_EVENT_MODIFY_QP, + PROTOCOLID_ROCE, &init_data); + if (rc) { + DP_NOTICE(p_hwfn, "rc = %d\n", rc); + return rc; + } + + p_ramrod = &p_ent->ramrod.roce_modify_qp_resp; + + p_ramrod->flags = 0; + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_MOVE_TO_ERR_FLG, move_to_err); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_RDMA_RD_EN, + qp->incoming_rdma_read_en); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_RDMA_WR_EN, + qp->incoming_rdma_write_en); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_ATOMIC_EN, + qp->incoming_atomic_en); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_E2E_FLOW_CONTROL_EN, + qp->e2e_flow_control_en); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_RDMA_OPS_EN_FLG, + GET_FIELD(modify_flags, + QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_P_KEY_FLG, + GET_FIELD(modify_flags, QED_ROCE_MODIFY_QP_VALID_PKEY)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_ADDRESS_VECTOR_FLG, + GET_FIELD(modify_flags, + QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_MAX_IRD_FLG, + GET_FIELD(modify_flags, + QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_MIN_RNR_NAK_TIMER_FLG, + GET_FIELD(modify_flags, + QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER)); + + p_ramrod->fields = 0; + SET_FIELD(p_ramrod->fields, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_MIN_RNR_NAK_TIMER, + qp->min_rnr_nak_timer); + + p_ramrod->max_ird = qp->max_rd_atomic_resp; + p_ramrod->traffic_class = qp->traffic_class_tos; + p_ramrod->hop_limit = qp->hop_limit_ttl; + p_ramrod->p_key = cpu_to_le16(qp->pkey); + p_ramrod->flow_label = cpu_to_le32(qp->flow_label); + p_ramrod->mtu = cpu_to_le16(qp->mtu); + qed_rdma_copy_gids(qp, p_ramrod->src_gid, p_ramrod->dst_gid); + rc = qed_spq_post(p_hwfn, p_ent, NULL); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Modify responder, rc = %d\n", rc); + return rc; +} + +static int qed_roce_sp_modify_requester(struct qed_hwfn *p_hwfn, + struct qed_rdma_qp *qp, + bool move_to_sqd, + bool move_to_err, u32 modify_flags) +{ + struct roce_modify_qp_req_ramrod_data *p_ramrod; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); + + if (move_to_err && !(qp->req_offloaded)) + return 0; + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid + 1; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, + ROCE_EVENT_MODIFY_QP, + PROTOCOLID_ROCE, &init_data); + if (rc) { + DP_NOTICE(p_hwfn, "rc = %d\n", rc); + return rc; + } + + p_ramrod = &p_ent->ramrod.roce_modify_qp_req; + + p_ramrod->flags = 0; + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_MOVE_TO_ERR_FLG, move_to_err); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_MOVE_TO_SQD_FLG, move_to_sqd); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_EN_SQD_ASYNC_NOTIFY, + qp->sqd_async); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_P_KEY_FLG, + GET_FIELD(modify_flags, QED_ROCE_MODIFY_QP_VALID_PKEY)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_ADDRESS_VECTOR_FLG, + GET_FIELD(modify_flags, + QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_MAX_ORD_FLG, + GET_FIELD(modify_flags, + QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_RNR_NAK_CNT_FLG, + GET_FIELD(modify_flags, + QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_ERR_RETRY_CNT_FLG, + GET_FIELD(modify_flags, QED_ROCE_MODIFY_QP_VALID_RETRY_CNT)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_ACK_TIMEOUT_FLG, + GET_FIELD(modify_flags, + QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT)); + + p_ramrod->fields = 0; + SET_FIELD(p_ramrod->fields, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_ERR_RETRY_CNT, qp->retry_cnt); + + SET_FIELD(p_ramrod->fields, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_RNR_NAK_CNT, + qp->rnr_retry_cnt); + + p_ramrod->max_ord = qp->max_rd_atomic_req; + p_ramrod->traffic_class = qp->traffic_class_tos; + p_ramrod->hop_limit = qp->hop_limit_ttl; + p_ramrod->p_key = cpu_to_le16(qp->pkey); + p_ramrod->flow_label = cpu_to_le32(qp->flow_label); + p_ramrod->ack_timeout_val = cpu_to_le32(qp->ack_timeout); + p_ramrod->mtu = cpu_to_le16(qp->mtu); + qed_rdma_copy_gids(qp, p_ramrod->src_gid, p_ramrod->dst_gid); + rc = qed_spq_post(p_hwfn, p_ent, NULL); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Modify requester, rc = %d\n", rc); + return rc; +} + +static int qed_roce_sp_destroy_qp_responder(struct qed_hwfn *p_hwfn, + struct qed_rdma_qp *qp, + u32 *num_invalidated_mw) +{ + struct roce_destroy_qp_resp_output_params *p_ramrod_res; + struct roce_destroy_qp_resp_ramrod_data *p_ramrod; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + dma_addr_t ramrod_res_phys; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); + + if (!qp->resp_offloaded) + return 0; + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, + ROCE_RAMROD_DESTROY_QP, + PROTOCOLID_ROCE, &init_data); + if (rc) + return rc; + + p_ramrod = &p_ent->ramrod.roce_destroy_qp_resp; + + p_ramrod_res = (struct roce_destroy_qp_resp_output_params *) + dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, sizeof(*p_ramrod_res), + &ramrod_res_phys, GFP_KERNEL); + + if (!p_ramrod_res) { + rc = -ENOMEM; + DP_NOTICE(p_hwfn, + "qed destroy responder failed: cannot allocate memory (ramrod). rc = %d\n", + rc); + return rc; + } + + DMA_REGPAIR_LE(p_ramrod->output_params_addr, ramrod_res_phys); + + rc = qed_spq_post(p_hwfn, p_ent, NULL); + if (rc) + goto err; + + *num_invalidated_mw = le32_to_cpu(p_ramrod_res->num_invalidated_mw); + + /* Free IRQ - only if ramrod succeeded, in case FW is still using it */ + dma_free_coherent(&p_hwfn->cdev->pdev->dev, + qp->irq_num_pages * RDMA_RING_PAGE_SIZE, + qp->irq, qp->irq_phys_addr); + + qp->resp_offloaded = false; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Destroy responder, rc = %d\n", rc); + +err: + dma_free_coherent(&p_hwfn->cdev->pdev->dev, + sizeof(struct roce_destroy_qp_resp_output_params), + p_ramrod_res, ramrod_res_phys); + + return rc; +} + +static int qed_roce_sp_destroy_qp_requester(struct qed_hwfn *p_hwfn, + struct qed_rdma_qp *qp, + u32 *num_bound_mw) +{ + struct roce_destroy_qp_req_output_params *p_ramrod_res; + struct roce_destroy_qp_req_ramrod_data *p_ramrod; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + dma_addr_t ramrod_res_phys; + int rc = -ENOMEM; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); + + if (!qp->req_offloaded) + return 0; + + p_ramrod_res = (struct roce_destroy_qp_req_output_params *) + dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, + sizeof(*p_ramrod_res), + &ramrod_res_phys, GFP_KERNEL); + if (!p_ramrod_res) { + DP_NOTICE(p_hwfn, + "qed destroy requester failed: cannot allocate memory (ramrod)\n"); + return rc; + } + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid + 1; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, ROCE_RAMROD_DESTROY_QP, + PROTOCOLID_ROCE, &init_data); + if (rc) + goto err; + + p_ramrod = &p_ent->ramrod.roce_destroy_qp_req; + DMA_REGPAIR_LE(p_ramrod->output_params_addr, ramrod_res_phys); + + rc = qed_spq_post(p_hwfn, p_ent, NULL); + if (rc) + goto err; + + *num_bound_mw = le32_to_cpu(p_ramrod_res->num_bound_mw); + + /* Free ORQ - only if ramrod succeeded, in case FW is still using it */ + dma_free_coherent(&p_hwfn->cdev->pdev->dev, + qp->orq_num_pages * RDMA_RING_PAGE_SIZE, + qp->orq, qp->orq_phys_addr); + + qp->req_offloaded = false; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Destroy requester, rc = %d\n", rc); + +err: + dma_free_coherent(&p_hwfn->cdev->pdev->dev, sizeof(*p_ramrod_res), + p_ramrod_res, ramrod_res_phys); + + return rc; +} + +int qed_roce_query_qp(struct qed_hwfn *p_hwfn, + struct qed_rdma_qp *qp, + struct qed_rdma_query_qp_out_params *out_params) +{ + struct roce_query_qp_resp_output_params *p_resp_ramrod_res; + struct roce_query_qp_req_output_params *p_req_ramrod_res; + struct roce_query_qp_resp_ramrod_data *p_resp_ramrod; + struct roce_query_qp_req_ramrod_data *p_req_ramrod; + struct qed_sp_init_data init_data; + dma_addr_t resp_ramrod_res_phys; + dma_addr_t req_ramrod_res_phys; + struct qed_spq_entry *p_ent; + bool rq_err_state; + bool sq_err_state; + bool sq_draining; + int rc = -ENOMEM; + + if ((!(qp->resp_offloaded)) && (!(qp->req_offloaded))) { + /* We can't send ramrod to the fw since this qp wasn't offloaded + * to the fw yet + */ + out_params->draining = false; + out_params->rq_psn = qp->rq_psn; + out_params->sq_psn = qp->sq_psn; + out_params->state = qp->cur_state; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "No QPs as no offload\n"); + return 0; + } + + if (!(qp->resp_offloaded)) { + DP_NOTICE(p_hwfn, + "The responder's qp should be offloded before requester's\n"); + return -EINVAL; + } + + /* Send a query responder ramrod to FW to get RQ-PSN and state */ + p_resp_ramrod_res = (struct roce_query_qp_resp_output_params *) + dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, + sizeof(*p_resp_ramrod_res), + &resp_ramrod_res_phys, GFP_KERNEL); + if (!p_resp_ramrod_res) { + DP_NOTICE(p_hwfn, + "qed query qp failed: cannot allocate memory (ramrod)\n"); + return rc; + } + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + rc = qed_sp_init_request(p_hwfn, &p_ent, ROCE_RAMROD_QUERY_QP, + PROTOCOLID_ROCE, &init_data); + if (rc) + goto err_resp; + + p_resp_ramrod = &p_ent->ramrod.roce_query_qp_resp; + DMA_REGPAIR_LE(p_resp_ramrod->output_params_addr, resp_ramrod_res_phys); + + rc = qed_spq_post(p_hwfn, p_ent, NULL); + if (rc) + goto err_resp; + + dma_free_coherent(&p_hwfn->cdev->pdev->dev, sizeof(*p_resp_ramrod_res), + p_resp_ramrod_res, resp_ramrod_res_phys); + + out_params->rq_psn = le32_to_cpu(p_resp_ramrod_res->psn); + rq_err_state = GET_FIELD(le32_to_cpu(p_resp_ramrod_res->err_flag), + ROCE_QUERY_QP_RESP_OUTPUT_PARAMS_ERROR_FLG); + + if (!(qp->req_offloaded)) { + /* Don't send query qp for the requester */ + out_params->sq_psn = qp->sq_psn; + out_params->draining = false; + + if (rq_err_state) + qp->cur_state = QED_ROCE_QP_STATE_ERR; + + out_params->state = qp->cur_state; + + return 0; + } + + /* Send a query requester ramrod to FW to get SQ-PSN and state */ + p_req_ramrod_res = (struct roce_query_qp_req_output_params *) + dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, + sizeof(*p_req_ramrod_res), + &req_ramrod_res_phys, + GFP_KERNEL); + if (!p_req_ramrod_res) { + rc = -ENOMEM; + DP_NOTICE(p_hwfn, + "qed query qp failed: cannot allocate memory (ramrod)\n"); + return rc; + } + + /* Get SPQ entry */ + init_data.cid = qp->icid + 1; + rc = qed_sp_init_request(p_hwfn, &p_ent, ROCE_RAMROD_QUERY_QP, + PROTOCOLID_ROCE, &init_data); + if (rc) + goto err_req; + + p_req_ramrod = &p_ent->ramrod.roce_query_qp_req; + DMA_REGPAIR_LE(p_req_ramrod->output_params_addr, req_ramrod_res_phys); + + rc = qed_spq_post(p_hwfn, p_ent, NULL); + if (rc) + goto err_req; + + dma_free_coherent(&p_hwfn->cdev->pdev->dev, sizeof(*p_req_ramrod_res), + p_req_ramrod_res, req_ramrod_res_phys); + + out_params->sq_psn = le32_to_cpu(p_req_ramrod_res->psn); + sq_err_state = GET_FIELD(le32_to_cpu(p_req_ramrod_res->flags), + ROCE_QUERY_QP_REQ_OUTPUT_PARAMS_ERR_FLG); + sq_draining = + GET_FIELD(le32_to_cpu(p_req_ramrod_res->flags), + ROCE_QUERY_QP_REQ_OUTPUT_PARAMS_SQ_DRAINING_FLG); + + out_params->draining = false; + + if (rq_err_state) + qp->cur_state = QED_ROCE_QP_STATE_ERR; + else if (sq_err_state) + qp->cur_state = QED_ROCE_QP_STATE_SQE; + else if (sq_draining) + out_params->draining = true; + out_params->state = qp->cur_state; + + return 0; + +err_req: + dma_free_coherent(&p_hwfn->cdev->pdev->dev, sizeof(*p_req_ramrod_res), + p_req_ramrod_res, req_ramrod_res_phys); + return rc; +err_resp: + dma_free_coherent(&p_hwfn->cdev->pdev->dev, sizeof(*p_resp_ramrod_res), + p_resp_ramrod_res, resp_ramrod_res_phys); + return rc; +} + +int qed_roce_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp) +{ + u32 num_invalidated_mw = 0; + u32 num_bound_mw = 0; + u32 start_cid; + int rc; + + /* Destroys the specified QP */ + if ((qp->cur_state != QED_ROCE_QP_STATE_RESET) && + (qp->cur_state != QED_ROCE_QP_STATE_ERR) && + (qp->cur_state != QED_ROCE_QP_STATE_INIT)) { + DP_NOTICE(p_hwfn, + "QP must be in error, reset or init state before destroying it\n"); + return -EINVAL; + } + + rc = qed_roce_sp_destroy_qp_responder(p_hwfn, qp, &num_invalidated_mw); + if (rc) + return rc; + + /* Send destroy requester ramrod */ + rc = qed_roce_sp_destroy_qp_requester(p_hwfn, qp, &num_bound_mw); + if (rc) + return rc; + + if (num_invalidated_mw != num_bound_mw) { + DP_NOTICE(p_hwfn, + "number of invalidate memory windows is different from bounded ones\n"); + return -EINVAL; + } + + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + + start_cid = qed_cxt_get_proto_cid_start(p_hwfn, + p_hwfn->p_rdma_info->proto); + + /* Release responder's icid */ + qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->cid_map, + qp->icid - start_cid); + + /* Release requester's icid */ + qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->cid_map, + qp->icid + 1 - start_cid); + + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); + + return 0; +} + +int qed_rdma_query_qp(void *rdma_cxt, + struct qed_rdma_qp *qp, + struct qed_rdma_query_qp_out_params *out_params) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); + + /* The following fields are filled in from qp and not FW as they can't + * be modified by FW + */ + out_params->mtu = qp->mtu; + out_params->dest_qp = qp->dest_qp; + out_params->incoming_atomic_en = qp->incoming_atomic_en; + out_params->e2e_flow_control_en = qp->e2e_flow_control_en; + out_params->incoming_rdma_read_en = qp->incoming_rdma_read_en; + out_params->incoming_rdma_write_en = qp->incoming_rdma_write_en; + out_params->dgid = qp->dgid; + out_params->flow_label = qp->flow_label; + out_params->hop_limit_ttl = qp->hop_limit_ttl; + out_params->traffic_class_tos = qp->traffic_class_tos; + out_params->timeout = qp->ack_timeout; + out_params->rnr_retry = qp->rnr_retry_cnt; + out_params->retry_cnt = qp->retry_cnt; + out_params->min_rnr_nak_timer = qp->min_rnr_nak_timer; + out_params->pkey_index = 0; + out_params->max_rd_atomic = qp->max_rd_atomic_req; + out_params->max_dest_rd_atomic = qp->max_rd_atomic_resp; + out_params->sqd_async = qp->sqd_async; + + rc = qed_roce_query_qp(p_hwfn, qp, out_params); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Query QP, rc = %d\n", rc); + return rc; +} + +int qed_rdma_destroy_qp(void *rdma_cxt, struct qed_rdma_qp *qp) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + int rc = 0; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); + + rc = qed_roce_destroy_qp(p_hwfn, qp); + + /* free qp params struct */ + kfree(qp); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QP destroyed\n"); + return rc; +} + +struct qed_rdma_qp * +qed_rdma_create_qp(void *rdma_cxt, + struct qed_rdma_create_qp_in_params *in_params, + struct qed_rdma_create_qp_out_params *out_params) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + struct qed_rdma_qp *qp; + u8 max_stats_queues; + int rc; + + if (!rdma_cxt || !in_params || !out_params || !p_hwfn->p_rdma_info) { + DP_ERR(p_hwfn->cdev, + "qed roce create qp failed due to NULL entry (rdma_cxt=%p, in=%p, out=%p, roce_info=?\n", + rdma_cxt, in_params, out_params); + return NULL; + } + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "qed rdma create qp called with qp_handle = %08x%08x\n", + in_params->qp_handle_hi, in_params->qp_handle_lo); + + /* Some sanity checks... */ + max_stats_queues = p_hwfn->p_rdma_info->dev->max_stats_queues; + if (in_params->stats_queue >= max_stats_queues) { + DP_ERR(p_hwfn->cdev, + "qed rdma create qp failed due to invalid statistics queue %d. maximum is %d\n", + in_params->stats_queue, max_stats_queues); + return NULL; + } + + qp = kzalloc(sizeof(*qp), GFP_KERNEL); + if (!qp) { + DP_NOTICE(p_hwfn, "Failed to allocate qed_rdma_qp\n"); + return NULL; + } + + rc = qed_roce_alloc_cid(p_hwfn, &qp->icid); + qp->qpid = ((0xFF << 16) | qp->icid); + + DP_INFO(p_hwfn, "ROCE qpid=%x\n", qp->qpid); + + if (rc) { + kfree(qp); + return NULL; + } + + qp->cur_state = QED_ROCE_QP_STATE_RESET; + qp->qp_handle.hi = cpu_to_le32(in_params->qp_handle_hi); + qp->qp_handle.lo = cpu_to_le32(in_params->qp_handle_lo); + qp->qp_handle_async.hi = cpu_to_le32(in_params->qp_handle_async_hi); + qp->qp_handle_async.lo = cpu_to_le32(in_params->qp_handle_async_lo); + qp->use_srq = in_params->use_srq; + qp->signal_all = in_params->signal_all; + qp->fmr_and_reserved_lkey = in_params->fmr_and_reserved_lkey; + qp->pd = in_params->pd; + qp->dpi = in_params->dpi; + qp->sq_cq_id = in_params->sq_cq_id; + qp->sq_num_pages = in_params->sq_num_pages; + qp->sq_pbl_ptr = in_params->sq_pbl_ptr; + qp->rq_cq_id = in_params->rq_cq_id; + qp->rq_num_pages = in_params->rq_num_pages; + qp->rq_pbl_ptr = in_params->rq_pbl_ptr; + qp->srq_id = in_params->srq_id; + qp->req_offloaded = false; + qp->resp_offloaded = false; + qp->e2e_flow_control_en = qp->use_srq ? false : true; + qp->stats_queue = in_params->stats_queue; + + out_params->icid = qp->icid; + out_params->qp_id = qp->qpid; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Create QP, rc = %d\n", rc); + return qp; +} + +static int qed_roce_modify_qp(struct qed_hwfn *p_hwfn, + struct qed_rdma_qp *qp, + enum qed_roce_qp_state prev_state, + struct qed_rdma_modify_qp_in_params *params) +{ + u32 num_invalidated_mw = 0, num_bound_mw = 0; + int rc = 0; + + /* Perform additional operations according to the current state and the + * next state + */ + if (((prev_state == QED_ROCE_QP_STATE_INIT) || + (prev_state == QED_ROCE_QP_STATE_RESET)) && + (qp->cur_state == QED_ROCE_QP_STATE_RTR)) { + /* Init->RTR or Reset->RTR */ + rc = qed_roce_sp_create_responder(p_hwfn, qp); + return rc; + } else if ((prev_state == QED_ROCE_QP_STATE_RTR) && + (qp->cur_state == QED_ROCE_QP_STATE_RTS)) { + /* RTR-> RTS */ + rc = qed_roce_sp_create_requester(p_hwfn, qp); + if (rc) + return rc; + + /* Send modify responder ramrod */ + rc = qed_roce_sp_modify_responder(p_hwfn, qp, false, + params->modify_flags); + return rc; + } else if ((prev_state == QED_ROCE_QP_STATE_RTS) && + (qp->cur_state == QED_ROCE_QP_STATE_RTS)) { + /* RTS->RTS */ + rc = qed_roce_sp_modify_responder(p_hwfn, qp, false, + params->modify_flags); + if (rc) + return rc; + + rc = qed_roce_sp_modify_requester(p_hwfn, qp, false, false, + params->modify_flags); + return rc; + } else if ((prev_state == QED_ROCE_QP_STATE_RTS) && + (qp->cur_state == QED_ROCE_QP_STATE_SQD)) { + /* RTS->SQD */ + rc = qed_roce_sp_modify_requester(p_hwfn, qp, true, false, + params->modify_flags); + return rc; + } else if ((prev_state == QED_ROCE_QP_STATE_SQD) && + (qp->cur_state == QED_ROCE_QP_STATE_SQD)) { + /* SQD->SQD */ + rc = qed_roce_sp_modify_responder(p_hwfn, qp, false, + params->modify_flags); + if (rc) + return rc; + + rc = qed_roce_sp_modify_requester(p_hwfn, qp, false, false, + params->modify_flags); + return rc; + } else if ((prev_state == QED_ROCE_QP_STATE_SQD) && + (qp->cur_state == QED_ROCE_QP_STATE_RTS)) { + /* SQD->RTS */ + rc = qed_roce_sp_modify_responder(p_hwfn, qp, false, + params->modify_flags); + if (rc) + return rc; + + rc = qed_roce_sp_modify_requester(p_hwfn, qp, false, false, + params->modify_flags); + + return rc; + } else if (qp->cur_state == QED_ROCE_QP_STATE_ERR || + qp->cur_state == QED_ROCE_QP_STATE_SQE) { + /* ->ERR */ + rc = qed_roce_sp_modify_responder(p_hwfn, qp, true, + params->modify_flags); + if (rc) + return rc; + + rc = qed_roce_sp_modify_requester(p_hwfn, qp, false, true, + params->modify_flags); + return rc; + } else if (qp->cur_state == QED_ROCE_QP_STATE_RESET) { + /* Any state -> RESET */ + + rc = qed_roce_sp_destroy_qp_responder(p_hwfn, qp, + &num_invalidated_mw); + if (rc) + return rc; + + rc = qed_roce_sp_destroy_qp_requester(p_hwfn, qp, + &num_bound_mw); + + if (num_invalidated_mw != num_bound_mw) { + DP_NOTICE(p_hwfn, + "number of invalidate memory windows is different from bounded ones\n"); + return -EINVAL; + } + } else { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "0\n"); + } + + return rc; +} + +int qed_rdma_modify_qp(void *rdma_cxt, + struct qed_rdma_qp *qp, + struct qed_rdma_modify_qp_in_params *params) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + enum qed_roce_qp_state prev_state; + int rc = 0; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x params->new_state=%d\n", + qp->icid, params->new_state); + + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d\n", rc); + return rc; + } + + if (GET_FIELD(params->modify_flags, + QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN)) { + qp->incoming_rdma_read_en = params->incoming_rdma_read_en; + qp->incoming_rdma_write_en = params->incoming_rdma_write_en; + qp->incoming_atomic_en = params->incoming_atomic_en; + } + + /* Update QP structure with the updated values */ + if (GET_FIELD(params->modify_flags, QED_ROCE_MODIFY_QP_VALID_ROCE_MODE)) + qp->roce_mode = params->roce_mode; + if (GET_FIELD(params->modify_flags, QED_ROCE_MODIFY_QP_VALID_PKEY)) + qp->pkey = params->pkey; + if (GET_FIELD(params->modify_flags, + QED_ROCE_MODIFY_QP_VALID_E2E_FLOW_CONTROL_EN)) + qp->e2e_flow_control_en = params->e2e_flow_control_en; + if (GET_FIELD(params->modify_flags, QED_ROCE_MODIFY_QP_VALID_DEST_QP)) + qp->dest_qp = params->dest_qp; + if (GET_FIELD(params->modify_flags, + QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR)) { + /* Indicates that the following parameters have changed: + * Traffic class, flow label, hop limit, source GID, + * destination GID, loopback indicator + */ + qp->traffic_class_tos = params->traffic_class_tos; + qp->flow_label = params->flow_label; + qp->hop_limit_ttl = params->hop_limit_ttl; + + qp->sgid = params->sgid; + qp->dgid = params->dgid; + qp->udp_src_port = 0; + qp->vlan_id = params->vlan_id; + qp->mtu = params->mtu; + qp->lb_indication = params->lb_indication; + memcpy((u8 *)&qp->remote_mac_addr[0], + (u8 *)¶ms->remote_mac_addr[0], ETH_ALEN); + if (params->use_local_mac) { + memcpy((u8 *)&qp->local_mac_addr[0], + (u8 *)¶ms->local_mac_addr[0], ETH_ALEN); + } else { + memcpy((u8 *)&qp->local_mac_addr[0], + (u8 *)&p_hwfn->hw_info.hw_mac_addr, ETH_ALEN); + } + } + if (GET_FIELD(params->modify_flags, QED_ROCE_MODIFY_QP_VALID_RQ_PSN)) + qp->rq_psn = params->rq_psn; + if (GET_FIELD(params->modify_flags, QED_ROCE_MODIFY_QP_VALID_SQ_PSN)) + qp->sq_psn = params->sq_psn; + if (GET_FIELD(params->modify_flags, + QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ)) + qp->max_rd_atomic_req = params->max_rd_atomic_req; + if (GET_FIELD(params->modify_flags, + QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP)) + qp->max_rd_atomic_resp = params->max_rd_atomic_resp; + if (GET_FIELD(params->modify_flags, + QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT)) + qp->ack_timeout = params->ack_timeout; + if (GET_FIELD(params->modify_flags, QED_ROCE_MODIFY_QP_VALID_RETRY_CNT)) + qp->retry_cnt = params->retry_cnt; + if (GET_FIELD(params->modify_flags, + QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT)) + qp->rnr_retry_cnt = params->rnr_retry_cnt; + if (GET_FIELD(params->modify_flags, + QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER)) + qp->min_rnr_nak_timer = params->min_rnr_nak_timer; + + qp->sqd_async = params->sqd_async; + + prev_state = qp->cur_state; + if (GET_FIELD(params->modify_flags, + QED_RDMA_MODIFY_QP_VALID_NEW_STATE)) { + qp->cur_state = params->new_state; + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "qp->cur_state=%d\n", + qp->cur_state); + } + + rc = qed_roce_modify_qp(p_hwfn, qp, prev_state, params); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Modify QP, rc = %d\n", rc); + return rc; +} + static void *qed_rdma_get_rdma_ctx(struct qed_dev *cdev) { return QED_LEADING_HWFN(cdev); @@ -1201,6 +2394,10 @@ static const struct qed_rdma_ops qed_rdma_ops_pass = { .rdma_dealloc_pd = &qed_rdma_free_pd, .rdma_create_cq = &qed_rdma_create_cq, .rdma_destroy_cq = &qed_rdma_destroy_cq, + .rdma_create_qp = &qed_rdma_create_qp, + .rdma_modify_qp = &qed_rdma_modify_qp, + .rdma_query_qp = &qed_rdma_query_qp, + .rdma_destroy_qp = &qed_rdma_destroy_qp, }; const struct qed_rdma_ops *qed_get_rdma_ops() diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.h b/drivers/net/ethernet/qlogic/qed/qed_roce.h index 1fe73707e0b5..b8ddda456101 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.h +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.h @@ -114,6 +114,72 @@ struct qed_rdma_resize_cnq_in_params { u64 pbl_ptr; }; +struct qed_rdma_qp { + struct regpair qp_handle; + struct regpair qp_handle_async; + u32 qpid; + u16 icid; + enum qed_roce_qp_state cur_state; + bool use_srq; + bool signal_all; + bool fmr_and_reserved_lkey; + + bool incoming_rdma_read_en; + bool incoming_rdma_write_en; + bool incoming_atomic_en; + bool e2e_flow_control_en; + + u16 pd; + u16 pkey; + u32 dest_qp; + u16 mtu; + u16 srq_id; + u8 traffic_class_tos; + u8 hop_limit_ttl; + u16 dpi; + u32 flow_label; + bool lb_indication; + u16 vlan_id; + u32 ack_timeout; + u8 retry_cnt; + u8 rnr_retry_cnt; + u8 min_rnr_nak_timer; + bool sqd_async; + union qed_gid sgid; + union qed_gid dgid; + enum roce_mode roce_mode; + u16 udp_src_port; + u8 stats_queue; + + /* requeseter */ + u8 max_rd_atomic_req; + u32 sq_psn; + u16 sq_cq_id; + u16 sq_num_pages; + dma_addr_t sq_pbl_ptr; + void *orq; + dma_addr_t orq_phys_addr; + u8 orq_num_pages; + bool req_offloaded; + + /* responder */ + u8 max_rd_atomic_resp; + u32 rq_psn; + u16 rq_cq_id; + u16 rq_num_pages; + dma_addr_t rq_pbl_ptr; + void *irq; + dma_addr_t irq_phys_addr; + u8 irq_num_pages; + bool resp_offloaded; + + u8 remote_mac_addr[6]; + u8 local_mac_addr[6]; + + void *shared_queue; + dma_addr_t shared_queue_phys_addr; +}; + int qed_rdma_add_user(void *rdma_cxt, struct qed_rdma_add_user_out_params *out_params); @@ -135,6 +201,11 @@ void qed_rdma_cnq_prod_update(void *rdma_cxt, u8 cnq_index, u16 prod); void qed_rdma_resc_free(struct qed_hwfn *p_hwfn); void qed_async_roce_event(struct qed_hwfn *p_hwfn, struct event_ring_entry *p_eqe); +int qed_rdma_destroy_qp(void *rdma_cxt, struct qed_rdma_qp *qp); +int qed_rdma_modify_qp(void *rdma_cxt, struct qed_rdma_qp *qp, + struct qed_rdma_modify_qp_in_params *params); +int qed_rdma_query_qp(void *rdma_cxt, struct qed_rdma_qp *qp, + struct qed_rdma_query_qp_out_params *out_params); #if IS_ENABLED(CONFIG_INFINIBAND_QEDR) void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); diff --git a/include/linux/qed/qed_roce_if.h b/include/linux/qed/qed_roce_if.h index b559b1c9e76d..02321e3b1716 100644 --- a/include/linux/qed/qed_roce_if.h +++ b/include/linux/qed/qed_roce_if.h @@ -43,6 +43,17 @@ #define QED_RDMA_MAX_CNQ_SIZE (0xFFFF) /* rdma interface */ + +enum qed_roce_qp_state { + QED_ROCE_QP_STATE_RESET, + QED_ROCE_QP_STATE_INIT, + QED_ROCE_QP_STATE_RTR, + QED_ROCE_QP_STATE_RTS, + QED_ROCE_QP_STATE_SQD, + QED_ROCE_QP_STATE_ERR, + QED_ROCE_QP_STATE_SQE +}; + enum qed_rdma_tid_type { QED_RDMA_TID_REGISTERED_MR, QED_RDMA_TID_FMR, @@ -292,6 +303,128 @@ struct qed_rdma_destroy_cq_out_params { u16 num_cq_notif; }; +struct qed_rdma_create_qp_in_params { + u32 qp_handle_lo; + u32 qp_handle_hi; + u32 qp_handle_async_lo; + u32 qp_handle_async_hi; + bool use_srq; + bool signal_all; + bool fmr_and_reserved_lkey; + u16 pd; + u16 dpi; + u16 sq_cq_id; + u16 sq_num_pages; + u64 sq_pbl_ptr; + u8 max_sq_sges; + u16 rq_cq_id; + u16 rq_num_pages; + u64 rq_pbl_ptr; + u16 srq_id; + u8 stats_queue; +}; + +struct qed_rdma_create_qp_out_params { + u32 qp_id; + u16 icid; + void *rq_pbl_virt; + dma_addr_t rq_pbl_phys; + void *sq_pbl_virt; + dma_addr_t sq_pbl_phys; +}; + +struct qed_rdma_modify_qp_in_params { + u32 modify_flags; +#define QED_RDMA_MODIFY_QP_VALID_NEW_STATE_MASK 0x1 +#define QED_RDMA_MODIFY_QP_VALID_NEW_STATE_SHIFT 0 +#define QED_ROCE_MODIFY_QP_VALID_PKEY_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_PKEY_SHIFT 1 +#define QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN_MASK 0x1 +#define QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN_SHIFT 2 +#define QED_ROCE_MODIFY_QP_VALID_DEST_QP_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_DEST_QP_SHIFT 3 +#define QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR_SHIFT 4 +#define QED_ROCE_MODIFY_QP_VALID_RQ_PSN_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_RQ_PSN_SHIFT 5 +#define QED_ROCE_MODIFY_QP_VALID_SQ_PSN_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_SQ_PSN_SHIFT 6 +#define QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ_MASK 0x1 +#define QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ_SHIFT 7 +#define QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP_MASK 0x1 +#define QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP_SHIFT 8 +#define QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT_SHIFT 9 +#define QED_ROCE_MODIFY_QP_VALID_RETRY_CNT_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_RETRY_CNT_SHIFT 10 +#define QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT_SHIFT 11 +#define QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER_SHIFT 12 +#define QED_ROCE_MODIFY_QP_VALID_E2E_FLOW_CONTROL_EN_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_E2E_FLOW_CONTROL_EN_SHIFT 13 +#define QED_ROCE_MODIFY_QP_VALID_ROCE_MODE_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_ROCE_MODE_SHIFT 14 + + enum qed_roce_qp_state new_state; + u16 pkey; + bool incoming_rdma_read_en; + bool incoming_rdma_write_en; + bool incoming_atomic_en; + bool e2e_flow_control_en; + u32 dest_qp; + bool lb_indication; + u16 mtu; + u8 traffic_class_tos; + u8 hop_limit_ttl; + u32 flow_label; + union qed_gid sgid; + union qed_gid dgid; + u16 udp_src_port; + + u16 vlan_id; + + u32 rq_psn; + u32 sq_psn; + u8 max_rd_atomic_resp; + u8 max_rd_atomic_req; + u32 ack_timeout; + u8 retry_cnt; + u8 rnr_retry_cnt; + u8 min_rnr_nak_timer; + bool sqd_async; + u8 remote_mac_addr[6]; + u8 local_mac_addr[6]; + bool use_local_mac; + enum roce_mode roce_mode; +}; + +struct qed_rdma_query_qp_out_params { + enum qed_roce_qp_state state; + u32 rq_psn; + u32 sq_psn; + bool draining; + u16 mtu; + u32 dest_qp; + bool incoming_rdma_read_en; + bool incoming_rdma_write_en; + bool incoming_atomic_en; + bool e2e_flow_control_en; + union qed_gid sgid; + union qed_gid dgid; + u32 flow_label; + u8 hop_limit_ttl; + u8 traffic_class_tos; + u32 timeout; + u8 rnr_retry; + u8 retry_cnt; + u8 min_rnr_nak_timer; + u16 pkey_index; + u8 max_rd_atomic; + u8 max_dest_rd_atomic; + bool sqd_async; +}; + struct qed_rdma_create_srq_out_params { u16 srq_id; }; @@ -368,6 +501,17 @@ struct qed_rdma_ops { int (*rdma_destroy_cq)(void *rdma_cxt, struct qed_rdma_destroy_cq_in_params *iparams, struct qed_rdma_destroy_cq_out_params *oparams); + struct qed_rdma_qp * + (*rdma_create_qp)(void *rdma_cxt, + struct qed_rdma_create_qp_in_params *iparams, + struct qed_rdma_create_qp_out_params *oparams); + + int (*rdma_modify_qp)(void *roce_cxt, struct qed_rdma_qp *qp, + struct qed_rdma_modify_qp_in_params *iparams); + + int (*rdma_query_qp)(void *rdma_cxt, struct qed_rdma_qp *qp, + struct qed_rdma_query_qp_out_params *oparams); + int (*rdma_destroy_qp)(void *rdma_cxt, struct qed_rdma_qp *qp); }; const struct qed_rdma_ops *qed_get_rdma_ops(void); From ee8eaea30b1368680f4d2f873bc14e1d7b57d021 Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Sat, 1 Oct 2016 22:00:00 +0300 Subject: [PATCH 1037/1050] qed: Add support for memory registeration verbs Add slowpath configuration support for user, dma and memory regions registration. Signed-off-by: Ram Amrani Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_roce.c | 240 +++++++++++++++++++++ include/linux/qed/qed_roce_if.h | 6 + 2 files changed, 246 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c index 438a0badc4e1..8b4854d3b395 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.c +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -689,6 +689,17 @@ struct qed_rdma_device *qed_rdma_query_device(void *rdma_cxt) return p_hwfn->p_rdma_info->dev; } +void qed_rdma_free_tid(void *rdma_cxt, u32 itid) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "itid = %08x\n", itid); + + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->tid_map, itid); + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); +} + int qed_rdma_alloc_tid(void *rdma_cxt, u32 *itid) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; @@ -2300,6 +2311,231 @@ int qed_rdma_modify_qp(void *rdma_cxt, return rc; } +int qed_rdma_register_tid(void *rdma_cxt, + struct qed_rdma_register_tid_in_params *params) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + struct rdma_register_tid_ramrod_data *p_ramrod; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + enum rdma_tid_type tid_type; + u8 fw_return_code; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "itid = %08x\n", params->itid); + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, RDMA_RAMROD_REGISTER_MR, + p_hwfn->p_rdma_info->proto, &init_data); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d\n", rc); + return rc; + } + + if (p_hwfn->p_rdma_info->last_tid < params->itid) + p_hwfn->p_rdma_info->last_tid = params->itid; + + p_ramrod = &p_ent->ramrod.rdma_register_tid; + + p_ramrod->flags = 0; + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_TWO_LEVEL_PBL, + params->pbl_two_level); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_ZERO_BASED, params->zbva); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_PHY_MR, params->phy_mr); + + /* Don't initialize D/C field, as it may override other bits. */ + if (!(params->tid_type == QED_RDMA_TID_FMR) && !(params->dma_mr)) + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_PAGE_SIZE_LOG, + params->page_size_log - 12); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_MAX_ID, + p_hwfn->p_rdma_info->last_tid); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_READ, + params->remote_read); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_WRITE, + params->remote_write); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_ATOMIC, + params->remote_atomic); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_LOCAL_WRITE, + params->local_write); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_LOCAL_READ, params->local_read); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_ENABLE_MW_BIND, + params->mw_bind); + + SET_FIELD(p_ramrod->flags1, + RDMA_REGISTER_TID_RAMROD_DATA_PBL_PAGE_SIZE_LOG, + params->pbl_page_size_log - 12); + + SET_FIELD(p_ramrod->flags2, + RDMA_REGISTER_TID_RAMROD_DATA_DMA_MR, params->dma_mr); + + switch (params->tid_type) { + case QED_RDMA_TID_REGISTERED_MR: + tid_type = RDMA_TID_REGISTERED_MR; + break; + case QED_RDMA_TID_FMR: + tid_type = RDMA_TID_FMR; + break; + case QED_RDMA_TID_MW_TYPE1: + tid_type = RDMA_TID_MW_TYPE1; + break; + case QED_RDMA_TID_MW_TYPE2A: + tid_type = RDMA_TID_MW_TYPE2A; + break; + default: + rc = -EINVAL; + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d\n", rc); + return rc; + } + SET_FIELD(p_ramrod->flags1, + RDMA_REGISTER_TID_RAMROD_DATA_TID_TYPE, tid_type); + + p_ramrod->itid = cpu_to_le32(params->itid); + p_ramrod->key = params->key; + p_ramrod->pd = cpu_to_le16(params->pd); + p_ramrod->length_hi = (u8)(params->length >> 32); + p_ramrod->length_lo = DMA_LO_LE(params->length); + if (params->zbva) { + /* Lower 32 bits of the registered MR address. + * In case of zero based MR, will hold FBO + */ + p_ramrod->va.hi = 0; + p_ramrod->va.lo = cpu_to_le32(params->fbo); + } else { + DMA_REGPAIR_LE(p_ramrod->va, params->vaddr); + } + DMA_REGPAIR_LE(p_ramrod->pbl_base, params->pbl_ptr); + + /* DIF */ + if (params->dif_enabled) { + SET_FIELD(p_ramrod->flags2, + RDMA_REGISTER_TID_RAMROD_DATA_DIF_ON_HOST_FLG, 1); + DMA_REGPAIR_LE(p_ramrod->dif_error_addr, + params->dif_error_addr); + DMA_REGPAIR_LE(p_ramrod->dif_runt_addr, params->dif_runt_addr); + } + + rc = qed_spq_post(p_hwfn, p_ent, &fw_return_code); + + if (fw_return_code != RDMA_RETURN_OK) { + DP_NOTICE(p_hwfn, "fw_return_code = %d\n", fw_return_code); + return -EINVAL; + } + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Register TID, rc = %d\n", rc); + return rc; +} + +int qed_rdma_deregister_tid(void *rdma_cxt, u32 itid) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + struct rdma_deregister_tid_ramrod_data *p_ramrod; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + struct qed_ptt *p_ptt; + u8 fw_return_code; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "itid = %08x\n", itid); + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, RDMA_RAMROD_DEREGISTER_MR, + p_hwfn->p_rdma_info->proto, &init_data); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d\n", rc); + return rc; + } + + p_ramrod = &p_ent->ramrod.rdma_deregister_tid; + p_ramrod->itid = cpu_to_le32(itid); + + rc = qed_spq_post(p_hwfn, p_ent, &fw_return_code); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d\n", rc); + return rc; + } + + if (fw_return_code == RDMA_RETURN_DEREGISTER_MR_BAD_STATE_ERR) { + DP_NOTICE(p_hwfn, "fw_return_code = %d\n", fw_return_code); + return -EINVAL; + } else if (fw_return_code == RDMA_RETURN_NIG_DRAIN_REQ) { + /* Bit indicating that the TID is in use and a nig drain is + * required before sending the ramrod again + */ + p_ptt = qed_ptt_acquire(p_hwfn); + if (!p_ptt) { + rc = -EBUSY; + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Failed to acquire PTT\n"); + return rc; + } + + rc = qed_mcp_drain(p_hwfn, p_ptt); + if (rc) { + qed_ptt_release(p_hwfn, p_ptt); + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Drain failed\n"); + return rc; + } + + qed_ptt_release(p_hwfn, p_ptt); + + /* Resend the ramrod */ + rc = qed_sp_init_request(p_hwfn, &p_ent, + RDMA_RAMROD_DEREGISTER_MR, + p_hwfn->p_rdma_info->proto, + &init_data); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Failed to init sp-element\n"); + return rc; + } + + rc = qed_spq_post(p_hwfn, p_ent, &fw_return_code); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Ramrod failed\n"); + return rc; + } + + if (fw_return_code != RDMA_RETURN_OK) { + DP_NOTICE(p_hwfn, "fw_return_code = %d\n", + fw_return_code); + return rc; + } + } + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "De-registered TID, rc = %d\n", rc); + return rc; +} + static void *qed_rdma_get_rdma_ctx(struct qed_dev *cdev) { return QED_LEADING_HWFN(cdev); @@ -2398,6 +2634,10 @@ static const struct qed_rdma_ops qed_rdma_ops_pass = { .rdma_modify_qp = &qed_rdma_modify_qp, .rdma_query_qp = &qed_rdma_query_qp, .rdma_destroy_qp = &qed_rdma_destroy_qp, + .rdma_alloc_tid = &qed_rdma_alloc_tid, + .rdma_free_tid = &qed_rdma_free_tid, + .rdma_register_tid = &qed_rdma_register_tid, + .rdma_deregister_tid = &qed_rdma_deregister_tid, }; const struct qed_rdma_ops *qed_get_rdma_ops() diff --git a/include/linux/qed/qed_roce_if.h b/include/linux/qed/qed_roce_if.h index 02321e3b1716..0b6df6eedcf1 100644 --- a/include/linux/qed/qed_roce_if.h +++ b/include/linux/qed/qed_roce_if.h @@ -512,6 +512,12 @@ struct qed_rdma_ops { int (*rdma_query_qp)(void *rdma_cxt, struct qed_rdma_qp *qp, struct qed_rdma_query_qp_out_params *oparams); int (*rdma_destroy_qp)(void *rdma_cxt, struct qed_rdma_qp *qp); + int + (*rdma_register_tid)(void *rdma_cxt, + struct qed_rdma_register_tid_in_params *iparams); + int (*rdma_deregister_tid)(void *rdma_cxt, u32 itid); + int (*rdma_alloc_tid)(void *rdma_cxt, u32 *itid); + void (*rdma_free_tid)(void *rdma_cxt, u32 itid); }; const struct qed_rdma_ops *qed_get_rdma_ops(void); From abd49676c70793ee0a251bc3d8fe1604f9303210 Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Sat, 1 Oct 2016 22:00:01 +0300 Subject: [PATCH 1038/1050] qed: Add RoCE ll2 & GSI support Add the RoCE-specific LL2 logic [as well as GSI support] over the 'generic' LL2 interface. Signed-off-by: Ram Amrani Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed.h | 1 + drivers/net/ethernet/qlogic/qed/qed_hsi.h | 3 + drivers/net/ethernet/qlogic/qed/qed_ll2.c | 115 +++++++- drivers/net/ethernet/qlogic/qed/qed_ll2.h | 29 +- drivers/net/ethernet/qlogic/qed/qed_roce.c | 307 +++++++++++++++++++++ drivers/net/ethernet/qlogic/qed/qed_roce.h | 1 + include/linux/qed/qed_roce_if.h | 79 ++++++ 7 files changed, 523 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index c5a098a2ca2c..653bb5735f0c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -437,6 +437,7 @@ struct qed_hwfn { #endif struct z_stream_s *stream; + struct qed_roce_ll2_info *ll2; }; struct pci_params { diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index 2777d5bb4380..72eee29c677f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -727,6 +727,9 @@ struct core_tx_bd_flags { #define CORE_TX_BD_FLAGS_L4_PROTOCOL_SHIFT 6 #define CORE_TX_BD_FLAGS_L4_PSEUDO_CSUM_MODE_MASK 0x1 #define CORE_TX_BD_FLAGS_L4_PSEUDO_CSUM_MODE_SHIFT 7 +#define CORE_TX_BD_FLAGS_ROCE_FLAV_MASK 0x1 +#define CORE_TX_BD_FLAGS_ROCE_FLAV_SHIFT 12 + }; struct core_tx_bd { diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index e0ec8ed2f92c..a6db10717d5c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -278,6 +278,7 @@ static void qed_ll2_txq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle) struct qed_ll2_tx_packet *p_pkt = NULL; struct qed_ll2_info *p_ll2_conn; struct qed_ll2_tx_queue *p_tx; + dma_addr_t tx_frag; p_ll2_conn = qed_ll2_handle_sanity_inactive(p_hwfn, connection_handle); if (!p_ll2_conn) @@ -297,11 +298,22 @@ static void qed_ll2_txq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle) p_tx->cur_completing_packet = *p_pkt; p_tx->cur_completing_bd_idx = 1; b_last_frag = p_tx->cur_completing_bd_idx == p_pkt->bd_used; + tx_frag = p_pkt->bds_set[0].tx_frag; + if (p_ll2_conn->gsi_enable) + qed_ll2b_release_tx_gsi_packet(p_hwfn, + p_ll2_conn->my_id, + p_pkt->cookie, + tx_frag, + b_last_frag, + b_last_packet); + else + qed_ll2b_complete_tx_packet(p_hwfn, + p_ll2_conn->my_id, + p_pkt->cookie, + tx_frag, + b_last_frag, + b_last_packet); - qed_ll2b_complete_tx_packet(p_hwfn, p_ll2_conn->my_id, - p_pkt->cookie, - p_pkt->bds_set[0].tx_frag, - b_last_frag, b_last_packet); } } @@ -313,6 +325,7 @@ static int qed_ll2_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie) struct qed_ll2_tx_packet *p_pkt; bool b_last_frag = false; unsigned long flags; + dma_addr_t tx_frag; int rc = -EINVAL; spin_lock_irqsave(&p_tx->lock, flags); @@ -353,11 +366,19 @@ static int qed_ll2_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie) list_add_tail(&p_pkt->list_entry, &p_tx->free_descq); spin_unlock_irqrestore(&p_tx->lock, flags); - qed_ll2b_complete_tx_packet(p_hwfn, - p_ll2_conn->my_id, - p_pkt->cookie, - p_pkt->bds_set[0].tx_frag, - b_last_frag, !num_bds); + tx_frag = p_pkt->bds_set[0].tx_frag; + if (p_ll2_conn->gsi_enable) + qed_ll2b_complete_tx_gsi_packet(p_hwfn, + p_ll2_conn->my_id, + p_pkt->cookie, + tx_frag, + b_last_frag, !num_bds); + else + qed_ll2b_complete_tx_packet(p_hwfn, + p_ll2_conn->my_id, + p_pkt->cookie, + tx_frag, + b_last_frag, !num_bds); spin_lock_irqsave(&p_tx->lock, flags); } @@ -368,6 +389,54 @@ out: return rc; } +static int +qed_ll2_rxq_completion_gsi(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_info, + union core_rx_cqe_union *p_cqe, + unsigned long lock_flags, bool b_last_cqe) +{ + struct qed_ll2_rx_queue *p_rx = &p_ll2_info->rx_queue; + struct qed_ll2_rx_packet *p_pkt = NULL; + u16 packet_length, parse_flags, vlan; + u32 src_mac_addrhi; + u16 src_mac_addrlo; + + if (!list_empty(&p_rx->active_descq)) + p_pkt = list_first_entry(&p_rx->active_descq, + struct qed_ll2_rx_packet, list_entry); + if (!p_pkt) { + DP_NOTICE(p_hwfn, + "GSI Rx completion but active_descq is empty\n"); + return -EIO; + } + + list_del(&p_pkt->list_entry); + parse_flags = le16_to_cpu(p_cqe->rx_cqe_gsi.parse_flags.flags); + packet_length = le16_to_cpu(p_cqe->rx_cqe_gsi.data_length); + vlan = le16_to_cpu(p_cqe->rx_cqe_gsi.vlan); + src_mac_addrhi = le32_to_cpu(p_cqe->rx_cqe_gsi.src_mac_addrhi); + src_mac_addrlo = le16_to_cpu(p_cqe->rx_cqe_gsi.src_mac_addrlo); + if (qed_chain_consume(&p_rx->rxq_chain) != p_pkt->rxq_bd) + DP_NOTICE(p_hwfn, + "Mismatch between active_descq and the LL2 Rx chain\n"); + list_add_tail(&p_pkt->list_entry, &p_rx->free_descq); + + spin_unlock_irqrestore(&p_rx->lock, lock_flags); + qed_ll2b_complete_rx_gsi_packet(p_hwfn, + p_ll2_info->my_id, + p_pkt->cookie, + p_pkt->rx_buf_addr, + packet_length, + p_cqe->rx_cqe_gsi.data_length_error, + parse_flags, + vlan, + src_mac_addrhi, + src_mac_addrlo, b_last_cqe); + spin_lock_irqsave(&p_rx->lock, lock_flags); + + return 0; +} + static int qed_ll2_rxq_completion_reg(struct qed_hwfn *p_hwfn, struct qed_ll2_info *p_ll2_conn, union core_rx_cqe_union *p_cqe, @@ -429,6 +498,10 @@ static int qed_ll2_rxq_completion(struct qed_hwfn *p_hwfn, void *cookie) DP_NOTICE(p_hwfn, "LL2 - unexpected Rx CQE slowpath\n"); rc = -EINVAL; break; + case CORE_RX_CQE_TYPE_GSI_OFFLOAD: + rc = qed_ll2_rxq_completion_gsi(p_hwfn, p_ll2_conn, + cqe, flags, b_last_cqe); + break; case CORE_RX_CQE_TYPE_REGULAR: rc = qed_ll2_rxq_completion_reg(p_hwfn, p_ll2_conn, cqe, flags, b_last_cqe); @@ -527,6 +600,7 @@ static int qed_sp_ll2_rx_queue_start(struct qed_hwfn *p_hwfn, } p_ramrod->action_on_error.error_type = action_on_error; + p_ramrod->gsi_offload_flag = p_ll2_conn->gsi_enable; return qed_spq_post(p_hwfn, p_ent, NULL); } @@ -589,6 +663,7 @@ static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn, DP_NOTICE(p_hwfn, "Unknown connection type: %d\n", conn_type); } + p_ramrod->gsi_offload_flag = p_ll2_conn->gsi_enable; return qed_spq_post(p_hwfn, p_ent, NULL); } @@ -775,6 +850,7 @@ int qed_ll2_acquire_connection(struct qed_hwfn *p_hwfn, p_ll2_info->tx_dest = p_params->tx_dest; p_ll2_info->ai_err_packet_too_big = p_params->ai_err_packet_too_big; p_ll2_info->ai_err_no_buf = p_params->ai_err_no_buf; + p_ll2_info->gsi_enable = p_params->gsi_enable; rc = qed_ll2_acquire_connection_rx(p_hwfn, p_ll2_info, rx_num_desc); if (rc) @@ -1026,6 +1102,7 @@ static void qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn, u16 vlan, u8 bd_flags, u16 l4_hdr_offset_w, + enum core_roce_flavor_type type, dma_addr_t first_frag, u16 first_frag_len) { @@ -1046,6 +1123,9 @@ static void qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn, DMA_REGPAIR_LE(start_bd->addr, first_frag); start_bd->nbytes = cpu_to_le16(first_frag_len); + SET_FIELD(start_bd->bd_flags.as_bitfield, CORE_TX_BD_FLAGS_ROCE_FLAV, + type); + DP_VERBOSE(p_hwfn, (NETIF_MSG_TX_QUEUED | QED_MSG_LL2), "LL2 [q 0x%02x cid 0x%08x type 0x%08x] Tx Producer at [0x%04x] - set with a %04x bytes %02x BDs buffer at %08x:%08x\n", @@ -1137,11 +1217,13 @@ int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn, u16 vlan, u8 bd_flags, u16 l4_hdr_offset_w, + enum qed_ll2_roce_flavor_type qed_roce_flavor, dma_addr_t first_frag, u16 first_frag_len, void *cookie, u8 notify_fw) { struct qed_ll2_tx_packet *p_curp = NULL; struct qed_ll2_info *p_ll2_conn = NULL; + enum core_roce_flavor_type roce_flavor; struct qed_ll2_tx_queue *p_tx; struct qed_chain *p_tx_chain; unsigned long flags; @@ -1174,6 +1256,15 @@ int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn, goto out; } + if (qed_roce_flavor == QED_LL2_ROCE) { + roce_flavor = CORE_ROCE; + } else if (qed_roce_flavor == QED_LL2_RROCE) { + roce_flavor = CORE_RROCE; + } else { + rc = -EINVAL; + goto out; + } + /* Prepare packet and BD, and perhaps send a doorbell to FW */ qed_ll2_prepare_tx_packet_set(p_hwfn, p_tx, p_curp, num_of_bds, first_frag, @@ -1181,6 +1272,7 @@ int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn, qed_ll2_prepare_tx_packet_set_bd(p_hwfn, p_ll2_conn, p_curp, num_of_bds, CORE_TX_DEST_NW, vlan, bd_flags, l4_hdr_offset_w, + roce_flavor, first_frag, first_frag_len); qed_ll2_tx_packet_notify(p_hwfn, p_ll2_conn); @@ -1476,6 +1568,7 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params) ll2_info.rx_vlan_removal_en = params->rx_vlan_stripping; ll2_info.tx_tc = 0; ll2_info.tx_dest = CORE_TX_DEST_NW; + ll2_info.gsi_enable = 1; rc = qed_ll2_acquire_connection(QED_LEADING_HWFN(cdev), &ll2_info, QED_LL2_RX_SIZE, QED_LL2_TX_SIZE, @@ -1625,8 +1718,8 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb) rc = qed_ll2_prepare_tx_packet(QED_LEADING_HWFN(cdev), cdev->ll2->handle, 1 + skb_shinfo(skb)->nr_frags, - vlan, flags, 0, mapping, - skb->len, skb, 1); + vlan, flags, 0, 0 /* RoCE FLAVOR */, + mapping, skb->len, skb, 1); if (rc) goto err; diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.h b/drivers/net/ethernet/qlogic/qed/qed_ll2.h index a037c4845928..80a5dc2d652d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.h +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.h @@ -24,6 +24,12 @@ #define QED_MAX_NUM_OF_LL2_CONNECTIONS (4) +enum qed_ll2_roce_flavor_type { + QED_LL2_ROCE, + QED_LL2_RROCE, + MAX_QED_LL2_ROCE_FLAVOR_TYPE +}; + enum qed_ll2_conn_type { QED_LL2_TYPE_RESERVED, QED_LL2_TYPE_ISCSI, @@ -119,6 +125,7 @@ struct qed_ll2_info { u8 tx_stats_en; struct qed_ll2_rx_queue rx_queue; struct qed_ll2_tx_queue tx_queue; + u8 gsi_enable; }; /** @@ -199,6 +206,7 @@ int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn, u16 vlan, u8 bd_flags, u16 l4_hdr_offset_w, + enum qed_ll2_roce_flavor_type qed_roce_flavor, dma_addr_t first_frag, u16 first_frag_len, void *cookie, u8 notify_fw); @@ -285,5 +293,24 @@ void qed_ll2_setup(struct qed_hwfn *p_hwfn, */ void qed_ll2_free(struct qed_hwfn *p_hwfn, struct qed_ll2_info *p_ll2_connections); - +void qed_ll2b_complete_rx_gsi_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + void *cookie, + dma_addr_t rx_buf_addr, + u16 data_length, + u8 data_length_error, + u16 parse_flags, + u16 vlan, + u32 src_mac_addr_hi, + u16 src_mac_addr_lo, bool b_last_packet); +void qed_ll2b_complete_tx_gsi_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + void *cookie, + dma_addr_t first_frag_addr, + bool b_last_fragment, bool b_last_packet); +void qed_ll2b_release_tx_gsi_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + void *cookie, + dma_addr_t first_frag_addr, + bool b_last_fragment, bool b_last_packet); #endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c index 8b4854d3b395..23430059471c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.c +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -64,6 +64,7 @@ #include "qed_reg_addr.h" #include "qed_sp.h" #include "qed_roce.h" +#include "qed_ll2.h" void qed_async_roce_event(struct qed_hwfn *p_hwfn, struct event_ring_entry *p_eqe) @@ -2611,6 +2612,306 @@ void qed_rdma_remove_user(void *rdma_cxt, u16 dpi) spin_unlock_bh(&p_hwfn->p_rdma_info->lock); } +void qed_ll2b_complete_tx_gsi_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + void *cookie, + dma_addr_t first_frag_addr, + bool b_last_fragment, bool b_last_packet) +{ + struct qed_roce_ll2_packet *packet = cookie; + struct qed_roce_ll2_info *roce_ll2 = p_hwfn->ll2; + + roce_ll2->cbs.tx_cb(roce_ll2->cb_cookie, packet); +} + +void qed_ll2b_release_tx_gsi_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + void *cookie, + dma_addr_t first_frag_addr, + bool b_last_fragment, bool b_last_packet) +{ + qed_ll2b_complete_tx_gsi_packet(p_hwfn, connection_handle, + cookie, first_frag_addr, + b_last_fragment, b_last_packet); +} + +void qed_ll2b_complete_rx_gsi_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + void *cookie, + dma_addr_t rx_buf_addr, + u16 data_length, + u8 data_length_error, + u16 parse_flags, + u16 vlan, + u32 src_mac_addr_hi, + u16 src_mac_addr_lo, bool b_last_packet) +{ + struct qed_roce_ll2_info *roce_ll2 = p_hwfn->ll2; + struct qed_roce_ll2_rx_params params; + struct qed_dev *cdev = p_hwfn->cdev; + struct qed_roce_ll2_packet pkt; + + DP_VERBOSE(cdev, + QED_MSG_LL2, + "roce ll2 rx complete: bus_addr=%p, len=%d, data_len_err=%d\n", + (void *)(uintptr_t)rx_buf_addr, + data_length, data_length_error); + + memset(&pkt, 0, sizeof(pkt)); + pkt.n_seg = 1; + pkt.payload[0].baddr = rx_buf_addr; + pkt.payload[0].len = data_length; + + memset(¶ms, 0, sizeof(params)); + params.vlan_id = vlan; + *((u32 *)¶ms.smac[0]) = ntohl(src_mac_addr_hi); + *((u16 *)¶ms.smac[4]) = ntohs(src_mac_addr_lo); + + if (data_length_error) { + DP_ERR(cdev, + "roce ll2 rx complete: data length error %d, length=%d\n", + data_length_error, data_length); + params.rc = -EINVAL; + } + + roce_ll2->cbs.rx_cb(roce_ll2->cb_cookie, &pkt, ¶ms); +} + +static int qed_roce_ll2_set_mac_filter(struct qed_dev *cdev, + u8 *old_mac_address, + u8 *new_mac_address) +{ + struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); + struct qed_ptt *p_ptt; + int rc = 0; + + if (!hwfn->ll2 || hwfn->ll2->handle == QED_LL2_UNUSED_HANDLE) { + DP_ERR(cdev, + "qed roce mac filter failed - roce_info/ll2 NULL\n"); + return -EINVAL; + } + + p_ptt = qed_ptt_acquire(QED_LEADING_HWFN(cdev)); + if (!p_ptt) { + DP_ERR(cdev, + "qed roce ll2 mac filter set: failed to acquire PTT\n"); + return -EINVAL; + } + + mutex_lock(&hwfn->ll2->lock); + if (old_mac_address) + qed_llh_remove_mac_filter(QED_LEADING_HWFN(cdev), p_ptt, + old_mac_address); + if (new_mac_address) + rc = qed_llh_add_mac_filter(QED_LEADING_HWFN(cdev), p_ptt, + new_mac_address); + mutex_unlock(&hwfn->ll2->lock); + + qed_ptt_release(QED_LEADING_HWFN(cdev), p_ptt); + + if (rc) + DP_ERR(cdev, + "qed roce ll2 mac filter set: failed to add mac filter\n"); + + return rc; +} + +static int qed_roce_ll2_start(struct qed_dev *cdev, + struct qed_roce_ll2_params *params) +{ + struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); + struct qed_roce_ll2_info *roce_ll2; + struct qed_ll2_info ll2_params; + int rc; + + if (!params) { + DP_ERR(cdev, "qed roce ll2 start: failed due to NULL params\n"); + return -EINVAL; + } + if (!params->cbs.tx_cb || !params->cbs.rx_cb) { + DP_ERR(cdev, + "qed roce ll2 start: failed due to NULL tx/rx. tx_cb=%p, rx_cb=%p\n", + params->cbs.tx_cb, params->cbs.rx_cb); + return -EINVAL; + } + if (!is_valid_ether_addr(params->mac_address)) { + DP_ERR(cdev, + "qed roce ll2 start: failed due to invalid Ethernet address %pM\n", + params->mac_address); + return -EINVAL; + } + + /* Initialize */ + roce_ll2 = kzalloc(sizeof(*roce_ll2), GFP_ATOMIC); + if (!roce_ll2) { + DP_ERR(cdev, "qed roce ll2 start: failed memory allocation\n"); + return -ENOMEM; + } + memset(roce_ll2, 0, sizeof(*roce_ll2)); + roce_ll2->handle = QED_LL2_UNUSED_HANDLE; + roce_ll2->cbs = params->cbs; + roce_ll2->cb_cookie = params->cb_cookie; + mutex_init(&roce_ll2->lock); + + memset(&ll2_params, 0, sizeof(ll2_params)); + ll2_params.conn_type = QED_LL2_TYPE_ROCE; + ll2_params.mtu = params->mtu; + ll2_params.rx_drop_ttl0_flg = true; + ll2_params.rx_vlan_removal_en = false; + ll2_params.tx_dest = CORE_TX_DEST_NW; + ll2_params.ai_err_packet_too_big = LL2_DROP_PACKET; + ll2_params.ai_err_no_buf = LL2_DROP_PACKET; + ll2_params.gsi_enable = true; + + rc = qed_ll2_acquire_connection(QED_LEADING_HWFN(cdev), &ll2_params, + params->max_rx_buffers, + params->max_tx_buffers, + &roce_ll2->handle); + if (rc) { + DP_ERR(cdev, + "qed roce ll2 start: failed to acquire LL2 connection (rc=%d)\n", + rc); + goto err; + } + + rc = qed_ll2_establish_connection(QED_LEADING_HWFN(cdev), + roce_ll2->handle); + if (rc) { + DP_ERR(cdev, + "qed roce ll2 start: failed to establish LL2 connection (rc=%d)\n", + rc); + goto err1; + } + + hwfn->ll2 = roce_ll2; + + rc = qed_roce_ll2_set_mac_filter(cdev, NULL, params->mac_address); + if (rc) { + hwfn->ll2 = NULL; + goto err2; + } + ether_addr_copy(roce_ll2->mac_address, params->mac_address); + + return 0; + +err2: + qed_ll2_terminate_connection(QED_LEADING_HWFN(cdev), roce_ll2->handle); +err1: + qed_ll2_release_connection(QED_LEADING_HWFN(cdev), roce_ll2->handle); +err: + kfree(roce_ll2); + return rc; +} + +static int qed_roce_ll2_stop(struct qed_dev *cdev) +{ + struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); + struct qed_roce_ll2_info *roce_ll2 = hwfn->ll2; + int rc; + + if (!cdev) { + DP_ERR(cdev, "qed roce ll2 stop: invalid cdev\n"); + return -EINVAL; + } + + if (roce_ll2->handle == QED_LL2_UNUSED_HANDLE) { + DP_ERR(cdev, "qed roce ll2 stop: cannot stop an unused LL2\n"); + return -EINVAL; + } + + /* remove LL2 MAC address filter */ + rc = qed_roce_ll2_set_mac_filter(cdev, roce_ll2->mac_address, NULL); + eth_zero_addr(roce_ll2->mac_address); + + rc = qed_ll2_terminate_connection(QED_LEADING_HWFN(cdev), + roce_ll2->handle); + if (rc) + DP_ERR(cdev, + "qed roce ll2 stop: failed to terminate LL2 connection (rc=%d)\n", + rc); + + qed_ll2_release_connection(QED_LEADING_HWFN(cdev), roce_ll2->handle); + + roce_ll2->handle = QED_LL2_UNUSED_HANDLE; + + kfree(roce_ll2); + + return rc; +} + +static int qed_roce_ll2_tx(struct qed_dev *cdev, + struct qed_roce_ll2_packet *pkt, + struct qed_roce_ll2_tx_params *params) +{ + struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); + struct qed_roce_ll2_info *roce_ll2 = hwfn->ll2; + enum qed_ll2_roce_flavor_type qed_roce_flavor; + u8 flags = 0; + int rc; + int i; + + if (!cdev || !pkt || !params) { + DP_ERR(cdev, + "roce ll2 tx: failed tx because one of the following is NULL - drv=%p, pkt=%p, params=%p\n", + cdev, pkt, params); + return -EINVAL; + } + + qed_roce_flavor = (pkt->roce_mode == ROCE_V1) ? QED_LL2_ROCE + : QED_LL2_RROCE; + + if (pkt->roce_mode == ROCE_V2_IPV4) + flags |= BIT(CORE_TX_BD_FLAGS_IP_CSUM_SHIFT); + + /* Tx header */ + rc = qed_ll2_prepare_tx_packet(QED_LEADING_HWFN(cdev), roce_ll2->handle, + 1 + pkt->n_seg, 0, flags, 0, + qed_roce_flavor, pkt->header.baddr, + pkt->header.len, pkt, 1); + if (rc) { + DP_ERR(cdev, "roce ll2 tx: header failed (rc=%d)\n", rc); + return QED_ROCE_TX_HEAD_FAILURE; + } + + /* Tx payload */ + for (i = 0; i < pkt->n_seg; i++) { + rc = qed_ll2_set_fragment_of_tx_packet(QED_LEADING_HWFN(cdev), + roce_ll2->handle, + pkt->payload[i].baddr, + pkt->payload[i].len); + if (rc) { + /* If failed not much to do here, partial packet has + * been posted * we can't free memory, will need to wait + * for completion + */ + DP_ERR(cdev, + "roce ll2 tx: payload failed (rc=%d)\n", rc); + return QED_ROCE_TX_FRAG_FAILURE; + } + } + + return 0; +} + +static int qed_roce_ll2_post_rx_buffer(struct qed_dev *cdev, + struct qed_roce_ll2_buffer *buf, + u64 cookie, u8 notify_fw) +{ + return qed_ll2_post_rx_buffer(QED_LEADING_HWFN(cdev), + QED_LEADING_HWFN(cdev)->ll2->handle, + buf->baddr, buf->len, + (void *)(uintptr_t)cookie, notify_fw); +} + +static int qed_roce_ll2_stats(struct qed_dev *cdev, struct qed_ll2_stats *stats) +{ + struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); + struct qed_roce_ll2_info *roce_ll2 = hwfn->ll2; + + return qed_ll2_get_stats(QED_LEADING_HWFN(cdev), + roce_ll2->handle, stats); +} + static const struct qed_rdma_ops qed_rdma_ops_pass = { .common = &qed_common_ops_pass, .fill_dev_info = &qed_fill_rdma_dev_info, @@ -2638,6 +2939,12 @@ static const struct qed_rdma_ops qed_rdma_ops_pass = { .rdma_free_tid = &qed_rdma_free_tid, .rdma_register_tid = &qed_rdma_register_tid, .rdma_deregister_tid = &qed_rdma_deregister_tid, + .roce_ll2_start = &qed_roce_ll2_start, + .roce_ll2_stop = &qed_roce_ll2_stop, + .roce_ll2_tx = &qed_roce_ll2_tx, + .roce_ll2_post_rx_buffer = &qed_roce_ll2_post_rx_buffer, + .roce_ll2_set_mac_filter = &qed_roce_ll2_set_mac_filter, + .roce_ll2_stats = &qed_roce_ll2_stats, }; const struct qed_rdma_ops *qed_get_rdma_ops() diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.h b/drivers/net/ethernet/qlogic/qed/qed_roce.h index b8ddda456101..2f091e8a0f40 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.h +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.h @@ -42,6 +42,7 @@ #include "qed.h" #include "qed_dev_api.h" #include "qed_hsi.h" +#include "qed_ll2.h" #define QED_RDMA_MAX_FMR (RDMA_MAX_TIDS) #define QED_RDMA_MAX_P_KEY (1) diff --git a/include/linux/qed/qed_roce_if.h b/include/linux/qed/qed_roce_if.h index 0b6df6eedcf1..53047d3fa678 100644 --- a/include/linux/qed/qed_roce_if.h +++ b/include/linux/qed/qed_roce_if.h @@ -39,6 +39,16 @@ #include #include #include +#include + +enum qed_roce_ll2_tx_dest { + /* Light L2 TX Destination to the Network */ + QED_ROCE_LL2_TX_DEST_NW, + + /* Light L2 TX Destination to the Loopback */ + QED_ROCE_LL2_TX_DEST_LB, + QED_ROCE_LL2_TX_DEST_MAX +}; #define QED_RDMA_MAX_CNQ_SIZE (0xFFFF) @@ -461,6 +471,61 @@ struct qed_rdma_counters_out_params { #define QED_ROCE_TX_HEAD_FAILURE (1) #define QED_ROCE_TX_FRAG_FAILURE (2) +struct qed_roce_ll2_header { + void *vaddr; + dma_addr_t baddr; + size_t len; +}; + +struct qed_roce_ll2_buffer { + dma_addr_t baddr; + size_t len; +}; + +struct qed_roce_ll2_packet { + struct qed_roce_ll2_header header; + int n_seg; + struct qed_roce_ll2_buffer payload[RDMA_MAX_SGE_PER_SQ_WQE]; + int roce_mode; + enum qed_roce_ll2_tx_dest tx_dest; +}; + +struct qed_roce_ll2_tx_params { + int reserved; +}; + +struct qed_roce_ll2_rx_params { + u16 vlan_id; + u8 smac[ETH_ALEN]; + int rc; +}; + +struct qed_roce_ll2_cbs { + void (*tx_cb)(void *pdev, struct qed_roce_ll2_packet *pkt); + + void (*rx_cb)(void *pdev, struct qed_roce_ll2_packet *pkt, + struct qed_roce_ll2_rx_params *params); +}; + +struct qed_roce_ll2_params { + u16 max_rx_buffers; + u16 max_tx_buffers; + u16 mtu; + u8 mac_address[ETH_ALEN]; + struct qed_roce_ll2_cbs cbs; + void *cb_cookie; +}; + +struct qed_roce_ll2_info { + u8 handle; + struct qed_roce_ll2_cbs cbs; + u8 mac_address[ETH_ALEN]; + void *cb_cookie; + + /* Lock to protect ll2 */ + struct mutex lock; +}; + enum qed_rdma_type { QED_RDMA_TYPE_ROCE, }; @@ -518,6 +583,20 @@ struct qed_rdma_ops { int (*rdma_deregister_tid)(void *rdma_cxt, u32 itid); int (*rdma_alloc_tid)(void *rdma_cxt, u32 *itid); void (*rdma_free_tid)(void *rdma_cxt, u32 itid); + int (*roce_ll2_start)(struct qed_dev *cdev, + struct qed_roce_ll2_params *params); + int (*roce_ll2_stop)(struct qed_dev *cdev); + int (*roce_ll2_tx)(struct qed_dev *cdev, + struct qed_roce_ll2_packet *packet, + struct qed_roce_ll2_tx_params *params); + int (*roce_ll2_post_rx_buffer)(struct qed_dev *cdev, + struct qed_roce_ll2_buffer *buf, + u64 cookie, u8 notify_fw); + int (*roce_ll2_set_mac_filter)(struct qed_dev *cdev, + u8 *old_mac_address, + u8 *new_mac_address); + int (*roce_ll2_stats)(struct qed_dev *cdev, + struct qed_ll2_stats *stats); }; const struct qed_rdma_ops *qed_get_rdma_ops(void); From edfc23ee3e0ebbb6713d7574ab1b00abff178f6c Mon Sep 17 00:00:00 2001 From: Guilherme G Piccoli Date: Mon, 3 Oct 2016 00:31:12 -0700 Subject: [PATCH 1039/1050] i40e: avoid NULL pointer dereference and recursive errors on early PCI error Although rare, it's possible to hit PCI error early on device probe, meaning possibly some structs are not entirely initialized, and some might even be completely uninitialized, leading to NULL pointer dereference. The i40e driver currently presents a "bad" behavior if device hits such early PCI error: firstly, the struct i40e_pf might not be attached to pci_dev yet, leading to a NULL pointer dereference on access to pf->state. Even checking if the struct is NULL and avoiding the access in that case isn't enough, since the driver cannot recover from PCI error that early; in our experiments we saw multiple failures on kernel log, like: [549.664] i40e 0007:01:00.1: Initial pf_reset failed: -15 [549.664] i40e: probe of 0007:01:00.1 failed with error -15 [...] [871.644] i40e 0007:01:00.1: The driver for the device stopped because the device firmware failed to init. Try updating your NVM image. [871.644] i40e: probe of 0007:01:00.1 failed with error -32 [...] [872.516] i40e 0007:01:00.0: ARQ: Unknown event 0x0000 ignored Between the first probe failure (error -15) and the second (error -32) another PCI error happened due to the first bad probe. Also, driver started to flood console with those ARQ event messages. This patch will prevent these issues by allowing error recovery mechanism to remove the failed device from the system instead of trying to recover from early PCI errors during device probe. CC: Signed-off-by: Guilherme G Piccoli Acked-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/i40e/i40e_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 0044c29ca31e..ac1faee2a5b8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -11400,6 +11400,12 @@ static pci_ers_result_t i40e_pci_error_detected(struct pci_dev *pdev, dev_info(&pdev->dev, "%s: error %d\n", __func__, error); + if (!pf) { + dev_info(&pdev->dev, + "Cannot recover - error happened during device probe\n"); + return PCI_ERS_RESULT_DISCONNECT; + } + /* shutdown all operations */ if (!test_bit(__I40E_SUSPENDED, &pf->state)) { rtnl_lock(); From 277964e19e1416ca31301e113edb2580c81a8b66 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Mon, 3 Oct 2016 10:47:50 +0800 Subject: [PATCH 1040/1050] vmxnet3: Wake queue from reset work vmxnet3_reset_work() expects tx queues to be stopped (via vmxnet3_quiesce_dev -> netif_tx_disable). However, this races with the netif_wake_queue() call in netif_tx_timeout() such that the driver's start_xmit routine may be called unexpectedly, triggering one of the BUG_ON in vmxnet3_map_pkt with a stack trace like this: RIP: 0010:[] vmxnet3_map_pkt+0x3ac/0x4c0 [vmxnet3] [] vmxnet3_tq_xmit+0x210/0x4e0 [vmxnet3] [] dev_hard_start_xmit+0x2e4/0x4c0 [] sch_direct_xmit+0x17e/0x1e0 [] __qdisc_run+0xd7/0x130 [] net_tx_action+0x10a/0x200 [] __do_softirq+0x11f/0x260 [] call_softirq+0x1c/0x30 [] do_softirq+0x65/0xa0 [] local_bh_enable_ip+0x99/0xa0 [] destroy_conntrack+0x96/0x110 [nf_conntrack] [] nf_conntrack_destroy+0x12/0x20 [] skb_release_head_state+0xb5/0xf0 [] skb_release_all+0x9/0x20 [] __kfree_skb+0x9/0x90 [] vmxnet3_quiesce_dev+0x209/0x340 [vmxnet3] [] vmxnet3_reset_work+0x6a/0xa0 [vmxnet3] [] process_one_work+0x16c/0x350 [] worker_thread+0x17a/0x410 [] kthread+0x96/0xa0 [] kernel_thread_helper+0x4/0x10 Signed-off-by: Benjamin Poirier Signed-off-by: David S. Miller --- drivers/net/vmxnet3/vmxnet3_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 2fd93b4c759a..b5554f2ebee4 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -3186,7 +3186,6 @@ vmxnet3_tx_timeout(struct net_device *netdev) netdev_err(adapter->netdev, "tx hang\n"); schedule_work(&adapter->work); - netif_wake_queue(adapter->netdev); } @@ -3213,6 +3212,7 @@ vmxnet3_reset_work(struct work_struct *data) } rtnl_unlock(); + netif_wake_queue(adapter->netdev); clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state); } From a4cc96d1f0170b779c32c6b2cc58764f5d2cdef0 Mon Sep 17 00:00:00 2001 From: Raju Lakkaraju Date: Mon, 3 Oct 2016 12:53:13 +0530 Subject: [PATCH 1041/1050] net: phy: Add Edge-rate driver for Microsemi PHYs. Edge-rate: As system and networking speeds increase, a signal's output transition, also know as the edge rate or slew rate (V/ns), takes on greater importance because high-speed signals come with a price. That price is an assortment of interference problems like ringing on the line, signal overshoot and undershoot, extended signal settling times, crosstalk noise, transmission line reflections, false signal detection by the receiving device and electromagnetic interference (EMI) -- all of which can negate the potential gains designers are seeking when they try to increase system speeds through the use of higher performance logic devices. The fact is, faster signaling edge rates can cause a higher level of electrical noise or other type of interference that can actually lead to slower line speeds and lower maximum system frequencies. This parameter allow the board designers to change the driving strange, and thereby change the EMI behavioral. Edge-rate parameters (vddmac, edge-slowdown) get from Device Tree. Tested on Beaglebone Black with VSC 8531 PHY. Signed-off-by: Raju Lakkaraju Signed-off-by: David S. Miller --- .../bindings/net/mscc-phy-vsc8531.txt | 58 ++++++++ drivers/net/phy/mscc.c | 125 ++++++++++++++++++ include/dt-bindings/net/mscc-phy-vsc8531.h | 21 +++ 3 files changed, 204 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/mscc-phy-vsc8531.txt create mode 100644 include/dt-bindings/net/mscc-phy-vsc8531.h diff --git a/Documentation/devicetree/bindings/net/mscc-phy-vsc8531.txt b/Documentation/devicetree/bindings/net/mscc-phy-vsc8531.txt new file mode 100644 index 000000000000..99c7eb0a00c8 --- /dev/null +++ b/Documentation/devicetree/bindings/net/mscc-phy-vsc8531.txt @@ -0,0 +1,58 @@ +* Microsemi - vsc8531 Giga bit ethernet phy + +Required properties: +- compatible : Should contain phy id as "ethernet-phy-idAAAA.BBBB" + The PHY device uses the binding described in + Documentation/devicetree/bindings/net/phy.txt + +Optional properties: +- vsc8531,vddmac : The vddmac in mV. +- vsc8531,edge-slowdown : % the edge should be slowed down relative to + the fastest possible edge time. Native sign + need not enter. + Edge rate sets the drive strength of the MAC + interface output signals. Changing the drive + strength will affect the edge rate of the output + signal. The goal of this setting is to help + reduce electrical emission (EMI) by being able + to reprogram drive strength and in effect slow + down the edge rate if desired. Table 1 shows the + impact to the edge rate per VDDMAC supply for each + drive strength setting. + Ref: Table:1 - Edge rate change below. + +Note: see dt-bindings/net/mscc-phy-vsc8531.h for applicable values + +Table: 1 - Edge rate change +----------------------------------------------------------------| +| Edge Rate Change (VDDMAC) | +| | +| 3300 mV 2500 mV 1800 mV 1500 mV | +|---------------------------------------------------------------| +| Default Deafult Default Default | +| (Fastest) (recommended) (recommended) | +|---------------------------------------------------------------| +| -2% -3% -5% -6% | +|---------------------------------------------------------------| +| -4% -6% -9% -14% | +|---------------------------------------------------------------| +| -7% -10% -16% -21% | +|(recommended) (recommended) | +|---------------------------------------------------------------| +| -10% -14% -23% -29% | +|---------------------------------------------------------------| +| -17% -23% -35% -42% | +|---------------------------------------------------------------| +| -29% -37% -52% -58% | +|---------------------------------------------------------------| +| -53% -63% -76% -77% | +| (slowest) | +|---------------------------------------------------------------| + +Example: + + vsc8531_0: ethernet-phy@0 { + compatible = "ethernet-phy-id0007.0570"; + vsc8531,vddmac = <3300>; + vsc8531,edge-slowdown = <21>; + }; diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c index d350debd174a..a17573e3bd8a 100644 --- a/drivers/net/phy/mscc.c +++ b/drivers/net/phy/mscc.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include enum rgmii_rx_clock_delay { RGMII_RX_CLK_DELAY_0_2_NS = 0, @@ -37,6 +39,10 @@ enum rgmii_rx_clock_delay { #define MII_VSC85XX_INT_MASK_MASK 0xa000 #define MII_VSC85XX_INT_STATUS 26 +#define MSCC_PHY_WOL_MAC_CONTROL 27 +#define EDGE_RATE_CNTL_POS 5 +#define EDGE_RATE_CNTL_MASK 0x00E0 + #define MSCC_EXT_PAGE_ACCESS 31 #define MSCC_PHY_PAGE_STANDARD 0x0000 /* Standard registers */ #define MSCC_PHY_PAGE_EXTENDED_2 0x0002 /* Extended reg - page 2 */ @@ -50,6 +56,23 @@ enum rgmii_rx_clock_delay { #define PHY_ID_VSC8531 0x00070570 #define PHY_ID_VSC8541 0x00070770 +struct edge_rate_table { + u16 vddmac; + int slowdown[MSCC_SLOWDOWN_MAX]; +}; + +struct edge_rate_table edge_table[MSCC_VDDMAC_MAX] = { + {3300, { 0, -2, -4, -7, -10, -17, -29, -53} }, + {2500, { 0, -3, -6, -10, -14, -23, -37, -63} }, + {1800, { 0, -5, -9, -16, -23, -35, -52, -76} }, + {1500, { 0, -6, -14, -21, -29, -42, -58, -77} }, +}; + +struct vsc8531_private { + u8 edge_slowdown; + u16 vddmac; +}; + static int vsc85xx_phy_page_set(struct phy_device *phydev, u8 page) { int rc; @@ -58,6 +81,51 @@ static int vsc85xx_phy_page_set(struct phy_device *phydev, u8 page) return rc; } +static u8 edge_rate_magic_get(u16 vddmac, + int slowdown) +{ + int rc = (MSCC_SLOWDOWN_MAX - 1); + u8 vdd; + u8 sd; + + for (vdd = 0; vdd < MSCC_VDDMAC_MAX; vdd++) { + if (edge_table[vdd].vddmac == vddmac) { + for (sd = 0; sd < MSCC_SLOWDOWN_MAX; sd++) { + if (edge_table[vdd].slowdown[sd] <= slowdown) { + rc = (MSCC_SLOWDOWN_MAX - sd - 1); + break; + } + } + } + } + + return rc; +} + +static int vsc85xx_edge_rate_cntl_set(struct phy_device *phydev, + u8 edge_rate) +{ + int rc; + u16 reg_val; + + mutex_lock(&phydev->lock); + rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_EXTENDED_2); + if (rc != 0) + goto out_unlock; + reg_val = phy_read(phydev, MSCC_PHY_WOL_MAC_CONTROL); + reg_val &= ~(EDGE_RATE_CNTL_MASK); + reg_val |= (edge_rate << EDGE_RATE_CNTL_POS); + rc = phy_write(phydev, MSCC_PHY_WOL_MAC_CONTROL, reg_val); + if (rc != 0) + goto out_unlock; + rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_STANDARD); + +out_unlock: + mutex_unlock(&phydev->lock); + + return rc; +} + static int vsc85xx_mac_if_set(struct phy_device *phydev, phy_interface_t interface) { @@ -116,9 +184,45 @@ out_unlock: return rc; } +#ifdef CONFIG_OF_MDIO +static int vsc8531_of_init(struct phy_device *phydev) +{ + int rc; + struct vsc8531_private *vsc8531 = phydev->priv; + struct device *dev = &phydev->mdio.dev; + struct device_node *of_node = dev->of_node; + + if (!of_node) + return -ENODEV; + + rc = of_property_read_u16(of_node, "vsc8531,vddmac", + &vsc8531->vddmac); + if (rc == -EINVAL) + vsc8531->vddmac = MSCC_VDDMAC_3300; + rc = of_property_read_u8(of_node, "vsc8531,edge-slowdown", + &vsc8531->edge_slowdown); + if (rc == -EINVAL) + vsc8531->edge_slowdown = 0; + + rc = 0; + return rc; +} +#else +static int vsc8531_of_init(struct phy_device *phydev) +{ + return 0; +} +#endif /* CONFIG_OF_MDIO */ + static int vsc85xx_config_init(struct phy_device *phydev) { int rc; + struct vsc8531_private *vsc8531 = phydev->priv; + u8 edge_rate; + + rc = vsc8531_of_init(phydev); + if (rc) + return rc; rc = vsc85xx_default_config(phydev); if (rc) @@ -128,6 +232,12 @@ static int vsc85xx_config_init(struct phy_device *phydev) if (rc) return rc; + edge_rate = edge_rate_magic_get(vsc8531->vddmac, + -(int)vsc8531->edge_slowdown); + rc = vsc85xx_edge_rate_cntl_set(phydev, edge_rate); + if (rc) + return rc; + rc = genphy_config_init(phydev); return rc; @@ -160,6 +270,19 @@ static int vsc85xx_config_intr(struct phy_device *phydev) return rc; } +static int vsc85xx_probe(struct phy_device *phydev) +{ + struct vsc8531_private *vsc8531; + + vsc8531 = devm_kzalloc(&phydev->mdio.dev, sizeof(*vsc8531), GFP_KERNEL); + if (!vsc8531) + return -ENOMEM; + + phydev->priv = vsc8531; + + return 0; +} + /* Microsemi VSC85xx PHYs */ static struct phy_driver vsc85xx_driver[] = { { @@ -177,6 +300,7 @@ static struct phy_driver vsc85xx_driver[] = { .config_intr = &vsc85xx_config_intr, .suspend = &genphy_suspend, .resume = &genphy_resume, + .probe = &vsc85xx_probe, }, { .phy_id = PHY_ID_VSC8541, @@ -193,6 +317,7 @@ static struct phy_driver vsc85xx_driver[] = { .config_intr = &vsc85xx_config_intr, .suspend = &genphy_suspend, .resume = &genphy_resume, + .probe = &vsc85xx_probe, } }; diff --git a/include/dt-bindings/net/mscc-phy-vsc8531.h b/include/dt-bindings/net/mscc-phy-vsc8531.h new file mode 100644 index 000000000000..2383dd20ff43 --- /dev/null +++ b/include/dt-bindings/net/mscc-phy-vsc8531.h @@ -0,0 +1,21 @@ +/* + * Device Tree constants for Microsemi VSC8531 PHY + * + * Author: Nagaraju Lakkaraju + * + * License: Dual MIT/GPL + * Copyright (c) 2016 Microsemi Corporation + */ + +#ifndef _DT_BINDINGS_MSCC_VSC8531_H +#define _DT_BINDINGS_MSCC_VSC8531_H + +/* MAC interface Edge rate control VDDMAC in milli Volts */ +#define MSCC_VDDMAC_3300 3300 +#define MSCC_VDDMAC_2500 2500 +#define MSCC_VDDMAC_1800 1800 +#define MSCC_VDDMAC_1500 1500 +#define MSCC_VDDMAC_MAX 4 +#define MSCC_SLOWDOWN_MAX 8 + +#endif From 93409033ae653f1c9a949202fb537ab095b2092f Mon Sep 17 00:00:00 2001 From: Andrew Collins Date: Mon, 3 Oct 2016 13:43:02 -0600 Subject: [PATCH 1042/1050] net: Add netdev all_adj_list refcnt propagation to fix panic This is a respin of a patch to fix a relatively easily reproducible kernel panic related to the all_adj_list handling for netdevs in recent kernels. The following sequence of commands will reproduce the issue: ip link add link eth0 name eth0.100 type vlan id 100 ip link add link eth0 name eth0.200 type vlan id 200 ip link add name testbr type bridge ip link set eth0.100 master testbr ip link set eth0.200 master testbr ip link add link testbr mac0 type macvlan ip link delete dev testbr This creates an upper/lower tree of (excuse the poor ASCII art): /---eth0.100-eth0 mac0-testbr- \---eth0.200-eth0 When testbr is deleted, the all_adj_lists are walked, and eth0 is deleted twice from the mac0 list. Unfortunately, during setup in __netdev_upper_dev_link, only one reference to eth0 is added, so this results in a panic. This change adds reference count propagation so things are handled properly. Matthias Schiffer reported a similar crash in batman-adv: https://github.com/freifunk-gluon/gluon/issues/680 https://www.open-mesh.org/issues/247 which this patch also seems to resolve. Signed-off-by: Andrew Collins Signed-off-by: David S. Miller --- net/core/dev.c | 68 +++++++++++++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 31 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index c0c291f721d6..f1fe26f66458 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5589,6 +5589,7 @@ static inline bool netdev_adjacent_is_neigh_list(struct net_device *dev, static int __netdev_adjacent_dev_insert(struct net_device *dev, struct net_device *adj_dev, + u16 ref_nr, struct list_head *dev_list, void *private, bool master) { @@ -5598,7 +5599,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, adj = __netdev_find_adj(adj_dev, dev_list); if (adj) { - adj->ref_nr++; + adj->ref_nr += ref_nr; return 0; } @@ -5608,7 +5609,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, adj->dev = adj_dev; adj->master = master; - adj->ref_nr = 1; + adj->ref_nr = ref_nr; adj->private = private; dev_hold(adj_dev); @@ -5647,6 +5648,7 @@ free_adj: static void __netdev_adjacent_dev_remove(struct net_device *dev, struct net_device *adj_dev, + u16 ref_nr, struct list_head *dev_list) { struct netdev_adjacent *adj; @@ -5659,10 +5661,10 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, BUG(); } - if (adj->ref_nr > 1) { - pr_debug("%s to %s ref_nr-- = %d\n", dev->name, adj_dev->name, - adj->ref_nr-1); - adj->ref_nr--; + if (adj->ref_nr > ref_nr) { + pr_debug("%s to %s ref_nr-%d = %d\n", dev->name, adj_dev->name, + ref_nr, adj->ref_nr-ref_nr); + adj->ref_nr -= ref_nr; return; } @@ -5681,21 +5683,22 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, static int __netdev_adjacent_dev_link_lists(struct net_device *dev, struct net_device *upper_dev, + u16 ref_nr, struct list_head *up_list, struct list_head *down_list, void *private, bool master) { int ret; - ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private, - master); + ret = __netdev_adjacent_dev_insert(dev, upper_dev, ref_nr, up_list, + private, master); if (ret) return ret; - ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private, - false); + ret = __netdev_adjacent_dev_insert(upper_dev, dev, ref_nr, down_list, + private, false); if (ret) { - __netdev_adjacent_dev_remove(dev, upper_dev, up_list); + __netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list); return ret; } @@ -5703,9 +5706,10 @@ static int __netdev_adjacent_dev_link_lists(struct net_device *dev, } static int __netdev_adjacent_dev_link(struct net_device *dev, - struct net_device *upper_dev) + struct net_device *upper_dev, + u16 ref_nr) { - return __netdev_adjacent_dev_link_lists(dev, upper_dev, + return __netdev_adjacent_dev_link_lists(dev, upper_dev, ref_nr, &dev->all_adj_list.upper, &upper_dev->all_adj_list.lower, NULL, false); @@ -5713,17 +5717,19 @@ static int __netdev_adjacent_dev_link(struct net_device *dev, static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, struct net_device *upper_dev, + u16 ref_nr, struct list_head *up_list, struct list_head *down_list) { - __netdev_adjacent_dev_remove(dev, upper_dev, up_list); - __netdev_adjacent_dev_remove(upper_dev, dev, down_list); + __netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list); + __netdev_adjacent_dev_remove(upper_dev, dev, ref_nr, down_list); } static void __netdev_adjacent_dev_unlink(struct net_device *dev, - struct net_device *upper_dev) + struct net_device *upper_dev, + u16 ref_nr) { - __netdev_adjacent_dev_unlink_lists(dev, upper_dev, + __netdev_adjacent_dev_unlink_lists(dev, upper_dev, ref_nr, &dev->all_adj_list.upper, &upper_dev->all_adj_list.lower); } @@ -5732,17 +5738,17 @@ static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, struct net_device *upper_dev, void *private, bool master) { - int ret = __netdev_adjacent_dev_link(dev, upper_dev); + int ret = __netdev_adjacent_dev_link(dev, upper_dev, 1); if (ret) return ret; - ret = __netdev_adjacent_dev_link_lists(dev, upper_dev, + ret = __netdev_adjacent_dev_link_lists(dev, upper_dev, 1, &dev->adj_list.upper, &upper_dev->adj_list.lower, private, master); if (ret) { - __netdev_adjacent_dev_unlink(dev, upper_dev); + __netdev_adjacent_dev_unlink(dev, upper_dev, 1); return ret; } @@ -5752,8 +5758,8 @@ static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, struct net_device *upper_dev) { - __netdev_adjacent_dev_unlink(dev, upper_dev); - __netdev_adjacent_dev_unlink_lists(dev, upper_dev, + __netdev_adjacent_dev_unlink(dev, upper_dev, 1); + __netdev_adjacent_dev_unlink_lists(dev, upper_dev, 1, &dev->adj_list.upper, &upper_dev->adj_list.lower); } @@ -5806,7 +5812,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) { pr_debug("Interlinking %s with %s, non-neighbour\n", i->dev->name, j->dev->name); - ret = __netdev_adjacent_dev_link(i->dev, j->dev); + ret = __netdev_adjacent_dev_link(i->dev, j->dev, i->ref_nr); if (ret) goto rollback_mesh; } @@ -5816,7 +5822,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) { pr_debug("linking %s's upper device %s with %s\n", upper_dev->name, i->dev->name, dev->name); - ret = __netdev_adjacent_dev_link(dev, i->dev); + ret = __netdev_adjacent_dev_link(dev, i->dev, i->ref_nr); if (ret) goto rollback_upper_mesh; } @@ -5825,7 +5831,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, list_for_each_entry(i, &dev->all_adj_list.lower, list) { pr_debug("linking %s's lower device %s with %s\n", dev->name, i->dev->name, upper_dev->name); - ret = __netdev_adjacent_dev_link(i->dev, upper_dev); + ret = __netdev_adjacent_dev_link(i->dev, upper_dev, i->ref_nr); if (ret) goto rollback_lower_mesh; } @@ -5843,7 +5849,7 @@ rollback_lower_mesh: list_for_each_entry(i, &dev->all_adj_list.lower, list) { if (i == to_i) break; - __netdev_adjacent_dev_unlink(i->dev, upper_dev); + __netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr); } i = NULL; @@ -5853,7 +5859,7 @@ rollback_upper_mesh: list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) { if (i == to_i) break; - __netdev_adjacent_dev_unlink(dev, i->dev); + __netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr); } i = j = NULL; @@ -5865,7 +5871,7 @@ rollback_mesh: list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) { if (i == to_i && j == to_j) break; - __netdev_adjacent_dev_unlink(i->dev, j->dev); + __netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr); } if (i == to_i) break; @@ -5945,16 +5951,16 @@ void netdev_upper_dev_unlink(struct net_device *dev, */ list_for_each_entry(i, &dev->all_adj_list.lower, list) list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) - __netdev_adjacent_dev_unlink(i->dev, j->dev); + __netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr); /* remove also the devices itself from lower/upper device * list */ list_for_each_entry(i, &dev->all_adj_list.lower, list) - __netdev_adjacent_dev_unlink(i->dev, upper_dev); + __netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr); list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) - __netdev_adjacent_dev_unlink(dev, i->dev); + __netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr); call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, &changeupper_info.info); From d8cedaabe71236d27da1ff03d32ab1da06ed041f Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 4 Oct 2016 11:25:47 +1100 Subject: [PATCH 1043/1050] net/ncsi: Avoid unused-value build warning from ia64-linux-gcc xchg() is used to set NCSI channel's state in order for consistent access to the state. xchg()'s return value should be used. Otherwise, one build warning will be raised (with -Wunused-value) as below message indicates. It is reported by ia64-linux-gcc (GCC) 4.9.0. net/ncsi/ncsi-manage.c: In function 'ncsi_channel_monitor': arch/ia64/include/uapi/asm/cmpxchg.h:56:2: warning: value computed is \ not used [-Wunused-value] ((__typeof__(*(ptr))) __xchg((unsigned long) (x), (ptr), sizeof(*(ptr)))) ^ net/ncsi/ncsi-manage.c:202:3: note: in expansion of macro 'xchg' xchg(&nc->state, NCSI_CHANNEL_INACTIVE); This removes the atomic access to NCSI channel's state avoid the above build warning. We have to hold the channel's lock when its state is readed or updated. No functional changes introduced. Signed-off-by: Gavin Shan Reviewed-by: Joel Stanley Signed-off-by: David S. Miller --- net/ncsi/ncsi-aen.c | 37 ++++++++++++++++------ net/ncsi/ncsi-manage.c | 71 ++++++++++++++++++++++++++++++++---------- 2 files changed, 81 insertions(+), 27 deletions(-) diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c index d463468442ae..b41a6617d498 100644 --- a/net/ncsi/ncsi-aen.c +++ b/net/ncsi/ncsi-aen.c @@ -53,7 +53,9 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp, struct ncsi_aen_lsc_pkt *lsc; struct ncsi_channel *nc; struct ncsi_channel_mode *ncm; - unsigned long old_data; + bool chained; + int state; + unsigned long old_data, data; unsigned long flags; /* Find the NCSI channel */ @@ -62,20 +64,27 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp, return -ENODEV; /* Update the link status */ - ncm = &nc->modes[NCSI_MODE_LINK]; lsc = (struct ncsi_aen_lsc_pkt *)h; + + spin_lock_irqsave(&nc->lock, flags); + ncm = &nc->modes[NCSI_MODE_LINK]; old_data = ncm->data[2]; - ncm->data[2] = ntohl(lsc->status); + data = ntohl(lsc->status); + ncm->data[2] = data; ncm->data[4] = ntohl(lsc->oem_status); - if (!((old_data ^ ncm->data[2]) & 0x1) || - !list_empty(&nc->link)) + + chained = !list_empty(&nc->link); + state = nc->state; + spin_unlock_irqrestore(&nc->lock, flags); + + if (!((old_data ^ data) & 0x1) || chained) return 0; - if (!(nc->state == NCSI_CHANNEL_INACTIVE && (ncm->data[2] & 0x1)) && - !(nc->state == NCSI_CHANNEL_ACTIVE && !(ncm->data[2] & 0x1))) + if (!(state == NCSI_CHANNEL_INACTIVE && (data & 0x1)) && + !(state == NCSI_CHANNEL_ACTIVE && !(data & 0x1))) return 0; if (!(ndp->flags & NCSI_DEV_HWA) && - nc->state == NCSI_CHANNEL_ACTIVE) + state == NCSI_CHANNEL_ACTIVE) ndp->flags |= NCSI_DEV_RESHUFFLE; ncsi_stop_channel_monitor(nc); @@ -97,13 +106,21 @@ static int ncsi_aen_handler_cr(struct ncsi_dev_priv *ndp, if (!nc) return -ENODEV; + spin_lock_irqsave(&nc->lock, flags); if (!list_empty(&nc->link) || - nc->state != NCSI_CHANNEL_ACTIVE) + nc->state != NCSI_CHANNEL_ACTIVE) { + spin_unlock_irqrestore(&nc->lock, flags); return 0; + } + spin_unlock_irqrestore(&nc->lock, flags); ncsi_stop_channel_monitor(nc); + spin_lock_irqsave(&nc->lock, flags); + nc->state = NCSI_CHANNEL_INVISIBLE; + spin_unlock_irqrestore(&nc->lock, flags); + spin_lock_irqsave(&ndp->lock, flags); - xchg(&nc->state, NCSI_CHANNEL_INACTIVE); + nc->state = NCSI_CHANNEL_INACTIVE; list_add_tail_rcu(&nc->link, &ndp->channel_queue); spin_unlock_irqrestore(&ndp->lock, flags); diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index ef017b871857..a26ce5132549 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -132,6 +132,7 @@ static void ncsi_report_link(struct ncsi_dev_priv *ndp, bool force_down) struct ncsi_dev *nd = &ndp->ndev; struct ncsi_package *np; struct ncsi_channel *nc; + unsigned long flags; nd->state = ncsi_dev_state_functional; if (force_down) { @@ -142,14 +143,21 @@ static void ncsi_report_link(struct ncsi_dev_priv *ndp, bool force_down) nd->link_up = 0; NCSI_FOR_EACH_PACKAGE(ndp, np) { NCSI_FOR_EACH_CHANNEL(np, nc) { + spin_lock_irqsave(&nc->lock, flags); + if (!list_empty(&nc->link) || - nc->state != NCSI_CHANNEL_ACTIVE) + nc->state != NCSI_CHANNEL_ACTIVE) { + spin_unlock_irqrestore(&nc->lock, flags); continue; + } if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1) { + spin_unlock_irqrestore(&nc->lock, flags); nd->link_up = 1; goto report; } + + spin_unlock_irqrestore(&nc->lock, flags); } } @@ -163,20 +171,22 @@ static void ncsi_channel_monitor(unsigned long data) struct ncsi_package *np = nc->package; struct ncsi_dev_priv *ndp = np->ndp; struct ncsi_cmd_arg nca; - bool enabled; + bool enabled, chained; unsigned int timeout; unsigned long flags; - int ret; + int state, ret; spin_lock_irqsave(&nc->lock, flags); + state = nc->state; + chained = !list_empty(&nc->link); timeout = nc->timeout; enabled = nc->enabled; spin_unlock_irqrestore(&nc->lock, flags); - if (!enabled || !list_empty(&nc->link)) + if (!enabled || chained) return; - if (nc->state != NCSI_CHANNEL_INACTIVE && - nc->state != NCSI_CHANNEL_ACTIVE) + if (state != NCSI_CHANNEL_INACTIVE && + state != NCSI_CHANNEL_ACTIVE) return; if (!(timeout % 2)) { @@ -195,11 +205,15 @@ static void ncsi_channel_monitor(unsigned long data) if (timeout + 1 >= 3) { if (!(ndp->flags & NCSI_DEV_HWA) && - nc->state == NCSI_CHANNEL_ACTIVE) + state == NCSI_CHANNEL_ACTIVE) ncsi_report_link(ndp, true); + spin_lock_irqsave(&nc->lock, flags); + nc->state = NCSI_CHANNEL_INVISIBLE; + spin_unlock_irqrestore(&nc->lock, flags); + spin_lock_irqsave(&ndp->lock, flags); - xchg(&nc->state, NCSI_CHANNEL_INACTIVE); + nc->state = NCSI_CHANNEL_INACTIVE; list_add_tail_rcu(&nc->link, &ndp->channel_queue); spin_unlock_irqrestore(&ndp->lock, flags); ncsi_process_next_channel(ndp); @@ -508,6 +522,7 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp) struct ncsi_package *np = ndp->active_package; struct ncsi_channel *nc = ndp->active_channel; struct ncsi_cmd_arg nca; + unsigned long flags; int ret; nca.ndp = ndp; @@ -556,7 +571,9 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp) break; case ncsi_dev_state_suspend_done: - xchg(&nc->state, NCSI_CHANNEL_INACTIVE); + spin_lock_irqsave(&nc->lock, flags); + nc->state = NCSI_CHANNEL_INACTIVE; + spin_unlock_irqrestore(&nc->lock, flags); ncsi_process_next_channel(ndp); break; @@ -574,6 +591,7 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) struct ncsi_channel *nc = ndp->active_channel; struct ncsi_cmd_arg nca; unsigned char index; + unsigned long flags; int ret; nca.ndp = ndp; @@ -675,10 +693,12 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) goto error; break; case ncsi_dev_state_config_done: + spin_lock_irqsave(&nc->lock, flags); if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1) - xchg(&nc->state, NCSI_CHANNEL_ACTIVE); + nc->state = NCSI_CHANNEL_ACTIVE; else - xchg(&nc->state, NCSI_CHANNEL_INACTIVE); + nc->state = NCSI_CHANNEL_INACTIVE; + spin_unlock_irqrestore(&nc->lock, flags); ncsi_start_channel_monitor(nc); ncsi_process_next_channel(ndp); @@ -707,18 +727,25 @@ static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp) found = NULL; NCSI_FOR_EACH_PACKAGE(ndp, np) { NCSI_FOR_EACH_CHANNEL(np, nc) { + spin_lock_irqsave(&nc->lock, flags); + if (!list_empty(&nc->link) || - nc->state != NCSI_CHANNEL_INACTIVE) + nc->state != NCSI_CHANNEL_INACTIVE) { + spin_unlock_irqrestore(&nc->lock, flags); continue; + } if (!found) found = nc; ncm = &nc->modes[NCSI_MODE_LINK]; if (ncm->data[2] & 0x1) { + spin_unlock_irqrestore(&nc->lock, flags); found = nc; goto out; } + + spin_unlock_irqrestore(&nc->lock, flags); } } @@ -987,11 +1014,14 @@ int ncsi_process_next_channel(struct ncsi_dev_priv *ndp) goto out; } - old_state = xchg(&nc->state, NCSI_CHANNEL_INVISIBLE); list_del_init(&nc->link); - spin_unlock_irqrestore(&ndp->lock, flags); + spin_lock_irqsave(&nc->lock, flags); + old_state = nc->state; + nc->state = NCSI_CHANNEL_INVISIBLE; + spin_unlock_irqrestore(&nc->lock, flags); + ndp->active_channel = nc; ndp->active_package = nc->package; @@ -1006,7 +1036,7 @@ int ncsi_process_next_channel(struct ncsi_dev_priv *ndp) break; default: netdev_err(ndp->ndev.dev, "Invalid state 0x%x on %d:%d\n", - nc->state, nc->package->id, nc->id); + old_state, nc->package->id, nc->id); ncsi_report_link(ndp, false); return -EINVAL; } @@ -1151,6 +1181,8 @@ int ncsi_start_dev(struct ncsi_dev *nd) struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd); struct ncsi_package *np; struct ncsi_channel *nc; + unsigned long flags; + bool chained; int old_state, ret; if (nd->state != ncsi_dev_state_registered && @@ -1166,8 +1198,13 @@ int ncsi_start_dev(struct ncsi_dev *nd) /* Reset channel's state and start over */ NCSI_FOR_EACH_PACKAGE(ndp, np) { NCSI_FOR_EACH_CHANNEL(np, nc) { - old_state = xchg(&nc->state, NCSI_CHANNEL_INACTIVE); - WARN_ON_ONCE(!list_empty(&nc->link) || + spin_lock_irqsave(&nc->lock, flags); + chained = !list_empty(&nc->link); + old_state = nc->state; + nc->state = NCSI_CHANNEL_INACTIVE; + spin_unlock_irqrestore(&nc->lock, flags); + + WARN_ON_ONCE(chained || old_state == NCSI_CHANNEL_INVISIBLE); } } From bc7e0f50aa6958676115bffc1e5e58703579e04b Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 4 Oct 2016 11:25:48 +1100 Subject: [PATCH 1044/1050] net/ncsi: Introduce NCSI_RESERVED_CHANNEL This defines NCSI_RESERVED_CHANNEL as the reserved NCSI channel ID (0x1f). No logical changes introduced. Signed-off-by: Gavin Shan Reviewed-by: Joel Stanley Signed-off-by: David S. Miller --- net/ncsi/internal.h | 1 + net/ncsi/ncsi-manage.c | 14 +++++++------- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index 33738c060547..66dc851d49ee 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -170,6 +170,7 @@ struct ncsi_package; #define NCSI_PACKAGE_SHIFT 5 #define NCSI_PACKAGE_INDEX(c) (((c) >> NCSI_PACKAGE_SHIFT) & 0x7) +#define NCSI_RESERVED_CHANNEL 0x1f #define NCSI_CHANNEL_INDEX(c) ((c) & ((1 << NCSI_PACKAGE_SHIFT) - 1)) #define NCSI_TO_CHANNEL(p, c) (((p) << NCSI_PACKAGE_SHIFT) | (c)) diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index a26ce5132549..97c99bee8b68 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -542,7 +542,7 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp) nca.package = np->id; if (nd->state == ncsi_dev_state_suspend_select) { nca.type = NCSI_PKT_CMD_SP; - nca.channel = 0x1f; + nca.channel = NCSI_RESERVED_CHANNEL; if (ndp->flags & NCSI_DEV_HWA) nca.bytes[0] = 0; else @@ -559,7 +559,7 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp) nd->state = ncsi_dev_state_suspend_deselect; } else if (nd->state == ncsi_dev_state_suspend_deselect) { nca.type = NCSI_PKT_CMD_DP; - nca.channel = 0x1f; + nca.channel = NCSI_RESERVED_CHANNEL; nd->state = ncsi_dev_state_suspend_done; } @@ -608,7 +608,7 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) else nca.bytes[0] = 1; nca.package = np->id; - nca.channel = 0x1f; + nca.channel = NCSI_RESERVED_CHANNEL; ret = ncsi_xmit_cmd(&nca); if (ret) goto error; @@ -834,7 +834,7 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) /* Deselect all possible packages */ nca.type = NCSI_PKT_CMD_DP; - nca.channel = 0x1f; + nca.channel = NCSI_RESERVED_CHANNEL; for (index = 0; index < 8; index++) { nca.package = index; ret = ncsi_xmit_cmd(&nca); @@ -850,7 +850,7 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) /* Select all possible packages */ nca.type = NCSI_PKT_CMD_SP; nca.bytes[0] = 1; - nca.channel = 0x1f; + nca.channel = NCSI_RESERVED_CHANNEL; for (index = 0; index < 8; index++) { nca.package = index; ret = ncsi_xmit_cmd(&nca); @@ -903,7 +903,7 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) nca.type = NCSI_PKT_CMD_SP; nca.bytes[0] = 1; nca.package = ndp->active_package->id; - nca.channel = 0x1f; + nca.channel = NCSI_RESERVED_CHANNEL; ret = ncsi_xmit_cmd(&nca); if (ret) goto error; @@ -960,7 +960,7 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) /* Deselect the active package */ nca.type = NCSI_PKT_CMD_DP; nca.package = ndp->active_package->id; - nca.channel = 0x1f; + nca.channel = NCSI_RESERVED_CHANNEL; ret = ncsi_xmit_cmd(&nca); if (ret) goto error; From 55e02d0837fb4cf023832252847bfbff453603cc Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 4 Oct 2016 11:25:49 +1100 Subject: [PATCH 1045/1050] net/ncsi: Don't probe on the reserved channel ID (0x1f) We needn't send CIS (Clear Initial State) command to the NCSI reserved channel (0x1f) in the enumeration. We shouldn't receive a valid response from CIS on NCSI channel 0x1f. Signed-off-by: Gavin Shan Reviewed-by: Joel Stanley Signed-off-by: David S. Miller --- net/ncsi/ncsi-manage.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 97c99bee8b68..8c5e0160d578 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -911,12 +911,12 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) nd->state = ncsi_dev_state_probe_cis; break; case ncsi_dev_state_probe_cis: - ndp->pending_req_num = 32; + ndp->pending_req_num = NCSI_RESERVED_CHANNEL; /* Clear initial state */ nca.type = NCSI_PKT_CMD_CIS; nca.package = ndp->active_package->id; - for (index = 0; index < 0x20; index++) { + for (index = 0; index < NCSI_RESERVED_CHANNEL; index++) { nca.channel = index; ret = ncsi_xmit_cmd(&nca); if (ret) From a15af54f8f2a32d629781417503843bfbd02a004 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 4 Oct 2016 11:25:50 +1100 Subject: [PATCH 1046/1050] net/ncsi: Rework request index allocation The NCSI request index (struct ncsi_request::id) is put into instance ID (IID) field while sending NCSI command packet. It was designed the available IDs are given in round-robin fashion. @ndp->request_id was introduced to represent the next available ID, but it has been used as number of successively allocated IDs. It breaks the round-robin design. Besides, we shouldn't put 0 to NCSI command packet's IID field, meaning ID#0 should be reserved according section 6.3.1.1 in NCSI spec (v1.1.0). This fixes above two issues. With it applied, the available IDs will be assigned in round-robin fashion and ID#0 won't be assigned. Signed-off-by: Gavin Shan Reviewed-by: Joel Stanley Signed-off-by: David S. Miller --- net/ncsi/internal.h | 1 + net/ncsi/ncsi-manage.c | 17 +++++++++-------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index 66dc851d49ee..c956fe8d80c3 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -259,6 +259,7 @@ struct ncsi_dev_priv { struct list_head packages; /* List of packages */ struct ncsi_request requests[256]; /* Request table */ unsigned int request_id; /* Last used request ID */ +#define NCSI_REQ_START_IDX 1 unsigned int pending_req_num; /* Number of pending requests */ struct ncsi_package *active_package; /* Currently handled package */ struct ncsi_channel *active_channel; /* Currently handled channel */ diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 8c5e0160d578..00ce2c7fdb15 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -427,30 +427,31 @@ struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, bool driven) /* Check if there is one available request until the ceiling */ spin_lock_irqsave(&ndp->lock, flags); - for (i = ndp->request_id; !nr && i < limit; i++) { + for (i = ndp->request_id; i < limit; i++) { if (ndp->requests[i].used) continue; nr = &ndp->requests[i]; nr->used = true; nr->driven = driven; - if (++ndp->request_id >= limit) - ndp->request_id = 0; + ndp->request_id = i + 1; + goto found; } /* Fail back to check from the starting cursor */ - for (i = 0; !nr && i < ndp->request_id; i++) { + for (i = NCSI_REQ_START_IDX; i < ndp->request_id; i++) { if (ndp->requests[i].used) continue; nr = &ndp->requests[i]; nr->used = true; nr->driven = driven; - if (++ndp->request_id >= limit) - ndp->request_id = 0; + ndp->request_id = i + 1; + goto found; } - spin_unlock_irqrestore(&ndp->lock, flags); +found: + spin_unlock_irqrestore(&ndp->lock, flags); return nr; } @@ -1148,7 +1149,7 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev, /* Initialize private NCSI device */ spin_lock_init(&ndp->lock); INIT_LIST_HEAD(&ndp->packages); - ndp->request_id = 0; + ndp->request_id = NCSI_REQ_START_IDX; for (i = 0; i < ARRAY_SIZE(ndp->requests); i++) { ndp->requests[i].id = i; ndp->requests[i].ndp = ndp; From a0509cbeef5dafbab42c42622e012bcc94c3eb9e Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 4 Oct 2016 11:25:51 +1100 Subject: [PATCH 1047/1050] net/ncsi: Allow to extend NCSI request properties There is only one NCSI request property for now: the response for the sent command need drive the workqueue or not. So we had one field (@driven) for the purpose. We lost the flexibility to extend NCSI request properties. This replaces @driven with @flags and @req_flags in NCSI request and NCSI command argument struct. Each bit of the newly introduced field can be used for one property. No functional changes introduced. Signed-off-by: Gavin Shan Reviewed-by: Joel Stanley Signed-off-by: David S. Miller --- net/ncsi/internal.h | 8 +++++--- net/ncsi/ncsi-cmd.c | 2 +- net/ncsi/ncsi-manage.c | 19 ++++++++++--------- net/ncsi/ncsi-rsp.c | 2 +- 4 files changed, 17 insertions(+), 14 deletions(-) diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index c956fe8d80c3..26e929595b5e 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -207,7 +207,8 @@ struct ncsi_package { struct ncsi_request { unsigned char id; /* Request ID - 0 to 255 */ bool used; /* Request that has been assigned */ - bool driven; /* Drive state machine */ + unsigned int flags; /* NCSI request property */ +#define NCSI_REQ_FLAG_EVENT_DRIVEN 1 struct ncsi_dev_priv *ndp; /* Associated NCSI device */ struct sk_buff *cmd; /* Associated NCSI command packet */ struct sk_buff *rsp; /* Associated NCSI response packet */ @@ -276,7 +277,7 @@ struct ncsi_cmd_arg { unsigned char package; /* Destination package ID */ unsigned char channel; /* Detination channel ID or 0x1f */ unsigned short payload; /* Command packet payload length */ - bool driven; /* Drive the state machine? */ + unsigned int req_flags; /* NCSI request properties */ union { unsigned char bytes[16]; /* Command packet specific data */ unsigned short words[8]; @@ -315,7 +316,8 @@ void ncsi_find_package_and_channel(struct ncsi_dev_priv *ndp, unsigned char id, struct ncsi_package **np, struct ncsi_channel **nc); -struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, bool driven); +struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, + unsigned int req_flags); void ncsi_free_request(struct ncsi_request *nr); struct ncsi_dev *ncsi_find_dev(struct net_device *dev); int ncsi_process_next_channel(struct ncsi_dev_priv *ndp); diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c index 21057a8ceeac..db7083bfd476 100644 --- a/net/ncsi/ncsi-cmd.c +++ b/net/ncsi/ncsi-cmd.c @@ -272,7 +272,7 @@ static struct ncsi_request *ncsi_alloc_command(struct ncsi_cmd_arg *nca) struct sk_buff *skb; struct ncsi_request *nr; - nr = ncsi_alloc_request(ndp, nca->driven); + nr = ncsi_alloc_request(ndp, nca->req_flags); if (!nr) return NULL; diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 00ce2c7fdb15..adf5401817c2 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -194,7 +194,7 @@ static void ncsi_channel_monitor(unsigned long data) nca.package = np->id; nca.channel = nc->id; nca.type = NCSI_PKT_CMD_GLS; - nca.driven = false; + nca.req_flags = 0; ret = ncsi_xmit_cmd(&nca); if (ret) { netdev_err(ndp->ndev.dev, "Error %d sending GLS\n", @@ -419,7 +419,8 @@ void ncsi_find_package_and_channel(struct ncsi_dev_priv *ndp, * be same. Otherwise, the bogus response might be replied. So * the available IDs are allocated in round-robin fashion. */ -struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, bool driven) +struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, + unsigned int req_flags) { struct ncsi_request *nr = NULL; int i, limit = ARRAY_SIZE(ndp->requests); @@ -433,7 +434,7 @@ struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, bool driven) nr = &ndp->requests[i]; nr->used = true; - nr->driven = driven; + nr->flags = req_flags; ndp->request_id = i + 1; goto found; } @@ -445,7 +446,7 @@ struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, bool driven) nr = &ndp->requests[i]; nr->used = true; - nr->driven = driven; + nr->flags = req_flags; ndp->request_id = i + 1; goto found; } @@ -473,7 +474,7 @@ void ncsi_free_request(struct ncsi_request *nr) nr->cmd = NULL; nr->rsp = NULL; nr->used = false; - driven = nr->driven; + driven = !!(nr->flags & NCSI_REQ_FLAG_EVENT_DRIVEN); spin_unlock_irqrestore(&ndp->lock, flags); if (driven && cmd && --ndp->pending_req_num == 0) @@ -527,7 +528,7 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp) int ret; nca.ndp = ndp; - nca.driven = true; + nca.req_flags = NCSI_REQ_FLAG_EVENT_DRIVEN; switch (nd->state) { case ncsi_dev_state_suspend: nd->state = ncsi_dev_state_suspend_select; @@ -596,7 +597,7 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) int ret; nca.ndp = ndp; - nca.driven = true; + nca.req_flags = NCSI_REQ_FLAG_EVENT_DRIVEN; switch (nd->state) { case ncsi_dev_state_config: case ncsi_dev_state_config_sp: @@ -825,7 +826,7 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) int ret; nca.ndp = ndp; - nca.driven = true; + nca.req_flags = NCSI_REQ_FLAG_EVENT_DRIVEN; switch (nd->state) { case ncsi_dev_state_probe: nd->state = ncsi_dev_state_probe_deselect; @@ -1101,7 +1102,7 @@ static int ncsi_inet6addr_event(struct notifier_block *this, return NOTIFY_OK; nca.ndp = ndp; - nca.driven = false; + nca.req_flags = 0; nca.package = np->id; nca.channel = nc->id; nca.dwords[0] = nc->caps[NCSI_CAP_MC].cap; diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index af84389a6bf1..86cdaebd8d9e 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -317,7 +317,7 @@ static int ncsi_rsp_handler_gls(struct ncsi_request *nr) ncm->data[3] = ntohl(rsp->other); ncm->data[4] = ntohl(rsp->oem_status); - if (nr->driven) + if (nr->flags & NCSI_REQ_FLAG_EVENT_DRIVEN) return 0; /* Reset the channel monitor if it has been enabled */ From 83afdc6aad9d767cae271df1ca15641b9cbe3bfe Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 4 Oct 2016 11:25:52 +1100 Subject: [PATCH 1048/1050] net/ncsi: Rework the channel monitoring The original NCSI channel monitoring was implemented based on a backoff algorithm: the GLS response should be received in the specified interval. Otherwise, the channel is regarded as dead and failover should be taken if current channel is an active one. There are several problems in the implementation: (A) On BCM5718, we found when the IID (Instance ID) in the GLS command packet changes from 255 to 1, the response corresponding to IID#1 never comes in. It means we cannot make the unfair judgement that the channel is dead when one response is missed. (B) The code's readability should be improved. (C) We should do failover when current channel is active one and the channel monitoring should be marked as disabled before doing failover. This reworks the channel monitoring to address all above issues. The fields for channel monitoring is put into separate struct and the state of channel monitoring is predefined. The channel is regarded alive if the network controller responses to one of two GLS commands or both of them in 5 seconds. Signed-off-by: Gavin Shan Reviewed-by: Joel Stanley Signed-off-by: David S. Miller --- net/ncsi/internal.h | 12 +++++++++--- net/ncsi/ncsi-manage.c | 44 ++++++++++++++++++++++++------------------ net/ncsi/ncsi-rsp.c | 2 +- 3 files changed, 35 insertions(+), 23 deletions(-) diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index 26e929595b5e..13290a70fa71 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -187,9 +187,15 @@ struct ncsi_channel { struct ncsi_channel_mode modes[NCSI_MODE_MAX]; struct ncsi_channel_filter *filters[NCSI_FILTER_MAX]; struct ncsi_channel_stats stats; - struct timer_list timer; /* Link monitor timer */ - bool enabled; /* Timer is enabled */ - unsigned int timeout; /* Times of timeout */ + struct { + struct timer_list timer; + bool enabled; + unsigned int state; +#define NCSI_CHANNEL_MONITOR_START 0 +#define NCSI_CHANNEL_MONITOR_RETRY 1 +#define NCSI_CHANNEL_MONITOR_WAIT 2 +#define NCSI_CHANNEL_MONITOR_WAIT_MAX 5 + } monitor; struct list_head node; struct list_head link; }; diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index adf5401817c2..4742c7c6c748 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -172,15 +172,15 @@ static void ncsi_channel_monitor(unsigned long data) struct ncsi_dev_priv *ndp = np->ndp; struct ncsi_cmd_arg nca; bool enabled, chained; - unsigned int timeout; + unsigned int monitor_state; unsigned long flags; int state, ret; spin_lock_irqsave(&nc->lock, flags); state = nc->state; chained = !list_empty(&nc->link); - timeout = nc->timeout; - enabled = nc->enabled; + enabled = nc->monitor.enabled; + monitor_state = nc->monitor.state; spin_unlock_irqrestore(&nc->lock, flags); if (!enabled || chained) @@ -189,7 +189,9 @@ static void ncsi_channel_monitor(unsigned long data) state != NCSI_CHANNEL_ACTIVE) return; - if (!(timeout % 2)) { + switch (monitor_state) { + case NCSI_CHANNEL_MONITOR_START: + case NCSI_CHANNEL_MONITOR_RETRY: nca.ndp = ndp; nca.package = np->id; nca.channel = nc->id; @@ -201,12 +203,16 @@ static void ncsi_channel_monitor(unsigned long data) ret); return; } - } - if (timeout + 1 >= 3) { + break; + case NCSI_CHANNEL_MONITOR_WAIT ... NCSI_CHANNEL_MONITOR_WAIT_MAX: + break; + default: if (!(ndp->flags & NCSI_DEV_HWA) && - state == NCSI_CHANNEL_ACTIVE) + state == NCSI_CHANNEL_ACTIVE) { ncsi_report_link(ndp, true); + ndp->flags |= NCSI_DEV_RESHUFFLE; + } spin_lock_irqsave(&nc->lock, flags); nc->state = NCSI_CHANNEL_INVISIBLE; @@ -221,10 +227,9 @@ static void ncsi_channel_monitor(unsigned long data) } spin_lock_irqsave(&nc->lock, flags); - nc->timeout = timeout + 1; - nc->enabled = true; + nc->monitor.state++; spin_unlock_irqrestore(&nc->lock, flags); - mod_timer(&nc->timer, jiffies + HZ * (1 << (nc->timeout / 2))); + mod_timer(&nc->monitor.timer, jiffies + HZ); } void ncsi_start_channel_monitor(struct ncsi_channel *nc) @@ -232,12 +237,12 @@ void ncsi_start_channel_monitor(struct ncsi_channel *nc) unsigned long flags; spin_lock_irqsave(&nc->lock, flags); - WARN_ON_ONCE(nc->enabled); - nc->timeout = 0; - nc->enabled = true; + WARN_ON_ONCE(nc->monitor.enabled); + nc->monitor.enabled = true; + nc->monitor.state = NCSI_CHANNEL_MONITOR_START; spin_unlock_irqrestore(&nc->lock, flags); - mod_timer(&nc->timer, jiffies + HZ * (1 << (nc->timeout / 2))); + mod_timer(&nc->monitor.timer, jiffies + HZ); } void ncsi_stop_channel_monitor(struct ncsi_channel *nc) @@ -245,14 +250,14 @@ void ncsi_stop_channel_monitor(struct ncsi_channel *nc) unsigned long flags; spin_lock_irqsave(&nc->lock, flags); - if (!nc->enabled) { + if (!nc->monitor.enabled) { spin_unlock_irqrestore(&nc->lock, flags); return; } - nc->enabled = false; + nc->monitor.enabled = false; spin_unlock_irqrestore(&nc->lock, flags); - del_timer_sync(&nc->timer); + del_timer_sync(&nc->monitor.timer); } struct ncsi_channel *ncsi_find_channel(struct ncsi_package *np, @@ -281,8 +286,9 @@ struct ncsi_channel *ncsi_add_channel(struct ncsi_package *np, unsigned char id) nc->id = id; nc->package = np; nc->state = NCSI_CHANNEL_INACTIVE; - nc->enabled = false; - setup_timer(&nc->timer, ncsi_channel_monitor, (unsigned long)nc); + nc->monitor.enabled = false; + setup_timer(&nc->monitor.timer, + ncsi_channel_monitor, (unsigned long)nc); spin_lock_init(&nc->lock); INIT_LIST_HEAD(&nc->link); for (index = 0; index < NCSI_CAP_MAX; index++) diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 86cdaebd8d9e..087db775b3dc 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -322,7 +322,7 @@ static int ncsi_rsp_handler_gls(struct ncsi_request *nr) /* Reset the channel monitor if it has been enabled */ spin_lock_irqsave(&nc->lock, flags); - nc->timeout = 0; + nc->monitor.state = NCSI_CHANNEL_MONITOR_START; spin_unlock_irqrestore(&nc->lock, flags); return 0; From c0cd1ba4f8bd8b5fef43bc51a2983673b8f086ff Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 4 Oct 2016 11:25:53 +1100 Subject: [PATCH 1049/1050] net/ncsi: Introduce ncsi_stop_dev() This introduces ncsi_stop_dev(), as counterpart to ncsi_start_dev(), to stop the NCSI device so that it can be reenabled in future. This API should be called when the network device driver is going to shutdown the device. There are 3 things done in the function: Stop the channel monitoring; Reset channels to inactive state; Report NCSI link down. Signed-off-by: Gavin Shan Reviewed-by: Joel Stanley Signed-off-by: David S. Miller --- include/net/ncsi.h | 5 +++++ net/ncsi/ncsi-manage.c | 37 ++++++++++++++++++++++++------------- 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/include/net/ncsi.h b/include/net/ncsi.h index 1dbf42f79750..68680baac0fd 100644 --- a/include/net/ncsi.h +++ b/include/net/ncsi.h @@ -31,6 +31,7 @@ struct ncsi_dev { struct ncsi_dev *ncsi_register_dev(struct net_device *dev, void (*notifier)(struct ncsi_dev *nd)); int ncsi_start_dev(struct ncsi_dev *nd); +void ncsi_stop_dev(struct ncsi_dev *nd); void ncsi_unregister_dev(struct ncsi_dev *nd); #else /* !CONFIG_NET_NCSI */ static inline struct ncsi_dev *ncsi_register_dev(struct net_device *dev, @@ -44,6 +45,10 @@ static inline int ncsi_start_dev(struct ncsi_dev *nd) return -ENOTTY; } +static void ncsi_stop_dev(struct ncsi_dev *nd) +{ +} + static inline void ncsi_unregister_dev(struct ncsi_dev *nd) { } diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 4742c7c6c748..5e509e547c2d 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -1187,11 +1187,7 @@ EXPORT_SYMBOL_GPL(ncsi_register_dev); int ncsi_start_dev(struct ncsi_dev *nd) { struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd); - struct ncsi_package *np; - struct ncsi_channel *nc; - unsigned long flags; - bool chained; - int old_state, ret; + int ret; if (nd->state != ncsi_dev_state_registered && nd->state != ncsi_dev_state_functional) @@ -1203,9 +1199,29 @@ int ncsi_start_dev(struct ncsi_dev *nd) return 0; } - /* Reset channel's state and start over */ + if (ndp->flags & NCSI_DEV_HWA) + ret = ncsi_enable_hwa(ndp); + else + ret = ncsi_choose_active_channel(ndp); + + return ret; +} +EXPORT_SYMBOL_GPL(ncsi_start_dev); + +void ncsi_stop_dev(struct ncsi_dev *nd) +{ + struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd); + struct ncsi_package *np; + struct ncsi_channel *nc; + bool chained; + int old_state; + unsigned long flags; + + /* Stop the channel monitor and reset channel's state */ NCSI_FOR_EACH_PACKAGE(ndp, np) { NCSI_FOR_EACH_CHANNEL(np, nc) { + ncsi_stop_channel_monitor(nc); + spin_lock_irqsave(&nc->lock, flags); chained = !list_empty(&nc->link); old_state = nc->state; @@ -1217,14 +1233,9 @@ int ncsi_start_dev(struct ncsi_dev *nd) } } - if (ndp->flags & NCSI_DEV_HWA) - ret = ncsi_enable_hwa(ndp); - else - ret = ncsi_choose_active_channel(ndp); - - return ret; + ncsi_report_link(ndp, true); } -EXPORT_SYMBOL_GPL(ncsi_start_dev); +EXPORT_SYMBOL_GPL(ncsi_stop_dev); void ncsi_unregister_dev(struct ncsi_dev *nd) { From 2c15f25b2923435515298589dcaa2eace6a948c1 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 4 Oct 2016 11:25:54 +1100 Subject: [PATCH 1050/1050] net/faraday: Stop NCSI device on shutdown This stops NCSI device when closing the network device so that the NCSI device can be reenabled later. Signed-off-by: Gavin Shan Reviewed-by: Joel Stanley Signed-off-by: David S. Miller --- drivers/net/ethernet/faraday/ftgmac100.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 90f9c5481290..262587240c86 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1190,6 +1190,8 @@ static int ftgmac100_stop(struct net_device *netdev) napi_disable(&priv->napi); if (netdev->phydev) phy_stop(netdev->phydev); + else if (priv->use_ncsi) + ncsi_stop_dev(priv->ndev); ftgmac100_stop_hw(priv); free_irq(priv->irq, netdev);