mirror of
https://github.com/torvalds/linux.git
synced 2025-01-01 07:42:07 +00:00
5bbcc0f595
Pull networking updates from David Miller: "Highlights: 1) Maintain the TCP retransmit queue using an rbtree, with 1GB windows at 100Gb this really has become necessary. From Eric Dumazet. 2) Multi-program support for cgroup+bpf, from Alexei Starovoitov. 3) Perform broadcast flooding in hardware in mv88e6xxx, from Andrew Lunn. 4) Add meter action support to openvswitch, from Andy Zhou. 5) Add a data meta pointer for BPF accessible packets, from Daniel Borkmann. 6) Namespace-ify almost all TCP sysctl knobs, from Eric Dumazet. 7) Turn on Broadcom Tags in b53 driver, from Florian Fainelli. 8) More work to move the RTNL mutex down, from Florian Westphal. 9) Add 'bpftool' utility, to help with bpf program introspection. From Jakub Kicinski. 10) Add new 'cpumap' type for XDP_REDIRECT action, from Jesper Dangaard Brouer. 11) Support 'blocks' of transformations in the packet scheduler which can span multiple network devices, from Jiri Pirko. 12) TC flower offload support in cxgb4, from Kumar Sanghvi. 13) Priority based stream scheduler for SCTP, from Marcelo Ricardo Leitner. 14) Thunderbolt networking driver, from Amir Levy and Mika Westerberg. 15) Add RED qdisc offloadability, and use it in mlxsw driver. From Nogah Frankel. 16) eBPF based device controller for cgroup v2, from Roman Gushchin. 17) Add some fundamental tracepoints for TCP, from Song Liu. 18) Remove garbage collection from ipv6 route layer, this is a significant accomplishment. From Wei Wang. 19) Add multicast route offload support to mlxsw, from Yotam Gigi" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (2177 commits) tcp: highest_sack fix geneve: fix fill_info when link down bpf: fix lockdep splat net: cdc_ncm: GetNtbFormat endian fix openvswitch: meter: fix NULL pointer dereference in ovs_meter_cmd_reply_start netem: remove unnecessary 64 bit modulus netem: use 64 bit divide by rate tcp: Namespace-ify sysctl_tcp_default_congestion_control net: Protect iterations over net::fib_notifier_ops in fib_seq_sum() ipv6: set all.accept_dad to 0 by default uapi: fix linux/tls.h userspace compilation error usbnet: ipheth: prevent TX queue timeouts when device not ready vhost_net: conditionally enable tx polling uapi: fix linux/rxrpc.h userspace compilation errors net: stmmac: fix LPI transitioning for dwmac4 atm: horizon: Fix irq release error net-sysfs: trigger netlink notification on ifalias change via sysfs openvswitch: Using kfree_rcu() to simplify the code openvswitch: Make local function ovs_nsh_key_attr_size() static openvswitch: Fix return value check in ovs_meter_cmd_features() ...
139 lines
4.3 KiB
C
139 lines
4.3 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Dynamic byte queue limits. See include/linux/dynamic_queue_limits.h
|
|
*
|
|
* Copyright (c) 2011, Tom Herbert <therbert@google.com>
|
|
*/
|
|
#include <linux/types.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/jiffies.h>
|
|
#include <linux/dynamic_queue_limits.h>
|
|
#include <linux/compiler.h>
|
|
#include <linux/export.h>
|
|
|
|
#define POSDIFF(A, B) ((int)((A) - (B)) > 0 ? (A) - (B) : 0)
|
|
#define AFTER_EQ(A, B) ((int)((A) - (B)) >= 0)
|
|
|
|
/* Records completed count and recalculates the queue limit */
|
|
void dql_completed(struct dql *dql, unsigned int count)
|
|
{
|
|
unsigned int inprogress, prev_inprogress, limit;
|
|
unsigned int ovlimit, completed, num_queued;
|
|
bool all_prev_completed;
|
|
|
|
num_queued = READ_ONCE(dql->num_queued);
|
|
|
|
/* Can't complete more than what's in queue */
|
|
BUG_ON(count > num_queued - dql->num_completed);
|
|
|
|
completed = dql->num_completed + count;
|
|
limit = dql->limit;
|
|
ovlimit = POSDIFF(num_queued - dql->num_completed, limit);
|
|
inprogress = num_queued - completed;
|
|
prev_inprogress = dql->prev_num_queued - dql->num_completed;
|
|
all_prev_completed = AFTER_EQ(completed, dql->prev_num_queued);
|
|
|
|
if ((ovlimit && !inprogress) ||
|
|
(dql->prev_ovlimit && all_prev_completed)) {
|
|
/*
|
|
* Queue considered starved if:
|
|
* - The queue was over-limit in the last interval,
|
|
* and there is no more data in the queue.
|
|
* OR
|
|
* - The queue was over-limit in the previous interval and
|
|
* when enqueuing it was possible that all queued data
|
|
* had been consumed. This covers the case when queue
|
|
* may have becomes starved between completion processing
|
|
* running and next time enqueue was scheduled.
|
|
*
|
|
* When queue is starved increase the limit by the amount
|
|
* of bytes both sent and completed in the last interval,
|
|
* plus any previous over-limit.
|
|
*/
|
|
limit += POSDIFF(completed, dql->prev_num_queued) +
|
|
dql->prev_ovlimit;
|
|
dql->slack_start_time = jiffies;
|
|
dql->lowest_slack = UINT_MAX;
|
|
} else if (inprogress && prev_inprogress && !all_prev_completed) {
|
|
/*
|
|
* Queue was not starved, check if the limit can be decreased.
|
|
* A decrease is only considered if the queue has been busy in
|
|
* the whole interval (the check above).
|
|
*
|
|
* If there is slack, the amount of execess data queued above
|
|
* the the amount needed to prevent starvation, the queue limit
|
|
* can be decreased. To avoid hysteresis we consider the
|
|
* minimum amount of slack found over several iterations of the
|
|
* completion routine.
|
|
*/
|
|
unsigned int slack, slack_last_objs;
|
|
|
|
/*
|
|
* Slack is the maximum of
|
|
* - The queue limit plus previous over-limit minus twice
|
|
* the number of objects completed. Note that two times
|
|
* number of completed bytes is a basis for an upper bound
|
|
* of the limit.
|
|
* - Portion of objects in the last queuing operation that
|
|
* was not part of non-zero previous over-limit. That is
|
|
* "round down" by non-overlimit portion of the last
|
|
* queueing operation.
|
|
*/
|
|
slack = POSDIFF(limit + dql->prev_ovlimit,
|
|
2 * (completed - dql->num_completed));
|
|
slack_last_objs = dql->prev_ovlimit ?
|
|
POSDIFF(dql->prev_last_obj_cnt, dql->prev_ovlimit) : 0;
|
|
|
|
slack = max(slack, slack_last_objs);
|
|
|
|
if (slack < dql->lowest_slack)
|
|
dql->lowest_slack = slack;
|
|
|
|
if (time_after(jiffies,
|
|
dql->slack_start_time + dql->slack_hold_time)) {
|
|
limit = POSDIFF(limit, dql->lowest_slack);
|
|
dql->slack_start_time = jiffies;
|
|
dql->lowest_slack = UINT_MAX;
|
|
}
|
|
}
|
|
|
|
/* Enforce bounds on limit */
|
|
limit = clamp(limit, dql->min_limit, dql->max_limit);
|
|
|
|
if (limit != dql->limit) {
|
|
dql->limit = limit;
|
|
ovlimit = 0;
|
|
}
|
|
|
|
dql->adj_limit = limit + completed;
|
|
dql->prev_ovlimit = ovlimit;
|
|
dql->prev_last_obj_cnt = dql->last_obj_cnt;
|
|
dql->num_completed = completed;
|
|
dql->prev_num_queued = num_queued;
|
|
}
|
|
EXPORT_SYMBOL(dql_completed);
|
|
|
|
void dql_reset(struct dql *dql)
|
|
{
|
|
/* Reset all dynamic values */
|
|
dql->limit = 0;
|
|
dql->num_queued = 0;
|
|
dql->num_completed = 0;
|
|
dql->last_obj_cnt = 0;
|
|
dql->prev_num_queued = 0;
|
|
dql->prev_last_obj_cnt = 0;
|
|
dql->prev_ovlimit = 0;
|
|
dql->lowest_slack = UINT_MAX;
|
|
dql->slack_start_time = jiffies;
|
|
}
|
|
EXPORT_SYMBOL(dql_reset);
|
|
|
|
void dql_init(struct dql *dql, unsigned int hold_time)
|
|
{
|
|
dql->max_limit = DQL_MAX_LIMIT;
|
|
dql->min_limit = 0;
|
|
dql->slack_hold_time = hold_time;
|
|
dql_reset(dql);
|
|
}
|
|
EXPORT_SYMBOL(dql_init);
|