tcp: allow for bigger reordering level
While testing upcoming Yaogong patch (converting out of order queue into an RB tree), I hit the max reordering level of linux TCP stack. Reordering level was limited to 127 for no good reason, and some network setups [1] can easily reach this limit and get limited throughput. Allow a new max limit of 300, and add a sysctl to allow admins to even allow bigger (or lower) values if needed. [1] Aggregation of links, per packet load balancing, fabrics not doing deep packet inspections, alternative TCP congestion modules... Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Yaogong Wang <wygivan@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
7aef06db0f
commit
dca145ffaa
@ -2230,11 +2230,8 @@ balance-rr: This mode is the only mode that will permit a single
|
|||||||
|
|
||||||
It is possible to adjust TCP/IP's congestion limits by
|
It is possible to adjust TCP/IP's congestion limits by
|
||||||
altering the net.ipv4.tcp_reordering sysctl parameter. The
|
altering the net.ipv4.tcp_reordering sysctl parameter. The
|
||||||
usual default value is 3, and the maximum useful value is 127.
|
usual default value is 3. But keep in mind TCP stack is able
|
||||||
For a four interface balance-rr bond, expect that a single
|
to automatically increase this when it detects reorders.
|
||||||
TCP/IP stream will utilize no more than approximately 2.3
|
|
||||||
interface's worth of throughput, even after adjusting
|
|
||||||
tcp_reordering.
|
|
||||||
|
|
||||||
Note that the fraction of packets that will be delivered out of
|
Note that the fraction of packets that will be delivered out of
|
||||||
order is highly variable, and is unlikely to be zero. The level
|
order is highly variable, and is unlikely to be zero. The level
|
||||||
|
@ -376,9 +376,17 @@ tcp_orphan_retries - INTEGER
|
|||||||
may consume significant resources. Cf. tcp_max_orphans.
|
may consume significant resources. Cf. tcp_max_orphans.
|
||||||
|
|
||||||
tcp_reordering - INTEGER
|
tcp_reordering - INTEGER
|
||||||
Maximal reordering of packets in a TCP stream.
|
Initial reordering level of packets in a TCP stream.
|
||||||
|
TCP stack can then dynamically adjust flow reordering level
|
||||||
|
between this initial value and tcp_max_reordering
|
||||||
Default: 3
|
Default: 3
|
||||||
|
|
||||||
|
tcp_max_reordering - INTEGER
|
||||||
|
Maximal reordering level of packets in a TCP stream.
|
||||||
|
300 is a fairly conservative value, but you might increase it
|
||||||
|
if paths are using per packet load balancing (like bonding rr mode)
|
||||||
|
Default: 300
|
||||||
|
|
||||||
tcp_retrans_collapse - BOOLEAN
|
tcp_retrans_collapse - BOOLEAN
|
||||||
Bug-to-bug compatibility with some broken printers.
|
Bug-to-bug compatibility with some broken printers.
|
||||||
On retransmit try to send bigger packets to work around bugs in
|
On retransmit try to send bigger packets to work around bugs in
|
||||||
|
@ -204,10 +204,10 @@ struct tcp_sock {
|
|||||||
|
|
||||||
u16 urg_data; /* Saved octet of OOB data and control flags */
|
u16 urg_data; /* Saved octet of OOB data and control flags */
|
||||||
u8 ecn_flags; /* ECN status bits. */
|
u8 ecn_flags; /* ECN status bits. */
|
||||||
u8 reordering; /* Packet reordering metric. */
|
u8 keepalive_probes; /* num of allowed keep alive probes */
|
||||||
|
u32 reordering; /* Packet reordering metric. */
|
||||||
u32 snd_up; /* Urgent pointer */
|
u32 snd_up; /* Urgent pointer */
|
||||||
|
|
||||||
u8 keepalive_probes; /* num of allowed keep alive probes */
|
|
||||||
/*
|
/*
|
||||||
* Options received (usually on last packet, some only on SYN packets).
|
* Options received (usually on last packet, some only on SYN packets).
|
||||||
*/
|
*/
|
||||||
|
@ -70,9 +70,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
|
|||||||
/* After receiving this amount of duplicate ACKs fast retransmit starts. */
|
/* After receiving this amount of duplicate ACKs fast retransmit starts. */
|
||||||
#define TCP_FASTRETRANS_THRESH 3
|
#define TCP_FASTRETRANS_THRESH 3
|
||||||
|
|
||||||
/* Maximal reordering. */
|
|
||||||
#define TCP_MAX_REORDERING 127
|
|
||||||
|
|
||||||
/* Maximal number of ACKs sent quickly to accelerate slow-start. */
|
/* Maximal number of ACKs sent quickly to accelerate slow-start. */
|
||||||
#define TCP_MAX_QUICKACKS 16U
|
#define TCP_MAX_QUICKACKS 16U
|
||||||
|
|
||||||
@ -252,6 +249,7 @@ extern int sysctl_tcp_abort_on_overflow;
|
|||||||
extern int sysctl_tcp_max_orphans;
|
extern int sysctl_tcp_max_orphans;
|
||||||
extern int sysctl_tcp_fack;
|
extern int sysctl_tcp_fack;
|
||||||
extern int sysctl_tcp_reordering;
|
extern int sysctl_tcp_reordering;
|
||||||
|
extern int sysctl_tcp_max_reordering;
|
||||||
extern int sysctl_tcp_dsack;
|
extern int sysctl_tcp_dsack;
|
||||||
extern long sysctl_tcp_mem[3];
|
extern long sysctl_tcp_mem[3];
|
||||||
extern int sysctl_tcp_wmem[3];
|
extern int sysctl_tcp_wmem[3];
|
||||||
|
@ -495,6 +495,13 @@ static struct ctl_table ipv4_table[] = {
|
|||||||
.mode = 0644,
|
.mode = 0644,
|
||||||
.proc_handler = proc_dointvec
|
.proc_handler = proc_dointvec
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
.procname = "tcp_max_reordering",
|
||||||
|
.data = &sysctl_tcp_max_reordering,
|
||||||
|
.maxlen = sizeof(int),
|
||||||
|
.mode = 0644,
|
||||||
|
.proc_handler = proc_dointvec
|
||||||
|
},
|
||||||
{
|
{
|
||||||
.procname = "tcp_dsack",
|
.procname = "tcp_dsack",
|
||||||
.data = &sysctl_tcp_dsack,
|
.data = &sysctl_tcp_dsack,
|
||||||
|
@ -81,6 +81,7 @@ int sysctl_tcp_window_scaling __read_mostly = 1;
|
|||||||
int sysctl_tcp_sack __read_mostly = 1;
|
int sysctl_tcp_sack __read_mostly = 1;
|
||||||
int sysctl_tcp_fack __read_mostly = 1;
|
int sysctl_tcp_fack __read_mostly = 1;
|
||||||
int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
|
int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
|
||||||
|
int sysctl_tcp_max_reordering __read_mostly = 300;
|
||||||
EXPORT_SYMBOL(sysctl_tcp_reordering);
|
EXPORT_SYMBOL(sysctl_tcp_reordering);
|
||||||
int sysctl_tcp_dsack __read_mostly = 1;
|
int sysctl_tcp_dsack __read_mostly = 1;
|
||||||
int sysctl_tcp_app_win __read_mostly = 31;
|
int sysctl_tcp_app_win __read_mostly = 31;
|
||||||
@ -833,7 +834,7 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
|
|||||||
if (metric > tp->reordering) {
|
if (metric > tp->reordering) {
|
||||||
int mib_idx;
|
int mib_idx;
|
||||||
|
|
||||||
tp->reordering = min(TCP_MAX_REORDERING, metric);
|
tp->reordering = min(sysctl_tcp_max_reordering, metric);
|
||||||
|
|
||||||
/* This exciting event is worth to be remembered. 8) */
|
/* This exciting event is worth to be remembered. 8) */
|
||||||
if (ts)
|
if (ts)
|
||||||
|
Loading…
Reference in New Issue
Block a user