Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
* master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6: [SUNHME]: Fix for sunhme failures on x86 [XFRM] xfrm_user: Better validation of user templates. [DCCP] tfrc: Binary search for reverse TFRC lookup [DCCP] ccid3: Deprecate TFRC_SMALLEST_P [DCCP] tfrc: Identify TFRC table limits and simplify code [DCCP] tfrc: Add protection against invalid parameters to TFRC routines [DCCP] tfrc: Fix small error in reverse lookup of p for given f(p) [DCCP] tfrc: Document boundaries and limits of the TFRC lookup table [DCCP] ccid3: Fix warning message about illegal ACK [DCCP] ccid3: Fix bug in calculation of send rate [DCCP]: Fix BUG in retransmission delay calculation [DCCP]: Use higher RTO default for CCID3
This commit is contained in:
commit
12472b4165
@ -3012,6 +3012,11 @@ static int __devinit happy_meal_pci_probe(struct pci_dev *pdev,
|
||||
#endif
|
||||
|
||||
err = -ENODEV;
|
||||
|
||||
if (pci_enable_device(pdev))
|
||||
goto err_out;
|
||||
pci_set_master(pdev);
|
||||
|
||||
if (!strcmp(prom_name, "SUNW,qfe") || !strcmp(prom_name, "qfe")) {
|
||||
qp = quattro_pci_find(pdev);
|
||||
if (qp == NULL)
|
||||
|
@ -89,4 +89,37 @@ config IP_DCCP_CCID3_DEBUG
|
||||
parameter to 0 or 1.
|
||||
|
||||
If in doubt, say N.
|
||||
|
||||
config IP_DCCP_CCID3_RTO
|
||||
int "Use higher bound for nofeedback timer"
|
||||
default 100
|
||||
depends on IP_DCCP_CCID3 && EXPERIMENTAL
|
||||
---help---
|
||||
Use higher lower bound for nofeedback timer expiration.
|
||||
|
||||
The TFRC nofeedback timer normally expires after the maximum of 4
|
||||
RTTs and twice the current send interval (RFC 3448, 4.3). On LANs
|
||||
with a small RTT this can mean a high processing load and reduced
|
||||
performance, since then the nofeedback timer is triggered very
|
||||
frequently.
|
||||
|
||||
This option enables to set a higher lower bound for the nofeedback
|
||||
value. Values in units of milliseconds can be set here.
|
||||
|
||||
A value of 0 disables this feature by enforcing the value specified
|
||||
in RFC 3448. The following values have been suggested as bounds for
|
||||
experimental use:
|
||||
* 16-20ms to match the typical multimedia inter-frame interval
|
||||
* 100ms as a reasonable compromise [default]
|
||||
* 1000ms corresponds to the lower TCP RTO bound (RFC 2988, 2.4)
|
||||
|
||||
The default of 100ms is a compromise between a large value for
|
||||
efficient DCCP implementations, and a small value to avoid disrupting
|
||||
the network in times of congestion.
|
||||
|
||||
The purpose of the nofeedback timer is to slow DCCP down when there
|
||||
is serious network congestion: experimenting with larger values should
|
||||
therefore not be performed on WANs.
|
||||
|
||||
|
||||
endmenu
|
||||
|
@ -121,12 +121,15 @@ static inline void ccid3_update_send_time(struct ccid3_hc_tx_sock *hctx)
|
||||
/*
|
||||
* Update X by
|
||||
* If (p > 0)
|
||||
* x_calc = calcX(s, R, p);
|
||||
* X_calc = calcX(s, R, p);
|
||||
* X = max(min(X_calc, 2 * X_recv), s / t_mbi);
|
||||
* Else
|
||||
* If (now - tld >= R)
|
||||
* X = max(min(2 * X, 2 * X_recv), s / R);
|
||||
* tld = now;
|
||||
*
|
||||
* If X has changed, we also update the scheduled send time t_now,
|
||||
* the inter-packet interval t_ipi, and the delta value.
|
||||
*/
|
||||
static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now)
|
||||
|
||||
@ -134,8 +137,7 @@ static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now)
|
||||
struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
|
||||
const __u32 old_x = hctx->ccid3hctx_x;
|
||||
|
||||
/* To avoid large error in calcX */
|
||||
if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) {
|
||||
if (hctx->ccid3hctx_p > 0) {
|
||||
hctx->ccid3hctx_x_calc = tfrc_calc_x(hctx->ccid3hctx_s,
|
||||
hctx->ccid3hctx_rtt,
|
||||
hctx->ccid3hctx_p);
|
||||
@ -223,16 +225,14 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
|
||||
ccid3_tx_state_name(hctx->ccid3hctx_state));
|
||||
/* Halve sending rate */
|
||||
|
||||
/* If (X_calc > 2 * X_recv)
|
||||
/* If (p == 0 || X_calc > 2 * X_recv)
|
||||
* X_recv = max(X_recv / 2, s / (2 * t_mbi));
|
||||
* Else
|
||||
* X_recv = X_calc / 4;
|
||||
*/
|
||||
BUG_ON(hctx->ccid3hctx_p >= TFRC_SMALLEST_P &&
|
||||
hctx->ccid3hctx_x_calc == 0);
|
||||
BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc);
|
||||
|
||||
/* check also if p is zero -> x_calc is infinity? */
|
||||
if (hctx->ccid3hctx_p < TFRC_SMALLEST_P ||
|
||||
if (hctx->ccid3hctx_p == 0 ||
|
||||
hctx->ccid3hctx_x_calc > 2 * hctx->ccid3hctx_x_recv)
|
||||
hctx->ccid3hctx_x_recv = max_t(u32, hctx->ccid3hctx_x_recv / 2,
|
||||
hctx->ccid3hctx_s / (2 * TFRC_T_MBI));
|
||||
@ -245,9 +245,10 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
|
||||
}
|
||||
/*
|
||||
* Schedule no feedback timer to expire in
|
||||
* max(4 * R, 2 * s/X) = max(4 * R, 2 * t_ipi)
|
||||
* max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi)
|
||||
* See comments in packet_recv() regarding the value of t_RTO.
|
||||
*/
|
||||
t_nfb = max(4 * hctx->ccid3hctx_rtt, 2 * hctx->ccid3hctx_t_ipi);
|
||||
t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi);
|
||||
break;
|
||||
case TFRC_SSTATE_NO_SENT:
|
||||
DCCP_BUG("Illegal %s state NO_SENT, sk=%p", dccp_role(sk), sk);
|
||||
@ -338,7 +339,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
|
||||
* else
|
||||
* // send the packet in (t_nom - t_now) milliseconds.
|
||||
*/
|
||||
if (delay >= hctx->ccid3hctx_delta)
|
||||
if (delay - (long)hctx->ccid3hctx_delta >= 0)
|
||||
return delay / 1000L;
|
||||
break;
|
||||
case TFRC_SSTATE_TERM:
|
||||
@ -412,10 +413,8 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
|
||||
struct dccp_tx_hist_entry *packet;
|
||||
struct timeval now;
|
||||
unsigned long t_nfb;
|
||||
u32 t_elapsed;
|
||||
u32 pinv;
|
||||
u32 x_recv;
|
||||
u32 r_sample;
|
||||
long r_sample, t_elapsed;
|
||||
|
||||
BUG_ON(hctx == NULL);
|
||||
|
||||
@ -426,31 +425,44 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
|
||||
|
||||
opt_recv = &hctx->ccid3hctx_options_received;
|
||||
|
||||
t_elapsed = dp->dccps_options_received.dccpor_elapsed_time * 10;
|
||||
x_recv = opt_recv->ccid3or_receive_rate;
|
||||
pinv = opt_recv->ccid3or_loss_event_rate;
|
||||
|
||||
switch (hctx->ccid3hctx_state) {
|
||||
case TFRC_SSTATE_NO_FBACK:
|
||||
case TFRC_SSTATE_FBACK:
|
||||
/* Calculate new round trip sample by
|
||||
* R_sample = (now - t_recvdata) - t_delay */
|
||||
/* get t_recvdata from history */
|
||||
/* get packet from history to look up t_recvdata */
|
||||
packet = dccp_tx_hist_find_entry(&hctx->ccid3hctx_hist,
|
||||
DCCP_SKB_CB(skb)->dccpd_ack_seq);
|
||||
if (unlikely(packet == NULL)) {
|
||||
DCCP_WARN("%s, sk=%p, seqno %llu(%s) does't exist "
|
||||
DCCP_WARN("%s(%p), seqno %llu(%s) doesn't exist "
|
||||
"in history!\n", dccp_role(sk), sk,
|
||||
(unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq,
|
||||
dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type));
|
||||
return;
|
||||
}
|
||||
|
||||
/* Update RTT */
|
||||
/* Update receive rate */
|
||||
hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate;
|
||||
|
||||
/* Update loss event rate */
|
||||
pinv = opt_recv->ccid3or_loss_event_rate;
|
||||
if (pinv == ~0U || pinv == 0)
|
||||
hctx->ccid3hctx_p = 0;
|
||||
else
|
||||
hctx->ccid3hctx_p = 1000000 / pinv;
|
||||
|
||||
dccp_timestamp(sk, &now);
|
||||
r_sample = timeval_delta(&now, &packet->dccphtx_tstamp);
|
||||
if (unlikely(r_sample <= t_elapsed))
|
||||
DCCP_WARN("r_sample=%uus,t_elapsed=%uus\n",
|
||||
|
||||
/*
|
||||
* Calculate new round trip sample as per [RFC 3448, 4.3] by
|
||||
* R_sample = (now - t_recvdata) - t_elapsed
|
||||
*/
|
||||
r_sample = timeval_delta(&now, &packet->dccphtx_tstamp);
|
||||
t_elapsed = dp->dccps_options_received.dccpor_elapsed_time * 10;
|
||||
|
||||
if (unlikely(r_sample <= 0)) {
|
||||
DCCP_WARN("WARNING: R_sample (%ld) <= 0!\n", r_sample);
|
||||
r_sample = 0;
|
||||
} else if (unlikely(r_sample <= t_elapsed))
|
||||
DCCP_WARN("WARNING: r_sample=%ldus <= t_elapsed=%ldus\n",
|
||||
r_sample, t_elapsed);
|
||||
else
|
||||
r_sample -= t_elapsed;
|
||||
@ -473,31 +485,25 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
|
||||
hctx->ccid3hctx_t_ld = now;
|
||||
|
||||
ccid3_update_send_time(hctx);
|
||||
|
||||
ccid3_pr_debug("%s(%p), s=%u, w_init=%u, "
|
||||
"R_sample=%ldus, X=%u\n", dccp_role(sk),
|
||||
sk, hctx->ccid3hctx_s, w_init, r_sample,
|
||||
hctx->ccid3hctx_x);
|
||||
|
||||
ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
|
||||
} else {
|
||||
hctx->ccid3hctx_rtt = (hctx->ccid3hctx_rtt * 9) / 10 +
|
||||
r_sample / 10;
|
||||
hctx->ccid3hctx_rtt = (9 * hctx->ccid3hctx_rtt +
|
||||
(u32)r_sample ) / 10;
|
||||
|
||||
ccid3_hc_tx_update_x(sk, &now);
|
||||
}
|
||||
|
||||
ccid3_pr_debug("%s, sk=%p, New RTT estimate=%uus, "
|
||||
"r_sample=%us\n", dccp_role(sk), sk,
|
||||
hctx->ccid3hctx_rtt, r_sample);
|
||||
|
||||
/* Update receive rate */
|
||||
hctx->ccid3hctx_x_recv = x_recv;/* X_recv in bytes per sec */
|
||||
|
||||
/* Update loss event rate */
|
||||
if (pinv == ~0 || pinv == 0)
|
||||
hctx->ccid3hctx_p = 0;
|
||||
else {
|
||||
hctx->ccid3hctx_p = 1000000 / pinv;
|
||||
|
||||
if (hctx->ccid3hctx_p < TFRC_SMALLEST_P) {
|
||||
hctx->ccid3hctx_p = TFRC_SMALLEST_P;
|
||||
ccid3_pr_debug("%s, sk=%p, Smallest p used!\n",
|
||||
dccp_role(sk), sk);
|
||||
}
|
||||
ccid3_pr_debug("%s(%p), RTT=%uus (sample=%ldus), s=%u, "
|
||||
"p=%u, X_calc=%u, X=%u\n", dccp_role(sk),
|
||||
sk, hctx->ccid3hctx_rtt, r_sample,
|
||||
hctx->ccid3hctx_s, hctx->ccid3hctx_p,
|
||||
hctx->ccid3hctx_x_calc,
|
||||
hctx->ccid3hctx_x);
|
||||
}
|
||||
|
||||
/* unschedule no feedback timer */
|
||||
@ -512,16 +518,20 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
|
||||
*/
|
||||
sk->sk_write_space(sk);
|
||||
|
||||
/* Update timeout interval. We use the alternative variant of
|
||||
* [RFC 3448, 3.1] which sets the upper bound of t_rto to one
|
||||
* second, as it is suggested for TCP (see RFC 2988, 2.4). */
|
||||
/*
|
||||
* Update timeout interval for the nofeedback timer.
|
||||
* We use a configuration option to increase the lower bound.
|
||||
* This can help avoid triggering the nofeedback timer too often
|
||||
* ('spinning') on LANs with small RTTs.
|
||||
*/
|
||||
hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt,
|
||||
USEC_PER_SEC );
|
||||
CONFIG_IP_DCCP_CCID3_RTO *
|
||||
(USEC_PER_SEC/1000) );
|
||||
/*
|
||||
* Schedule no feedback timer to expire in
|
||||
* max(4 * R, 2 * s/X) = max(4 * R, 2 * t_ipi)
|
||||
* max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi)
|
||||
*/
|
||||
t_nfb = max(4 * hctx->ccid3hctx_rtt, 2 * hctx->ccid3hctx_t_ipi);
|
||||
t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi);
|
||||
|
||||
ccid3_pr_debug("%s, sk=%p, Scheduled no feedback timer to "
|
||||
"expire in %lu jiffies (%luus)\n",
|
||||
@ -535,7 +545,8 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
|
||||
hctx->ccid3hctx_idle = 1;
|
||||
break;
|
||||
case TFRC_SSTATE_NO_SENT:
|
||||
DCCP_WARN("Illegal ACK received - no packet has been sent\n");
|
||||
if (dccp_sk(sk)->dccps_role == DCCP_ROLE_CLIENT)
|
||||
DCCP_WARN("Illegal ACK received - no packet sent\n");
|
||||
/* fall through */
|
||||
case TFRC_SSTATE_TERM: /* ignore feedback when closing */
|
||||
break;
|
||||
|
@ -51,8 +51,6 @@
|
||||
/* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */
|
||||
#define TFRC_T_MBI 64
|
||||
|
||||
#define TFRC_SMALLEST_P 40
|
||||
|
||||
enum ccid3_options {
|
||||
TFRC_OPT_LOSS_EVENT_RATE = 192,
|
||||
TFRC_OPT_LOSS_INTERVALS = 193,
|
||||
|
@ -18,10 +18,79 @@
|
||||
#include "tfrc.h"
|
||||
|
||||
#define TFRC_CALC_X_ARRSIZE 500
|
||||
#define TFRC_CALC_X_SPLIT 50000 /* 0.05 * 1000000, details below */
|
||||
#define TFRC_SMALLEST_P (TFRC_CALC_X_SPLIT/TFRC_CALC_X_ARRSIZE)
|
||||
|
||||
#define TFRC_CALC_X_SPLIT 50000
|
||||
/* equivalent to 0.05 */
|
||||
/*
|
||||
TFRC TCP Reno Throughput Equation Lookup Table for f(p)
|
||||
|
||||
The following two-column lookup table implements a part of the TCP throughput
|
||||
equation from [RFC 3448, sec. 3.1]:
|
||||
|
||||
s
|
||||
X_calc = --------------------------------------------------------------
|
||||
R * sqrt(2*b*p/3) + (3 * t_RTO * sqrt(3*b*p/8) * (p + 32*p^3))
|
||||
|
||||
Where:
|
||||
X is the transmit rate in bytes/second
|
||||
s is the packet size in bytes
|
||||
R is the round trip time in seconds
|
||||
p is the loss event rate, between 0 and 1.0, of the number of loss
|
||||
events as a fraction of the number of packets transmitted
|
||||
t_RTO is the TCP retransmission timeout value in seconds
|
||||
b is the number of packets acknowledged by a single TCP ACK
|
||||
|
||||
We can assume that b = 1 and t_RTO is 4 * R. The equation now becomes:
|
||||
|
||||
s
|
||||
X_calc = -------------------------------------------------------
|
||||
R * sqrt(p*2/3) + (12 * R * sqrt(p*3/8) * (p + 32*p^3))
|
||||
|
||||
which we can break down into:
|
||||
|
||||
s
|
||||
X_calc = ---------
|
||||
R * f(p)
|
||||
|
||||
where f(p) is given for 0 < p <= 1 by:
|
||||
|
||||
f(p) = sqrt(2*p/3) + 12 * sqrt(3*p/8) * (p + 32*p^3)
|
||||
|
||||
Since this is kernel code, floating-point arithmetic is avoided in favour of
|
||||
integer arithmetic. This means that nearly all fractional parameters are
|
||||
scaled by 1000000:
|
||||
* the parameters p and R
|
||||
* the return result f(p)
|
||||
The lookup table therefore actually tabulates the following function g(q):
|
||||
|
||||
g(q) = 1000000 * f(q/1000000)
|
||||
|
||||
Hence, when p <= 1, q must be less than or equal to 1000000. To achieve finer
|
||||
granularity for the practically more relevant case of small values of p (up to
|
||||
5%), the second column is used; the first one ranges up to 100%. This split
|
||||
corresponds to the value of q = TFRC_CALC_X_SPLIT. At the same time this also
|
||||
determines the smallest resolution possible with this lookup table:
|
||||
|
||||
TFRC_SMALLEST_P = TFRC_CALC_X_SPLIT / TFRC_CALC_X_ARRSIZE
|
||||
|
||||
The entire table is generated by:
|
||||
for(i=0; i < TFRC_CALC_X_ARRSIZE; i++) {
|
||||
lookup[i][0] = g((i+1) * 1000000/TFRC_CALC_X_ARRSIZE);
|
||||
lookup[i][1] = g((i+1) * TFRC_CALC_X_SPLIT/TFRC_CALC_X_ARRSIZE);
|
||||
}
|
||||
|
||||
With the given configuration, we have, with M = TFRC_CALC_X_ARRSIZE-1,
|
||||
lookup[0][0] = g(1000000/(M+1)) = 1000000 * f(0.2%)
|
||||
lookup[M][0] = g(1000000) = 1000000 * f(100%)
|
||||
lookup[0][1] = g(TFRC_SMALLEST_P) = 1000000 * f(0.01%)
|
||||
lookup[M][1] = g(TFRC_CALC_X_SPLIT) = 1000000 * f(5%)
|
||||
|
||||
In summary, the two columns represent f(p) for the following ranges:
|
||||
* The first column is for 0.002 <= p <= 1.0
|
||||
* The second column is for 0.0001 <= p <= 0.05
|
||||
Where the columns overlap, the second (finer-grained) is given preference,
|
||||
i.e. the first column is used only for p >= 0.05.
|
||||
*/
|
||||
static const u32 tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE][2] = {
|
||||
{ 37172, 8172 },
|
||||
{ 53499, 11567 },
|
||||
@ -525,85 +594,69 @@ static const u32 tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE][2] = {
|
||||
{ 243315981, 271305 }
|
||||
};
|
||||
|
||||
/* Calculate the send rate as per section 3.1 of RFC3448
|
||||
/* return largest index i such that fval <= lookup[i][small] */
|
||||
static inline u32 tfrc_binsearch(u32 fval, u8 small)
|
||||
{
|
||||
u32 try, low = 0, high = TFRC_CALC_X_ARRSIZE - 1;
|
||||
|
||||
Returns send rate in bytes per second
|
||||
while (low < high) {
|
||||
try = (low + high) / 2;
|
||||
if (fval <= tfrc_calc_x_lookup[try][small])
|
||||
high = try;
|
||||
else
|
||||
low = try + 1;
|
||||
}
|
||||
return high;
|
||||
}
|
||||
|
||||
Integer maths and lookups are used as not allowed floating point in kernel
|
||||
|
||||
The function for Xcalc as per section 3.1 of RFC3448 is:
|
||||
|
||||
X = s
|
||||
-------------------------------------------------------------
|
||||
R*sqrt(2*b*p/3) + (t_RTO * (3*sqrt(3*b*p/8) * p * (1+32*p^2)))
|
||||
|
||||
where
|
||||
X is the trasmit rate in bytes/second
|
||||
s is the packet size in bytes
|
||||
R is the round trip time in seconds
|
||||
p is the loss event rate, between 0 and 1.0, of the number of loss events
|
||||
as a fraction of the number of packets transmitted
|
||||
t_RTO is the TCP retransmission timeout value in seconds
|
||||
b is the number of packets acknowledged by a single TCP acknowledgement
|
||||
|
||||
we can assume that b = 1 and t_RTO is 4 * R. With this the equation becomes:
|
||||
|
||||
X = s
|
||||
-----------------------------------------------------------------------
|
||||
R * sqrt(2 * p / 3) + (12 * R * (sqrt(3 * p / 8) * p * (1 + 32 * p^2)))
|
||||
|
||||
|
||||
which we can break down into:
|
||||
|
||||
X = s
|
||||
--------
|
||||
R * f(p)
|
||||
|
||||
where f(p) = sqrt(2 * p / 3) + (12 * sqrt(3 * p / 8) * p * (1 + 32 * p * p))
|
||||
|
||||
Function parameters:
|
||||
s - bytes
|
||||
R - RTT in usecs
|
||||
p - loss rate (decimal fraction multiplied by 1,000,000)
|
||||
|
||||
Returns Xcalc in bytes per second
|
||||
|
||||
DON'T alter this code unless you run test cases against it as the code
|
||||
has been manipulated to stop underflow/overlow.
|
||||
|
||||
*/
|
||||
/**
|
||||
* tfrc_calc_x - Calculate the send rate as per section 3.1 of RFC3448
|
||||
*
|
||||
* @s: packet size in bytes
|
||||
* @R: RTT scaled by 1000000 (i.e., microseconds)
|
||||
* @p: loss ratio estimate scaled by 1000000
|
||||
* Returns X_calc in bytes per second (not scaled).
|
||||
*
|
||||
* Note: DO NOT alter this code unless you run test cases against it,
|
||||
* as the code has been optimized to stop underflow/overflow.
|
||||
*/
|
||||
u32 tfrc_calc_x(u16 s, u32 R, u32 p)
|
||||
{
|
||||
int index;
|
||||
u32 f;
|
||||
u64 tmp1, tmp2;
|
||||
|
||||
if (p < TFRC_CALC_X_SPLIT)
|
||||
index = (p / (TFRC_CALC_X_SPLIT / TFRC_CALC_X_ARRSIZE)) - 1;
|
||||
else
|
||||
index = (p / (1000000 / TFRC_CALC_X_ARRSIZE)) - 1;
|
||||
/* check against invalid parameters and divide-by-zero */
|
||||
BUG_ON(p > 1000000); /* p must not exceed 100% */
|
||||
BUG_ON(p == 0); /* f(0) = 0, divide by zero */
|
||||
if (R == 0) { /* possible divide by zero */
|
||||
DCCP_CRIT("WARNING: RTT is 0, returning maximum X_calc.");
|
||||
return ~0U;
|
||||
}
|
||||
|
||||
if (index < 0)
|
||||
/* p should be 0 unless there is a bug in my code */
|
||||
index = 0;
|
||||
if (p <= TFRC_CALC_X_SPLIT) { /* 0.0000 < p <= 0.05 */
|
||||
if (p < TFRC_SMALLEST_P) { /* 0.0000 < p < 0.0001 */
|
||||
DCCP_WARN("Value of p (%d) below resolution. "
|
||||
"Substituting %d\n", p, TFRC_SMALLEST_P);
|
||||
index = 0;
|
||||
} else /* 0.0001 <= p <= 0.05 */
|
||||
index = p/TFRC_SMALLEST_P - 1;
|
||||
|
||||
if (R == 0) {
|
||||
DCCP_WARN("RTT==0, setting to 1\n");
|
||||
R = 1; /* RTT can't be zero or else divide by zero */
|
||||
f = tfrc_calc_x_lookup[index][1];
|
||||
|
||||
} else { /* 0.05 < p <= 1.00 */
|
||||
index = p/(1000000/TFRC_CALC_X_ARRSIZE) - 1;
|
||||
|
||||
f = tfrc_calc_x_lookup[index][0];
|
||||
}
|
||||
|
||||
BUG_ON(index >= TFRC_CALC_X_ARRSIZE);
|
||||
|
||||
if (p >= TFRC_CALC_X_SPLIT)
|
||||
f = tfrc_calc_x_lookup[index][0];
|
||||
else
|
||||
f = tfrc_calc_x_lookup[index][1];
|
||||
|
||||
/* The following computes X = s/(R*f(p)) in bytes per second. Since f(p)
|
||||
* and R are both scaled by 1000000, we need to multiply by 1000000^2.
|
||||
* ==> DO NOT alter this unless you test against overflow on 32 bit */
|
||||
tmp1 = ((u64)s * 100000000);
|
||||
tmp2 = ((u64)R * (u64)f);
|
||||
do_div(tmp2, 10000);
|
||||
do_div(tmp1, tmp2);
|
||||
/* Don't alter above math unless you test due to overflow on 32 bit */
|
||||
|
||||
return (u32)tmp1;
|
||||
}
|
||||
@ -611,33 +664,36 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p)
|
||||
EXPORT_SYMBOL_GPL(tfrc_calc_x);
|
||||
|
||||
/*
|
||||
* args: fvalue - function value to match
|
||||
* returns: p closest to that value
|
||||
* tfrc_calc_x_reverse_lookup - try to find p given f(p)
|
||||
*
|
||||
* both fvalue and p are multiplied by 1,000,000 to use ints
|
||||
* @fvalue: function value to match, scaled by 1000000
|
||||
* Returns closest match for p, also scaled by 1000000
|
||||
*/
|
||||
u32 tfrc_calc_x_reverse_lookup(u32 fvalue)
|
||||
{
|
||||
int ctr = 0;
|
||||
int small;
|
||||
int index;
|
||||
|
||||
if (fvalue < tfrc_calc_x_lookup[0][1])
|
||||
if (fvalue == 0) /* f(p) = 0 whenever p = 0 */
|
||||
return 0;
|
||||
|
||||
if (fvalue <= tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][1])
|
||||
small = 1;
|
||||
else if (fvalue > tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][0])
|
||||
/* Error cases. */
|
||||
if (fvalue < tfrc_calc_x_lookup[0][1]) {
|
||||
DCCP_WARN("fvalue %d smaller than resolution\n", fvalue);
|
||||
return tfrc_calc_x_lookup[0][1];
|
||||
}
|
||||
if (fvalue > tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][0]) {
|
||||
DCCP_WARN("fvalue %d exceeds bounds!\n", fvalue);
|
||||
return 1000000;
|
||||
else
|
||||
small = 0;
|
||||
}
|
||||
|
||||
while (fvalue > tfrc_calc_x_lookup[ctr][small])
|
||||
ctr++;
|
||||
if (fvalue <= tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][1]) {
|
||||
index = tfrc_binsearch(fvalue, 1);
|
||||
return (index + 1) * TFRC_CALC_X_SPLIT / TFRC_CALC_X_ARRSIZE;
|
||||
}
|
||||
|
||||
if (small)
|
||||
return TFRC_CALC_X_SPLIT * ctr / TFRC_CALC_X_ARRSIZE;
|
||||
else
|
||||
return 1000000 * ctr / TFRC_CALC_X_ARRSIZE;
|
||||
/* else ... it must be in the coarse-grained column */
|
||||
index = tfrc_binsearch(fvalue, 0);
|
||||
return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup);
|
||||
|
@ -858,7 +858,6 @@ static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut,
|
||||
int i;
|
||||
|
||||
xp->xfrm_nr = nr;
|
||||
xp->family = ut->family;
|
||||
for (i = 0; i < nr; i++, ut++) {
|
||||
struct xfrm_tmpl *t = &xp->xfrm_vec[i];
|
||||
|
||||
@ -876,19 +875,53 @@ static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut,
|
||||
}
|
||||
}
|
||||
|
||||
static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (nr > XFRM_MAX_DEPTH)
|
||||
return -EINVAL;
|
||||
|
||||
for (i = 0; i < nr; i++) {
|
||||
/* We never validated the ut->family value, so many
|
||||
* applications simply leave it at zero. The check was
|
||||
* never made and ut->family was ignored because all
|
||||
* templates could be assumed to have the same family as
|
||||
* the policy itself. Now that we will have ipv4-in-ipv6
|
||||
* and ipv6-in-ipv4 tunnels, this is no longer true.
|
||||
*/
|
||||
if (!ut[i].family)
|
||||
ut[i].family = family;
|
||||
|
||||
switch (ut[i].family) {
|
||||
case AF_INET:
|
||||
break;
|
||||
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
|
||||
case AF_INET6:
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
return -EINVAL;
|
||||
};
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int copy_from_user_tmpl(struct xfrm_policy *pol, struct rtattr **xfrma)
|
||||
{
|
||||
struct rtattr *rt = xfrma[XFRMA_TMPL-1];
|
||||
struct xfrm_user_tmpl *utmpl;
|
||||
int nr;
|
||||
|
||||
if (!rt) {
|
||||
pol->xfrm_nr = 0;
|
||||
} else {
|
||||
nr = (rt->rta_len - sizeof(*rt)) / sizeof(*utmpl);
|
||||
struct xfrm_user_tmpl *utmpl = RTA_DATA(rt);
|
||||
int nr = (rt->rta_len - sizeof(*rt)) / sizeof(*utmpl);
|
||||
int err;
|
||||
|
||||
if (nr > XFRM_MAX_DEPTH)
|
||||
return -EINVAL;
|
||||
err = validate_tmpl(nr, utmpl, pol->family);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
copy_templates(pol, RTA_DATA(rt), nr);
|
||||
}
|
||||
@ -1530,7 +1563,8 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, void **xf
|
||||
}
|
||||
|
||||
/* build an XP */
|
||||
xp = xfrm_policy_construct(&ua->policy, (struct rtattr **) xfrma, &err); if (!xp) {
|
||||
xp = xfrm_policy_construct(&ua->policy, (struct rtattr **) xfrma, &err);
|
||||
if (!xp) {
|
||||
kfree(x);
|
||||
return err;
|
||||
}
|
||||
@ -1979,7 +2013,7 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt,
|
||||
return NULL;
|
||||
|
||||
nr = ((len - sizeof(*p)) / sizeof(*ut));
|
||||
if (nr > XFRM_MAX_DEPTH)
|
||||
if (validate_tmpl(nr, ut, p->sel.family))
|
||||
return NULL;
|
||||
|
||||
if (p->dir > XFRM_POLICY_OUT)
|
||||
|
Loading…
Reference in New Issue
Block a user