mirror of
https://github.com/torvalds/linux.git
synced 2024-11-28 07:01:32 +00:00
Merge branch 'master' of git://eden-feed.erg.abdn.ac.uk/net-next-2.6
This commit is contained in:
commit
ded67c0e2f
@ -45,25 +45,6 @@ http://linux-net.osdl.org/index.php/DCCP_Testing#Experimental_DCCP_source_tree
|
||||
|
||||
Socket options
|
||||
==============
|
||||
DCCP_SOCKOPT_QPOLICY_ID sets the dequeuing policy for outgoing packets. It takes
|
||||
a policy ID as argument and can only be set before the connection (i.e. changes
|
||||
during an established connection are not supported). Currently, two policies are
|
||||
defined: the "simple" policy (DCCPQ_POLICY_SIMPLE), which does nothing special,
|
||||
and a priority-based variant (DCCPQ_POLICY_PRIO). The latter allows to pass an
|
||||
u32 priority value as ancillary data to sendmsg(), where higher numbers indicate
|
||||
a higher packet priority (similar to SO_PRIORITY). This ancillary data needs to
|
||||
be formatted using a cmsg(3) message header filled in as follows:
|
||||
cmsg->cmsg_level = SOL_DCCP;
|
||||
cmsg->cmsg_type = DCCP_SCM_PRIORITY;
|
||||
cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t)); /* or CMSG_LEN(4) */
|
||||
|
||||
DCCP_SOCKOPT_QPOLICY_TXQLEN sets the maximum length of the output queue. A zero
|
||||
value is always interpreted as unbounded queue length. If different from zero,
|
||||
the interpretation of this parameter depends on the current dequeuing policy
|
||||
(see above): the "simple" policy will enforce a fixed queue size by returning
|
||||
EAGAIN, whereas the "prio" policy enforces a fixed queue length by dropping the
|
||||
lowest-priority packet first. The default value for this parameter is
|
||||
initialised from /proc/sys/net/dccp/default/tx_qlen.
|
||||
|
||||
DCCP_SOCKOPT_SERVICE sets the service. The specification mandates use of
|
||||
service codes (RFC 4340, sec. 8.1.2); if this socket option is not set,
|
||||
@ -76,24 +57,6 @@ can be set before calling bind().
|
||||
DCCP_SOCKOPT_GET_CUR_MPS is read-only and retrieves the current maximum packet
|
||||
size (application payload size) in bytes, see RFC 4340, section 14.
|
||||
|
||||
DCCP_SOCKOPT_AVAILABLE_CCIDS is also read-only and returns the list of CCIDs
|
||||
supported by the endpoint (see include/linux/dccp.h for symbolic constants).
|
||||
The caller needs to provide a sufficiently large (> 2) array of type uint8_t.
|
||||
|
||||
DCCP_SOCKOPT_CCID is write-only and sets both the TX and RX CCIDs at the same
|
||||
time, combining the operation of the next two socket options. This option is
|
||||
preferrable over the latter two, since often applications will use the same
|
||||
type of CCID for both directions; and mixed use of CCIDs is not currently well
|
||||
understood. This socket option takes as argument at least one uint8_t value, or
|
||||
an array of uint8_t values, which must match available CCIDS (see above). CCIDs
|
||||
must be registered on the socket before calling connect() or listen().
|
||||
|
||||
DCCP_SOCKOPT_TX_CCID is read/write. It returns the current CCID (if set) or sets
|
||||
the preference list for the TX CCID, using the same format as DCCP_SOCKOPT_CCID.
|
||||
Please note that the getsockopt argument type here is `int', not uint8_t.
|
||||
|
||||
DCCP_SOCKOPT_RX_CCID is analogous to DCCP_SOCKOPT_TX_CCID, but for the RX CCID.
|
||||
|
||||
DCCP_SOCKOPT_SERVER_TIMEWAIT enables the server (listening socket) to hold
|
||||
timewait state when closing the connection (RFC 4340, 8.3). The usual case is
|
||||
that the closing server sends a CloseReq, whereupon the client holds timewait
|
||||
@ -152,16 +115,23 @@ retries2
|
||||
importance for retransmitted acknowledgments and feature negotiation,
|
||||
data packets are never retransmitted. Analogue of tcp_retries2.
|
||||
|
||||
send_ndp = 1
|
||||
Whether or not to send NDP count options (sec. 7.7.2).
|
||||
|
||||
send_ackvec = 1
|
||||
Whether or not to send Ack Vector options (sec. 11.5).
|
||||
|
||||
ack_ratio = 2
|
||||
The default Ack Ratio (sec. 11.3) to use.
|
||||
|
||||
tx_ccid = 2
|
||||
Default CCID for the sender-receiver half-connection. Depending on the
|
||||
choice of CCID, the Send Ack Vector feature is enabled automatically.
|
||||
Default CCID for the sender-receiver half-connection.
|
||||
|
||||
rx_ccid = 2
|
||||
Default CCID for the receiver-sender half-connection; see tx_ccid.
|
||||
Default CCID for the receiver-sender half-connection.
|
||||
|
||||
seq_window = 100
|
||||
The initial sequence window (sec. 7.5.2) of the sender. This influences
|
||||
the local ackno validity and the remote seqno validity windows (7.5.1).
|
||||
The initial sequence window (sec. 7.5.2).
|
||||
|
||||
tx_qlen = 5
|
||||
The size of the transmit buffer in packets. A value of 0 corresponds
|
||||
|
@ -165,13 +165,9 @@ enum {
|
||||
DCCPO_TIMESTAMP_ECHO = 42,
|
||||
DCCPO_ELAPSED_TIME = 43,
|
||||
DCCPO_MAX = 45,
|
||||
DCCPO_MIN_RX_CCID_SPECIFIC = 128, /* from sender to receiver */
|
||||
DCCPO_MAX_RX_CCID_SPECIFIC = 191,
|
||||
DCCPO_MIN_TX_CCID_SPECIFIC = 192, /* from receiver to sender */
|
||||
DCCPO_MAX_TX_CCID_SPECIFIC = 255,
|
||||
DCCPO_MIN_CCID_SPECIFIC = 128,
|
||||
DCCPO_MAX_CCID_SPECIFIC = 255,
|
||||
};
|
||||
/* maximum size of a single TLV-encoded DCCP option (sans type/len bytes) */
|
||||
#define DCCP_SINGLE_OPT_MAXLEN 253
|
||||
|
||||
/* DCCP CCIDS */
|
||||
enum {
|
||||
@ -180,36 +176,27 @@ enum {
|
||||
};
|
||||
|
||||
/* DCCP features (RFC 4340 section 6.4) */
|
||||
enum dccp_feature_numbers {
|
||||
enum {
|
||||
DCCPF_RESERVED = 0,
|
||||
DCCPF_CCID = 1,
|
||||
DCCPF_SHORT_SEQNOS = 2,
|
||||
DCCPF_SHORT_SEQNOS = 2, /* XXX: not yet implemented */
|
||||
DCCPF_SEQUENCE_WINDOW = 3,
|
||||
DCCPF_ECN_INCAPABLE = 4,
|
||||
DCCPF_ECN_INCAPABLE = 4, /* XXX: not yet implemented */
|
||||
DCCPF_ACK_RATIO = 5,
|
||||
DCCPF_SEND_ACK_VECTOR = 6,
|
||||
DCCPF_SEND_NDP_COUNT = 7,
|
||||
DCCPF_MIN_CSUM_COVER = 8,
|
||||
DCCPF_DATA_CHECKSUM = 9,
|
||||
DCCPF_DATA_CHECKSUM = 9, /* XXX: not yet implemented */
|
||||
/* 10-127 reserved */
|
||||
DCCPF_MIN_CCID_SPECIFIC = 128,
|
||||
DCCPF_SEND_LEV_RATE = 192, /* RFC 4342, sec. 8.4 */
|
||||
DCCPF_MAX_CCID_SPECIFIC = 255,
|
||||
};
|
||||
|
||||
/* DCCP socket control message types for cmsg */
|
||||
enum dccp_cmsg_type {
|
||||
DCCP_SCM_PRIORITY = 1,
|
||||
DCCP_SCM_QPOLICY_MAX = 0xFFFF,
|
||||
/* ^-- Up to here reserved exclusively for qpolicy parameters */
|
||||
DCCP_SCM_MAX
|
||||
};
|
||||
|
||||
/* DCCP priorities for outgoing/queued packets */
|
||||
enum dccp_packet_dequeueing_policy {
|
||||
DCCPQ_POLICY_SIMPLE,
|
||||
DCCPQ_POLICY_PRIO,
|
||||
DCCPQ_POLICY_MAX
|
||||
/* this structure is argument to DCCP_SOCKOPT_CHANGE_X */
|
||||
struct dccp_so_feat {
|
||||
__u8 dccpsf_feat;
|
||||
__u8 __user *dccpsf_val;
|
||||
__u8 dccpsf_len;
|
||||
};
|
||||
|
||||
/* DCCP socket options */
|
||||
@ -221,12 +208,6 @@ enum dccp_packet_dequeueing_policy {
|
||||
#define DCCP_SOCKOPT_SERVER_TIMEWAIT 6
|
||||
#define DCCP_SOCKOPT_SEND_CSCOV 10
|
||||
#define DCCP_SOCKOPT_RECV_CSCOV 11
|
||||
#define DCCP_SOCKOPT_AVAILABLE_CCIDS 12
|
||||
#define DCCP_SOCKOPT_CCID 13
|
||||
#define DCCP_SOCKOPT_TX_CCID 14
|
||||
#define DCCP_SOCKOPT_RX_CCID 15
|
||||
#define DCCP_SOCKOPT_QPOLICY_ID 16
|
||||
#define DCCP_SOCKOPT_QPOLICY_TXQLEN 17
|
||||
#define DCCP_SOCKOPT_CCID_RX_INFO 128
|
||||
#define DCCP_SOCKOPT_CCID_TX_INFO 192
|
||||
|
||||
@ -374,13 +355,62 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb)
|
||||
return __dccp_hdr_len(dccp_hdr(skb));
|
||||
}
|
||||
|
||||
|
||||
/* initial values for each feature */
|
||||
#define DCCPF_INITIAL_SEQUENCE_WINDOW 100
|
||||
#define DCCPF_INITIAL_ACK_RATIO 2
|
||||
#define DCCPF_INITIAL_CCID DCCPC_CCID2
|
||||
#define DCCPF_INITIAL_SEND_ACK_VECTOR 1
|
||||
/* FIXME: for now we're default to 1 but it should really be 0 */
|
||||
#define DCCPF_INITIAL_SEND_NDP_COUNT 1
|
||||
|
||||
/**
|
||||
* struct dccp_minisock - Minimal DCCP connection representation
|
||||
*
|
||||
* Will be used to pass the state from dccp_request_sock to dccp_sock.
|
||||
*
|
||||
* @dccpms_sequence_window - Sequence Window Feature (section 7.5.2)
|
||||
* @dccpms_ccid - Congestion Control Id (CCID) (section 10)
|
||||
* @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5)
|
||||
* @dccpms_send_ndp_count - Send NDP Count Feature (7.7.2)
|
||||
* @dccpms_ack_ratio - Ack Ratio Feature (section 11.3)
|
||||
* @dccpms_pending - List of features being negotiated
|
||||
* @dccpms_conf -
|
||||
*/
|
||||
struct dccp_minisock {
|
||||
__u64 dccpms_sequence_window;
|
||||
__u8 dccpms_rx_ccid;
|
||||
__u8 dccpms_tx_ccid;
|
||||
__u8 dccpms_send_ack_vector;
|
||||
__u8 dccpms_send_ndp_count;
|
||||
__u8 dccpms_ack_ratio;
|
||||
struct list_head dccpms_pending;
|
||||
struct list_head dccpms_conf;
|
||||
};
|
||||
|
||||
struct dccp_opt_conf {
|
||||
__u8 *dccpoc_val;
|
||||
__u8 dccpoc_len;
|
||||
};
|
||||
|
||||
struct dccp_opt_pend {
|
||||
struct list_head dccpop_node;
|
||||
__u8 dccpop_type;
|
||||
__u8 dccpop_feat;
|
||||
__u8 *dccpop_val;
|
||||
__u8 dccpop_len;
|
||||
int dccpop_conf;
|
||||
struct dccp_opt_conf *dccpop_sc;
|
||||
};
|
||||
|
||||
extern void dccp_minisock_init(struct dccp_minisock *dmsk);
|
||||
|
||||
/**
|
||||
* struct dccp_request_sock - represent DCCP-specific connection request
|
||||
* @dreq_inet_rsk: structure inherited from
|
||||
* @dreq_iss: initial sequence number sent on the Response (RFC 4340, 7.1)
|
||||
* @dreq_isr: initial sequence number received on the Request
|
||||
* @dreq_service: service code present on the Request (there is just one)
|
||||
* @dreq_featneg: feature negotiation options for this connection
|
||||
* The following two fields are analogous to the ones in dccp_sock:
|
||||
* @dreq_timestamp_echo: last received timestamp to echo (13.1)
|
||||
* @dreq_timestamp_echo: the time of receiving the last @dreq_timestamp_echo
|
||||
@ -390,7 +420,6 @@ struct dccp_request_sock {
|
||||
__u64 dreq_iss;
|
||||
__u64 dreq_isr;
|
||||
__be32 dreq_service;
|
||||
struct list_head dreq_featneg;
|
||||
__u32 dreq_timestamp_echo;
|
||||
__u32 dreq_timestamp_time;
|
||||
};
|
||||
@ -462,28 +491,21 @@ struct dccp_ackvec;
|
||||
* @dccps_timestamp_time - time of receiving latest @dccps_timestamp_echo
|
||||
* @dccps_l_ack_ratio - feature-local Ack Ratio
|
||||
* @dccps_r_ack_ratio - feature-remote Ack Ratio
|
||||
* @dccps_l_seq_win - local Sequence Window (influences ack number validity)
|
||||
* @dccps_r_seq_win - remote Sequence Window (influences seq number validity)
|
||||
* @dccps_pcslen - sender partial checksum coverage (via sockopt)
|
||||
* @dccps_pcrlen - receiver partial checksum coverage (via sockopt)
|
||||
* @dccps_send_ndp_count - local Send NDP Count feature (7.7.2)
|
||||
* @dccps_ndp_count - number of Non Data Packets since last data packet
|
||||
* @dccps_mss_cache - current value of MSS (path MTU minus header sizes)
|
||||
* @dccps_rate_last - timestamp for rate-limiting DCCP-Sync (RFC 4340, 7.5.4)
|
||||
* @dccps_featneg - tracks feature-negotiation state (mostly during handshake)
|
||||
* @dccps_minisock - associated minisock (accessed via dccp_msk)
|
||||
* @dccps_hc_rx_ackvec - rx half connection ack vector
|
||||
* @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection)
|
||||
* @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection)
|
||||
* @dccps_options_received - parsed set of retrieved options
|
||||
* @dccps_qpolicy - TX dequeueing policy, one of %dccp_packet_dequeueing_policy
|
||||
* @dccps_tx_qlen - maximum length of the TX queue
|
||||
* @dccps_role - role of this sock, one of %dccp_role
|
||||
* @dccps_hc_rx_insert_options - receiver wants to add options when acking
|
||||
* @dccps_hc_tx_insert_options - sender wants to add options when sending
|
||||
* @dccps_server_timewait - server holds timewait state on close (RFC 4340, 8.3)
|
||||
* @dccps_sync_scheduled - flag which signals "send out-of-band message soon"
|
||||
* @dccps_xmitlet - tasklet scheduled by the TX CCID to dequeue data packets
|
||||
* @dccps_xmit_timer - used by the TX CCID to delay sending (rate-based pacing)
|
||||
* @dccps_xmit_timer - timer for when CCID is not ready to send
|
||||
* @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs)
|
||||
*/
|
||||
struct dccp_sock {
|
||||
@ -507,26 +529,19 @@ struct dccp_sock {
|
||||
__u32 dccps_timestamp_time;
|
||||
__u16 dccps_l_ack_ratio;
|
||||
__u16 dccps_r_ack_ratio;
|
||||
__u64 dccps_l_seq_win:48;
|
||||
__u64 dccps_r_seq_win:48;
|
||||
__u8 dccps_pcslen:4;
|
||||
__u8 dccps_pcrlen:4;
|
||||
__u8 dccps_send_ndp_count:1;
|
||||
__u16 dccps_pcslen;
|
||||
__u16 dccps_pcrlen;
|
||||
__u64 dccps_ndp_count:48;
|
||||
unsigned long dccps_rate_last;
|
||||
struct list_head dccps_featneg;
|
||||
struct dccp_minisock dccps_minisock;
|
||||
struct dccp_ackvec *dccps_hc_rx_ackvec;
|
||||
struct ccid *dccps_hc_rx_ccid;
|
||||
struct ccid *dccps_hc_tx_ccid;
|
||||
struct dccp_options_received dccps_options_received;
|
||||
__u8 dccps_qpolicy;
|
||||
__u32 dccps_tx_qlen;
|
||||
enum dccp_role dccps_role:2;
|
||||
__u8 dccps_hc_rx_insert_options:1;
|
||||
__u8 dccps_hc_tx_insert_options:1;
|
||||
__u8 dccps_server_timewait:1;
|
||||
__u8 dccps_sync_scheduled:1;
|
||||
struct tasklet_struct dccps_xmitlet;
|
||||
struct timer_list dccps_xmit_timer;
|
||||
};
|
||||
|
||||
@ -535,6 +550,11 @@ static inline struct dccp_sock *dccp_sk(const struct sock *sk)
|
||||
return (struct dccp_sock *)sk;
|
||||
}
|
||||
|
||||
static inline struct dccp_minisock *dccp_msk(const struct sock *sk)
|
||||
{
|
||||
return (struct dccp_minisock *)&dccp_sk(sk)->dccps_minisock;
|
||||
}
|
||||
|
||||
static inline const char *dccp_role(const struct sock *sk)
|
||||
{
|
||||
switch (dccp_sk(sk)->dccps_role) {
|
||||
|
@ -782,21 +782,6 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk)
|
||||
/* Use define here intentionally to get WARN_ON location shown at the caller */
|
||||
#define tcp_verify_left_out(tp) WARN_ON(tcp_left_out(tp) > tp->packets_out)
|
||||
|
||||
/*
|
||||
* Convert RFC3390 larger initial windows into an equivalent number of packets.
|
||||
*
|
||||
* John Heffner states:
|
||||
*
|
||||
* The RFC specifies a window of no more than 4380 bytes
|
||||
* unless 2*MSS > 4380. Reading the pseudocode in the RFC
|
||||
* is a bit misleading because they use a clamp at 4380 bytes
|
||||
* rather than a multiplier in the relevant range.
|
||||
*/
|
||||
static inline u32 rfc3390_bytes_to_packets(const u32 bytes)
|
||||
{
|
||||
return bytes <= 1095 ? 4 : (bytes > 1460 ? 2 : 3);
|
||||
}
|
||||
|
||||
extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh);
|
||||
extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst);
|
||||
|
||||
|
@ -25,6 +25,9 @@ config INET_DCCP_DIAG
|
||||
def_tristate y if (IP_DCCP = y && INET_DIAG = y)
|
||||
def_tristate m
|
||||
|
||||
config IP_DCCP_ACKVEC
|
||||
bool
|
||||
|
||||
source "net/dccp/ccids/Kconfig"
|
||||
|
||||
menu "DCCP Kernel Hacking"
|
||||
|
@ -1,7 +1,6 @@
|
||||
obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o
|
||||
|
||||
dccp-y := ccid.o feat.o input.o minisocks.o options.o \
|
||||
qpolicy.o output.o proto.o timer.o ackvec.o
|
||||
dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o
|
||||
|
||||
dccp_ipv4-y := ipv4.o
|
||||
|
||||
@ -9,6 +8,8 @@ dccp_ipv4-y := ipv4.o
|
||||
obj-$(subst y,$(CONFIG_IP_DCCP),$(CONFIG_IPV6)) += dccp_ipv6.o
|
||||
dccp_ipv6-y := ipv6.o
|
||||
|
||||
dccp-$(CONFIG_IP_DCCP_ACKVEC) += ackvec.o
|
||||
|
||||
obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o
|
||||
obj-$(CONFIG_NET_DCCPPROBE) += dccp_probe.o
|
||||
|
||||
|
@ -1,375 +1,445 @@
|
||||
/*
|
||||
* net/dccp/ackvec.c
|
||||
*
|
||||
* An implementation of Ack Vectors for the DCCP protocol
|
||||
* Copyright (c) 2007 University of Aberdeen, Scotland, UK
|
||||
* An implementation of the DCCP protocol
|
||||
* Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the
|
||||
* Free Software Foundation; version 2 of the License;
|
||||
*/
|
||||
|
||||
#include "ackvec.h"
|
||||
#include "dccp.h"
|
||||
|
||||
#include <linux/dccp.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/skbuff.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include <net/sock.h>
|
||||
|
||||
static struct kmem_cache *dccp_ackvec_slab;
|
||||
static struct kmem_cache *dccp_ackvec_record_slab;
|
||||
|
||||
struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
|
||||
static struct dccp_ackvec_record *dccp_ackvec_record_new(void)
|
||||
{
|
||||
struct dccp_ackvec *av = kmem_cache_zalloc(dccp_ackvec_slab, priority);
|
||||
struct dccp_ackvec_record *avr =
|
||||
kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC);
|
||||
|
||||
if (av != NULL) {
|
||||
av->av_buf_head = av->av_buf_tail = DCCPAV_MAX_ACKVEC_LEN - 1;
|
||||
INIT_LIST_HEAD(&av->av_records);
|
||||
}
|
||||
return av;
|
||||
if (avr != NULL)
|
||||
INIT_LIST_HEAD(&avr->avr_node);
|
||||
|
||||
return avr;
|
||||
}
|
||||
|
||||
static void dccp_ackvec_purge_records(struct dccp_ackvec *av)
|
||||
static void dccp_ackvec_record_delete(struct dccp_ackvec_record *avr)
|
||||
{
|
||||
struct dccp_ackvec_record *cur, *next;
|
||||
if (unlikely(avr == NULL))
|
||||
return;
|
||||
/* Check if deleting a linked record */
|
||||
WARN_ON(!list_empty(&avr->avr_node));
|
||||
kmem_cache_free(dccp_ackvec_record_slab, avr);
|
||||
}
|
||||
|
||||
list_for_each_entry_safe(cur, next, &av->av_records, avr_node)
|
||||
kmem_cache_free(dccp_ackvec_record_slab, cur);
|
||||
INIT_LIST_HEAD(&av->av_records);
|
||||
static void dccp_ackvec_insert_avr(struct dccp_ackvec *av,
|
||||
struct dccp_ackvec_record *avr)
|
||||
{
|
||||
/*
|
||||
* AVRs are sorted by seqno. Since we are sending them in order, we
|
||||
* just add the AVR at the head of the list.
|
||||
* -sorbo.
|
||||
*/
|
||||
if (!list_empty(&av->av_records)) {
|
||||
const struct dccp_ackvec_record *head =
|
||||
list_entry(av->av_records.next,
|
||||
struct dccp_ackvec_record,
|
||||
avr_node);
|
||||
BUG_ON(before48(avr->avr_ack_seqno, head->avr_ack_seqno));
|
||||
}
|
||||
|
||||
list_add(&avr->avr_node, &av->av_records);
|
||||
}
|
||||
|
||||
int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
struct dccp_sock *dp = dccp_sk(sk);
|
||||
struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
|
||||
/* Figure out how many options do we need to represent the ackvec */
|
||||
const u16 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_MAX_ACKVEC_OPT_LEN);
|
||||
u16 len = av->av_vec_len + 2 * nr_opts, i;
|
||||
u32 elapsed_time;
|
||||
const unsigned char *tail, *from;
|
||||
unsigned char *to;
|
||||
struct dccp_ackvec_record *avr;
|
||||
suseconds_t delta;
|
||||
|
||||
if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
|
||||
return -1;
|
||||
|
||||
delta = ktime_us_delta(ktime_get_real(), av->av_time);
|
||||
elapsed_time = delta / 10;
|
||||
|
||||
if (elapsed_time != 0 &&
|
||||
dccp_insert_option_elapsed_time(sk, skb, elapsed_time))
|
||||
return -1;
|
||||
|
||||
avr = dccp_ackvec_record_new();
|
||||
if (avr == NULL)
|
||||
return -1;
|
||||
|
||||
DCCP_SKB_CB(skb)->dccpd_opt_len += len;
|
||||
|
||||
to = skb_push(skb, len);
|
||||
len = av->av_vec_len;
|
||||
from = av->av_buf + av->av_buf_head;
|
||||
tail = av->av_buf + DCCP_MAX_ACKVEC_LEN;
|
||||
|
||||
for (i = 0; i < nr_opts; ++i) {
|
||||
int copylen = len;
|
||||
|
||||
if (len > DCCP_MAX_ACKVEC_OPT_LEN)
|
||||
copylen = DCCP_MAX_ACKVEC_OPT_LEN;
|
||||
|
||||
*to++ = DCCPO_ACK_VECTOR_0;
|
||||
*to++ = copylen + 2;
|
||||
|
||||
/* Check if buf_head wraps */
|
||||
if (from + copylen > tail) {
|
||||
const u16 tailsize = tail - from;
|
||||
|
||||
memcpy(to, from, tailsize);
|
||||
to += tailsize;
|
||||
len -= tailsize;
|
||||
copylen -= tailsize;
|
||||
from = av->av_buf;
|
||||
}
|
||||
|
||||
memcpy(to, from, copylen);
|
||||
from += copylen;
|
||||
to += copylen;
|
||||
len -= copylen;
|
||||
}
|
||||
|
||||
/*
|
||||
* From RFC 4340, A.2:
|
||||
*
|
||||
* For each acknowledgement it sends, the HC-Receiver will add an
|
||||
* acknowledgement record. ack_seqno will equal the HC-Receiver
|
||||
* sequence number it used for the ack packet; ack_ptr will equal
|
||||
* buf_head; ack_ackno will equal buf_ackno; and ack_nonce will
|
||||
* equal buf_nonce.
|
||||
*/
|
||||
avr->avr_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
|
||||
avr->avr_ack_ptr = av->av_buf_head;
|
||||
avr->avr_ack_ackno = av->av_buf_ackno;
|
||||
avr->avr_ack_nonce = av->av_buf_nonce;
|
||||
avr->avr_sent_len = av->av_vec_len;
|
||||
|
||||
dccp_ackvec_insert_avr(av, avr);
|
||||
|
||||
dccp_pr_debug("%s ACK Vector 0, len=%d, ack_seqno=%llu, "
|
||||
"ack_ackno=%llu\n",
|
||||
dccp_role(sk), avr->avr_sent_len,
|
||||
(unsigned long long)avr->avr_ack_seqno,
|
||||
(unsigned long long)avr->avr_ack_ackno);
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
|
||||
{
|
||||
struct dccp_ackvec *av = kmem_cache_alloc(dccp_ackvec_slab, priority);
|
||||
|
||||
if (av != NULL) {
|
||||
av->av_buf_head = DCCP_MAX_ACKVEC_LEN - 1;
|
||||
av->av_buf_ackno = UINT48_MAX + 1;
|
||||
av->av_buf_nonce = 0;
|
||||
av->av_time = ktime_set(0, 0);
|
||||
av->av_vec_len = 0;
|
||||
INIT_LIST_HEAD(&av->av_records);
|
||||
}
|
||||
|
||||
return av;
|
||||
}
|
||||
|
||||
void dccp_ackvec_free(struct dccp_ackvec *av)
|
||||
{
|
||||
if (likely(av != NULL)) {
|
||||
dccp_ackvec_purge_records(av);
|
||||
kmem_cache_free(dccp_ackvec_slab, av);
|
||||
if (unlikely(av == NULL))
|
||||
return;
|
||||
|
||||
if (!list_empty(&av->av_records)) {
|
||||
struct dccp_ackvec_record *avr, *next;
|
||||
|
||||
list_for_each_entry_safe(avr, next, &av->av_records, avr_node) {
|
||||
list_del_init(&avr->avr_node);
|
||||
dccp_ackvec_record_delete(avr);
|
||||
}
|
||||
}
|
||||
|
||||
kmem_cache_free(dccp_ackvec_slab, av);
|
||||
}
|
||||
|
||||
/**
|
||||
* dccp_ackvec_update_records - Record information about sent Ack Vectors
|
||||
* @av: Ack Vector records to update
|
||||
* @seqno: Sequence number of the packet carrying the Ack Vector just sent
|
||||
* @nonce_sum: The sum of all buffer nonces contained in the Ack Vector
|
||||
*/
|
||||
int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seqno, u8 nonce_sum)
|
||||
static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av,
|
||||
const u32 index)
|
||||
{
|
||||
struct dccp_ackvec_record *avr;
|
||||
|
||||
avr = kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC);
|
||||
if (avr == NULL)
|
||||
return -ENOBUFS;
|
||||
|
||||
avr->avr_ack_seqno = seqno;
|
||||
avr->avr_ack_ptr = av->av_buf_head;
|
||||
avr->avr_ack_ackno = av->av_buf_ackno;
|
||||
avr->avr_ack_nonce = nonce_sum;
|
||||
avr->avr_ack_runlen = dccp_ackvec_runlen(av->av_buf + av->av_buf_head);
|
||||
/*
|
||||
* When the buffer overflows, we keep no more than one record. This is
|
||||
* the simplest way of disambiguating sender-Acks dating from before the
|
||||
* overflow from sender-Acks which refer to after the overflow; a simple
|
||||
* solution is preferable here since we are handling an exception.
|
||||
*/
|
||||
if (av->av_overflow)
|
||||
dccp_ackvec_purge_records(av);
|
||||
/*
|
||||
* Since GSS is incremented for each packet, the list is automatically
|
||||
* arranged in descending order of @ack_seqno.
|
||||
*/
|
||||
list_add(&avr->avr_node, &av->av_records);
|
||||
|
||||
dccp_pr_debug("Added Vector, ack_seqno=%llu, ack_ackno=%llu (rl=%u)\n",
|
||||
(unsigned long long)avr->avr_ack_seqno,
|
||||
(unsigned long long)avr->avr_ack_ackno,
|
||||
avr->avr_ack_runlen);
|
||||
return 0;
|
||||
return av->av_buf[index] & DCCP_ACKVEC_STATE_MASK;
|
||||
}
|
||||
|
||||
static struct dccp_ackvec_record *dccp_ackvec_lookup(struct list_head *av_list,
|
||||
const u64 ackno)
|
||||
static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av,
|
||||
const u32 index)
|
||||
{
|
||||
struct dccp_ackvec_record *avr;
|
||||
/*
|
||||
* Exploit that records are inserted in descending order of sequence
|
||||
* number, start with the oldest record first. If @ackno is `before'
|
||||
* the earliest ack_ackno, the packet is too old to be considered.
|
||||
*/
|
||||
list_for_each_entry_reverse(avr, av_list, avr_node) {
|
||||
if (avr->avr_ack_seqno == ackno)
|
||||
return avr;
|
||||
if (before48(ackno, avr->avr_ack_seqno))
|
||||
break;
|
||||
}
|
||||
return NULL;
|
||||
return av->av_buf[index] & DCCP_ACKVEC_LEN_MASK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Buffer index and length computation using modulo-buffersize arithmetic.
|
||||
* Note that, as pointers move from right to left, head is `before' tail.
|
||||
* If several packets are missing, the HC-Receiver may prefer to enter multiple
|
||||
* bytes with run length 0, rather than a single byte with a larger run length;
|
||||
* this simplifies table updates if one of the missing packets arrives.
|
||||
*/
|
||||
static inline u16 __ackvec_idx_add(const u16 a, const u16 b)
|
||||
static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
|
||||
const unsigned int packets,
|
||||
const unsigned char state)
|
||||
{
|
||||
return (a + b) % DCCPAV_MAX_ACKVEC_LEN;
|
||||
}
|
||||
unsigned int gap;
|
||||
long new_head;
|
||||
|
||||
static inline u16 __ackvec_idx_sub(const u16 a, const u16 b)
|
||||
{
|
||||
return __ackvec_idx_add(a, DCCPAV_MAX_ACKVEC_LEN - b);
|
||||
}
|
||||
if (av->av_vec_len + packets > DCCP_MAX_ACKVEC_LEN)
|
||||
return -ENOBUFS;
|
||||
|
||||
u16 dccp_ackvec_buflen(const struct dccp_ackvec *av)
|
||||
{
|
||||
if (unlikely(av->av_overflow))
|
||||
return DCCPAV_MAX_ACKVEC_LEN;
|
||||
return __ackvec_idx_sub(av->av_buf_tail, av->av_buf_head);
|
||||
}
|
||||
gap = packets - 1;
|
||||
new_head = av->av_buf_head - packets;
|
||||
|
||||
/**
|
||||
* dccp_ackvec_update_old - Update previous state as per RFC 4340, 11.4.1
|
||||
* @av: non-empty buffer to update
|
||||
* @distance: negative or zero distance of @seqno from buf_ackno downward
|
||||
* @seqno: the (old) sequence number whose record is to be updated
|
||||
* @state: state in which packet carrying @seqno was received
|
||||
*/
|
||||
static void dccp_ackvec_update_old(struct dccp_ackvec *av, s64 distance,
|
||||
u64 seqno, enum dccp_ackvec_states state)
|
||||
{
|
||||
u16 ptr = av->av_buf_head;
|
||||
|
||||
BUG_ON(distance > 0);
|
||||
if (unlikely(dccp_ackvec_is_empty(av)))
|
||||
return;
|
||||
|
||||
do {
|
||||
u8 runlen = dccp_ackvec_runlen(av->av_buf + ptr);
|
||||
|
||||
if (distance + runlen >= 0) {
|
||||
/*
|
||||
* Only update the state if packet has not been received
|
||||
* yet. This is OK as per the second table in RFC 4340,
|
||||
* 11.4.1; i.e. here we are using the following table:
|
||||
* RECEIVED
|
||||
* 0 1 3
|
||||
* S +---+---+---+
|
||||
* T 0 | 0 | 0 | 0 |
|
||||
* O +---+---+---+
|
||||
* R 1 | 1 | 1 | 1 |
|
||||
* E +---+---+---+
|
||||
* D 3 | 0 | 1 | 3 |
|
||||
* +---+---+---+
|
||||
* The "Not Received" state was set by reserve_seats().
|
||||
*/
|
||||
if (av->av_buf[ptr] == DCCPAV_NOT_RECEIVED)
|
||||
av->av_buf[ptr] = state;
|
||||
else
|
||||
dccp_pr_debug("Not changing %llu state to %u\n",
|
||||
(unsigned long long)seqno, state);
|
||||
break;
|
||||
}
|
||||
|
||||
distance += runlen + 1;
|
||||
ptr = __ackvec_idx_add(ptr, 1);
|
||||
|
||||
} while (ptr != av->av_buf_tail);
|
||||
}
|
||||
|
||||
/* Mark @num entries after buf_head as "Not yet received". */
|
||||
static void dccp_ackvec_reserve_seats(struct dccp_ackvec *av, u16 num)
|
||||
{
|
||||
u16 start = __ackvec_idx_add(av->av_buf_head, 1),
|
||||
len = DCCPAV_MAX_ACKVEC_LEN - start;
|
||||
|
||||
/* check for buffer wrap-around */
|
||||
if (num > len) {
|
||||
memset(av->av_buf + start, DCCPAV_NOT_RECEIVED, len);
|
||||
start = 0;
|
||||
num -= len;
|
||||
}
|
||||
if (num)
|
||||
memset(av->av_buf + start, DCCPAV_NOT_RECEIVED, num);
|
||||
}
|
||||
|
||||
/**
|
||||
* dccp_ackvec_add_new - Record one or more new entries in Ack Vector buffer
|
||||
* @av: container of buffer to update (can be empty or non-empty)
|
||||
* @num_packets: number of packets to register (must be >= 1)
|
||||
* @seqno: sequence number of the first packet in @num_packets
|
||||
* @state: state in which packet carrying @seqno was received
|
||||
*/
|
||||
static void dccp_ackvec_add_new(struct dccp_ackvec *av, u32 num_packets,
|
||||
u64 seqno, enum dccp_ackvec_states state)
|
||||
{
|
||||
u32 num_cells = num_packets;
|
||||
|
||||
if (num_packets > DCCPAV_BURST_THRESH) {
|
||||
u32 lost_packets = num_packets - 1;
|
||||
|
||||
DCCP_WARN("Warning: large burst loss (%u)\n", lost_packets);
|
||||
/*
|
||||
* We received 1 packet and have a loss of size "num_packets-1"
|
||||
* which we squeeze into num_cells-1 rather than reserving an
|
||||
* entire byte for each lost packet.
|
||||
* The reason is that the vector grows in O(burst_length); when
|
||||
* it grows too large there will no room left for the payload.
|
||||
* This is a trade-off: if a few packets out of the burst show
|
||||
* up later, their state will not be changed; it is simply too
|
||||
* costly to reshuffle/reallocate/copy the buffer each time.
|
||||
* Should such problems persist, we will need to switch to a
|
||||
* different underlying data structure.
|
||||
*/
|
||||
for (num_packets = num_cells = 1; lost_packets; ++num_cells) {
|
||||
u8 len = min(lost_packets, (u32)DCCPAV_MAX_RUNLEN);
|
||||
|
||||
av->av_buf_head = __ackvec_idx_sub(av->av_buf_head, 1);
|
||||
av->av_buf[av->av_buf_head] = DCCPAV_NOT_RECEIVED | len;
|
||||
|
||||
lost_packets -= len;
|
||||
if (new_head < 0) {
|
||||
if (gap > 0) {
|
||||
memset(av->av_buf, DCCP_ACKVEC_STATE_NOT_RECEIVED,
|
||||
gap + new_head + 1);
|
||||
gap = -new_head;
|
||||
}
|
||||
new_head += DCCP_MAX_ACKVEC_LEN;
|
||||
}
|
||||
|
||||
if (num_cells + dccp_ackvec_buflen(av) >= DCCPAV_MAX_ACKVEC_LEN) {
|
||||
DCCP_CRIT("Ack Vector buffer overflow: dropping old entries\n");
|
||||
av->av_overflow = true;
|
||||
}
|
||||
av->av_buf_head = new_head;
|
||||
|
||||
av->av_buf_head = __ackvec_idx_sub(av->av_buf_head, num_packets);
|
||||
if (av->av_overflow)
|
||||
av->av_buf_tail = av->av_buf_head;
|
||||
if (gap > 0)
|
||||
memset(av->av_buf + av->av_buf_head + 1,
|
||||
DCCP_ACKVEC_STATE_NOT_RECEIVED, gap);
|
||||
|
||||
av->av_buf[av->av_buf_head] = state;
|
||||
av->av_buf_ackno = seqno;
|
||||
|
||||
if (num_packets > 1)
|
||||
dccp_ackvec_reserve_seats(av, num_packets - 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* dccp_ackvec_input - Register incoming packet in the buffer
|
||||
*/
|
||||
void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb)
|
||||
{
|
||||
u64 seqno = DCCP_SKB_CB(skb)->dccpd_seq;
|
||||
enum dccp_ackvec_states state = DCCPAV_RECEIVED;
|
||||
|
||||
if (dccp_ackvec_is_empty(av)) {
|
||||
dccp_ackvec_add_new(av, 1, seqno, state);
|
||||
av->av_tail_ackno = seqno;
|
||||
|
||||
} else {
|
||||
s64 num_packets = dccp_delta_seqno(av->av_buf_ackno, seqno);
|
||||
u8 *current_head = av->av_buf + av->av_buf_head;
|
||||
|
||||
if (num_packets == 1 &&
|
||||
dccp_ackvec_state(current_head) == state &&
|
||||
dccp_ackvec_runlen(current_head) < DCCPAV_MAX_RUNLEN) {
|
||||
|
||||
*current_head += 1;
|
||||
av->av_buf_ackno = seqno;
|
||||
|
||||
} else if (num_packets > 0) {
|
||||
dccp_ackvec_add_new(av, num_packets, seqno, state);
|
||||
} else {
|
||||
dccp_ackvec_update_old(av, num_packets, seqno, state);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* dccp_ackvec_clear_state - Perform house-keeping / garbage-collection
|
||||
* This routine is called when the peer acknowledges the receipt of Ack Vectors
|
||||
* up to and including @ackno. While based on on section A.3 of RFC 4340, here
|
||||
* are additional precautions to prevent corrupted buffer state. In particular,
|
||||
* we use tail_ackno to identify outdated records; it always marks the earliest
|
||||
* packet of group (2) in 11.4.2.
|
||||
*/
|
||||
void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno)
|
||||
{
|
||||
struct dccp_ackvec_record *avr, *next;
|
||||
u8 runlen_now, eff_runlen;
|
||||
s64 delta;
|
||||
|
||||
avr = dccp_ackvec_lookup(&av->av_records, ackno);
|
||||
if (avr == NULL)
|
||||
return;
|
||||
/*
|
||||
* Deal with outdated acknowledgments: this arises when e.g. there are
|
||||
* several old records and the acks from the peer come in slowly. In
|
||||
* that case we may still have records that pre-date tail_ackno.
|
||||
*/
|
||||
delta = dccp_delta_seqno(av->av_tail_ackno, avr->avr_ack_ackno);
|
||||
if (delta < 0)
|
||||
goto free_records;
|
||||
/*
|
||||
* Deal with overlapping Ack Vectors: don't subtract more than the
|
||||
* number of packets between tail_ackno and ack_ackno.
|
||||
*/
|
||||
eff_runlen = delta < avr->avr_ack_runlen ? delta : avr->avr_ack_runlen;
|
||||
|
||||
runlen_now = dccp_ackvec_runlen(av->av_buf + avr->avr_ack_ptr);
|
||||
/*
|
||||
* The run length of Ack Vector cells does not decrease over time. If
|
||||
* the run length is the same as at the time the Ack Vector was sent, we
|
||||
* free the ack_ptr cell. That cell can however not be freed if the run
|
||||
* length has increased: in this case we need to move the tail pointer
|
||||
* backwards (towards higher indices), to its next-oldest neighbour.
|
||||
*/
|
||||
if (runlen_now > eff_runlen) {
|
||||
|
||||
av->av_buf[avr->avr_ack_ptr] -= eff_runlen + 1;
|
||||
av->av_buf_tail = __ackvec_idx_add(avr->avr_ack_ptr, 1);
|
||||
|
||||
/* This move may not have cleared the overflow flag. */
|
||||
if (av->av_overflow)
|
||||
av->av_overflow = (av->av_buf_head == av->av_buf_tail);
|
||||
} else {
|
||||
av->av_buf_tail = avr->avr_ack_ptr;
|
||||
/*
|
||||
* We have made sure that avr points to a valid cell within the
|
||||
* buffer. This cell is either older than head, or equals head
|
||||
* (empty buffer): in both cases we no longer have any overflow.
|
||||
*/
|
||||
av->av_overflow = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* The peer has acknowledged up to and including ack_ackno. Hence the
|
||||
* first packet in group (2) of 11.4.2 is the successor of ack_ackno.
|
||||
*/
|
||||
av->av_tail_ackno = ADD48(avr->avr_ack_ackno, 1);
|
||||
|
||||
free_records:
|
||||
list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) {
|
||||
list_del(&avr->avr_node);
|
||||
kmem_cache_free(dccp_ackvec_record_slab, avr);
|
||||
}
|
||||
av->av_vec_len += packets;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Routines to keep track of Ack Vectors received in an skb
|
||||
* Implements the RFC 4340, Appendix A
|
||||
*/
|
||||
int dccp_ackvec_parsed_add(struct list_head *head, u8 *vec, u8 len, u8 nonce)
|
||||
int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
|
||||
const u64 ackno, const u8 state)
|
||||
{
|
||||
struct dccp_ackvec_parsed *new = kmalloc(sizeof(*new), GFP_ATOMIC);
|
||||
/*
|
||||
* Check at the right places if the buffer is full, if it is, tell the
|
||||
* caller to start dropping packets till the HC-Sender acks our ACK
|
||||
* vectors, when we will free up space in av_buf.
|
||||
*
|
||||
* We may well decide to do buffer compression, etc, but for now lets
|
||||
* just drop.
|
||||
*
|
||||
* From Appendix A.1.1 (`New Packets'):
|
||||
*
|
||||
* Of course, the circular buffer may overflow, either when the
|
||||
* HC-Sender is sending data at a very high rate, when the
|
||||
* HC-Receiver's acknowledgements are not reaching the HC-Sender,
|
||||
* or when the HC-Sender is forgetting to acknowledge those acks
|
||||
* (so the HC-Receiver is unable to clean up old state). In this
|
||||
* case, the HC-Receiver should either compress the buffer (by
|
||||
* increasing run lengths when possible), transfer its state to
|
||||
* a larger buffer, or, as a last resort, drop all received
|
||||
* packets, without processing them whatsoever, until its buffer
|
||||
* shrinks again.
|
||||
*/
|
||||
|
||||
if (new == NULL)
|
||||
return -ENOBUFS;
|
||||
new->vec = vec;
|
||||
new->len = len;
|
||||
new->nonce = nonce;
|
||||
/* See if this is the first ackno being inserted */
|
||||
if (av->av_vec_len == 0) {
|
||||
av->av_buf[av->av_buf_head] = state;
|
||||
av->av_vec_len = 1;
|
||||
} else if (after48(ackno, av->av_buf_ackno)) {
|
||||
const u64 delta = dccp_delta_seqno(av->av_buf_ackno, ackno);
|
||||
|
||||
list_add_tail(&new->node, head);
|
||||
/*
|
||||
* Look if the state of this packet is the same as the
|
||||
* previous ackno and if so if we can bump the head len.
|
||||
*/
|
||||
if (delta == 1 &&
|
||||
dccp_ackvec_state(av, av->av_buf_head) == state &&
|
||||
dccp_ackvec_len(av, av->av_buf_head) < DCCP_ACKVEC_LEN_MASK)
|
||||
av->av_buf[av->av_buf_head]++;
|
||||
else if (dccp_ackvec_set_buf_head_state(av, delta, state))
|
||||
return -ENOBUFS;
|
||||
} else {
|
||||
/*
|
||||
* A.1.2. Old Packets
|
||||
*
|
||||
* When a packet with Sequence Number S <= buf_ackno
|
||||
* arrives, the HC-Receiver will scan the table for
|
||||
* the byte corresponding to S. (Indexing structures
|
||||
* could reduce the complexity of this scan.)
|
||||
*/
|
||||
u64 delta = dccp_delta_seqno(ackno, av->av_buf_ackno);
|
||||
u32 index = av->av_buf_head;
|
||||
|
||||
while (1) {
|
||||
const u8 len = dccp_ackvec_len(av, index);
|
||||
const u8 av_state = dccp_ackvec_state(av, index);
|
||||
/*
|
||||
* valid packets not yet in av_buf have a reserved
|
||||
* entry, with a len equal to 0.
|
||||
*/
|
||||
if (av_state == DCCP_ACKVEC_STATE_NOT_RECEIVED &&
|
||||
len == 0 && delta == 0) { /* Found our
|
||||
reserved seat! */
|
||||
dccp_pr_debug("Found %llu reserved seat!\n",
|
||||
(unsigned long long)ackno);
|
||||
av->av_buf[index] = state;
|
||||
goto out;
|
||||
}
|
||||
/* len == 0 means one packet */
|
||||
if (delta < len + 1)
|
||||
goto out_duplicate;
|
||||
|
||||
delta -= len + 1;
|
||||
if (++index == DCCP_MAX_ACKVEC_LEN)
|
||||
index = 0;
|
||||
}
|
||||
}
|
||||
|
||||
av->av_buf_ackno = ackno;
|
||||
av->av_time = ktime_get_real();
|
||||
out:
|
||||
return 0;
|
||||
|
||||
out_duplicate:
|
||||
/* Duplicate packet */
|
||||
dccp_pr_debug("Received a dup or already considered lost "
|
||||
"packet: %llu\n", (unsigned long long)ackno);
|
||||
return -EILSEQ;
|
||||
}
|
||||
|
||||
static void dccp_ackvec_throw_record(struct dccp_ackvec *av,
|
||||
struct dccp_ackvec_record *avr)
|
||||
{
|
||||
struct dccp_ackvec_record *next;
|
||||
|
||||
/* sort out vector length */
|
||||
if (av->av_buf_head <= avr->avr_ack_ptr)
|
||||
av->av_vec_len = avr->avr_ack_ptr - av->av_buf_head;
|
||||
else
|
||||
av->av_vec_len = DCCP_MAX_ACKVEC_LEN - 1 -
|
||||
av->av_buf_head + avr->avr_ack_ptr;
|
||||
|
||||
/* free records */
|
||||
list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) {
|
||||
list_del_init(&avr->avr_node);
|
||||
dccp_ackvec_record_delete(avr);
|
||||
}
|
||||
}
|
||||
|
||||
void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk,
|
||||
const u64 ackno)
|
||||
{
|
||||
struct dccp_ackvec_record *avr;
|
||||
|
||||
/*
|
||||
* If we traverse backwards, it should be faster when we have large
|
||||
* windows. We will be receiving ACKs for stuff we sent a while back
|
||||
* -sorbo.
|
||||
*/
|
||||
list_for_each_entry_reverse(avr, &av->av_records, avr_node) {
|
||||
if (ackno == avr->avr_ack_seqno) {
|
||||
dccp_pr_debug("%s ACK packet 0, len=%d, ack_seqno=%llu, "
|
||||
"ack_ackno=%llu, ACKED!\n",
|
||||
dccp_role(sk), 1,
|
||||
(unsigned long long)avr->avr_ack_seqno,
|
||||
(unsigned long long)avr->avr_ack_ackno);
|
||||
dccp_ackvec_throw_record(av, avr);
|
||||
break;
|
||||
} else if (avr->avr_ack_seqno > ackno)
|
||||
break; /* old news */
|
||||
}
|
||||
}
|
||||
|
||||
static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
|
||||
struct sock *sk, u64 *ackno,
|
||||
const unsigned char len,
|
||||
const unsigned char *vector)
|
||||
{
|
||||
unsigned char i;
|
||||
struct dccp_ackvec_record *avr;
|
||||
|
||||
/* Check if we actually sent an ACK vector */
|
||||
if (list_empty(&av->av_records))
|
||||
return;
|
||||
|
||||
i = len;
|
||||
/*
|
||||
* XXX
|
||||
* I think it might be more efficient to work backwards. See comment on
|
||||
* rcv_ackno. -sorbo.
|
||||
*/
|
||||
avr = list_entry(av->av_records.next, struct dccp_ackvec_record, avr_node);
|
||||
while (i--) {
|
||||
const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
|
||||
u64 ackno_end_rl;
|
||||
|
||||
dccp_set_seqno(&ackno_end_rl, *ackno - rl);
|
||||
|
||||
/*
|
||||
* If our AVR sequence number is greater than the ack, go
|
||||
* forward in the AVR list until it is not so.
|
||||
*/
|
||||
list_for_each_entry_from(avr, &av->av_records, avr_node) {
|
||||
if (!after48(avr->avr_ack_seqno, *ackno))
|
||||
goto found;
|
||||
}
|
||||
/* End of the av_records list, not found, exit */
|
||||
break;
|
||||
found:
|
||||
if (between48(avr->avr_ack_seqno, ackno_end_rl, *ackno)) {
|
||||
const u8 state = *vector & DCCP_ACKVEC_STATE_MASK;
|
||||
if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) {
|
||||
dccp_pr_debug("%s ACK vector 0, len=%d, "
|
||||
"ack_seqno=%llu, ack_ackno=%llu, "
|
||||
"ACKED!\n",
|
||||
dccp_role(sk), len,
|
||||
(unsigned long long)
|
||||
avr->avr_ack_seqno,
|
||||
(unsigned long long)
|
||||
avr->avr_ack_ackno);
|
||||
dccp_ackvec_throw_record(av, avr);
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* If it wasn't received, continue scanning... we might
|
||||
* find another one.
|
||||
*/
|
||||
}
|
||||
|
||||
dccp_set_seqno(ackno, ackno_end_rl - 1);
|
||||
++vector;
|
||||
}
|
||||
}
|
||||
|
||||
int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
|
||||
u64 *ackno, const u8 opt, const u8 *value, const u8 len)
|
||||
{
|
||||
if (len > DCCP_MAX_ACKVEC_OPT_LEN)
|
||||
return -1;
|
||||
|
||||
/* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */
|
||||
dccp_ackvec_check_rcv_ackvector(dccp_sk(sk)->dccps_hc_rx_ackvec, sk,
|
||||
ackno, len, value);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_add);
|
||||
|
||||
void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks)
|
||||
{
|
||||
struct dccp_ackvec_parsed *cur, *next;
|
||||
|
||||
list_for_each_entry_safe(cur, next, parsed_chunks, node)
|
||||
kfree(cur);
|
||||
INIT_LIST_HEAD(parsed_chunks);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_cleanup);
|
||||
|
||||
int __init dccp_ackvec_init(void)
|
||||
{
|
||||
@ -379,9 +449,10 @@ int __init dccp_ackvec_init(void)
|
||||
if (dccp_ackvec_slab == NULL)
|
||||
goto out_err;
|
||||
|
||||
dccp_ackvec_record_slab = kmem_cache_create("dccp_ackvec_record",
|
||||
sizeof(struct dccp_ackvec_record),
|
||||
0, SLAB_HWCACHE_ALIGN, NULL);
|
||||
dccp_ackvec_record_slab =
|
||||
kmem_cache_create("dccp_ackvec_record",
|
||||
sizeof(struct dccp_ackvec_record),
|
||||
0, SLAB_HWCACHE_ALIGN, NULL);
|
||||
if (dccp_ackvec_record_slab == NULL)
|
||||
goto out_destroy_slab;
|
||||
|
||||
|
@ -3,134 +3,156 @@
|
||||
/*
|
||||
* net/dccp/ackvec.h
|
||||
*
|
||||
* An implementation of Ack Vectors for the DCCP protocol
|
||||
* Copyright (c) 2007 University of Aberdeen, Scotland, UK
|
||||
* An implementation of the DCCP protocol
|
||||
* Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@mandriva.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/dccp.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/ktime.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
/*
|
||||
* Ack Vector buffer space is static, in multiples of %DCCP_SINGLE_OPT_MAXLEN,
|
||||
* the maximum size of a single Ack Vector. Setting %DCCPAV_NUM_ACKVECS to 1
|
||||
* will be sufficient for most cases of low Ack Ratios, using a value of 2 gives
|
||||
* more headroom if Ack Ratio is higher or when the sender acknowledges slowly.
|
||||
* The maximum value is bounded by the u16 types for indices and functions.
|
||||
*/
|
||||
#define DCCPAV_NUM_ACKVECS 2
|
||||
#define DCCPAV_MAX_ACKVEC_LEN (DCCP_SINGLE_OPT_MAXLEN * DCCPAV_NUM_ACKVECS)
|
||||
/* Read about the ECN nonce to see why it is 253 */
|
||||
#define DCCP_MAX_ACKVEC_OPT_LEN 253
|
||||
/* We can spread an ack vector across multiple options */
|
||||
#define DCCP_MAX_ACKVEC_LEN (DCCP_MAX_ACKVEC_OPT_LEN * 2)
|
||||
|
||||
/* Estimated minimum average Ack Vector length - used for updating MPS */
|
||||
#define DCCPAV_MIN_OPTLEN 16
|
||||
#define DCCP_ACKVEC_STATE_RECEIVED 0
|
||||
#define DCCP_ACKVEC_STATE_ECN_MARKED (1 << 6)
|
||||
#define DCCP_ACKVEC_STATE_NOT_RECEIVED (3 << 6)
|
||||
|
||||
/* Threshold for coping with large bursts of losses */
|
||||
#define DCCPAV_BURST_THRESH (DCCPAV_MAX_ACKVEC_LEN / 8)
|
||||
#define DCCP_ACKVEC_STATE_MASK 0xC0 /* 11000000 */
|
||||
#define DCCP_ACKVEC_LEN_MASK 0x3F /* 00111111 */
|
||||
|
||||
enum dccp_ackvec_states {
|
||||
DCCPAV_RECEIVED = 0x00,
|
||||
DCCPAV_ECN_MARKED = 0x40,
|
||||
DCCPAV_RESERVED = 0x80,
|
||||
DCCPAV_NOT_RECEIVED = 0xC0
|
||||
};
|
||||
#define DCCPAV_MAX_RUNLEN 0x3F
|
||||
|
||||
static inline u8 dccp_ackvec_runlen(const u8 *cell)
|
||||
{
|
||||
return *cell & DCCPAV_MAX_RUNLEN;
|
||||
}
|
||||
|
||||
static inline u8 dccp_ackvec_state(const u8 *cell)
|
||||
{
|
||||
return *cell & ~DCCPAV_MAX_RUNLEN;
|
||||
}
|
||||
|
||||
/** struct dccp_ackvec - Ack Vector main data structure
|
||||
/** struct dccp_ackvec - ack vector
|
||||
*
|
||||
* This implements a fixed-size circular buffer within an array and is largely
|
||||
* based on Appendix A of RFC 4340.
|
||||
* This data structure is the one defined in RFC 4340, Appendix A.
|
||||
*
|
||||
* @av_buf: circular buffer storage area
|
||||
* @av_buf_head: head index; begin of live portion in @av_buf
|
||||
* @av_buf_tail: tail index; first index _after_ the live portion in @av_buf
|
||||
* @av_buf_ackno: highest seqno of acknowledgeable packet recorded in @av_buf
|
||||
* @av_tail_ackno: lowest seqno of acknowledgeable packet recorded in @av_buf
|
||||
* @av_buf_nonce: ECN nonce sums, each covering subsequent segments of up to
|
||||
* %DCCP_SINGLE_OPT_MAXLEN cells in the live portion of @av_buf
|
||||
* @av_overflow: if 1 then buf_head == buf_tail indicates buffer wraparound
|
||||
* @av_records: list of %dccp_ackvec_record (Ack Vectors sent previously)
|
||||
* @av_buf_head - circular buffer head
|
||||
* @av_buf_tail - circular buffer tail
|
||||
* @av_buf_ackno - ack # of the most recent packet acknowledgeable in the
|
||||
* buffer (i.e. %av_buf_head)
|
||||
* @av_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked
|
||||
* by the buffer with State 0
|
||||
*
|
||||
* Additionally, the HC-Receiver must keep some information about the
|
||||
* Ack Vectors it has recently sent. For each packet sent carrying an
|
||||
* Ack Vector, it remembers four variables:
|
||||
*
|
||||
* @av_records - list of dccp_ackvec_record
|
||||
* @av_ack_nonce - the one-bit sum of the ECN Nonces for all State 0.
|
||||
*
|
||||
* @av_time - the time in usecs
|
||||
* @av_buf - circular buffer of acknowledgeable packets
|
||||
*/
|
||||
struct dccp_ackvec {
|
||||
u8 av_buf[DCCPAV_MAX_ACKVEC_LEN];
|
||||
u16 av_buf_head;
|
||||
u16 av_buf_tail;
|
||||
u64 av_buf_ackno:48;
|
||||
u64 av_tail_ackno:48;
|
||||
bool av_buf_nonce[DCCPAV_NUM_ACKVECS];
|
||||
u8 av_overflow:1;
|
||||
u64 av_buf_ackno;
|
||||
struct list_head av_records;
|
||||
ktime_t av_time;
|
||||
u16 av_buf_head;
|
||||
u16 av_vec_len;
|
||||
u8 av_buf_nonce;
|
||||
u8 av_ack_nonce;
|
||||
u8 av_buf[DCCP_MAX_ACKVEC_LEN];
|
||||
};
|
||||
|
||||
/** struct dccp_ackvec_record - Records information about sent Ack Vectors
|
||||
/** struct dccp_ackvec_record - ack vector record
|
||||
*
|
||||
* These list entries define the additional information which the HC-Receiver
|
||||
* keeps about recently-sent Ack Vectors; again refer to RFC 4340, Appendix A.
|
||||
* ACK vector record as defined in Appendix A of spec.
|
||||
*
|
||||
* @avr_node: the list node in @av_records
|
||||
* @avr_ack_seqno: sequence number of the packet the Ack Vector was sent on
|
||||
* @avr_ack_ackno: the Ack number that this record/Ack Vector refers to
|
||||
* @avr_ack_ptr: pointer into @av_buf where this record starts
|
||||
* @avr_ack_runlen: run length of @avr_ack_ptr at the time of sending
|
||||
* @avr_ack_nonce: the sum of @av_buf_nonce's at the time this record was sent
|
||||
* The list is sorted by avr_ack_seqno
|
||||
*
|
||||
* The list as a whole is sorted in descending order by @avr_ack_seqno.
|
||||
* @avr_node - node in av_records
|
||||
* @avr_ack_seqno - sequence number of the packet this record was sent on
|
||||
* @avr_ack_ackno - sequence number being acknowledged
|
||||
* @avr_ack_ptr - pointer into av_buf where this record starts
|
||||
* @avr_ack_nonce - av_ack_nonce at the time this record was sent
|
||||
* @avr_sent_len - lenght of the record in av_buf
|
||||
*/
|
||||
struct dccp_ackvec_record {
|
||||
struct list_head avr_node;
|
||||
u64 avr_ack_seqno:48;
|
||||
u64 avr_ack_ackno:48;
|
||||
u64 avr_ack_seqno;
|
||||
u64 avr_ack_ackno;
|
||||
u16 avr_ack_ptr;
|
||||
u8 avr_ack_runlen;
|
||||
u8 avr_ack_nonce:1;
|
||||
u16 avr_sent_len;
|
||||
u8 avr_ack_nonce;
|
||||
};
|
||||
|
||||
extern int dccp_ackvec_init(void);
|
||||
struct sock;
|
||||
struct sk_buff;
|
||||
|
||||
#ifdef CONFIG_IP_DCCP_ACKVEC
|
||||
extern int dccp_ackvec_init(void);
|
||||
extern void dccp_ackvec_exit(void);
|
||||
|
||||
extern struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority);
|
||||
extern void dccp_ackvec_free(struct dccp_ackvec *av);
|
||||
|
||||
extern void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb);
|
||||
extern int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seq, u8 sum);
|
||||
extern void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno);
|
||||
extern u16 dccp_ackvec_buflen(const struct dccp_ackvec *av);
|
||||
extern int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
|
||||
const u64 ackno, const u8 state);
|
||||
|
||||
static inline bool dccp_ackvec_is_empty(const struct dccp_ackvec *av)
|
||||
extern void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av,
|
||||
struct sock *sk, const u64 ackno);
|
||||
extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
|
||||
u64 *ackno, const u8 opt,
|
||||
const u8 *value, const u8 len);
|
||||
|
||||
extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb);
|
||||
|
||||
static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
|
||||
{
|
||||
return av->av_overflow == 0 && av->av_buf_head == av->av_buf_tail;
|
||||
return av->av_vec_len;
|
||||
}
|
||||
#else /* CONFIG_IP_DCCP_ACKVEC */
|
||||
static inline int dccp_ackvec_init(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* struct dccp_ackvec_parsed - Record offsets of Ack Vectors in skb
|
||||
* @vec: start of vector (offset into skb)
|
||||
* @len: length of @vec
|
||||
* @nonce: whether @vec had an ECN nonce of 0 or 1
|
||||
* @node: FIFO - arranged in descending order of ack_ackno
|
||||
* This structure is used by CCIDs to access Ack Vectors in a received skb.
|
||||
*/
|
||||
struct dccp_ackvec_parsed {
|
||||
u8 *vec,
|
||||
len,
|
||||
nonce:1;
|
||||
struct list_head node;
|
||||
};
|
||||
static inline void dccp_ackvec_exit(void)
|
||||
{
|
||||
}
|
||||
|
||||
extern int dccp_ackvec_parsed_add(struct list_head *head,
|
||||
u8 *vec, u8 len, u8 nonce);
|
||||
extern void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks);
|
||||
static inline struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void dccp_ackvec_free(struct dccp_ackvec *av)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
|
||||
const u64 ackno, const u8 state)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
static inline void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av,
|
||||
struct sock *sk, const u64 ackno)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
|
||||
const u64 *ackno, const u8 opt,
|
||||
const u8 *value, const u8 len)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
static inline int dccp_insert_option_ackvec(const struct sock *sk,
|
||||
const struct sk_buff *skb)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_IP_DCCP_ACKVEC */
|
||||
#endif /* _ACKVEC_H */
|
||||
|
101
net/dccp/ccid.c
101
net/dccp/ccid.c
@ -13,13 +13,6 @@
|
||||
|
||||
#include "ccid.h"
|
||||
|
||||
static u8 builtin_ccids[] = {
|
||||
DCCPC_CCID2, /* CCID2 is supported by default */
|
||||
#if defined(CONFIG_IP_DCCP_CCID3) || defined(CONFIG_IP_DCCP_CCID3_MODULE)
|
||||
DCCPC_CCID3,
|
||||
#endif
|
||||
};
|
||||
|
||||
static struct ccid_operations *ccids[CCID_MAX];
|
||||
#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
|
||||
static atomic_t ccids_lockct = ATOMIC_INIT(0);
|
||||
@ -93,47 +86,6 @@ static void ccid_kmem_cache_destroy(struct kmem_cache *slab)
|
||||
}
|
||||
}
|
||||
|
||||
/* check that up to @array_len members in @ccid_array are supported */
|
||||
bool ccid_support_check(u8 const *ccid_array, u8 array_len)
|
||||
{
|
||||
u8 i, j, found;
|
||||
|
||||
for (i = 0, found = 0; i < array_len; i++, found = 0) {
|
||||
for (j = 0; !found && j < ARRAY_SIZE(builtin_ccids); j++)
|
||||
found = (ccid_array[i] == builtin_ccids[j]);
|
||||
if (!found)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* ccid_get_builtin_ccids - Provide copy of `builtin' CCID array
|
||||
* @ccid_array: pointer to copy into
|
||||
* @array_len: value to return length into
|
||||
* This function allocates memory - caller must see that it is freed after use.
|
||||
*/
|
||||
int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len)
|
||||
{
|
||||
*ccid_array = kmemdup(builtin_ccids, sizeof(builtin_ccids), gfp_any());
|
||||
if (*ccid_array == NULL)
|
||||
return -ENOBUFS;
|
||||
*array_len = ARRAY_SIZE(builtin_ccids);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ccid_getsockopt_builtin_ccids(struct sock *sk, int len,
|
||||
char __user *optval, int __user *optlen)
|
||||
{
|
||||
if (len < sizeof(builtin_ccids))
|
||||
return -EINVAL;
|
||||
|
||||
if (put_user(sizeof(builtin_ccids), optlen) ||
|
||||
copy_to_user(optval, builtin_ccids, sizeof(builtin_ccids)))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ccid_register(struct ccid_operations *ccid_ops)
|
||||
{
|
||||
int err = -ENOBUFS;
|
||||
@ -196,41 +148,22 @@ int ccid_unregister(struct ccid_operations *ccid_ops)
|
||||
|
||||
EXPORT_SYMBOL_GPL(ccid_unregister);
|
||||
|
||||
/**
|
||||
* ccid_request_module - Pre-load CCID module for later use
|
||||
* This should be called only from process context (e.g. during connection
|
||||
* setup) and is necessary for later calls to ccid_new (typically in software
|
||||
* interrupt), so that it has the modules available when they are needed.
|
||||
*/
|
||||
static int ccid_request_module(u8 id)
|
||||
{
|
||||
if (!in_atomic()) {
|
||||
ccids_read_lock();
|
||||
if (ccids[id] == NULL) {
|
||||
ccids_read_unlock();
|
||||
return request_module("net-dccp-ccid-%d", id);
|
||||
}
|
||||
ccids_read_unlock();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ccid_request_modules(u8 const *ccid_array, u8 array_len)
|
||||
{
|
||||
#ifdef CONFIG_KMOD
|
||||
while (array_len--)
|
||||
if (ccid_request_module(ccid_array[array_len]))
|
||||
return -1;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, gfp_t gfp)
|
||||
{
|
||||
struct ccid_operations *ccid_ops;
|
||||
struct ccid *ccid = NULL;
|
||||
|
||||
ccids_read_lock();
|
||||
#ifdef CONFIG_KMOD
|
||||
if (ccids[id] == NULL) {
|
||||
/* We only try to load if in process context */
|
||||
ccids_read_unlock();
|
||||
if (gfp & GFP_ATOMIC)
|
||||
goto out;
|
||||
request_module("net-dccp-ccid-%d", id);
|
||||
ccids_read_lock();
|
||||
}
|
||||
#endif
|
||||
ccid_ops = ccids[id];
|
||||
if (ccid_ops == NULL)
|
||||
goto out_unlock;
|
||||
@ -272,6 +205,20 @@ out_module_put:
|
||||
|
||||
EXPORT_SYMBOL_GPL(ccid_new);
|
||||
|
||||
struct ccid *ccid_hc_rx_new(unsigned char id, struct sock *sk, gfp_t gfp)
|
||||
{
|
||||
return ccid_new(id, sk, 1, gfp);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(ccid_hc_rx_new);
|
||||
|
||||
struct ccid *ccid_hc_tx_new(unsigned char id,struct sock *sk, gfp_t gfp)
|
||||
{
|
||||
return ccid_new(id, sk, 0, gfp);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(ccid_hc_tx_new);
|
||||
|
||||
static void ccid_delete(struct ccid *ccid, struct sock *sk, int rx)
|
||||
{
|
||||
struct ccid_operations *ccid_ops;
|
||||
|
113
net/dccp/ccid.h
113
net/dccp/ccid.h
@ -60,18 +60,22 @@ struct ccid_operations {
|
||||
void (*ccid_hc_tx_exit)(struct sock *sk);
|
||||
void (*ccid_hc_rx_packet_recv)(struct sock *sk,
|
||||
struct sk_buff *skb);
|
||||
int (*ccid_hc_rx_parse_options)(struct sock *sk, u8 pkt,
|
||||
u8 opt, u8 *val, u8 len);
|
||||
int (*ccid_hc_rx_parse_options)(struct sock *sk,
|
||||
unsigned char option,
|
||||
unsigned char len, u16 idx,
|
||||
unsigned char* value);
|
||||
int (*ccid_hc_rx_insert_options)(struct sock *sk,
|
||||
struct sk_buff *skb);
|
||||
void (*ccid_hc_tx_packet_recv)(struct sock *sk,
|
||||
struct sk_buff *skb);
|
||||
int (*ccid_hc_tx_parse_options)(struct sock *sk, u8 pkt,
|
||||
u8 opt, u8 *val, u8 len);
|
||||
int (*ccid_hc_tx_parse_options)(struct sock *sk,
|
||||
unsigned char option,
|
||||
unsigned char len, u16 idx,
|
||||
unsigned char* value);
|
||||
int (*ccid_hc_tx_send_packet)(struct sock *sk,
|
||||
struct sk_buff *skb);
|
||||
void (*ccid_hc_tx_packet_sent)(struct sock *sk,
|
||||
unsigned int len);
|
||||
int more, unsigned int len);
|
||||
void (*ccid_hc_rx_get_info)(struct sock *sk,
|
||||
struct tcp_info *info);
|
||||
void (*ccid_hc_tx_get_info)(struct sock *sk,
|
||||
@ -99,78 +103,31 @@ static inline void *ccid_priv(const struct ccid *ccid)
|
||||
return (void *)ccid->ccid_priv;
|
||||
}
|
||||
|
||||
extern bool ccid_support_check(u8 const *ccid_array, u8 array_len);
|
||||
extern int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len);
|
||||
extern int ccid_getsockopt_builtin_ccids(struct sock *sk, int len,
|
||||
char __user *, int __user *);
|
||||
|
||||
extern int ccid_request_modules(u8 const *ccid_array, u8 array_len);
|
||||
extern struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx,
|
||||
gfp_t gfp);
|
||||
|
||||
static inline int ccid_get_current_rx_ccid(struct dccp_sock *dp)
|
||||
{
|
||||
struct ccid *ccid = dp->dccps_hc_rx_ccid;
|
||||
|
||||
if (ccid == NULL || ccid->ccid_ops == NULL)
|
||||
return -1;
|
||||
return ccid->ccid_ops->ccid_id;
|
||||
}
|
||||
|
||||
static inline int ccid_get_current_tx_ccid(struct dccp_sock *dp)
|
||||
{
|
||||
struct ccid *ccid = dp->dccps_hc_tx_ccid;
|
||||
|
||||
if (ccid == NULL || ccid->ccid_ops == NULL)
|
||||
return -1;
|
||||
return ccid->ccid_ops->ccid_id;
|
||||
}
|
||||
extern struct ccid *ccid_hc_rx_new(unsigned char id, struct sock *sk,
|
||||
gfp_t gfp);
|
||||
extern struct ccid *ccid_hc_tx_new(unsigned char id, struct sock *sk,
|
||||
gfp_t gfp);
|
||||
|
||||
extern void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk);
|
||||
extern void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk);
|
||||
|
||||
/*
|
||||
* Congestion control of queued data packets via CCID decision.
|
||||
*
|
||||
* The TX CCID performs its congestion-control by indicating whether and when a
|
||||
* queued packet may be sent, using the return code of ccid_hc_tx_send_packet().
|
||||
* The following modes are supported via the symbolic constants below:
|
||||
* - timer-based pacing (CCID returns a delay value in milliseconds);
|
||||
* - autonomous dequeueing (CCID internally schedules dccps_xmitlet).
|
||||
*/
|
||||
|
||||
enum ccid_dequeueing_decision {
|
||||
CCID_PACKET_SEND_AT_ONCE = 0x00000, /* "green light": no delay */
|
||||
CCID_PACKET_DELAY_MAX = 0x0FFFF, /* maximum delay in msecs */
|
||||
CCID_PACKET_DELAY = 0x10000, /* CCID msec-delay mode */
|
||||
CCID_PACKET_WILL_DEQUEUE_LATER = 0x20000, /* CCID autonomous mode */
|
||||
CCID_PACKET_ERR = 0xF0000, /* error condition */
|
||||
};
|
||||
|
||||
static inline int ccid_packet_dequeue_eval(const int return_code)
|
||||
{
|
||||
if (return_code < 0)
|
||||
return CCID_PACKET_ERR;
|
||||
if (return_code == 0)
|
||||
return CCID_PACKET_SEND_AT_ONCE;
|
||||
if (return_code <= CCID_PACKET_DELAY_MAX)
|
||||
return CCID_PACKET_DELAY;
|
||||
return return_code;
|
||||
}
|
||||
|
||||
static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk,
|
||||
struct sk_buff *skb)
|
||||
{
|
||||
int rc = 0;
|
||||
if (ccid->ccid_ops->ccid_hc_tx_send_packet != NULL)
|
||||
return ccid->ccid_ops->ccid_hc_tx_send_packet(sk, skb);
|
||||
return CCID_PACKET_SEND_AT_ONCE;
|
||||
rc = ccid->ccid_ops->ccid_hc_tx_send_packet(sk, skb);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk,
|
||||
unsigned int len)
|
||||
int more, unsigned int len)
|
||||
{
|
||||
if (ccid->ccid_ops->ccid_hc_tx_packet_sent != NULL)
|
||||
ccid->ccid_ops->ccid_hc_tx_packet_sent(sk, len);
|
||||
ccid->ccid_ops->ccid_hc_tx_packet_sent(sk, more, len);
|
||||
}
|
||||
|
||||
static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk,
|
||||
@ -187,31 +144,27 @@ static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk,
|
||||
ccid->ccid_ops->ccid_hc_tx_packet_recv(sk, skb);
|
||||
}
|
||||
|
||||
/**
|
||||
* ccid_hc_tx_parse_options - Parse CCID-specific options sent by the receiver
|
||||
* @pkt: type of packet that @opt appears on (RFC 4340, 5.1)
|
||||
* @opt: the CCID-specific option type (RFC 4340, 5.8 and 10.3)
|
||||
* @val: value of @opt
|
||||
* @len: length of @val in bytes
|
||||
*/
|
||||
static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk,
|
||||
u8 pkt, u8 opt, u8 *val, u8 len)
|
||||
unsigned char option,
|
||||
unsigned char len, u16 idx,
|
||||
unsigned char* value)
|
||||
{
|
||||
if (ccid->ccid_ops->ccid_hc_tx_parse_options == NULL)
|
||||
return 0;
|
||||
return ccid->ccid_ops->ccid_hc_tx_parse_options(sk, pkt, opt, val, len);
|
||||
int rc = 0;
|
||||
if (ccid->ccid_ops->ccid_hc_tx_parse_options != NULL)
|
||||
rc = ccid->ccid_ops->ccid_hc_tx_parse_options(sk, option, len, idx,
|
||||
value);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* ccid_hc_rx_parse_options - Parse CCID-specific options sent by the sender
|
||||
* Arguments are analogous to ccid_hc_tx_parse_options()
|
||||
*/
|
||||
static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk,
|
||||
u8 pkt, u8 opt, u8 *val, u8 len)
|
||||
unsigned char option,
|
||||
unsigned char len, u16 idx,
|
||||
unsigned char* value)
|
||||
{
|
||||
if (ccid->ccid_ops->ccid_hc_rx_parse_options == NULL)
|
||||
return 0;
|
||||
return ccid->ccid_ops->ccid_hc_rx_parse_options(sk, pkt, opt, val, len);
|
||||
int rc = 0;
|
||||
if (ccid->ccid_ops->ccid_hc_rx_parse_options != NULL)
|
||||
rc = ccid->ccid_ops->ccid_hc_rx_parse_options(sk, option, len, idx, value);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static inline int ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk,
|
||||
|
@ -1,8 +1,10 @@
|
||||
menu "DCCP CCIDs Configuration (EXPERIMENTAL)"
|
||||
depends on EXPERIMENTAL
|
||||
|
||||
config IP_DCCP_CCID2
|
||||
tristate "CCID2 (TCP-Like)"
|
||||
tristate "CCID2 (TCP-Like) (EXPERIMENTAL)"
|
||||
def_tristate IP_DCCP
|
||||
select IP_DCCP_ACKVEC
|
||||
---help---
|
||||
CCID 2, TCP-like Congestion Control, denotes Additive Increase,
|
||||
Multiplicative Decrease (AIMD) congestion control with behavior
|
||||
@ -34,7 +36,7 @@ config IP_DCCP_CCID2_DEBUG
|
||||
If in doubt, say N.
|
||||
|
||||
config IP_DCCP_CCID3
|
||||
tristate "CCID3 (TCP-Friendly)"
|
||||
tristate "CCID3 (TCP-Friendly) (EXPERIMENTAL)"
|
||||
def_tristate IP_DCCP
|
||||
select IP_DCCP_TFRC_LIB
|
||||
---help---
|
||||
@ -62,9 +64,9 @@ config IP_DCCP_CCID3
|
||||
|
||||
If in doubt, say M.
|
||||
|
||||
if IP_DCCP_CCID3
|
||||
config IP_DCCP_CCID3_DEBUG
|
||||
bool "CCID3 debugging messages"
|
||||
depends on IP_DCCP_CCID3
|
||||
---help---
|
||||
Enable CCID3-specific debugging messages.
|
||||
|
||||
@ -74,29 +76,10 @@ config IP_DCCP_CCID3_DEBUG
|
||||
|
||||
If in doubt, say N.
|
||||
|
||||
choice
|
||||
prompt "Select method for measuring the packet size s"
|
||||
default IP_DCCP_CCID3_MEASURE_S_AS_MPS
|
||||
|
||||
config IP_DCCP_CCID3_MEASURE_S_AS_MPS
|
||||
bool "Always use MPS in place of s"
|
||||
---help---
|
||||
This use is recommended as it is consistent with the initialisation
|
||||
of X and suggested when s varies (rfc3448bis, (1) in section 4.1).
|
||||
config IP_DCCP_CCID3_MEASURE_S_AS_AVG
|
||||
bool "Use moving average"
|
||||
---help---
|
||||
An alternative way of tracking s, also supported by rfc3448bis.
|
||||
This used to be the default for CCID-3 in previous kernels.
|
||||
config IP_DCCP_CCID3_MEASURE_S_AS_MAX
|
||||
bool "Track the maximum payload length"
|
||||
---help---
|
||||
An experimental method based on tracking the maximum packet size.
|
||||
endchoice
|
||||
|
||||
config IP_DCCP_CCID3_RTO
|
||||
int "Use higher bound for nofeedback timer"
|
||||
default 100
|
||||
depends on IP_DCCP_CCID3 && EXPERIMENTAL
|
||||
---help---
|
||||
Use higher lower bound for nofeedback timer expiration.
|
||||
|
||||
@ -123,7 +106,6 @@ config IP_DCCP_CCID3_RTO
|
||||
The purpose of the nofeedback timer is to slow DCCP down when there
|
||||
is serious network congestion: experimenting with larger values should
|
||||
therefore not be performed on WANs.
|
||||
endif # IP_DCCP_CCID3
|
||||
|
||||
config IP_DCCP_TFRC_LIB
|
||||
tristate
|
||||
|
@ -25,7 +25,7 @@
|
||||
/*
|
||||
* This implementation should follow RFC 4341
|
||||
*/
|
||||
#include "../feat.h"
|
||||
|
||||
#include "../ccid.h"
|
||||
#include "../dccp.h"
|
||||
#include "ccid2.h"
|
||||
@ -34,8 +34,51 @@
|
||||
#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
|
||||
static int ccid2_debug;
|
||||
#define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a)
|
||||
|
||||
static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx)
|
||||
{
|
||||
int len = 0;
|
||||
int pipe = 0;
|
||||
struct ccid2_seq *seqp = hctx->ccid2hctx_seqh;
|
||||
|
||||
/* there is data in the chain */
|
||||
if (seqp != hctx->ccid2hctx_seqt) {
|
||||
seqp = seqp->ccid2s_prev;
|
||||
len++;
|
||||
if (!seqp->ccid2s_acked)
|
||||
pipe++;
|
||||
|
||||
while (seqp != hctx->ccid2hctx_seqt) {
|
||||
struct ccid2_seq *prev = seqp->ccid2s_prev;
|
||||
|
||||
len++;
|
||||
if (!prev->ccid2s_acked)
|
||||
pipe++;
|
||||
|
||||
/* packets are sent sequentially */
|
||||
BUG_ON(dccp_delta_seqno(seqp->ccid2s_seq,
|
||||
prev->ccid2s_seq ) >= 0);
|
||||
BUG_ON(time_before(seqp->ccid2s_sent,
|
||||
prev->ccid2s_sent));
|
||||
|
||||
seqp = prev;
|
||||
}
|
||||
}
|
||||
|
||||
BUG_ON(pipe != hctx->ccid2hctx_pipe);
|
||||
ccid2_pr_debug("len of chain=%d\n", len);
|
||||
|
||||
do {
|
||||
seqp = seqp->ccid2s_prev;
|
||||
len++;
|
||||
} while (seqp != hctx->ccid2hctx_seqh);
|
||||
|
||||
ccid2_pr_debug("total len=%d\n", len);
|
||||
BUG_ON(len != hctx->ccid2hctx_seqbufc * CCID2_SEQBUF_LEN);
|
||||
}
|
||||
#else
|
||||
#define ccid2_pr_debug(format, a...)
|
||||
#define ccid2_hc_tx_check_sanity(hctx)
|
||||
#endif
|
||||
|
||||
static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx)
|
||||
@ -44,7 +87,8 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx)
|
||||
int i;
|
||||
|
||||
/* check if we have space to preserve the pointer to the buffer */
|
||||
if (hctx->seqbufc >= sizeof(hctx->seqbuf) / sizeof(struct ccid2_seq *))
|
||||
if (hctx->ccid2hctx_seqbufc >= (sizeof(hctx->ccid2hctx_seqbuf) /
|
||||
sizeof(struct ccid2_seq*)))
|
||||
return -ENOMEM;
|
||||
|
||||
/* allocate buffer and initialize linked list */
|
||||
@ -60,35 +104,38 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx)
|
||||
seqp->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1];
|
||||
|
||||
/* This is the first allocation. Initiate the head and tail. */
|
||||
if (hctx->seqbufc == 0)
|
||||
hctx->seqh = hctx->seqt = seqp;
|
||||
if (hctx->ccid2hctx_seqbufc == 0)
|
||||
hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqt = seqp;
|
||||
else {
|
||||
/* link the existing list with the one we just created */
|
||||
hctx->seqh->ccid2s_next = seqp;
|
||||
seqp->ccid2s_prev = hctx->seqh;
|
||||
hctx->ccid2hctx_seqh->ccid2s_next = seqp;
|
||||
seqp->ccid2s_prev = hctx->ccid2hctx_seqh;
|
||||
|
||||
hctx->seqt->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1];
|
||||
seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = hctx->seqt;
|
||||
hctx->ccid2hctx_seqt->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1];
|
||||
seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = hctx->ccid2hctx_seqt;
|
||||
}
|
||||
|
||||
/* store the original pointer to the buffer so we can free it */
|
||||
hctx->seqbuf[hctx->seqbufc] = seqp;
|
||||
hctx->seqbufc++;
|
||||
hctx->ccid2hctx_seqbuf[hctx->ccid2hctx_seqbufc] = seqp;
|
||||
hctx->ccid2hctx_seqbufc++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
if (ccid2_cwnd_network_limited(ccid2_hc_tx_sk(sk)))
|
||||
return CCID_PACKET_WILL_DEQUEUE_LATER;
|
||||
return CCID_PACKET_SEND_AT_ONCE;
|
||||
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
|
||||
|
||||
if (hctx->ccid2hctx_pipe < hctx->ccid2hctx_cwnd)
|
||||
return 0;
|
||||
|
||||
return 1; /* XXX CCID should dequeue when ready instead of polling */
|
||||
}
|
||||
|
||||
static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
|
||||
{
|
||||
struct dccp_sock *dp = dccp_sk(sk);
|
||||
u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->cwnd, 2);
|
||||
u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->ccid2hctx_cwnd, 2);
|
||||
|
||||
/*
|
||||
* Ensure that Ack Ratio does not exceed ceil(cwnd/2), which is (2) from
|
||||
@ -100,8 +147,8 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
|
||||
DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val, max_ratio);
|
||||
val = max_ratio;
|
||||
}
|
||||
if (val > DCCPF_ACK_RATIO_MAX)
|
||||
val = DCCPF_ACK_RATIO_MAX;
|
||||
if (val > 0xFFFF) /* RFC 4340, 11.3 */
|
||||
val = 0xFFFF;
|
||||
|
||||
if (val == dp->dccps_l_ack_ratio)
|
||||
return;
|
||||
@ -110,77 +157,99 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
|
||||
dp->dccps_l_ack_ratio = val;
|
||||
}
|
||||
|
||||
static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val)
|
||||
{
|
||||
ccid2_pr_debug("change SRTT to %ld\n", val);
|
||||
hctx->ccid2hctx_srtt = val;
|
||||
}
|
||||
|
||||
static void ccid2_start_rto_timer(struct sock *sk);
|
||||
|
||||
static void ccid2_hc_tx_rto_expire(unsigned long data)
|
||||
{
|
||||
struct sock *sk = (struct sock *)data;
|
||||
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
|
||||
const bool sender_was_blocked = ccid2_cwnd_network_limited(hctx);
|
||||
long s;
|
||||
|
||||
bh_lock_sock(sk);
|
||||
if (sock_owned_by_user(sk)) {
|
||||
sk_reset_timer(sk, &hctx->rtotimer, jiffies + HZ / 5);
|
||||
sk_reset_timer(sk, &hctx->ccid2hctx_rtotimer,
|
||||
jiffies + HZ / 5);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ccid2_pr_debug("RTO_EXPIRE\n");
|
||||
|
||||
ccid2_hc_tx_check_sanity(hctx);
|
||||
|
||||
/* back-off timer */
|
||||
hctx->rto <<= 1;
|
||||
if (hctx->rto > DCCP_RTO_MAX)
|
||||
hctx->rto = DCCP_RTO_MAX;
|
||||
hctx->ccid2hctx_rto <<= 1;
|
||||
|
||||
s = hctx->ccid2hctx_rto / HZ;
|
||||
if (s > 60)
|
||||
hctx->ccid2hctx_rto = 60 * HZ;
|
||||
|
||||
ccid2_start_rto_timer(sk);
|
||||
|
||||
/* adjust pipe, cwnd etc */
|
||||
hctx->ssthresh = hctx->cwnd / 2;
|
||||
if (hctx->ssthresh < 2)
|
||||
hctx->ssthresh = 2;
|
||||
hctx->cwnd = 1;
|
||||
hctx->pipe = 0;
|
||||
hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd / 2;
|
||||
if (hctx->ccid2hctx_ssthresh < 2)
|
||||
hctx->ccid2hctx_ssthresh = 2;
|
||||
hctx->ccid2hctx_cwnd = 1;
|
||||
hctx->ccid2hctx_pipe = 0;
|
||||
|
||||
/* clear state about stuff we sent */
|
||||
hctx->seqt = hctx->seqh;
|
||||
hctx->packets_acked = 0;
|
||||
hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh;
|
||||
hctx->ccid2hctx_packets_acked = 0;
|
||||
|
||||
/* clear ack ratio state. */
|
||||
hctx->rpseq = 0;
|
||||
hctx->rpdupack = -1;
|
||||
hctx->ccid2hctx_rpseq = 0;
|
||||
hctx->ccid2hctx_rpdupack = -1;
|
||||
ccid2_change_l_ack_ratio(sk, 1);
|
||||
|
||||
/* if we were blocked before, we may now send cwnd=1 packet */
|
||||
if (sender_was_blocked)
|
||||
tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
|
||||
/* restart backed-off timer */
|
||||
sk_reset_timer(sk, &hctx->rtotimer, jiffies + hctx->rto);
|
||||
ccid2_hc_tx_check_sanity(hctx);
|
||||
out:
|
||||
bh_unlock_sock(sk);
|
||||
sock_put(sk);
|
||||
}
|
||||
|
||||
static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
|
||||
static void ccid2_start_rto_timer(struct sock *sk)
|
||||
{
|
||||
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
|
||||
|
||||
ccid2_pr_debug("setting RTO timeout=%ld\n", hctx->ccid2hctx_rto);
|
||||
|
||||
BUG_ON(timer_pending(&hctx->ccid2hctx_rtotimer));
|
||||
sk_reset_timer(sk, &hctx->ccid2hctx_rtotimer,
|
||||
jiffies + hctx->ccid2hctx_rto);
|
||||
}
|
||||
|
||||
static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
|
||||
{
|
||||
struct dccp_sock *dp = dccp_sk(sk);
|
||||
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
|
||||
struct ccid2_seq *next;
|
||||
|
||||
hctx->pipe++;
|
||||
hctx->ccid2hctx_pipe++;
|
||||
|
||||
hctx->seqh->ccid2s_seq = dp->dccps_gss;
|
||||
hctx->seqh->ccid2s_acked = 0;
|
||||
hctx->seqh->ccid2s_sent = jiffies;
|
||||
hctx->ccid2hctx_seqh->ccid2s_seq = dp->dccps_gss;
|
||||
hctx->ccid2hctx_seqh->ccid2s_acked = 0;
|
||||
hctx->ccid2hctx_seqh->ccid2s_sent = jiffies;
|
||||
|
||||
next = hctx->seqh->ccid2s_next;
|
||||
next = hctx->ccid2hctx_seqh->ccid2s_next;
|
||||
/* check if we need to alloc more space */
|
||||
if (next == hctx->seqt) {
|
||||
if (next == hctx->ccid2hctx_seqt) {
|
||||
if (ccid2_hc_tx_alloc_seq(hctx)) {
|
||||
DCCP_CRIT("packet history - out of memory!");
|
||||
/* FIXME: find a more graceful way to bail out */
|
||||
return;
|
||||
}
|
||||
next = hctx->seqh->ccid2s_next;
|
||||
BUG_ON(next == hctx->seqt);
|
||||
next = hctx->ccid2hctx_seqh->ccid2s_next;
|
||||
BUG_ON(next == hctx->ccid2hctx_seqt);
|
||||
}
|
||||
hctx->seqh = next;
|
||||
hctx->ccid2hctx_seqh = next;
|
||||
|
||||
ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->cwnd, hctx->pipe);
|
||||
ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->ccid2hctx_cwnd,
|
||||
hctx->ccid2hctx_pipe);
|
||||
|
||||
/*
|
||||
* FIXME: The code below is broken and the variables have been removed
|
||||
@ -203,12 +272,12 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
|
||||
*/
|
||||
#if 0
|
||||
/* Ack Ratio. Need to maintain a concept of how many windows we sent */
|
||||
hctx->arsent++;
|
||||
hctx->ccid2hctx_arsent++;
|
||||
/* We had an ack loss in this window... */
|
||||
if (hctx->ackloss) {
|
||||
if (hctx->arsent >= hctx->cwnd) {
|
||||
hctx->arsent = 0;
|
||||
hctx->ackloss = 0;
|
||||
if (hctx->ccid2hctx_ackloss) {
|
||||
if (hctx->ccid2hctx_arsent >= hctx->ccid2hctx_cwnd) {
|
||||
hctx->ccid2hctx_arsent = 0;
|
||||
hctx->ccid2hctx_ackloss = 0;
|
||||
}
|
||||
} else {
|
||||
/* No acks lost up to now... */
|
||||
@ -218,28 +287,28 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
|
||||
int denom = dp->dccps_l_ack_ratio * dp->dccps_l_ack_ratio -
|
||||
dp->dccps_l_ack_ratio;
|
||||
|
||||
denom = hctx->cwnd * hctx->cwnd / denom;
|
||||
denom = hctx->ccid2hctx_cwnd * hctx->ccid2hctx_cwnd / denom;
|
||||
|
||||
if (hctx->arsent >= denom) {
|
||||
if (hctx->ccid2hctx_arsent >= denom) {
|
||||
ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio - 1);
|
||||
hctx->arsent = 0;
|
||||
hctx->ccid2hctx_arsent = 0;
|
||||
}
|
||||
} else {
|
||||
/* we can't increase ack ratio further [1] */
|
||||
hctx->arsent = 0; /* or maybe set it to cwnd*/
|
||||
hctx->ccid2hctx_arsent = 0; /* or maybe set it to cwnd*/
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* setup RTO timer */
|
||||
if (!timer_pending(&hctx->rtotimer))
|
||||
sk_reset_timer(sk, &hctx->rtotimer, jiffies + hctx->rto);
|
||||
if (!timer_pending(&hctx->ccid2hctx_rtotimer))
|
||||
ccid2_start_rto_timer(sk);
|
||||
|
||||
#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
|
||||
do {
|
||||
struct ccid2_seq *seqp = hctx->seqt;
|
||||
struct ccid2_seq *seqp = hctx->ccid2hctx_seqt;
|
||||
|
||||
while (seqp != hctx->seqh) {
|
||||
while (seqp != hctx->ccid2hctx_seqh) {
|
||||
ccid2_pr_debug("out seq=%llu acked=%d time=%lu\n",
|
||||
(unsigned long long)seqp->ccid2s_seq,
|
||||
seqp->ccid2s_acked, seqp->ccid2s_sent);
|
||||
@ -247,158 +316,205 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
|
||||
}
|
||||
} while (0);
|
||||
ccid2_pr_debug("=========\n");
|
||||
ccid2_hc_tx_check_sanity(hctx);
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm
|
||||
* This code is almost identical with TCP's tcp_rtt_estimator(), since
|
||||
* - it has a higher sampling frequency (recommended by RFC 1323),
|
||||
* - the RTO does not collapse into RTT due to RTTVAR going towards zero,
|
||||
* - it is simple (cf. more complex proposals such as Eifel timer or research
|
||||
* which suggests that the gain should be set according to window size),
|
||||
* - in tests it was found to work well with CCID2 [gerrit].
|
||||
/* XXX Lame code duplication!
|
||||
* returns -1 if none was found.
|
||||
* else returns the next offset to use in the function call.
|
||||
*/
|
||||
static void ccid2_rtt_estimator(struct sock *sk, const long mrtt)
|
||||
static int ccid2_ackvector(struct sock *sk, struct sk_buff *skb, int offset,
|
||||
unsigned char **vec, unsigned char *veclen)
|
||||
{
|
||||
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
|
||||
long m = mrtt ? : 1;
|
||||
const struct dccp_hdr *dh = dccp_hdr(skb);
|
||||
unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb);
|
||||
unsigned char *opt_ptr;
|
||||
const unsigned char *opt_end = (unsigned char *)dh +
|
||||
(dh->dccph_doff * 4);
|
||||
unsigned char opt, len;
|
||||
unsigned char *value;
|
||||
|
||||
if (hctx->srtt == 0) {
|
||||
/* First measurement m */
|
||||
hctx->srtt = m << 3;
|
||||
hctx->mdev = m << 1;
|
||||
BUG_ON(offset < 0);
|
||||
options += offset;
|
||||
opt_ptr = options;
|
||||
if (opt_ptr >= opt_end)
|
||||
return -1;
|
||||
|
||||
hctx->mdev_max = max(TCP_RTO_MIN, hctx->mdev);
|
||||
hctx->rttvar = hctx->mdev_max;
|
||||
hctx->rtt_seq = dccp_sk(sk)->dccps_gss;
|
||||
} else {
|
||||
/* Update scaled SRTT as SRTT += 1/8 * (m - SRTT) */
|
||||
m -= (hctx->srtt >> 3);
|
||||
hctx->srtt += m;
|
||||
while (opt_ptr != opt_end) {
|
||||
opt = *opt_ptr++;
|
||||
len = 0;
|
||||
value = NULL;
|
||||
|
||||
/* Similarly, update scaled mdev with regard to |m| */
|
||||
if (m < 0) {
|
||||
m = -m;
|
||||
m -= (hctx->mdev >> 2);
|
||||
/* Check if this isn't a single byte option */
|
||||
if (opt > DCCPO_MAX_RESERVED) {
|
||||
if (opt_ptr == opt_end)
|
||||
goto out_invalid_option;
|
||||
|
||||
len = *opt_ptr++;
|
||||
if (len < 3)
|
||||
goto out_invalid_option;
|
||||
/*
|
||||
* This neutralises RTO increase when RTT < SRTT - mdev
|
||||
* (see P. Sarolahti, A. Kuznetsov,"Congestion Control
|
||||
* in Linux TCP", USENIX 2002, pp. 49-62).
|
||||
* Remove the type and len fields, leaving
|
||||
* just the value size
|
||||
*/
|
||||
if (m > 0)
|
||||
m >>= 3;
|
||||
} else {
|
||||
m -= (hctx->mdev >> 2);
|
||||
}
|
||||
hctx->mdev += m;
|
||||
len -= 2;
|
||||
value = opt_ptr;
|
||||
opt_ptr += len;
|
||||
|
||||
if (hctx->mdev > hctx->mdev_max) {
|
||||
hctx->mdev_max = hctx->mdev;
|
||||
if (hctx->mdev_max > hctx->rttvar)
|
||||
hctx->rttvar = hctx->mdev_max;
|
||||
if (opt_ptr > opt_end)
|
||||
goto out_invalid_option;
|
||||
}
|
||||
|
||||
/*
|
||||
* Decay RTTVAR at most once per flight, exploiting that
|
||||
* 1) pipe <= cwnd <= Sequence_Window = W (RFC 4340, 7.5.2)
|
||||
* 2) AWL = GSS-W+1 <= GAR <= GSS (RFC 4340, 7.5.1)
|
||||
* GAR is a useful bound for FlightSize = pipe, AWL is probably
|
||||
* too low as it over-estimates pipe.
|
||||
*/
|
||||
if (after48(dccp_sk(sk)->dccps_gar, hctx->rtt_seq)) {
|
||||
if (hctx->mdev_max < hctx->rttvar)
|
||||
hctx->rttvar -= (hctx->rttvar -
|
||||
hctx->mdev_max) >> 2;
|
||||
hctx->rtt_seq = dccp_sk(sk)->dccps_gss;
|
||||
hctx->mdev_max = TCP_RTO_MIN;
|
||||
switch (opt) {
|
||||
case DCCPO_ACK_VECTOR_0:
|
||||
case DCCPO_ACK_VECTOR_1:
|
||||
*vec = value;
|
||||
*veclen = len;
|
||||
return offset + (opt_ptr - options);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Set RTO from SRTT and RTTVAR
|
||||
* Clock granularity is ignored since the minimum error for RTTVAR is
|
||||
* clamped to 50msec (corresponding to HZ=20). This leads to a minimum
|
||||
* RTO of 200msec. This agrees with TCP and RFC 4341, 5.: "Because DCCP
|
||||
* does not retransmit data, DCCP does not require TCP's recommended
|
||||
* minimum timeout of one second".
|
||||
*/
|
||||
hctx->rto = (hctx->srtt >> 3) + hctx->rttvar;
|
||||
return -1;
|
||||
|
||||
if (hctx->rto > DCCP_RTO_MAX)
|
||||
hctx->rto = DCCP_RTO_MAX;
|
||||
out_invalid_option:
|
||||
DCCP_BUG("Invalid option - this should not happen (previous parsing)!");
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp,
|
||||
unsigned int *maxincr)
|
||||
static void ccid2_hc_tx_kill_rto_timer(struct sock *sk)
|
||||
{
|
||||
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
|
||||
|
||||
if (hctx->cwnd < hctx->ssthresh) {
|
||||
if (*maxincr > 0 && ++hctx->packets_acked == 2) {
|
||||
hctx->cwnd += 1;
|
||||
*maxincr -= 1;
|
||||
hctx->packets_acked = 0;
|
||||
sk_stop_timer(sk, &hctx->ccid2hctx_rtotimer);
|
||||
ccid2_pr_debug("deleted RTO timer\n");
|
||||
}
|
||||
|
||||
static inline void ccid2_new_ack(struct sock *sk,
|
||||
struct ccid2_seq *seqp,
|
||||
unsigned int *maxincr)
|
||||
{
|
||||
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
|
||||
|
||||
if (hctx->ccid2hctx_cwnd < hctx->ccid2hctx_ssthresh) {
|
||||
if (*maxincr > 0 && ++hctx->ccid2hctx_packets_acked == 2) {
|
||||
hctx->ccid2hctx_cwnd += 1;
|
||||
*maxincr -= 1;
|
||||
hctx->ccid2hctx_packets_acked = 0;
|
||||
}
|
||||
} else if (++hctx->packets_acked >= hctx->cwnd) {
|
||||
hctx->cwnd += 1;
|
||||
hctx->packets_acked = 0;
|
||||
} else if (++hctx->ccid2hctx_packets_acked >= hctx->ccid2hctx_cwnd) {
|
||||
hctx->ccid2hctx_cwnd += 1;
|
||||
hctx->ccid2hctx_packets_acked = 0;
|
||||
}
|
||||
/*
|
||||
* FIXME: RTT is sampled several times per acknowledgment (for each
|
||||
* entry in the Ack Vector), instead of once per Ack (as in TCP SACK).
|
||||
* This causes the RTT to be over-estimated, since the older entries
|
||||
* in the Ack Vector have earlier sending times.
|
||||
* The cleanest solution is to not use the ccid2s_sent field at all
|
||||
* and instead use DCCP timestamps - need to be resolved at some time.
|
||||
*/
|
||||
ccid2_rtt_estimator(sk, jiffies - seqp->ccid2s_sent);
|
||||
|
||||
/* update RTO */
|
||||
if (hctx->ccid2hctx_srtt == -1 ||
|
||||
time_after(jiffies, hctx->ccid2hctx_lastrtt + hctx->ccid2hctx_srtt)) {
|
||||
unsigned long r = (long)jiffies - (long)seqp->ccid2s_sent;
|
||||
int s;
|
||||
|
||||
/* first measurement */
|
||||
if (hctx->ccid2hctx_srtt == -1) {
|
||||
ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n",
|
||||
r, jiffies,
|
||||
(unsigned long long)seqp->ccid2s_seq);
|
||||
ccid2_change_srtt(hctx, r);
|
||||
hctx->ccid2hctx_rttvar = r >> 1;
|
||||
} else {
|
||||
/* RTTVAR */
|
||||
long tmp = hctx->ccid2hctx_srtt - r;
|
||||
long srtt;
|
||||
|
||||
if (tmp < 0)
|
||||
tmp *= -1;
|
||||
|
||||
tmp >>= 2;
|
||||
hctx->ccid2hctx_rttvar *= 3;
|
||||
hctx->ccid2hctx_rttvar >>= 2;
|
||||
hctx->ccid2hctx_rttvar += tmp;
|
||||
|
||||
/* SRTT */
|
||||
srtt = hctx->ccid2hctx_srtt;
|
||||
srtt *= 7;
|
||||
srtt >>= 3;
|
||||
tmp = r >> 3;
|
||||
srtt += tmp;
|
||||
ccid2_change_srtt(hctx, srtt);
|
||||
}
|
||||
s = hctx->ccid2hctx_rttvar << 2;
|
||||
/* clock granularity is 1 when based on jiffies */
|
||||
if (!s)
|
||||
s = 1;
|
||||
hctx->ccid2hctx_rto = hctx->ccid2hctx_srtt + s;
|
||||
|
||||
/* must be at least a second */
|
||||
s = hctx->ccid2hctx_rto / HZ;
|
||||
/* DCCP doesn't require this [but I like it cuz my code sux] */
|
||||
#if 1
|
||||
if (s < 1)
|
||||
hctx->ccid2hctx_rto = HZ;
|
||||
#endif
|
||||
/* max 60 seconds */
|
||||
if (s > 60)
|
||||
hctx->ccid2hctx_rto = HZ * 60;
|
||||
|
||||
hctx->ccid2hctx_lastrtt = jiffies;
|
||||
|
||||
ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n",
|
||||
hctx->ccid2hctx_srtt, hctx->ccid2hctx_rttvar,
|
||||
hctx->ccid2hctx_rto, HZ, r);
|
||||
}
|
||||
|
||||
/* we got a new ack, so re-start RTO timer */
|
||||
ccid2_hc_tx_kill_rto_timer(sk);
|
||||
ccid2_start_rto_timer(sk);
|
||||
}
|
||||
|
||||
static void ccid2_hc_tx_dec_pipe(struct sock *sk)
|
||||
{
|
||||
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
|
||||
|
||||
if (hctx->ccid2hctx_pipe == 0)
|
||||
DCCP_BUG("pipe == 0");
|
||||
else
|
||||
hctx->ccid2hctx_pipe--;
|
||||
|
||||
if (hctx->ccid2hctx_pipe == 0)
|
||||
ccid2_hc_tx_kill_rto_timer(sk);
|
||||
}
|
||||
|
||||
static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
|
||||
{
|
||||
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
|
||||
|
||||
if (time_before(seqp->ccid2s_sent, hctx->last_cong)) {
|
||||
if (time_before(seqp->ccid2s_sent, hctx->ccid2hctx_last_cong)) {
|
||||
ccid2_pr_debug("Multiple losses in an RTT---treating as one\n");
|
||||
return;
|
||||
}
|
||||
|
||||
hctx->last_cong = jiffies;
|
||||
hctx->ccid2hctx_last_cong = jiffies;
|
||||
|
||||
hctx->cwnd = hctx->cwnd / 2 ? : 1U;
|
||||
hctx->ssthresh = max(hctx->cwnd, 2U);
|
||||
hctx->ccid2hctx_cwnd = hctx->ccid2hctx_cwnd / 2 ? : 1U;
|
||||
hctx->ccid2hctx_ssthresh = max(hctx->ccid2hctx_cwnd, 2U);
|
||||
|
||||
/* Avoid spurious timeouts resulting from Ack Ratio > cwnd */
|
||||
if (dccp_sk(sk)->dccps_l_ack_ratio > hctx->cwnd)
|
||||
ccid2_change_l_ack_ratio(sk, hctx->cwnd);
|
||||
}
|
||||
|
||||
static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type,
|
||||
u8 option, u8 *optval, u8 optlen)
|
||||
{
|
||||
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
|
||||
|
||||
switch (option) {
|
||||
case DCCPO_ACK_VECTOR_0:
|
||||
case DCCPO_ACK_VECTOR_1:
|
||||
return dccp_ackvec_parsed_add(&hctx->av_chunks, optval, optlen,
|
||||
option - DCCPO_ACK_VECTOR_0);
|
||||
}
|
||||
return 0;
|
||||
if (dccp_sk(sk)->dccps_l_ack_ratio > hctx->ccid2hctx_cwnd)
|
||||
ccid2_change_l_ack_ratio(sk, hctx->ccid2hctx_cwnd);
|
||||
}
|
||||
|
||||
static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
struct dccp_sock *dp = dccp_sk(sk);
|
||||
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
|
||||
const bool sender_was_blocked = ccid2_cwnd_network_limited(hctx);
|
||||
struct dccp_ackvec_parsed *avp;
|
||||
u64 ackno, seqno;
|
||||
struct ccid2_seq *seqp;
|
||||
unsigned char *vector;
|
||||
unsigned char veclen;
|
||||
int offset = 0;
|
||||
int done = 0;
|
||||
unsigned int maxincr = 0;
|
||||
|
||||
ccid2_hc_tx_check_sanity(hctx);
|
||||
/* check reverse path congestion */
|
||||
seqno = DCCP_SKB_CB(skb)->dccpd_seq;
|
||||
|
||||
@ -407,21 +523,21 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
|
||||
* -sorbo.
|
||||
*/
|
||||
/* need to bootstrap */
|
||||
if (hctx->rpdupack == -1) {
|
||||
hctx->rpdupack = 0;
|
||||
hctx->rpseq = seqno;
|
||||
if (hctx->ccid2hctx_rpdupack == -1) {
|
||||
hctx->ccid2hctx_rpdupack = 0;
|
||||
hctx->ccid2hctx_rpseq = seqno;
|
||||
} else {
|
||||
/* check if packet is consecutive */
|
||||
if (dccp_delta_seqno(hctx->rpseq, seqno) == 1)
|
||||
hctx->rpseq = seqno;
|
||||
if (dccp_delta_seqno(hctx->ccid2hctx_rpseq, seqno) == 1)
|
||||
hctx->ccid2hctx_rpseq = seqno;
|
||||
/* it's a later packet */
|
||||
else if (after48(seqno, hctx->rpseq)) {
|
||||
hctx->rpdupack++;
|
||||
else if (after48(seqno, hctx->ccid2hctx_rpseq)) {
|
||||
hctx->ccid2hctx_rpdupack++;
|
||||
|
||||
/* check if we got enough dupacks */
|
||||
if (hctx->rpdupack >= NUMDUPACK) {
|
||||
hctx->rpdupack = -1; /* XXX lame */
|
||||
hctx->rpseq = 0;
|
||||
if (hctx->ccid2hctx_rpdupack >= NUMDUPACK) {
|
||||
hctx->ccid2hctx_rpdupack = -1; /* XXX lame */
|
||||
hctx->ccid2hctx_rpseq = 0;
|
||||
|
||||
ccid2_change_l_ack_ratio(sk, 2 * dp->dccps_l_ack_ratio);
|
||||
}
|
||||
@ -429,22 +545,27 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
|
||||
}
|
||||
|
||||
/* check forward path congestion */
|
||||
if (dccp_packet_without_ack(skb))
|
||||
/* still didn't send out new data packets */
|
||||
if (hctx->ccid2hctx_seqh == hctx->ccid2hctx_seqt)
|
||||
return;
|
||||
|
||||
/* still didn't send out new data packets */
|
||||
if (hctx->seqh == hctx->seqt)
|
||||
goto done;
|
||||
switch (DCCP_SKB_CB(skb)->dccpd_type) {
|
||||
case DCCP_PKT_ACK:
|
||||
case DCCP_PKT_DATAACK:
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
|
||||
ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
|
||||
if (after48(ackno, hctx->high_ack))
|
||||
hctx->high_ack = ackno;
|
||||
if (after48(ackno, hctx->ccid2hctx_high_ack))
|
||||
hctx->ccid2hctx_high_ack = ackno;
|
||||
|
||||
seqp = hctx->seqt;
|
||||
seqp = hctx->ccid2hctx_seqt;
|
||||
while (before48(seqp->ccid2s_seq, ackno)) {
|
||||
seqp = seqp->ccid2s_next;
|
||||
if (seqp == hctx->seqh) {
|
||||
seqp = hctx->seqh->ccid2s_prev;
|
||||
if (seqp == hctx->ccid2hctx_seqh) {
|
||||
seqp = hctx->ccid2hctx_seqh->ccid2s_prev;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -454,26 +575,26 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
|
||||
* packets per acknowledgement. Rounding up avoids that cwnd is not
|
||||
* advanced when Ack Ratio is 1 and gives a slight edge otherwise.
|
||||
*/
|
||||
if (hctx->cwnd < hctx->ssthresh)
|
||||
if (hctx->ccid2hctx_cwnd < hctx->ccid2hctx_ssthresh)
|
||||
maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2);
|
||||
|
||||
/* go through all ack vectors */
|
||||
list_for_each_entry(avp, &hctx->av_chunks, node) {
|
||||
while ((offset = ccid2_ackvector(sk, skb, offset,
|
||||
&vector, &veclen)) != -1) {
|
||||
/* go through this ack vector */
|
||||
for (; avp->len--; avp->vec++) {
|
||||
u64 ackno_end_rl = SUB48(ackno,
|
||||
dccp_ackvec_runlen(avp->vec));
|
||||
while (veclen--) {
|
||||
const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
|
||||
u64 ackno_end_rl = SUB48(ackno, rl);
|
||||
|
||||
ccid2_pr_debug("ackvec %llu |%u,%u|\n",
|
||||
ccid2_pr_debug("ackvec start:%llu end:%llu\n",
|
||||
(unsigned long long)ackno,
|
||||
dccp_ackvec_state(avp->vec) >> 6,
|
||||
dccp_ackvec_runlen(avp->vec));
|
||||
(unsigned long long)ackno_end_rl);
|
||||
/* if the seqno we are analyzing is larger than the
|
||||
* current ackno, then move towards the tail of our
|
||||
* seqnos.
|
||||
*/
|
||||
while (after48(seqp->ccid2s_seq, ackno)) {
|
||||
if (seqp == hctx->seqt) {
|
||||
if (seqp == hctx->ccid2hctx_seqt) {
|
||||
done = 1;
|
||||
break;
|
||||
}
|
||||
@ -486,24 +607,26 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
|
||||
* run length
|
||||
*/
|
||||
while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) {
|
||||
const u8 state = dccp_ackvec_state(avp->vec);
|
||||
const u8 state = *vector &
|
||||
DCCP_ACKVEC_STATE_MASK;
|
||||
|
||||
/* new packet received or marked */
|
||||
if (state != DCCPAV_NOT_RECEIVED &&
|
||||
if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED &&
|
||||
!seqp->ccid2s_acked) {
|
||||
if (state == DCCPAV_ECN_MARKED)
|
||||
if (state ==
|
||||
DCCP_ACKVEC_STATE_ECN_MARKED) {
|
||||
ccid2_congestion_event(sk,
|
||||
seqp);
|
||||
else
|
||||
} else
|
||||
ccid2_new_ack(sk, seqp,
|
||||
&maxincr);
|
||||
|
||||
seqp->ccid2s_acked = 1;
|
||||
ccid2_pr_debug("Got ack for %llu\n",
|
||||
(unsigned long long)seqp->ccid2s_seq);
|
||||
hctx->pipe--;
|
||||
ccid2_hc_tx_dec_pipe(sk);
|
||||
}
|
||||
if (seqp == hctx->seqt) {
|
||||
if (seqp == hctx->ccid2hctx_seqt) {
|
||||
done = 1;
|
||||
break;
|
||||
}
|
||||
@ -513,6 +636,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
|
||||
break;
|
||||
|
||||
ackno = SUB48(ackno_end_rl, 1);
|
||||
vector++;
|
||||
}
|
||||
if (done)
|
||||
break;
|
||||
@ -521,11 +645,11 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
|
||||
/* The state about what is acked should be correct now
|
||||
* Check for NUMDUPACK
|
||||
*/
|
||||
seqp = hctx->seqt;
|
||||
while (before48(seqp->ccid2s_seq, hctx->high_ack)) {
|
||||
seqp = hctx->ccid2hctx_seqt;
|
||||
while (before48(seqp->ccid2s_seq, hctx->ccid2hctx_high_ack)) {
|
||||
seqp = seqp->ccid2s_next;
|
||||
if (seqp == hctx->seqh) {
|
||||
seqp = hctx->seqh->ccid2s_prev;
|
||||
if (seqp == hctx->ccid2hctx_seqh) {
|
||||
seqp = hctx->ccid2hctx_seqh->ccid2s_prev;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -536,7 +660,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
|
||||
if (done == NUMDUPACK)
|
||||
break;
|
||||
}
|
||||
if (seqp == hctx->seqt)
|
||||
if (seqp == hctx->ccid2hctx_seqt)
|
||||
break;
|
||||
seqp = seqp->ccid2s_prev;
|
||||
}
|
||||
@ -557,34 +681,25 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
|
||||
* one ack vector.
|
||||
*/
|
||||
ccid2_congestion_event(sk, seqp);
|
||||
hctx->pipe--;
|
||||
ccid2_hc_tx_dec_pipe(sk);
|
||||
}
|
||||
if (seqp == hctx->seqt)
|
||||
if (seqp == hctx->ccid2hctx_seqt)
|
||||
break;
|
||||
seqp = seqp->ccid2s_prev;
|
||||
}
|
||||
|
||||
hctx->seqt = last_acked;
|
||||
hctx->ccid2hctx_seqt = last_acked;
|
||||
}
|
||||
|
||||
/* trim acked packets in tail */
|
||||
while (hctx->seqt != hctx->seqh) {
|
||||
if (!hctx->seqt->ccid2s_acked)
|
||||
while (hctx->ccid2hctx_seqt != hctx->ccid2hctx_seqh) {
|
||||
if (!hctx->ccid2hctx_seqt->ccid2s_acked)
|
||||
break;
|
||||
|
||||
hctx->seqt = hctx->seqt->ccid2s_next;
|
||||
hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqt->ccid2s_next;
|
||||
}
|
||||
|
||||
/* restart RTO timer if not all outstanding data has been acked */
|
||||
if (hctx->pipe == 0)
|
||||
sk_stop_timer(sk, &hctx->rtotimer);
|
||||
else
|
||||
sk_reset_timer(sk, &hctx->rtotimer, jiffies + hctx->rto);
|
||||
done:
|
||||
/* check if incoming Acks allow pending packets to be sent */
|
||||
if (sender_was_blocked && !ccid2_cwnd_network_limited(hctx))
|
||||
tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
|
||||
dccp_ackvec_parsed_cleanup(&hctx->av_chunks);
|
||||
ccid2_hc_tx_check_sanity(hctx);
|
||||
}
|
||||
|
||||
static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
|
||||
@ -594,13 +709,17 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
|
||||
u32 max_ratio;
|
||||
|
||||
/* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */
|
||||
hctx->ssthresh = ~0U;
|
||||
hctx->ccid2hctx_ssthresh = ~0U;
|
||||
|
||||
/* Use larger initial windows (RFC 3390, rfc2581bis) */
|
||||
hctx->cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache);
|
||||
/*
|
||||
* RFC 4341, 5: "The cwnd parameter is initialized to at most four
|
||||
* packets for new connections, following the rules from [RFC3390]".
|
||||
* We need to convert the bytes of RFC3390 into the packets of RFC 4341.
|
||||
*/
|
||||
hctx->ccid2hctx_cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U);
|
||||
|
||||
/* Make sure that Ack Ratio is enabled and within bounds. */
|
||||
max_ratio = DIV_ROUND_UP(hctx->cwnd, 2);
|
||||
max_ratio = DIV_ROUND_UP(hctx->ccid2hctx_cwnd, 2);
|
||||
if (dp->dccps_l_ack_ratio == 0 || dp->dccps_l_ack_ratio > max_ratio)
|
||||
dp->dccps_l_ack_ratio = max_ratio;
|
||||
|
||||
@ -608,11 +727,15 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
|
||||
if (ccid2_hc_tx_alloc_seq(hctx))
|
||||
return -ENOMEM;
|
||||
|
||||
hctx->rto = DCCP_TIMEOUT_INIT;
|
||||
hctx->rpdupack = -1;
|
||||
hctx->last_cong = jiffies;
|
||||
setup_timer(&hctx->rtotimer, ccid2_hc_tx_rto_expire, (unsigned long)sk);
|
||||
INIT_LIST_HEAD(&hctx->av_chunks);
|
||||
hctx->ccid2hctx_rto = 3 * HZ;
|
||||
ccid2_change_srtt(hctx, -1);
|
||||
hctx->ccid2hctx_rttvar = -1;
|
||||
hctx->ccid2hctx_rpdupack = -1;
|
||||
hctx->ccid2hctx_last_cong = jiffies;
|
||||
setup_timer(&hctx->ccid2hctx_rtotimer, ccid2_hc_tx_rto_expire,
|
||||
(unsigned long)sk);
|
||||
|
||||
ccid2_hc_tx_check_sanity(hctx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -621,11 +744,11 @@ static void ccid2_hc_tx_exit(struct sock *sk)
|
||||
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
|
||||
int i;
|
||||
|
||||
sk_stop_timer(sk, &hctx->rtotimer);
|
||||
ccid2_hc_tx_kill_rto_timer(sk);
|
||||
|
||||
for (i = 0; i < hctx->seqbufc; i++)
|
||||
kfree(hctx->seqbuf[i]);
|
||||
hctx->seqbufc = 0;
|
||||
for (i = 0; i < hctx->ccid2hctx_seqbufc; i++)
|
||||
kfree(hctx->ccid2hctx_seqbuf[i]);
|
||||
hctx->ccid2hctx_seqbufc = 0;
|
||||
}
|
||||
|
||||
static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
|
||||
@ -636,28 +759,27 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
|
||||
switch (DCCP_SKB_CB(skb)->dccpd_type) {
|
||||
case DCCP_PKT_DATA:
|
||||
case DCCP_PKT_DATAACK:
|
||||
hcrx->data++;
|
||||
if (hcrx->data >= dp->dccps_r_ack_ratio) {
|
||||
hcrx->ccid2hcrx_data++;
|
||||
if (hcrx->ccid2hcrx_data >= dp->dccps_r_ack_ratio) {
|
||||
dccp_send_ack(sk);
|
||||
hcrx->data = 0;
|
||||
hcrx->ccid2hcrx_data = 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static struct ccid_operations ccid2 = {
|
||||
.ccid_id = DCCPC_CCID2,
|
||||
.ccid_name = "TCP-like",
|
||||
.ccid_owner = THIS_MODULE,
|
||||
.ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock),
|
||||
.ccid_hc_tx_init = ccid2_hc_tx_init,
|
||||
.ccid_hc_tx_exit = ccid2_hc_tx_exit,
|
||||
.ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet,
|
||||
.ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent,
|
||||
.ccid_hc_tx_parse_options = ccid2_hc_tx_parse_options,
|
||||
.ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv,
|
||||
.ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock),
|
||||
.ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv,
|
||||
.ccid_id = DCCPC_CCID2,
|
||||
.ccid_name = "TCP-like",
|
||||
.ccid_owner = THIS_MODULE,
|
||||
.ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock),
|
||||
.ccid_hc_tx_init = ccid2_hc_tx_init,
|
||||
.ccid_hc_tx_exit = ccid2_hc_tx_exit,
|
||||
.ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet,
|
||||
.ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent,
|
||||
.ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv,
|
||||
.ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock),
|
||||
.ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
|
||||
|
@ -42,49 +42,34 @@ struct ccid2_seq {
|
||||
|
||||
/** struct ccid2_hc_tx_sock - CCID2 TX half connection
|
||||
*
|
||||
* @{cwnd,ssthresh,pipe}: as per RFC 4341, section 5
|
||||
* @packets_acked: Ack counter for deriving cwnd growth (RFC 3465)
|
||||
* @srtt: smoothed RTT estimate, scaled by 2^3
|
||||
* @mdev: smoothed RTT variation, scaled by 2^2
|
||||
* @mdev_max: maximum of @mdev during one flight
|
||||
* @rttvar: moving average/maximum of @mdev_max
|
||||
* @rto: RTO value deriving from SRTT and RTTVAR (RFC 2988)
|
||||
* @rtt_seq: to decay RTTVAR at most once per flight
|
||||
* @rpseq: last consecutive seqno
|
||||
* @rpdupack: dupacks since rpseq
|
||||
* @av_chunks: list of Ack Vectors received on current skb
|
||||
*/
|
||||
* @ccid2hctx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5
|
||||
* @ccid2hctx_packets_acked - Ack counter for deriving cwnd growth (RFC 3465)
|
||||
* @ccid2hctx_lastrtt -time RTT was last measured
|
||||
* @ccid2hctx_rpseq - last consecutive seqno
|
||||
* @ccid2hctx_rpdupack - dupacks since rpseq
|
||||
*/
|
||||
struct ccid2_hc_tx_sock {
|
||||
u32 cwnd;
|
||||
u32 ssthresh;
|
||||
u32 pipe;
|
||||
u32 packets_acked;
|
||||
struct ccid2_seq *seqbuf[CCID2_SEQBUF_MAX];
|
||||
int seqbufc;
|
||||
struct ccid2_seq *seqh;
|
||||
struct ccid2_seq *seqt;
|
||||
/* RTT measurement: variables/principles are the same as in TCP */
|
||||
u32 srtt,
|
||||
mdev,
|
||||
mdev_max,
|
||||
rttvar,
|
||||
rto;
|
||||
u64 rtt_seq:48;
|
||||
struct timer_list rtotimer;
|
||||
u64 rpseq;
|
||||
int rpdupack;
|
||||
unsigned long last_cong;
|
||||
u64 high_ack;
|
||||
struct list_head av_chunks;
|
||||
u32 ccid2hctx_cwnd;
|
||||
u32 ccid2hctx_ssthresh;
|
||||
u32 ccid2hctx_pipe;
|
||||
u32 ccid2hctx_packets_acked;
|
||||
struct ccid2_seq *ccid2hctx_seqbuf[CCID2_SEQBUF_MAX];
|
||||
int ccid2hctx_seqbufc;
|
||||
struct ccid2_seq *ccid2hctx_seqh;
|
||||
struct ccid2_seq *ccid2hctx_seqt;
|
||||
long ccid2hctx_rto;
|
||||
long ccid2hctx_srtt;
|
||||
long ccid2hctx_rttvar;
|
||||
unsigned long ccid2hctx_lastrtt;
|
||||
struct timer_list ccid2hctx_rtotimer;
|
||||
u64 ccid2hctx_rpseq;
|
||||
int ccid2hctx_rpdupack;
|
||||
unsigned long ccid2hctx_last_cong;
|
||||
u64 ccid2hctx_high_ack;
|
||||
};
|
||||
|
||||
static inline bool ccid2_cwnd_network_limited(struct ccid2_hc_tx_sock *hctx)
|
||||
{
|
||||
return (hctx->pipe >= hctx->cwnd);
|
||||
}
|
||||
|
||||
struct ccid2_hc_rx_sock {
|
||||
int data;
|
||||
int ccid2hcrx_data;
|
||||
};
|
||||
|
||||
static inline struct ccid2_hc_tx_sock *ccid2_hc_tx_sk(const struct sock *sk)
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -47,22 +47,11 @@
|
||||
/* Two seconds as per RFC 3448 4.2 */
|
||||
#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC)
|
||||
|
||||
/* Maximum backoff interval t_mbi (RFC 3448, 4.3) */
|
||||
#define TFRC_T_MBI (64 * USEC_PER_SEC)
|
||||
/* In usecs - half the scheduling granularity as per RFC3448 4.6 */
|
||||
#define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ))
|
||||
|
||||
/*
|
||||
* The t_delta parameter (RFC 3448, 4.6): delays of less than %USEC_PER_MSEC are
|
||||
* rounded down to 0, since sk_reset_timer() here uses millisecond granularity.
|
||||
* Hence we can use a constant t_delta = %USEC_PER_MSEC when HZ >= 500. A coarse
|
||||
* resolution of HZ < 500 means that the error is below one timer tick (t_gran)
|
||||
* when using the constant t_delta = t_gran / 2 = %USEC_PER_SEC / (2 * HZ).
|
||||
*/
|
||||
#if (HZ >= 500)
|
||||
# define TFRC_T_DELTA USEC_PER_MSEC
|
||||
#else
|
||||
# define TFRC_T_DELTA (USEC_PER_SEC / (2 * HZ))
|
||||
#warning Coarse CONFIG_HZ resolution -- higher value recommended for TFRC.
|
||||
#endif
|
||||
/* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */
|
||||
#define TFRC_T_MBI 64
|
||||
|
||||
enum ccid3_options {
|
||||
TFRC_OPT_LOSS_EVENT_RATE = 192,
|
||||
@ -70,43 +59,62 @@ enum ccid3_options {
|
||||
TFRC_OPT_RECEIVE_RATE = 194,
|
||||
};
|
||||
|
||||
struct ccid3_options_received {
|
||||
u64 ccid3or_seqno:48,
|
||||
ccid3or_loss_intervals_idx:16;
|
||||
u16 ccid3or_loss_intervals_len;
|
||||
u32 ccid3or_loss_event_rate;
|
||||
u32 ccid3or_receive_rate;
|
||||
};
|
||||
|
||||
/* TFRC sender states */
|
||||
enum ccid3_hc_tx_states {
|
||||
TFRC_SSTATE_NO_SENT = 1,
|
||||
TFRC_SSTATE_NO_FBACK,
|
||||
TFRC_SSTATE_FBACK,
|
||||
TFRC_SSTATE_TERM,
|
||||
};
|
||||
|
||||
/** struct ccid3_hc_tx_sock - CCID3 sender half-connection socket
|
||||
*
|
||||
* @x - Current sending rate in 64 * bytes per second
|
||||
* @x_recv - Receive rate in 64 * bytes per second
|
||||
* @x_calc - Calculated rate in bytes per second
|
||||
* @rtt - Estimate of current round trip time in usecs
|
||||
* @r_sqmean - Estimate of long-term RTT (RFC 3448, 4.5)
|
||||
* @p - Current loss event rate (0-1) scaled by 1000000
|
||||
* @s - Packet size in bytes
|
||||
* @t_rto - Nofeedback Timer setting in usecs
|
||||
* @t_ipi - Interpacket (send) interval (RFC 3448, 4.6) in usecs
|
||||
* @feedback - Whether feedback has been received or not
|
||||
* @last_win_count - Last window counter sent
|
||||
* @t_last_win_count - Timestamp of earliest packet with
|
||||
* last_win_count value sent
|
||||
* @no_feedback_timer - Handle to no feedback timer
|
||||
* @t_ld - Time last doubled during slow start
|
||||
* @t_nom - Nominal send time of next packet
|
||||
* @hist - Packet history
|
||||
* @ccid3hctx_x - Current sending rate in 64 * bytes per second
|
||||
* @ccid3hctx_x_recv - Receive rate in 64 * bytes per second
|
||||
* @ccid3hctx_x_calc - Calculated rate in bytes per second
|
||||
* @ccid3hctx_rtt - Estimate of current round trip time in usecs
|
||||
* @ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000
|
||||
* @ccid3hctx_s - Packet size in bytes
|
||||
* @ccid3hctx_t_rto - Nofeedback Timer setting in usecs
|
||||
* @ccid3hctx_t_ipi - Interpacket (send) interval (RFC 3448, 4.6) in usecs
|
||||
* @ccid3hctx_state - Sender state, one of %ccid3_hc_tx_states
|
||||
* @ccid3hctx_last_win_count - Last window counter sent
|
||||
* @ccid3hctx_t_last_win_count - Timestamp of earliest packet
|
||||
* with last_win_count value sent
|
||||
* @ccid3hctx_no_feedback_timer - Handle to no feedback timer
|
||||
* @ccid3hctx_t_ld - Time last doubled during slow start
|
||||
* @ccid3hctx_t_nom - Nominal send time of next packet
|
||||
* @ccid3hctx_delta - Send timer delta (RFC 3448, 4.6) in usecs
|
||||
* @ccid3hctx_hist - Packet history
|
||||
* @ccid3hctx_options_received - Parsed set of retrieved options
|
||||
*/
|
||||
struct ccid3_hc_tx_sock {
|
||||
u64 x;
|
||||
u64 x_recv;
|
||||
u32 x_calc;
|
||||
u32 rtt;
|
||||
u16 r_sqmean;
|
||||
u32 p;
|
||||
u32 t_rto;
|
||||
u32 t_ipi;
|
||||
u16 s;
|
||||
bool feedback:1;
|
||||
u8 last_win_count;
|
||||
ktime_t t_last_win_count;
|
||||
struct timer_list no_feedback_timer;
|
||||
ktime_t t_ld;
|
||||
ktime_t t_nom;
|
||||
struct tfrc_tx_hist_entry *hist;
|
||||
struct tfrc_tx_info ccid3hctx_tfrc;
|
||||
#define ccid3hctx_x ccid3hctx_tfrc.tfrctx_x
|
||||
#define ccid3hctx_x_recv ccid3hctx_tfrc.tfrctx_x_recv
|
||||
#define ccid3hctx_x_calc ccid3hctx_tfrc.tfrctx_x_calc
|
||||
#define ccid3hctx_rtt ccid3hctx_tfrc.tfrctx_rtt
|
||||
#define ccid3hctx_p ccid3hctx_tfrc.tfrctx_p
|
||||
#define ccid3hctx_t_rto ccid3hctx_tfrc.tfrctx_rto
|
||||
#define ccid3hctx_t_ipi ccid3hctx_tfrc.tfrctx_ipi
|
||||
u16 ccid3hctx_s;
|
||||
enum ccid3_hc_tx_states ccid3hctx_state:8;
|
||||
u8 ccid3hctx_last_win_count;
|
||||
ktime_t ccid3hctx_t_last_win_count;
|
||||
struct timer_list ccid3hctx_no_feedback_timer;
|
||||
ktime_t ccid3hctx_t_ld;
|
||||
ktime_t ccid3hctx_t_nom;
|
||||
u32 ccid3hctx_delta;
|
||||
struct tfrc_tx_hist_entry *ccid3hctx_hist;
|
||||
struct ccid3_options_received ccid3hctx_options_received;
|
||||
};
|
||||
|
||||
static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
|
||||
@ -116,32 +124,41 @@ static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
|
||||
return hctx;
|
||||
}
|
||||
|
||||
|
||||
enum ccid3_fback_type {
|
||||
CCID3_FBACK_NONE = 0,
|
||||
CCID3_FBACK_INITIAL,
|
||||
CCID3_FBACK_PERIODIC,
|
||||
CCID3_FBACK_PARAM_CHANGE
|
||||
/* TFRC receiver states */
|
||||
enum ccid3_hc_rx_states {
|
||||
TFRC_RSTATE_NO_DATA = 1,
|
||||
TFRC_RSTATE_DATA,
|
||||
TFRC_RSTATE_TERM = 127,
|
||||
};
|
||||
|
||||
/** struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket
|
||||
*
|
||||
* @last_counter - Tracks window counter (RFC 4342, 8.1)
|
||||
* @feedback - The type of the feedback last sent
|
||||
* @x_recv - Receiver estimate of send rate (RFC 3448, sec. 4.3)
|
||||
* @tstamp_last_feedback - Time at which last feedback was sent
|
||||
* @hist - Packet history (loss detection + RTT sampling)
|
||||
* @li_hist - Loss Interval database
|
||||
* @p_inverse - Inverse of Loss Event Rate (RFC 4342, sec. 8.5)
|
||||
* @ccid3hcrx_x_recv - Receiver estimate of send rate (RFC 3448 4.3)
|
||||
* @ccid3hcrx_rtt - Receiver estimate of rtt (non-standard)
|
||||
* @ccid3hcrx_p - Current loss event rate (RFC 3448 5.4)
|
||||
* @ccid3hcrx_last_counter - Tracks window counter (RFC 4342, 8.1)
|
||||
* @ccid3hcrx_state - Receiver state, one of %ccid3_hc_rx_states
|
||||
* @ccid3hcrx_bytes_recv - Total sum of DCCP payload bytes
|
||||
* @ccid3hcrx_x_recv - Receiver estimate of send rate (RFC 3448, sec. 4.3)
|
||||
* @ccid3hcrx_rtt - Receiver estimate of RTT
|
||||
* @ccid3hcrx_tstamp_last_feedback - Time at which last feedback was sent
|
||||
* @ccid3hcrx_tstamp_last_ack - Time at which last feedback was sent
|
||||
* @ccid3hcrx_hist - Packet history (loss detection + RTT sampling)
|
||||
* @ccid3hcrx_li_hist - Loss Interval database
|
||||
* @ccid3hcrx_s - Received packet size in bytes
|
||||
* @ccid3hcrx_pinv - Inverse of Loss Event Rate (RFC 4342, sec. 8.5)
|
||||
*/
|
||||
struct ccid3_hc_rx_sock {
|
||||
u8 last_counter:4;
|
||||
enum ccid3_fback_type feedback:4;
|
||||
u32 x_recv;
|
||||
ktime_t tstamp_last_feedback;
|
||||
struct tfrc_rx_hist hist;
|
||||
struct tfrc_loss_hist li_hist;
|
||||
#define p_inverse li_hist.i_mean
|
||||
u8 ccid3hcrx_last_counter:4;
|
||||
enum ccid3_hc_rx_states ccid3hcrx_state:8;
|
||||
u32 ccid3hcrx_bytes_recv;
|
||||
u32 ccid3hcrx_x_recv;
|
||||
u32 ccid3hcrx_rtt;
|
||||
ktime_t ccid3hcrx_tstamp_last_feedback;
|
||||
struct tfrc_rx_hist ccid3hcrx_hist;
|
||||
struct tfrc_loss_hist ccid3hcrx_li_hist;
|
||||
u16 ccid3hcrx_s;
|
||||
#define ccid3hcrx_pinv ccid3hcrx_li_hist.i_mean
|
||||
};
|
||||
|
||||
static inline struct ccid3_hc_rx_sock *ccid3_hc_rx_sk(const struct sock *sk)
|
||||
|
@ -86,26 +86,21 @@ static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh)
|
||||
|
||||
/**
|
||||
* tfrc_lh_update_i_mean - Update the `open' loss interval I_0
|
||||
* This updates I_mean as the sequence numbers increase. As a consequence, the
|
||||
* open loss interval I_0 increases, hence p = W_tot/max(I_tot0, I_tot1)
|
||||
* decreases, and thus there is no need to send renewed feedback.
|
||||
* For recomputing p: returns `true' if p > p_prev <=> 1/p < 1/p_prev
|
||||
*/
|
||||
void tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
|
||||
u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
|
||||
{
|
||||
struct tfrc_loss_interval *cur = tfrc_lh_peek(lh);
|
||||
u32 old_i_mean = lh->i_mean;
|
||||
s64 len;
|
||||
|
||||
if (cur == NULL) /* not initialised */
|
||||
return;
|
||||
|
||||
/* FIXME: should probably also count non-data packets (RFC 4342, 6.1) */
|
||||
if (!dccp_data_packet(skb))
|
||||
return;
|
||||
return 0;
|
||||
|
||||
len = dccp_delta_seqno(cur->li_seqno, DCCP_SKB_CB(skb)->dccpd_seq) + 1;
|
||||
|
||||
if (len - (s64)cur->li_length <= 0) /* duplicate or reordered */
|
||||
return;
|
||||
return 0;
|
||||
|
||||
if (SUB16(dccp_hdr(skb)->dccph_ccval, cur->li_ccval) > 4)
|
||||
/*
|
||||
@ -119,11 +114,14 @@ void tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
|
||||
cur->li_is_closed = 1;
|
||||
|
||||
if (tfrc_lh_length(lh) == 1) /* due to RFC 3448, 6.3.1 */
|
||||
return;
|
||||
return 0;
|
||||
|
||||
cur->li_length = len;
|
||||
tfrc_lh_calc_i_mean(lh);
|
||||
|
||||
return (lh->i_mean < old_i_mean);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tfrc_lh_update_i_mean);
|
||||
|
||||
/* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */
|
||||
static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur,
|
||||
@ -140,18 +138,18 @@ static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur,
|
||||
* @sk: Used by @calc_first_li in caller-specific way (subtyping)
|
||||
* Updates I_mean and returns 1 if a new interval has in fact been added to @lh.
|
||||
*/
|
||||
bool tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh,
|
||||
u32 (*calc_first_li)(struct sock *), struct sock *sk)
|
||||
int tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh,
|
||||
u32 (*calc_first_li)(struct sock *), struct sock *sk)
|
||||
{
|
||||
struct tfrc_loss_interval *cur = tfrc_lh_peek(lh), *new;
|
||||
|
||||
if (cur != NULL && !tfrc_lh_is_new_loss(cur, tfrc_rx_hist_loss_prev(rh)))
|
||||
return false;
|
||||
return 0;
|
||||
|
||||
new = tfrc_lh_demand_next(lh);
|
||||
if (unlikely(new == NULL)) {
|
||||
DCCP_CRIT("Cannot allocate/add loss record.");
|
||||
return false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
new->li_seqno = tfrc_rx_hist_loss_prev(rh)->tfrchrx_seqno;
|
||||
@ -169,7 +167,7 @@ bool tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh,
|
||||
|
||||
tfrc_lh_calc_i_mean(lh);
|
||||
}
|
||||
return true;
|
||||
return 1;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tfrc_lh_interval_add);
|
||||
|
||||
|
@ -67,9 +67,9 @@ static inline u8 tfrc_lh_length(struct tfrc_loss_hist *lh)
|
||||
|
||||
struct tfrc_rx_hist;
|
||||
|
||||
extern bool tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *,
|
||||
extern int tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *,
|
||||
u32 (*first_li)(struct sock *), struct sock *);
|
||||
extern void tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *);
|
||||
extern u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *);
|
||||
extern void tfrc_lh_cleanup(struct tfrc_loss_hist *lh);
|
||||
|
||||
#endif /* _DCCP_LI_HIST_ */
|
||||
|
@ -40,6 +40,18 @@
|
||||
#include "packet_history.h"
|
||||
#include "../../dccp.h"
|
||||
|
||||
/**
|
||||
* tfrc_tx_hist_entry - Simple singly-linked TX history list
|
||||
* @next: next oldest entry (LIFO order)
|
||||
* @seqno: sequence number of this entry
|
||||
* @stamp: send time of packet with sequence number @seqno
|
||||
*/
|
||||
struct tfrc_tx_hist_entry {
|
||||
struct tfrc_tx_hist_entry *next;
|
||||
u64 seqno;
|
||||
ktime_t stamp;
|
||||
};
|
||||
|
||||
/*
|
||||
* Transmitter History Routines
|
||||
*/
|
||||
@ -61,6 +73,15 @@ void tfrc_tx_packet_history_exit(void)
|
||||
}
|
||||
}
|
||||
|
||||
static struct tfrc_tx_hist_entry *
|
||||
tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
|
||||
{
|
||||
while (head != NULL && head->seqno != seqno)
|
||||
head = head->next;
|
||||
|
||||
return head;
|
||||
}
|
||||
|
||||
int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno)
|
||||
{
|
||||
struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any());
|
||||
@ -90,6 +111,25 @@ void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tfrc_tx_hist_purge);
|
||||
|
||||
u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno,
|
||||
const ktime_t now)
|
||||
{
|
||||
u32 rtt = 0;
|
||||
struct tfrc_tx_hist_entry *packet = tfrc_tx_hist_find_entry(head, seqno);
|
||||
|
||||
if (packet != NULL) {
|
||||
rtt = ktime_us_delta(now, packet->stamp);
|
||||
/*
|
||||
* Garbage-collect older (irrelevant) entries:
|
||||
*/
|
||||
tfrc_tx_hist_purge(&packet->next);
|
||||
}
|
||||
|
||||
return rtt;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tfrc_tx_hist_rtt);
|
||||
|
||||
|
||||
/*
|
||||
* Receiver History Routines
|
||||
*/
|
||||
@ -151,31 +191,14 @@ int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tfrc_rx_hist_duplicate);
|
||||
|
||||
|
||||
static void __tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
|
||||
{
|
||||
struct tfrc_rx_hist_entry *tmp = h->ring[a];
|
||||
|
||||
h->ring[a] = h->ring[b];
|
||||
h->ring[b] = tmp;
|
||||
}
|
||||
|
||||
static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
|
||||
{
|
||||
__tfrc_rx_hist_swap(h, tfrc_rx_hist_index(h, a),
|
||||
tfrc_rx_hist_index(h, b));
|
||||
}
|
||||
const u8 idx_a = tfrc_rx_hist_index(h, a),
|
||||
idx_b = tfrc_rx_hist_index(h, b);
|
||||
struct tfrc_rx_hist_entry *tmp = h->ring[idx_a];
|
||||
|
||||
/**
|
||||
* tfrc_rx_hist_resume_rtt_sampling - Prepare RX history for RTT sampling
|
||||
* This is called after loss detection has finished, when the history entry
|
||||
* with the index of `loss_count' holds the highest-received sequence number.
|
||||
* RTT sampling requires this information at ring[0] (tfrc_rx_hist_sample_rtt).
|
||||
*/
|
||||
static inline void tfrc_rx_hist_resume_rtt_sampling(struct tfrc_rx_hist *h)
|
||||
{
|
||||
__tfrc_rx_hist_swap(h, 0, tfrc_rx_hist_index(h, h->loss_count));
|
||||
h->loss_count = h->loss_start = 0;
|
||||
h->ring[idx_a] = h->ring[idx_b];
|
||||
h->ring[idx_b] = tmp;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -192,8 +215,10 @@ static void __do_track_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u64 n1)
|
||||
u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno,
|
||||
s1 = DCCP_SKB_CB(skb)->dccpd_seq;
|
||||
|
||||
if (!dccp_loss_free(s0, s1, n1)) /* gap between S0 and S1 */
|
||||
if (!dccp_loss_free(s0, s1, n1)) { /* gap between S0 and S1 */
|
||||
h->loss_count = 1;
|
||||
tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n1);
|
||||
}
|
||||
}
|
||||
|
||||
static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2)
|
||||
@ -215,7 +240,8 @@ static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2
|
||||
|
||||
if (dccp_loss_free(s2, s1, n1)) {
|
||||
/* hole is filled: S0, S2, and S1 are consecutive */
|
||||
tfrc_rx_hist_resume_rtt_sampling(h);
|
||||
h->loss_count = 0;
|
||||
h->loss_start = tfrc_rx_hist_index(h, 1);
|
||||
} else
|
||||
/* gap between S2 and S1: just update loss_prev */
|
||||
tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n2);
|
||||
@ -268,7 +294,8 @@ static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3)
|
||||
|
||||
if (dccp_loss_free(s1, s2, n2)) {
|
||||
/* entire hole filled by S0, S3, S1, S2 */
|
||||
tfrc_rx_hist_resume_rtt_sampling(h);
|
||||
h->loss_start = tfrc_rx_hist_index(h, 2);
|
||||
h->loss_count = 0;
|
||||
} else {
|
||||
/* gap remains between S1 and S2 */
|
||||
h->loss_start = tfrc_rx_hist_index(h, 1);
|
||||
@ -312,7 +339,8 @@ static void __three_after_loss(struct tfrc_rx_hist *h)
|
||||
|
||||
if (dccp_loss_free(s2, s3, n3)) {
|
||||
/* no gap between S2 and S3: entire hole is filled */
|
||||
tfrc_rx_hist_resume_rtt_sampling(h);
|
||||
h->loss_start = tfrc_rx_hist_index(h, 3);
|
||||
h->loss_count = 0;
|
||||
} else {
|
||||
/* gap between S2 and S3 */
|
||||
h->loss_start = tfrc_rx_hist_index(h, 2);
|
||||
@ -326,13 +354,13 @@ static void __three_after_loss(struct tfrc_rx_hist *h)
|
||||
}
|
||||
|
||||
/**
|
||||
* tfrc_rx_congestion_event - Loss detection and further processing
|
||||
* @h: The non-empty RX history object
|
||||
* @lh: Loss Intervals database to update
|
||||
* @skb: Currently received packet
|
||||
* @ndp: The NDP count belonging to @skb
|
||||
* @first_li: Caller-dependent computation of first loss interval in @lh
|
||||
* @sk: Used by @calc_first_li (see tfrc_lh_interval_add)
|
||||
* tfrc_rx_handle_loss - Loss detection and further processing
|
||||
* @h: The non-empty RX history object
|
||||
* @lh: Loss Intervals database to update
|
||||
* @skb: Currently received packet
|
||||
* @ndp: The NDP count belonging to @skb
|
||||
* @calc_first_li: Caller-dependent computation of first loss interval in @lh
|
||||
* @sk: Used by @calc_first_li (see tfrc_lh_interval_add)
|
||||
* Chooses action according to pending loss, updates LI database when a new
|
||||
* loss was detected, and does required post-processing. Returns 1 when caller
|
||||
* should send feedback, 0 otherwise.
|
||||
@ -340,20 +368,15 @@ static void __three_after_loss(struct tfrc_rx_hist *h)
|
||||
* records accordingly, the caller should not perform any more RX history
|
||||
* operations when loss_count is greater than 0 after calling this function.
|
||||
*/
|
||||
bool tfrc_rx_congestion_event(struct tfrc_rx_hist *h,
|
||||
struct tfrc_loss_hist *lh,
|
||||
struct sk_buff *skb, const u64 ndp,
|
||||
u32 (*first_li)(struct sock *), struct sock *sk)
|
||||
int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
|
||||
struct tfrc_loss_hist *lh,
|
||||
struct sk_buff *skb, const u64 ndp,
|
||||
u32 (*calc_first_li)(struct sock *), struct sock *sk)
|
||||
{
|
||||
bool new_event = false;
|
||||
|
||||
if (tfrc_rx_hist_duplicate(h, skb))
|
||||
return 0;
|
||||
int is_new_loss = 0;
|
||||
|
||||
if (h->loss_count == 0) {
|
||||
__do_track_loss(h, skb, ndp);
|
||||
tfrc_rx_hist_sample_rtt(h, skb);
|
||||
tfrc_rx_hist_add_packet(h, skb, ndp);
|
||||
} else if (h->loss_count == 1) {
|
||||
__one_after_loss(h, skb, ndp);
|
||||
} else if (h->loss_count != 2) {
|
||||
@ -362,57 +385,34 @@ bool tfrc_rx_congestion_event(struct tfrc_rx_hist *h,
|
||||
/*
|
||||
* Update Loss Interval database and recycle RX records
|
||||
*/
|
||||
new_event = tfrc_lh_interval_add(lh, h, first_li, sk);
|
||||
is_new_loss = tfrc_lh_interval_add(lh, h, calc_first_li, sk);
|
||||
__three_after_loss(h);
|
||||
}
|
||||
|
||||
/*
|
||||
* Update moving-average of `s' and the sum of received payload bytes.
|
||||
*/
|
||||
if (dccp_data_packet(skb)) {
|
||||
const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4;
|
||||
|
||||
h->packet_size = tfrc_ewma(h->packet_size, payload, 9);
|
||||
h->bytes_recvd += payload;
|
||||
}
|
||||
|
||||
/* RFC 3448, 6.1: update I_0, whose growth implies p <= p_prev */
|
||||
if (!new_event)
|
||||
tfrc_lh_update_i_mean(lh, skb);
|
||||
|
||||
return new_event;
|
||||
return is_new_loss;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tfrc_rx_congestion_event);
|
||||
EXPORT_SYMBOL_GPL(tfrc_rx_handle_loss);
|
||||
|
||||
/* Compute the sending rate X_recv measured between feedback intervals */
|
||||
u32 tfrc_rx_hist_x_recv(struct tfrc_rx_hist *h, const u32 last_x_recv)
|
||||
int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h)
|
||||
{
|
||||
u64 bytes = h->bytes_recvd, last_rtt = h->rtt_estimate;
|
||||
s64 delta = ktime_to_us(net_timedelta(h->bytes_start));
|
||||
int i;
|
||||
|
||||
WARN_ON(delta <= 0);
|
||||
/*
|
||||
* Ensure that the sampling interval for X_recv is at least one RTT,
|
||||
* by extending the sampling interval backwards in time, over the last
|
||||
* R_(m-1) seconds, as per rfc3448bis-06, 6.2.
|
||||
* To reduce noise (e.g. when the RTT changes often), this is only
|
||||
* done when delta is smaller than RTT/2.
|
||||
*/
|
||||
if (last_x_recv > 0 && delta < last_rtt/2) {
|
||||
tfrc_pr_debug("delta < RTT ==> %ld us < %u us\n",
|
||||
(long)delta, (unsigned)last_rtt);
|
||||
|
||||
delta = (bytes ? delta : 0) + last_rtt;
|
||||
bytes += div_u64((u64)last_x_recv * last_rtt, USEC_PER_SEC);
|
||||
for (i = 0; i <= TFRC_NDUPACK; i++) {
|
||||
h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC);
|
||||
if (h->ring[i] == NULL)
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
if (unlikely(bytes == 0)) {
|
||||
DCCP_WARN("X_recv == 0, using old value of %u\n", last_x_recv);
|
||||
return last_x_recv;
|
||||
h->loss_count = h->loss_start = 0;
|
||||
return 0;
|
||||
|
||||
out_free:
|
||||
while (i-- != 0) {
|
||||
kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]);
|
||||
h->ring[i] = NULL;
|
||||
}
|
||||
return scaled_div32(bytes, delta);
|
||||
return -ENOBUFS;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tfrc_rx_hist_x_recv);
|
||||
EXPORT_SYMBOL_GPL(tfrc_rx_hist_alloc);
|
||||
|
||||
void tfrc_rx_hist_purge(struct tfrc_rx_hist *h)
|
||||
{
|
||||
@ -426,81 +426,73 @@ void tfrc_rx_hist_purge(struct tfrc_rx_hist *h)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tfrc_rx_hist_purge);
|
||||
|
||||
static int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h)
|
||||
/**
|
||||
* tfrc_rx_hist_rtt_last_s - reference entry to compute RTT samples against
|
||||
*/
|
||||
static inline struct tfrc_rx_hist_entry *
|
||||
tfrc_rx_hist_rtt_last_s(const struct tfrc_rx_hist *h)
|
||||
{
|
||||
int i;
|
||||
|
||||
memset(h, 0, sizeof(*h));
|
||||
|
||||
for (i = 0; i <= TFRC_NDUPACK; i++) {
|
||||
h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC);
|
||||
if (h->ring[i] == NULL) {
|
||||
tfrc_rx_hist_purge(h);
|
||||
return -ENOBUFS;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
return h->ring[0];
|
||||
}
|
||||
|
||||
int tfrc_rx_hist_init(struct tfrc_rx_hist *h, struct sock *sk)
|
||||
/**
|
||||
* tfrc_rx_hist_rtt_prev_s: previously suitable (wrt rtt_last_s) RTT-sampling entry
|
||||
*/
|
||||
static inline struct tfrc_rx_hist_entry *
|
||||
tfrc_rx_hist_rtt_prev_s(const struct tfrc_rx_hist *h)
|
||||
{
|
||||
if (tfrc_rx_hist_alloc(h))
|
||||
return -ENOBUFS;
|
||||
/*
|
||||
* Initialise first entry with GSR to start loss detection as early as
|
||||
* possible. Code using this must not use any other fields. The entry
|
||||
* will be overwritten once the CCID updates its received packets.
|
||||
*/
|
||||
tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno = dccp_sk(sk)->dccps_gsr;
|
||||
return 0;
|
||||
return h->ring[h->rtt_sample_prev];
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tfrc_rx_hist_init);
|
||||
|
||||
/**
|
||||
* tfrc_rx_hist_sample_rtt - Sample RTT from timestamp / CCVal
|
||||
* Based on ideas presented in RFC 4342, 8.1. This function expects that no loss
|
||||
* is pending and uses the following history entries (via rtt_sample_prev):
|
||||
* - h->ring[0] contains the most recent history entry prior to @skb;
|
||||
* - h->ring[1] is an unused `dummy' entry when the current difference is 0;
|
||||
* Based on ideas presented in RFC 4342, 8.1. Returns 0 if it was not able
|
||||
* to compute a sample with given data - calling function should check this.
|
||||
*/
|
||||
void tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb)
|
||||
u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb)
|
||||
{
|
||||
struct tfrc_rx_hist_entry *last = h->ring[0];
|
||||
u32 sample, delta_v;
|
||||
u32 sample = 0,
|
||||
delta_v = SUB16(dccp_hdr(skb)->dccph_ccval,
|
||||
tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
|
||||
|
||||
/*
|
||||
* When not to sample:
|
||||
* - on non-data packets
|
||||
* (RFC 4342, 8.1: CCVal only fully defined for data packets);
|
||||
* - when no data packets have been received yet
|
||||
* (FIXME: using sampled packet size as indicator here);
|
||||
* - as long as there are gaps in the sequence space (pending loss).
|
||||
*/
|
||||
if (!dccp_data_packet(skb) || h->packet_size == 0 ||
|
||||
tfrc_rx_hist_loss_pending(h))
|
||||
return;
|
||||
if (delta_v < 1 || delta_v > 4) { /* unsuitable CCVal delta */
|
||||
if (h->rtt_sample_prev == 2) { /* previous candidate stored */
|
||||
sample = SUB16(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval,
|
||||
tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
|
||||
if (sample)
|
||||
sample = 4 / sample *
|
||||
ktime_us_delta(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_tstamp,
|
||||
tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp);
|
||||
else /*
|
||||
* FIXME: This condition is in principle not
|
||||
* possible but occurs when CCID is used for
|
||||
* two-way data traffic. I have tried to trace
|
||||
* it, but the cause does not seem to be here.
|
||||
*/
|
||||
DCCP_BUG("please report to dccp@vger.kernel.org"
|
||||
" => prev = %u, last = %u",
|
||||
tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval,
|
||||
tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
|
||||
} else if (delta_v < 1) {
|
||||
h->rtt_sample_prev = 1;
|
||||
goto keep_ref_for_next_time;
|
||||
}
|
||||
|
||||
h->rtt_sample_prev = 0; /* reset previous candidate */
|
||||
|
||||
delta_v = SUB16(dccp_hdr(skb)->dccph_ccval, last->tfrchrx_ccval);
|
||||
if (delta_v == 0) { /* less than RTT/4 difference */
|
||||
h->rtt_sample_prev = 1;
|
||||
return;
|
||||
} else if (delta_v == 4) /* optimal match */
|
||||
sample = ktime_to_us(net_timedelta(tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp));
|
||||
else { /* suboptimal match */
|
||||
h->rtt_sample_prev = 2;
|
||||
goto keep_ref_for_next_time;
|
||||
}
|
||||
sample = dccp_sane_rtt(ktime_to_us(net_timedelta(last->tfrchrx_tstamp)));
|
||||
|
||||
if (delta_v <= 4) /* between RTT/4 and RTT */
|
||||
sample *= 4 / delta_v;
|
||||
else if (!(sample < h->rtt_estimate && sample > h->rtt_estimate/2))
|
||||
/*
|
||||
* Optimisation: CCVal difference is greater than 1 RTT, yet the
|
||||
* sample is less than the local RTT estimate; which means that
|
||||
* the RTT estimate is too high.
|
||||
* To avoid noise, it is not done if the sample is below RTT/2.
|
||||
*/
|
||||
return;
|
||||
if (unlikely(sample > DCCP_SANE_RTT_MAX)) {
|
||||
DCCP_WARN("RTT sample %u too large, using max\n", sample);
|
||||
sample = DCCP_SANE_RTT_MAX;
|
||||
}
|
||||
|
||||
/* Use a lower weight than usual to increase responsiveness */
|
||||
h->rtt_estimate = tfrc_ewma(h->rtt_estimate, sample, 5);
|
||||
h->rtt_sample_prev = 0; /* use current entry as next reference */
|
||||
keep_ref_for_next_time:
|
||||
|
||||
return sample;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tfrc_rx_hist_sample_rtt);
|
||||
|
@ -40,28 +40,12 @@
|
||||
#include <linux/slab.h>
|
||||
#include "tfrc.h"
|
||||
|
||||
/**
|
||||
* tfrc_tx_hist_entry - Simple singly-linked TX history list
|
||||
* @next: next oldest entry (LIFO order)
|
||||
* @seqno: sequence number of this entry
|
||||
* @stamp: send time of packet with sequence number @seqno
|
||||
*/
|
||||
struct tfrc_tx_hist_entry {
|
||||
struct tfrc_tx_hist_entry *next;
|
||||
u64 seqno;
|
||||
ktime_t stamp;
|
||||
};
|
||||
|
||||
static inline struct tfrc_tx_hist_entry *
|
||||
tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
|
||||
{
|
||||
while (head != NULL && head->seqno != seqno)
|
||||
head = head->next;
|
||||
return head;
|
||||
}
|
||||
struct tfrc_tx_hist_entry;
|
||||
|
||||
extern int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno);
|
||||
extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp);
|
||||
extern u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head,
|
||||
const u64 seqno, const ktime_t now);
|
||||
|
||||
/* Subtraction a-b modulo-16, respects circular wrap-around */
|
||||
#define SUB16(a, b) (((a) + 16 - (b)) & 0xF)
|
||||
@ -91,22 +75,12 @@ struct tfrc_rx_hist_entry {
|
||||
* @loss_count: Number of entries in circular history
|
||||
* @loss_start: Movable index (for loss detection)
|
||||
* @rtt_sample_prev: Used during RTT sampling, points to candidate entry
|
||||
* @rtt_estimate: Receiver RTT estimate
|
||||
* @packet_size: Packet size in bytes (as per RFC 3448, 3.1)
|
||||
* @bytes_recvd: Number of bytes received since @bytes_start
|
||||
* @bytes_start: Start time for counting @bytes_recvd
|
||||
*/
|
||||
struct tfrc_rx_hist {
|
||||
struct tfrc_rx_hist_entry *ring[TFRC_NDUPACK + 1];
|
||||
u8 loss_count:2,
|
||||
loss_start:2;
|
||||
/* Receiver RTT sampling */
|
||||
#define rtt_sample_prev loss_start
|
||||
u32 rtt_estimate;
|
||||
/* Receiver sampling of application payload lengths */
|
||||
u32 packet_size,
|
||||
bytes_recvd;
|
||||
ktime_t bytes_start;
|
||||
};
|
||||
|
||||
/**
|
||||
@ -150,50 +124,20 @@ static inline bool tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h)
|
||||
return h->loss_count > 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Accessor functions to retrieve parameters sampled by the RX history
|
||||
*/
|
||||
static inline u32 tfrc_rx_hist_packet_size(const struct tfrc_rx_hist *h)
|
||||
{
|
||||
if (h->packet_size == 0) {
|
||||
DCCP_WARN("No sample for s, using fallback\n");
|
||||
return TCP_MIN_RCVMSS;
|
||||
}
|
||||
return h->packet_size;
|
||||
|
||||
}
|
||||
static inline u32 tfrc_rx_hist_rtt(const struct tfrc_rx_hist *h)
|
||||
{
|
||||
if (h->rtt_estimate == 0) {
|
||||
DCCP_WARN("No RTT estimate available, using fallback RTT\n");
|
||||
return DCCP_FALLBACK_RTT;
|
||||
}
|
||||
return h->rtt_estimate;
|
||||
}
|
||||
|
||||
static inline void tfrc_rx_hist_restart_byte_counter(struct tfrc_rx_hist *h)
|
||||
{
|
||||
h->bytes_recvd = 0;
|
||||
h->bytes_start = ktime_get_real();
|
||||
}
|
||||
|
||||
extern u32 tfrc_rx_hist_x_recv(struct tfrc_rx_hist *h, const u32 last_x_recv);
|
||||
|
||||
|
||||
extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
|
||||
const struct sk_buff *skb, const u64 ndp);
|
||||
|
||||
extern int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb);
|
||||
|
||||
struct tfrc_loss_hist;
|
||||
extern bool tfrc_rx_congestion_event(struct tfrc_rx_hist *h,
|
||||
struct tfrc_loss_hist *lh,
|
||||
struct sk_buff *skb, const u64 ndp,
|
||||
u32 (*first_li)(struct sock *sk),
|
||||
struct sock *sk);
|
||||
extern void tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h,
|
||||
const struct sk_buff *skb);
|
||||
extern int tfrc_rx_hist_init(struct tfrc_rx_hist *h, struct sock *sk);
|
||||
extern int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
|
||||
struct tfrc_loss_hist *lh,
|
||||
struct sk_buff *skb, const u64 ndp,
|
||||
u32 (*first_li)(struct sock *sk),
|
||||
struct sock *sk);
|
||||
extern u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h,
|
||||
const struct sk_buff *skb);
|
||||
extern int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h);
|
||||
extern void tfrc_rx_hist_purge(struct tfrc_rx_hist *h);
|
||||
|
||||
#endif /* _DCCP_PKT_HIST_ */
|
||||
|
@ -47,21 +47,6 @@ static inline u32 scaled_div32(u64 a, u64 b)
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* tfrc_scaled_sqrt - Compute scaled integer sqrt(x) for 0 < x < 2^22-1
|
||||
* Uses scaling to improve accuracy of the integer approximation of sqrt(). The
|
||||
* scaling factor of 2^10 limits the maximum @sample to 4e6; this is okay for
|
||||
* clamped RTT samples (dccp_sample_rtt).
|
||||
* Should best be used for expressions of type sqrt(x)/sqrt(y), since then the
|
||||
* scaling factor is neutralised. For this purpose, it avoids returning zero.
|
||||
*/
|
||||
static inline u16 tfrc_scaled_sqrt(const u32 sample)
|
||||
{
|
||||
const unsigned long non_zero_sample = sample ? : 1;
|
||||
|
||||
return int_sqrt(non_zero_sample << 10);
|
||||
}
|
||||
|
||||
/**
|
||||
* tfrc_ewma - Exponentially weighted moving average
|
||||
* @weight: Weight to be used as damping factor, in units of 1/10
|
||||
@ -73,7 +58,6 @@ static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 weight)
|
||||
|
||||
extern u32 tfrc_calc_x(u16 s, u32 R, u32 p);
|
||||
extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue);
|
||||
extern u32 tfrc_invert_loss_event_rate(u32 loss_event_rate);
|
||||
|
||||
extern int tfrc_tx_packet_history_init(void);
|
||||
extern void tfrc_tx_packet_history_exit(void);
|
||||
|
@ -632,16 +632,8 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p)
|
||||
|
||||
if (p <= TFRC_CALC_X_SPLIT) { /* 0.0000 < p <= 0.05 */
|
||||
if (p < TFRC_SMALLEST_P) { /* 0.0000 < p < 0.0001 */
|
||||
/*
|
||||
* In the congestion-avoidance phase p decays towards 0
|
||||
* when there are no further losses, so this case is
|
||||
* natural. Truncating to p_min = 0.01% means that the
|
||||
* maximum achievable throughput is limited to about
|
||||
* X_calc_max = 122.4 * s/RTT (see RFC 3448, 3.1); e.g.
|
||||
* with s=1500 bytes, RTT=0.01 s: X_calc_max = 147 Mbps.
|
||||
*/
|
||||
tfrc_pr_debug("Value of p (%d) below resolution. "
|
||||
"Substituting %d\n", p, TFRC_SMALLEST_P);
|
||||
DCCP_WARN("Value of p (%d) below resolution. "
|
||||
"Substituting %d\n", p, TFRC_SMALLEST_P);
|
||||
index = 0;
|
||||
} else /* 0.0001 <= p <= 0.05 */
|
||||
index = p/TFRC_SMALLEST_P - 1;
|
||||
@ -666,6 +658,7 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p)
|
||||
result = scaled_div(s, R);
|
||||
return scaled_div32(result, f);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(tfrc_calc_x);
|
||||
|
||||
/**
|
||||
@ -700,19 +693,5 @@ u32 tfrc_calc_x_reverse_lookup(u32 fvalue)
|
||||
index = tfrc_binsearch(fvalue, 0);
|
||||
return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup);
|
||||
|
||||
/**
|
||||
* tfrc_invert_loss_event_rate - Compute p so that 10^6 corresponds to 100%
|
||||
* When @loss_event_rate is large, there is a chance that p is truncated to 0.
|
||||
* To avoid re-entering slow-start in that case, we set p = TFRC_SMALLEST_P > 0.
|
||||
*/
|
||||
u32 tfrc_invert_loss_event_rate(u32 loss_event_rate)
|
||||
{
|
||||
if (loss_event_rate == UINT_MAX) /* see RFC 4342, 8.5 */
|
||||
return 0;
|
||||
if (unlikely(loss_event_rate == 0)) /* map 1/0 into 100% */
|
||||
return 1000000;
|
||||
return max_t(u32, scaled_div(1, loss_event_rate), TFRC_SMALLEST_P);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tfrc_invert_loss_event_rate);
|
||||
EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup);
|
||||
|
104
net/dccp/dccp.h
104
net/dccp/dccp.h
@ -42,11 +42,9 @@
|
||||
extern int dccp_debug;
|
||||
#define dccp_pr_debug(format, a...) DCCP_PR_DEBUG(dccp_debug, format, ##a)
|
||||
#define dccp_pr_debug_cat(format, a...) DCCP_PRINTK(dccp_debug, format, ##a)
|
||||
#define dccp_debug(fmt, a...) dccp_pr_debug_cat(KERN_DEBUG fmt, ##a)
|
||||
#else
|
||||
#define dccp_pr_debug(format, a...)
|
||||
#define dccp_pr_debug_cat(format, a...)
|
||||
#define dccp_debug(format, a...)
|
||||
#endif
|
||||
|
||||
extern struct inet_hashinfo dccp_hashinfo;
|
||||
@ -63,14 +61,11 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo);
|
||||
* - DCCP-Reset with ACK Subheader and 4 bytes of Reset Code fields
|
||||
* Hence a safe upper bound for the maximum option length is 1020-28 = 992
|
||||
*/
|
||||
#define MAX_DCCP_SPECIFIC_HEADER (255 * sizeof(uint32_t))
|
||||
#define MAX_DCCP_SPECIFIC_HEADER (255 * sizeof(int))
|
||||
#define DCCP_MAX_PACKET_HDR 28
|
||||
#define DCCP_MAX_OPT_LEN (MAX_DCCP_SPECIFIC_HEADER - DCCP_MAX_PACKET_HDR)
|
||||
#define MAX_DCCP_HEADER (MAX_DCCP_SPECIFIC_HEADER + MAX_HEADER)
|
||||
|
||||
/* Upper bound for initial feature-negotiation overhead (padded to 32 bits) */
|
||||
#define DCCP_FEATNEG_OVERHEAD (32 * sizeof(uint32_t))
|
||||
|
||||
#define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT
|
||||
* state, about 60 seconds */
|
||||
|
||||
@ -86,13 +81,10 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo);
|
||||
*/
|
||||
#define DCCP_RTO_MAX ((unsigned)(64 * HZ))
|
||||
|
||||
/* DCCP base time resolution - 10 microseconds (RFC 4340, 13.1 ... 13.3) */
|
||||
#define DCCP_TIME_RESOLUTION 10
|
||||
|
||||
/*
|
||||
* RTT sampling: sanity bounds and fallback RTT value from RFC 4340, section 3.4
|
||||
*/
|
||||
#define DCCP_SANE_RTT_MIN (10 * DCCP_TIME_RESOLUTION)
|
||||
#define DCCP_SANE_RTT_MIN 100
|
||||
#define DCCP_FALLBACK_RTT (USEC_PER_SEC / 5)
|
||||
#define DCCP_SANE_RTT_MAX (3 * USEC_PER_SEC)
|
||||
|
||||
@ -103,6 +95,12 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo);
|
||||
extern int sysctl_dccp_request_retries;
|
||||
extern int sysctl_dccp_retries1;
|
||||
extern int sysctl_dccp_retries2;
|
||||
extern int sysctl_dccp_feat_sequence_window;
|
||||
extern int sysctl_dccp_feat_rx_ccid;
|
||||
extern int sysctl_dccp_feat_tx_ccid;
|
||||
extern int sysctl_dccp_feat_ack_ratio;
|
||||
extern int sysctl_dccp_feat_send_ack_vector;
|
||||
extern int sysctl_dccp_feat_send_ndp_count;
|
||||
extern int sysctl_dccp_tx_qlen;
|
||||
extern int sysctl_dccp_sync_ratelimit;
|
||||
|
||||
@ -237,22 +235,8 @@ extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
|
||||
extern void dccp_send_sync(struct sock *sk, const u64 seq,
|
||||
const enum dccp_pkt_type pkt_type);
|
||||
|
||||
/*
|
||||
* TX Packet Dequeueing Interface
|
||||
*/
|
||||
extern void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb);
|
||||
extern bool dccp_qpolicy_full(struct sock *sk);
|
||||
extern void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb);
|
||||
extern struct sk_buff *dccp_qpolicy_top(struct sock *sk);
|
||||
extern struct sk_buff *dccp_qpolicy_pop(struct sock *sk);
|
||||
extern bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param);
|
||||
|
||||
/*
|
||||
* TX Packet Output and TX Timers
|
||||
*/
|
||||
extern void dccp_write_xmit(struct sock *sk);
|
||||
extern void dccp_write_xmit(struct sock *sk, int block);
|
||||
extern void dccp_write_space(struct sock *sk);
|
||||
extern void dccp_flush_write_queue(struct sock *sk, long *time_budget);
|
||||
|
||||
extern void dccp_init_xmit_timers(struct sock *sk);
|
||||
static inline void dccp_clear_xmit_timers(struct sock *sk)
|
||||
@ -268,8 +252,7 @@ extern const char *dccp_state_name(const int state);
|
||||
extern void dccp_set_state(struct sock *sk, const int state);
|
||||
extern void dccp_done(struct sock *sk);
|
||||
|
||||
extern int dccp_reqsk_init(struct request_sock *rq, struct dccp_sock const *dp,
|
||||
struct sk_buff const *skb);
|
||||
extern void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb);
|
||||
|
||||
extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
|
||||
|
||||
@ -334,14 +317,7 @@ extern struct sk_buff *dccp_ctl_make_reset(struct sock *sk,
|
||||
extern int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code);
|
||||
extern void dccp_send_close(struct sock *sk, const int active);
|
||||
extern int dccp_invalid_packet(struct sk_buff *skb);
|
||||
|
||||
static inline u32 dccp_sane_rtt(long usec_sample)
|
||||
{
|
||||
if (unlikely(usec_sample <= 0 || usec_sample > DCCP_SANE_RTT_MAX))
|
||||
DCCP_WARN("RTT sample %ld out of bounds!\n", usec_sample);
|
||||
return clamp_val(usec_sample, DCCP_SANE_RTT_MIN, DCCP_SANE_RTT_MAX);
|
||||
}
|
||||
extern u32 dccp_sample_rtt(struct sock *sk, long delta);
|
||||
extern u32 dccp_sample_rtt(struct sock *sk, long delta);
|
||||
|
||||
static inline int dccp_bad_service_code(const struct sock *sk,
|
||||
const __be32 service)
|
||||
@ -435,62 +411,36 @@ static inline void dccp_hdr_set_ack(struct dccp_hdr_ack_bits *dhack,
|
||||
static inline void dccp_update_gsr(struct sock *sk, u64 seq)
|
||||
{
|
||||
struct dccp_sock *dp = dccp_sk(sk);
|
||||
const struct dccp_minisock *dmsk = dccp_msk(sk);
|
||||
|
||||
dp->dccps_gsr = seq;
|
||||
/* Sequence validity window depends on remote Sequence Window (7.5.1) */
|
||||
dp->dccps_swl = SUB48(ADD48(dp->dccps_gsr, 1), dp->dccps_r_seq_win / 4);
|
||||
/*
|
||||
* Adjust SWL so that it is not below ISR. In contrast to RFC 4340,
|
||||
* 7.5.1 we perform this check beyond the initial handshake: W/W' are
|
||||
* always > 32, so for the first W/W' packets in the lifetime of a
|
||||
* connection we always have to adjust SWL.
|
||||
* A second reason why we are doing this is that the window depends on
|
||||
* the feature-remote value of Sequence Window: nothing stops the peer
|
||||
* from updating this value while we are busy adjusting SWL for the
|
||||
* first W packets (we would have to count from scratch again then).
|
||||
* Therefore it is safer to always make sure that the Sequence Window
|
||||
* is not artificially extended by a peer who grows SWL downwards by
|
||||
* continually updating the feature-remote Sequence-Window.
|
||||
* If sequence numbers wrap it is bad luck. But that will take a while
|
||||
* (48 bit), and this measure prevents Sequence-number attacks.
|
||||
*/
|
||||
if (before48(dp->dccps_swl, dp->dccps_isr))
|
||||
dp->dccps_swl = dp->dccps_isr;
|
||||
dp->dccps_swh = ADD48(dp->dccps_gsr, (3 * dp->dccps_r_seq_win) / 4);
|
||||
dccp_set_seqno(&dp->dccps_swl,
|
||||
dp->dccps_gsr + 1 - (dmsk->dccpms_sequence_window / 4));
|
||||
dccp_set_seqno(&dp->dccps_swh,
|
||||
dp->dccps_gsr + (3 * dmsk->dccpms_sequence_window) / 4);
|
||||
}
|
||||
|
||||
static inline void dccp_update_gss(struct sock *sk, u64 seq)
|
||||
{
|
||||
struct dccp_sock *dp = dccp_sk(sk);
|
||||
|
||||
dp->dccps_gss = seq;
|
||||
/* Ack validity window depends on local Sequence Window value (7.5.1) */
|
||||
dp->dccps_awl = SUB48(ADD48(dp->dccps_gss, 1), dp->dccps_l_seq_win);
|
||||
/* Adjust AWL so that it is not below ISS - see comment above for SWL */
|
||||
if (before48(dp->dccps_awl, dp->dccps_iss))
|
||||
dp->dccps_awl = dp->dccps_iss;
|
||||
dp->dccps_awh = dp->dccps_gss;
|
||||
}
|
||||
|
||||
static inline int dccp_ackvec_pending(const struct sock *sk)
|
||||
{
|
||||
return dccp_sk(sk)->dccps_hc_rx_ackvec != NULL &&
|
||||
!dccp_ackvec_is_empty(dccp_sk(sk)->dccps_hc_rx_ackvec);
|
||||
dp->dccps_awh = dp->dccps_gss = seq;
|
||||
dccp_set_seqno(&dp->dccps_awl,
|
||||
(dp->dccps_gss -
|
||||
dccp_msk(sk)->dccpms_sequence_window + 1));
|
||||
}
|
||||
|
||||
static inline int dccp_ack_pending(const struct sock *sk)
|
||||
{
|
||||
return dccp_ackvec_pending(sk) || inet_csk_ack_scheduled(sk);
|
||||
const struct dccp_sock *dp = dccp_sk(sk);
|
||||
return dp->dccps_timestamp_echo != 0 ||
|
||||
#ifdef CONFIG_IP_DCCP_ACKVEC
|
||||
(dccp_msk(sk)->dccpms_send_ack_vector &&
|
||||
dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) ||
|
||||
#endif
|
||||
inet_csk_ack_scheduled(sk);
|
||||
}
|
||||
|
||||
extern int dccp_feat_signal_nn_change(struct sock *sk, u8 feat, u64 nn_val);
|
||||
extern int dccp_feat_finalise_settings(struct dccp_sock *dp);
|
||||
extern int dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq);
|
||||
extern int dccp_feat_insert_opts(struct dccp_sock*, struct dccp_request_sock*,
|
||||
struct sk_buff *skb);
|
||||
extern int dccp_feat_activate_values(struct sock *sk, struct list_head *fn);
|
||||
extern void dccp_feat_list_purge(struct list_head *fn_list);
|
||||
|
||||
extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb);
|
||||
extern int dccp_insert_options_rsk(struct dccp_request_sock*, struct sk_buff*);
|
||||
extern int dccp_insert_option_elapsed_time(struct sock *sk,
|
||||
|
@ -29,7 +29,7 @@ static void dccp_get_info(struct sock *sk, struct tcp_info *info)
|
||||
info->tcpi_backoff = icsk->icsk_backoff;
|
||||
info->tcpi_pmtu = icsk->icsk_pmtu_cookie;
|
||||
|
||||
if (dp->dccps_hc_rx_ackvec != NULL)
|
||||
if (dccp_msk(sk)->dccpms_send_ack_vector)
|
||||
info->tcpi_options |= TCPI_OPT_SACK;
|
||||
|
||||
ccid_hc_rx_get_info(dp->dccps_hc_rx_ccid, sk, info);
|
||||
|
1993
net/dccp/feat.c
1993
net/dccp/feat.c
File diff suppressed because it is too large
Load Diff
142
net/dccp/feat.h
142
net/dccp/feat.h
@ -3,134 +3,38 @@
|
||||
/*
|
||||
* net/dccp/feat.h
|
||||
*
|
||||
* Feature negotiation for the DCCP protocol (RFC 4340, section 6)
|
||||
* Copyright (c) 2008 Gerrit Renker <gerrit@erg.abdn.ac.uk>
|
||||
* An implementation of the DCCP protocol
|
||||
* Copyright (c) 2005 Andrea Bittau <a.bittau@cs.ucl.ac.uk>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/types.h>
|
||||
#include "dccp.h"
|
||||
|
||||
/*
|
||||
* Known limit values
|
||||
*/
|
||||
/* Ack Ratio takes 2-byte integer values (11.3) */
|
||||
#define DCCPF_ACK_RATIO_MAX 0xFFFF
|
||||
/* Wmin=32 and Wmax=2^46-1 from 7.5.2 */
|
||||
#define DCCPF_SEQ_WMIN 32
|
||||
#define DCCPF_SEQ_WMAX 0x3FFFFFFFFFFFull
|
||||
/* Maximum number of SP values that fit in a single (Confirm) option */
|
||||
#define DCCP_FEAT_MAX_SP_VALS (DCCP_SINGLE_OPT_MAXLEN - 2)
|
||||
#ifdef CONFIG_IP_DCCP_DEBUG
|
||||
extern const char *dccp_feat_typename(const u8 type);
|
||||
extern const char *dccp_feat_name(const u8 feat);
|
||||
|
||||
enum dccp_feat_type {
|
||||
FEAT_AT_RX = 1, /* located at RX side of half-connection */
|
||||
FEAT_AT_TX = 2, /* located at TX side of half-connection */
|
||||
FEAT_SP = 4, /* server-priority reconciliation (6.3.1) */
|
||||
FEAT_NN = 8, /* non-negotiable reconciliation (6.3.2) */
|
||||
FEAT_UNKNOWN = 0xFF /* not understood or invalid feature */
|
||||
};
|
||||
|
||||
enum dccp_feat_state {
|
||||
FEAT_DEFAULT = 0, /* using default values from 6.4 */
|
||||
FEAT_INITIALISING, /* feature is being initialised */
|
||||
FEAT_CHANGING, /* Change sent but not confirmed yet */
|
||||
FEAT_UNSTABLE, /* local modification in state CHANGING */
|
||||
FEAT_STABLE /* both ends (think they) agree */
|
||||
};
|
||||
|
||||
/**
|
||||
* dccp_feat_val - Container for SP or NN feature values
|
||||
* @nn: single NN value
|
||||
* @sp.vec: single SP value plus optional preference list
|
||||
* @sp.len: length of @sp.vec in bytes
|
||||
*/
|
||||
typedef union {
|
||||
u64 nn;
|
||||
struct {
|
||||
u8 *vec;
|
||||
u8 len;
|
||||
} sp;
|
||||
} dccp_feat_val;
|
||||
|
||||
/**
|
||||
* struct feat_entry - Data structure to perform feature negotiation
|
||||
* @feat_num: one of %dccp_feature_numbers
|
||||
* @val: feature's current value (SP features may have preference list)
|
||||
* @state: feature's current state
|
||||
* @needs_mandatory: whether Mandatory options should be sent
|
||||
* @needs_confirm: whether to send a Confirm instead of a Change
|
||||
* @empty_confirm: whether to send an empty Confirm (depends on @needs_confirm)
|
||||
* @is_local: feature location (1) or feature-remote (0)
|
||||
* @node: list pointers, entries arranged in FIFO order
|
||||
*/
|
||||
struct dccp_feat_entry {
|
||||
u8 feat_num;
|
||||
dccp_feat_val val;
|
||||
enum dccp_feat_state state:8;
|
||||
bool needs_mandatory:1,
|
||||
needs_confirm:1,
|
||||
empty_confirm:1,
|
||||
is_local:1;
|
||||
|
||||
struct list_head node;
|
||||
};
|
||||
|
||||
static inline u8 dccp_feat_genopt(struct dccp_feat_entry *entry)
|
||||
static inline void dccp_feat_debug(const u8 type, const u8 feat, const u8 val)
|
||||
{
|
||||
if (entry->needs_confirm)
|
||||
return entry->is_local ? DCCPO_CONFIRM_L : DCCPO_CONFIRM_R;
|
||||
return entry->is_local ? DCCPO_CHANGE_L : DCCPO_CHANGE_R;
|
||||
dccp_pr_debug("%s(%s (%d), %d)\n", dccp_feat_typename(type),
|
||||
dccp_feat_name(feat), feat, val);
|
||||
}
|
||||
#else
|
||||
#define dccp_feat_debug(type, feat, val)
|
||||
#endif /* CONFIG_IP_DCCP_DEBUG */
|
||||
|
||||
/**
|
||||
* struct ccid_dependency - Track changes resulting from choosing a CCID
|
||||
* @dependent_feat: one of %dccp_feature_numbers
|
||||
* @is_local: local (1) or remote (0) @dependent_feat
|
||||
* @is_mandatory: whether presence of @dependent_feat is mission-critical or not
|
||||
* @val: corresponding default value for @dependent_feat (u8 is sufficient here)
|
||||
*/
|
||||
struct ccid_dependency {
|
||||
u8 dependent_feat;
|
||||
bool is_local:1,
|
||||
is_mandatory:1;
|
||||
u8 val;
|
||||
};
|
||||
extern int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature,
|
||||
u8 *val, u8 len, gfp_t gfp);
|
||||
extern int dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature,
|
||||
u8 *val, u8 len);
|
||||
extern int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature,
|
||||
u8 *val, u8 len);
|
||||
extern void dccp_feat_clean(struct dccp_minisock *dmsk);
|
||||
extern int dccp_feat_clone(struct sock *oldsk, struct sock *newsk);
|
||||
extern int dccp_feat_init(struct dccp_minisock *dmsk);
|
||||
|
||||
/*
|
||||
* Sysctls to seed defaults for feature negotiation
|
||||
*/
|
||||
extern unsigned long sysctl_dccp_sequence_window;
|
||||
extern int sysctl_dccp_rx_ccid;
|
||||
extern int sysctl_dccp_tx_ccid;
|
||||
|
||||
extern int dccp_feat_init(struct sock *sk);
|
||||
extern void dccp_feat_initialise_sysctls(void);
|
||||
extern int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
|
||||
u8 const *list, u8 len);
|
||||
extern int dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val);
|
||||
extern int dccp_feat_parse_options(struct sock *, struct dccp_request_sock *,
|
||||
u8 mand, u8 opt, u8 feat, u8 *val, u8 len);
|
||||
extern int dccp_feat_clone_list(struct list_head const *, struct list_head *);
|
||||
|
||||
/*
|
||||
* Encoding variable-length options and their maximum length.
|
||||
*
|
||||
* This affects NN options (SP options are all u8) and other variable-length
|
||||
* options (see table 3 in RFC 4340). The limit is currently given the Sequence
|
||||
* Window NN value (sec. 7.5.2) and the NDP count (sec. 7.7) option, all other
|
||||
* options consume less than 6 bytes (timestamps are 4 bytes).
|
||||
* When updating this constant (e.g. due to new internet drafts / RFCs), make
|
||||
* sure that you also update all code which refers to it.
|
||||
*/
|
||||
#define DCCP_OPTVAL_MAXLEN 6
|
||||
|
||||
extern void dccp_encode_value_var(const u64 value, u8 *to, const u8 len);
|
||||
extern u64 dccp_decode_value_var(const u8 *bf, const u8 len);
|
||||
|
||||
extern int dccp_insert_option_mandatory(struct sk_buff *skb);
|
||||
extern int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat,
|
||||
u8 *val, u8 len, bool repeat_first);
|
||||
#endif /* _DCCP_FEAT_H */
|
||||
|
164
net/dccp/input.c
164
net/dccp/input.c
@ -159,15 +159,13 @@ static void dccp_rcv_reset(struct sock *sk, struct sk_buff *skb)
|
||||
dccp_time_wait(sk, DCCP_TIME_WAIT, 0);
|
||||
}
|
||||
|
||||
static void dccp_handle_ackvec_processing(struct sock *sk, struct sk_buff *skb)
|
||||
static void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
struct dccp_ackvec *av = dccp_sk(sk)->dccps_hc_rx_ackvec;
|
||||
struct dccp_sock *dp = dccp_sk(sk);
|
||||
|
||||
if (av == NULL)
|
||||
return;
|
||||
if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
|
||||
dccp_ackvec_clear_state(av, DCCP_SKB_CB(skb)->dccpd_ack_seq);
|
||||
dccp_ackvec_input(av, skb);
|
||||
if (dccp_msk(sk)->dccpms_send_ack_vector)
|
||||
dccp_ackvec_check_rcv_ackno(dp->dccps_hc_rx_ackvec, sk,
|
||||
DCCP_SKB_CB(skb)->dccpd_ack_seq);
|
||||
}
|
||||
|
||||
static void dccp_deliver_input_to_ccids(struct sock *sk, struct sk_buff *skb)
|
||||
@ -366,13 +364,22 @@ discard:
|
||||
int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
|
||||
const struct dccp_hdr *dh, const unsigned len)
|
||||
{
|
||||
struct dccp_sock *dp = dccp_sk(sk);
|
||||
|
||||
if (dccp_check_seqno(sk, skb))
|
||||
goto discard;
|
||||
|
||||
if (dccp_parse_options(sk, NULL, skb))
|
||||
return 1;
|
||||
|
||||
dccp_handle_ackvec_processing(sk, skb);
|
||||
if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
|
||||
dccp_event_ack_recv(sk, skb);
|
||||
|
||||
if (dccp_msk(sk)->dccpms_send_ack_vector &&
|
||||
dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
|
||||
DCCP_SKB_CB(skb)->dccpd_seq,
|
||||
DCCP_ACKVEC_STATE_RECEIVED))
|
||||
goto discard;
|
||||
dccp_deliver_input_to_ccids(sk, skb);
|
||||
|
||||
return __dccp_rcv_established(sk, skb, dh, len);
|
||||
@ -414,33 +421,40 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
|
||||
goto out_invalid_packet;
|
||||
}
|
||||
|
||||
/*
|
||||
* If option processing (Step 8) failed, return 1 here so that
|
||||
* dccp_v4_do_rcv() sends a Reset. The Reset code depends on
|
||||
* the option type and is set in dccp_parse_options().
|
||||
*/
|
||||
if (dccp_parse_options(sk, NULL, skb))
|
||||
return 1;
|
||||
goto out_invalid_packet;
|
||||
|
||||
/* Obtain usec RTT sample from SYN exchange (used by CCID 3) */
|
||||
if (likely(dp->dccps_options_received.dccpor_timestamp_echo))
|
||||
dp->dccps_syn_rtt = dccp_sample_rtt(sk, 10 * (tstamp -
|
||||
dp->dccps_options_received.dccpor_timestamp_echo));
|
||||
|
||||
if (dccp_msk(sk)->dccpms_send_ack_vector &&
|
||||
dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
|
||||
DCCP_SKB_CB(skb)->dccpd_seq,
|
||||
DCCP_ACKVEC_STATE_RECEIVED))
|
||||
goto out_invalid_packet; /* FIXME: change error code */
|
||||
|
||||
/* Stop the REQUEST timer */
|
||||
inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
|
||||
WARN_ON(sk->sk_send_head == NULL);
|
||||
kfree_skb(sk->sk_send_head);
|
||||
sk->sk_send_head = NULL;
|
||||
|
||||
dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq;
|
||||
dccp_update_gsr(sk, dp->dccps_isr);
|
||||
/*
|
||||
* Set ISR, GSR from packet. ISS was set in dccp_v{4,6}_connect
|
||||
* and GSS in dccp_transmit_skb(). Setting AWL/AWH and SWL/SWH
|
||||
* is done as part of activating the feature values below, since
|
||||
* these settings depend on the local/remote Sequence Window
|
||||
* features, which were undefined or not confirmed until now.
|
||||
* SWL and AWL are initially adjusted so that they are not less than
|
||||
* the initial Sequence Numbers received and sent, respectively:
|
||||
* SWL := max(GSR + 1 - floor(W/4), ISR),
|
||||
* AWL := max(GSS - W' + 1, ISS).
|
||||
* These adjustments MUST be applied only at the beginning of the
|
||||
* connection.
|
||||
*
|
||||
* AWL was adjusted in dccp_v4_connect -acme
|
||||
*/
|
||||
dp->dccps_gsr = dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq;
|
||||
dccp_set_seqno(&dp->dccps_swl,
|
||||
max48(dp->dccps_swl, dp->dccps_isr));
|
||||
|
||||
dccp_sync_mss(sk, icsk->icsk_pmtu_cookie);
|
||||
|
||||
@ -461,15 +475,6 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
|
||||
*/
|
||||
dccp_set_state(sk, DCCP_PARTOPEN);
|
||||
|
||||
/*
|
||||
* If feature negotiation was successful, activate features now;
|
||||
* an activation failure means that this host could not activate
|
||||
* one ore more features (e.g. insufficient memory), which would
|
||||
* leave at least one feature in an undefined state.
|
||||
*/
|
||||
if (dccp_feat_activate_values(sk, &dp->dccps_featneg))
|
||||
goto unable_to_proceed;
|
||||
|
||||
/* Make sure socket is routed, for correct metrics. */
|
||||
icsk->icsk_af_ops->rebuild_header(sk);
|
||||
|
||||
@ -504,16 +509,6 @@ out_invalid_packet:
|
||||
/* dccp_v4_do_rcv will send a reset */
|
||||
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR;
|
||||
return 1;
|
||||
|
||||
unable_to_proceed:
|
||||
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_ABORTED;
|
||||
/*
|
||||
* We mark this socket as no longer usable, so that the loop in
|
||||
* dccp_sendmsg() terminates and the application gets notified.
|
||||
*/
|
||||
dccp_set_state(sk, DCCP_CLOSED);
|
||||
sk->sk_err = ECOMM;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
|
||||
@ -595,6 +590,8 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
|
||||
if (inet_csk(sk)->icsk_af_ops->conn_request(sk,
|
||||
skb) < 0)
|
||||
return 1;
|
||||
|
||||
/* FIXME: do congestion control initialization */
|
||||
goto discard;
|
||||
}
|
||||
if (dh->dccph_type == DCCP_PKT_RESET)
|
||||
@ -603,36 +600,30 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
|
||||
/* Caller (dccp_v4_do_rcv) will send Reset */
|
||||
dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
|
||||
return 1;
|
||||
} else if (sk->sk_state == DCCP_CLOSED) {
|
||||
dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Step 6: Check sequence numbers (omitted in LISTEN/REQUEST state) */
|
||||
if (sk->sk_state != DCCP_REQUESTING && dccp_check_seqno(sk, skb))
|
||||
goto discard;
|
||||
if (sk->sk_state != DCCP_REQUESTING) {
|
||||
if (dccp_check_seqno(sk, skb))
|
||||
goto discard;
|
||||
|
||||
/*
|
||||
* Step 7: Check for unexpected packet types
|
||||
* If (S.is_server and P.type == Response)
|
||||
* or (S.is_client and P.type == Request)
|
||||
* or (S.state == RESPOND and P.type == Data),
|
||||
* Send Sync packet acknowledging P.seqno
|
||||
* Drop packet and return
|
||||
*/
|
||||
if ((dp->dccps_role != DCCP_ROLE_CLIENT &&
|
||||
dh->dccph_type == DCCP_PKT_RESPONSE) ||
|
||||
(dp->dccps_role == DCCP_ROLE_CLIENT &&
|
||||
dh->dccph_type == DCCP_PKT_REQUEST) ||
|
||||
(sk->sk_state == DCCP_RESPOND && dh->dccph_type == DCCP_PKT_DATA)) {
|
||||
dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC);
|
||||
goto discard;
|
||||
/*
|
||||
* Step 8: Process options and mark acknowledgeable
|
||||
*/
|
||||
if (dccp_parse_options(sk, NULL, skb))
|
||||
return 1;
|
||||
|
||||
if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
|
||||
dccp_event_ack_recv(sk, skb);
|
||||
|
||||
if (dccp_msk(sk)->dccpms_send_ack_vector &&
|
||||
dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
|
||||
DCCP_SKB_CB(skb)->dccpd_seq,
|
||||
DCCP_ACKVEC_STATE_RECEIVED))
|
||||
goto discard;
|
||||
|
||||
dccp_deliver_input_to_ccids(sk, skb);
|
||||
}
|
||||
|
||||
/* Step 8: Process options */
|
||||
if (dccp_parse_options(sk, NULL, skb))
|
||||
return 1;
|
||||
|
||||
/*
|
||||
* Step 9: Process Reset
|
||||
* If P.type == Reset,
|
||||
@ -640,22 +631,44 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
|
||||
* S.state := TIMEWAIT
|
||||
* Set TIMEWAIT timer
|
||||
* Drop packet and return
|
||||
*/
|
||||
*/
|
||||
if (dh->dccph_type == DCCP_PKT_RESET) {
|
||||
dccp_rcv_reset(sk, skb);
|
||||
return 0;
|
||||
} else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { /* Step 13 */
|
||||
/*
|
||||
* Step 7: Check for unexpected packet types
|
||||
* If (S.is_server and P.type == Response)
|
||||
* or (S.is_client and P.type == Request)
|
||||
* or (S.state == RESPOND and P.type == Data),
|
||||
* Send Sync packet acknowledging P.seqno
|
||||
* Drop packet and return
|
||||
*/
|
||||
} else if ((dp->dccps_role != DCCP_ROLE_CLIENT &&
|
||||
dh->dccph_type == DCCP_PKT_RESPONSE) ||
|
||||
(dp->dccps_role == DCCP_ROLE_CLIENT &&
|
||||
dh->dccph_type == DCCP_PKT_REQUEST) ||
|
||||
(sk->sk_state == DCCP_RESPOND &&
|
||||
dh->dccph_type == DCCP_PKT_DATA)) {
|
||||
dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC);
|
||||
goto discard;
|
||||
} else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) {
|
||||
if (dccp_rcv_closereq(sk, skb))
|
||||
return 0;
|
||||
goto discard;
|
||||
} else if (dh->dccph_type == DCCP_PKT_CLOSE) { /* Step 14 */
|
||||
} else if (dh->dccph_type == DCCP_PKT_CLOSE) {
|
||||
if (dccp_rcv_close(sk, skb))
|
||||
return 0;
|
||||
goto discard;
|
||||
}
|
||||
|
||||
switch (sk->sk_state) {
|
||||
case DCCP_CLOSED:
|
||||
dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
|
||||
return 1;
|
||||
|
||||
case DCCP_REQUESTING:
|
||||
/* FIXME: do congestion control initialization */
|
||||
|
||||
queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len);
|
||||
if (queued >= 0)
|
||||
return queued;
|
||||
@ -663,12 +676,8 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
|
||||
__kfree_skb(skb);
|
||||
return 0;
|
||||
|
||||
case DCCP_PARTOPEN:
|
||||
/* Step 8: if using Ack Vectors, mark packet acknowledgeable */
|
||||
dccp_handle_ackvec_processing(sk, skb);
|
||||
dccp_deliver_input_to_ccids(sk, skb);
|
||||
/* fall through */
|
||||
case DCCP_RESPOND:
|
||||
case DCCP_PARTOPEN:
|
||||
queued = dccp_rcv_respond_partopen_state_process(sk, skb,
|
||||
dh, len);
|
||||
break;
|
||||
@ -707,7 +716,16 @@ u32 dccp_sample_rtt(struct sock *sk, long delta)
|
||||
/* dccpor_elapsed_time is either zeroed out or set and > 0 */
|
||||
delta -= dccp_sk(sk)->dccps_options_received.dccpor_elapsed_time * 10;
|
||||
|
||||
return dccp_sane_rtt(delta);
|
||||
if (unlikely(delta <= 0)) {
|
||||
DCCP_WARN("unusable RTT sample %ld, using min\n", delta);
|
||||
return DCCP_SANE_RTT_MIN;
|
||||
}
|
||||
if (unlikely(delta > DCCP_SANE_RTT_MAX)) {
|
||||
DCCP_WARN("RTT sample %ld too large, using max\n", delta);
|
||||
return DCCP_SANE_RTT_MAX;
|
||||
}
|
||||
|
||||
return delta;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(dccp_sample_rtt);
|
||||
|
@ -545,7 +545,6 @@ out:
|
||||
|
||||
static void dccp_v4_reqsk_destructor(struct request_sock *req)
|
||||
{
|
||||
dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg);
|
||||
kfree(inet_rsk(req)->opt);
|
||||
}
|
||||
|
||||
@ -596,8 +595,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
|
||||
if (req == NULL)
|
||||
goto drop;
|
||||
|
||||
if (dccp_reqsk_init(req, dccp_sk(sk), skb))
|
||||
goto drop_and_free;
|
||||
dccp_reqsk_init(req, skb);
|
||||
|
||||
dreq = dccp_rsk(req);
|
||||
if (dccp_parse_options(sk, dreq, skb))
|
||||
|
@ -302,7 +302,6 @@ done:
|
||||
|
||||
static void dccp_v6_reqsk_destructor(struct request_sock *req)
|
||||
{
|
||||
dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg);
|
||||
if (inet6_rsk(req)->pktopts != NULL)
|
||||
kfree_skb(inet6_rsk(req)->pktopts);
|
||||
}
|
||||
@ -425,8 +424,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
|
||||
if (req == NULL)
|
||||
goto drop;
|
||||
|
||||
if (dccp_reqsk_init(req, dccp_sk(sk), skb))
|
||||
goto drop_and_free;
|
||||
dccp_reqsk_init(req, skb);
|
||||
|
||||
dreq = dccp_rsk(req);
|
||||
if (dccp_parse_options(sk, dreq, skb))
|
||||
|
@ -42,6 +42,16 @@ struct inet_timewait_death_row dccp_death_row = {
|
||||
|
||||
EXPORT_SYMBOL_GPL(dccp_death_row);
|
||||
|
||||
void dccp_minisock_init(struct dccp_minisock *dmsk)
|
||||
{
|
||||
dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window;
|
||||
dmsk->dccpms_rx_ccid = sysctl_dccp_feat_rx_ccid;
|
||||
dmsk->dccpms_tx_ccid = sysctl_dccp_feat_tx_ccid;
|
||||
dmsk->dccpms_ack_ratio = sysctl_dccp_feat_ack_ratio;
|
||||
dmsk->dccpms_send_ack_vector = sysctl_dccp_feat_send_ack_vector;
|
||||
dmsk->dccpms_send_ndp_count = sysctl_dccp_feat_send_ndp_count;
|
||||
}
|
||||
|
||||
void dccp_time_wait(struct sock *sk, int state, int timeo)
|
||||
{
|
||||
struct inet_timewait_sock *tw = NULL;
|
||||
@ -102,9 +112,10 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
|
||||
struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC);
|
||||
|
||||
if (newsk != NULL) {
|
||||
struct dccp_request_sock *dreq = dccp_rsk(req);
|
||||
const struct dccp_request_sock *dreq = dccp_rsk(req);
|
||||
struct inet_connection_sock *newicsk = inet_csk(newsk);
|
||||
struct dccp_sock *newdp = dccp_sk(newsk);
|
||||
struct dccp_minisock *newdmsk = dccp_msk(newsk);
|
||||
|
||||
newdp->dccps_role = DCCP_ROLE_SERVER;
|
||||
newdp->dccps_hc_rx_ackvec = NULL;
|
||||
@ -114,32 +125,65 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
|
||||
newdp->dccps_timestamp_time = dreq->dreq_timestamp_time;
|
||||
newicsk->icsk_rto = DCCP_TIMEOUT_INIT;
|
||||
|
||||
INIT_LIST_HEAD(&newdp->dccps_featneg);
|
||||
/*
|
||||
* Step 3: Process LISTEN state
|
||||
*
|
||||
* Choose S.ISS (initial seqno) or set from Init Cookies
|
||||
* Initialize S.GAR := S.ISS
|
||||
* Set S.ISR, S.GSR from packet (or Init Cookies)
|
||||
*
|
||||
* Setting AWL/AWH and SWL/SWH happens as part of the feature
|
||||
* activation below, as these windows all depend on the local
|
||||
* and remote Sequence Window feature values (7.5.2).
|
||||
*/
|
||||
newdp->dccps_gss = newdp->dccps_iss = dreq->dreq_iss;
|
||||
newdp->dccps_gar = newdp->dccps_iss;
|
||||
newdp->dccps_gsr = newdp->dccps_isr = dreq->dreq_isr;
|
||||
if (dccp_feat_clone(sk, newsk))
|
||||
goto out_free;
|
||||
|
||||
/*
|
||||
* Activate features: initialise CCIDs, sequence windows etc.
|
||||
*/
|
||||
if (dccp_feat_activate_values(newsk, &dreq->dreq_featneg)) {
|
||||
if (newdmsk->dccpms_send_ack_vector) {
|
||||
newdp->dccps_hc_rx_ackvec =
|
||||
dccp_ackvec_alloc(GFP_ATOMIC);
|
||||
if (unlikely(newdp->dccps_hc_rx_ackvec == NULL))
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
newdp->dccps_hc_rx_ccid =
|
||||
ccid_hc_rx_new(newdmsk->dccpms_rx_ccid,
|
||||
newsk, GFP_ATOMIC);
|
||||
newdp->dccps_hc_tx_ccid =
|
||||
ccid_hc_tx_new(newdmsk->dccpms_tx_ccid,
|
||||
newsk, GFP_ATOMIC);
|
||||
if (unlikely(newdp->dccps_hc_rx_ccid == NULL ||
|
||||
newdp->dccps_hc_tx_ccid == NULL)) {
|
||||
dccp_ackvec_free(newdp->dccps_hc_rx_ackvec);
|
||||
ccid_hc_rx_delete(newdp->dccps_hc_rx_ccid, newsk);
|
||||
ccid_hc_tx_delete(newdp->dccps_hc_tx_ccid, newsk);
|
||||
out_free:
|
||||
/* It is still raw copy of parent, so invalidate
|
||||
* destructor and make plain sk_free() */
|
||||
newsk->sk_destruct = NULL;
|
||||
sk_free(newsk);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Step 3: Process LISTEN state
|
||||
*
|
||||
* Choose S.ISS (initial seqno) or set from Init Cookies
|
||||
* Initialize S.GAR := S.ISS
|
||||
* Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies
|
||||
*/
|
||||
|
||||
/* See dccp_v4_conn_request */
|
||||
newdmsk->dccpms_sequence_window = req->rcv_wnd;
|
||||
|
||||
newdp->dccps_gar = newdp->dccps_iss = dreq->dreq_iss;
|
||||
dccp_update_gss(newsk, dreq->dreq_iss);
|
||||
|
||||
newdp->dccps_isr = dreq->dreq_isr;
|
||||
dccp_update_gsr(newsk, dreq->dreq_isr);
|
||||
|
||||
/*
|
||||
* SWL and AWL are initially adjusted so that they are not less than
|
||||
* the initial Sequence Numbers received and sent, respectively:
|
||||
* SWL := max(GSR + 1 - floor(W/4), ISR),
|
||||
* AWL := max(GSS - W' + 1, ISS).
|
||||
* These adjustments MUST be applied only at the beginning of the
|
||||
* connection.
|
||||
*/
|
||||
dccp_set_seqno(&newdp->dccps_swl,
|
||||
max48(newdp->dccps_swl, newdp->dccps_isr));
|
||||
dccp_set_seqno(&newdp->dccps_awl,
|
||||
max48(newdp->dccps_awl, newdp->dccps_iss));
|
||||
|
||||
dccp_init_xmit_timers(newsk);
|
||||
|
||||
DCCP_INC_STATS_BH(DCCP_MIB_PASSIVEOPENS);
|
||||
@ -260,17 +304,14 @@ void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
|
||||
|
||||
EXPORT_SYMBOL_GPL(dccp_reqsk_send_ack);
|
||||
|
||||
int dccp_reqsk_init(struct request_sock *req,
|
||||
struct dccp_sock const *dp, struct sk_buff const *skb)
|
||||
void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb)
|
||||
{
|
||||
struct dccp_request_sock *dreq = dccp_rsk(req);
|
||||
|
||||
inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport;
|
||||
inet_rsk(req)->acked = 0;
|
||||
req->rcv_wnd = sysctl_dccp_feat_sequence_window;
|
||||
dreq->dreq_timestamp_echo = 0;
|
||||
|
||||
/* inherit feature negotiation options from listening socket */
|
||||
return dccp_feat_clone_list(&dp->dccps_featneg, &dreq->dreq_featneg);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(dccp_reqsk_init);
|
||||
|
@ -23,20 +23,23 @@
|
||||
#include "dccp.h"
|
||||
#include "feat.h"
|
||||
|
||||
u64 dccp_decode_value_var(const u8 *bf, const u8 len)
|
||||
{
|
||||
u64 value = 0;
|
||||
int sysctl_dccp_feat_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW;
|
||||
int sysctl_dccp_feat_rx_ccid = DCCPF_INITIAL_CCID;
|
||||
int sysctl_dccp_feat_tx_ccid = DCCPF_INITIAL_CCID;
|
||||
int sysctl_dccp_feat_ack_ratio = DCCPF_INITIAL_ACK_RATIO;
|
||||
int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR;
|
||||
int sysctl_dccp_feat_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT;
|
||||
|
||||
static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len)
|
||||
{
|
||||
u32 value = 0;
|
||||
|
||||
if (len >= DCCP_OPTVAL_MAXLEN)
|
||||
value += ((u64)*bf++) << 40;
|
||||
if (len > 4)
|
||||
value += ((u64)*bf++) << 32;
|
||||
if (len > 3)
|
||||
value += ((u64)*bf++) << 24;
|
||||
value += *bf++ << 24;
|
||||
if (len > 2)
|
||||
value += ((u64)*bf++) << 16;
|
||||
value += *bf++ << 16;
|
||||
if (len > 1)
|
||||
value += ((u64)*bf++) << 8;
|
||||
value += *bf++ << 8;
|
||||
if (len > 0)
|
||||
value += *bf;
|
||||
|
||||
@ -54,6 +57,7 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
|
||||
struct dccp_sock *dp = dccp_sk(sk);
|
||||
const struct dccp_hdr *dh = dccp_hdr(skb);
|
||||
const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type;
|
||||
u64 ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
|
||||
unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb);
|
||||
unsigned char *opt_ptr = options;
|
||||
const unsigned char *opt_end = (unsigned char *)dh +
|
||||
@ -95,11 +99,18 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
|
||||
}
|
||||
|
||||
/*
|
||||
* CCID-Specific Options (from RFC 4340, sec. 10.3):
|
||||
*
|
||||
* Option numbers 128 through 191 are for options sent from the
|
||||
* HC-Sender to the HC-Receiver; option numbers 192 through 255
|
||||
* are for options sent from the HC-Receiver to the HC-Sender.
|
||||
*
|
||||
* CCID-specific options are ignored during connection setup, as
|
||||
* negotiation may still be in progress (see RFC 4340, 10.3).
|
||||
* The same applies to Ack Vectors, as these depend on the CCID.
|
||||
*
|
||||
*/
|
||||
if (dreq != NULL && (opt >= DCCPO_MIN_RX_CCID_SPECIFIC ||
|
||||
if (dreq != NULL && (opt >= 128 ||
|
||||
opt == DCCPO_ACK_VECTOR_0 || opt == DCCPO_ACK_VECTOR_1))
|
||||
goto ignore_option;
|
||||
|
||||
@ -120,13 +131,43 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
|
||||
dccp_pr_debug("%s opt: NDP count=%llu\n", dccp_role(sk),
|
||||
(unsigned long long)opt_recv->dccpor_ndp);
|
||||
break;
|
||||
case DCCPO_CHANGE_L ... DCCPO_CONFIRM_R:
|
||||
if (pkt_type == DCCP_PKT_DATA) /* RFC 4340, 6 */
|
||||
case DCCPO_CHANGE_L:
|
||||
/* fall through */
|
||||
case DCCPO_CHANGE_R:
|
||||
if (pkt_type == DCCP_PKT_DATA)
|
||||
break;
|
||||
rc = dccp_feat_parse_options(sk, dreq, mandatory, opt,
|
||||
*value, value + 1, len - 1);
|
||||
if (rc)
|
||||
goto out_featneg_failed;
|
||||
if (len < 2)
|
||||
goto out_invalid_option;
|
||||
rc = dccp_feat_change_recv(sk, opt, *value, value + 1,
|
||||
len - 1);
|
||||
/*
|
||||
* When there is a change error, change_recv is
|
||||
* responsible for dealing with it. i.e. reply with an
|
||||
* empty confirm.
|
||||
* If the change was mandatory, then we need to die.
|
||||
*/
|
||||
if (rc && mandatory)
|
||||
goto out_invalid_option;
|
||||
break;
|
||||
case DCCPO_CONFIRM_L:
|
||||
/* fall through */
|
||||
case DCCPO_CONFIRM_R:
|
||||
if (pkt_type == DCCP_PKT_DATA)
|
||||
break;
|
||||
if (len < 2) /* FIXME this disallows empty confirm */
|
||||
goto out_invalid_option;
|
||||
if (dccp_feat_confirm_recv(sk, opt, *value,
|
||||
value + 1, len - 1))
|
||||
goto out_invalid_option;
|
||||
break;
|
||||
case DCCPO_ACK_VECTOR_0:
|
||||
case DCCPO_ACK_VECTOR_1:
|
||||
if (dccp_packet_without_ack(skb)) /* RFC 4340, 11.4 */
|
||||
break;
|
||||
|
||||
if (dccp_msk(sk)->dccpms_send_ack_vector &&
|
||||
dccp_ackvec_parse(sk, skb, &ackno, opt, value, len))
|
||||
goto out_invalid_option;
|
||||
break;
|
||||
case DCCPO_TIMESTAMP:
|
||||
if (len != 4)
|
||||
@ -154,8 +195,6 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
|
||||
dccp_role(sk), ntohl(opt_val),
|
||||
(unsigned long long)
|
||||
DCCP_SKB_CB(skb)->dccpd_ack_seq);
|
||||
/* schedule an Ack in case this sender is quiescent */
|
||||
inet_csk_schedule_ack(sk);
|
||||
break;
|
||||
case DCCPO_TIMESTAMP_ECHO:
|
||||
if (len != 4 && len != 6 && len != 8)
|
||||
@ -212,25 +251,23 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
|
||||
dccp_pr_debug("%s rx opt: ELAPSED_TIME=%d\n",
|
||||
dccp_role(sk), elapsed_time);
|
||||
break;
|
||||
case DCCPO_MIN_RX_CCID_SPECIFIC ... DCCPO_MAX_RX_CCID_SPECIFIC:
|
||||
case 128 ... 191: {
|
||||
const u16 idx = value - options;
|
||||
|
||||
if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk,
|
||||
pkt_type, opt, value, len))
|
||||
opt, len, idx,
|
||||
value) != 0)
|
||||
goto out_invalid_option;
|
||||
}
|
||||
break;
|
||||
case DCCPO_ACK_VECTOR_0:
|
||||
case DCCPO_ACK_VECTOR_1:
|
||||
if (dccp_packet_without_ack(skb)) /* RFC 4340, 11.4 */
|
||||
break;
|
||||
/*
|
||||
* Ack vectors are processed by the TX CCID if it is
|
||||
* interested. The RX CCID need not parse Ack Vectors,
|
||||
* since it is only interested in clearing old state.
|
||||
* Fall through.
|
||||
*/
|
||||
case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC:
|
||||
case 192 ... 255: {
|
||||
const u16 idx = value - options;
|
||||
|
||||
if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk,
|
||||
pkt_type, opt, value, len))
|
||||
opt, len, idx,
|
||||
value) != 0)
|
||||
goto out_invalid_option;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
DCCP_CRIT("DCCP(%p): option %d(len=%d) not "
|
||||
@ -252,10 +289,8 @@ out_nonsensical_length:
|
||||
|
||||
out_invalid_option:
|
||||
DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT);
|
||||
rc = DCCP_RESET_CODE_OPTION_ERROR;
|
||||
out_featneg_failed:
|
||||
DCCP_WARN("DCCP(%p): Option %d (len=%d) error=%u\n", sk, opt, len, rc);
|
||||
DCCP_SKB_CB(skb)->dccpd_reset_code = rc;
|
||||
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR;
|
||||
DCCP_WARN("DCCP(%p): invalid option %d, len=%d", sk, opt, len);
|
||||
DCCP_SKB_CB(skb)->dccpd_reset_data[0] = opt;
|
||||
DCCP_SKB_CB(skb)->dccpd_reset_data[1] = len > 0 ? value[0] : 0;
|
||||
DCCP_SKB_CB(skb)->dccpd_reset_data[2] = len > 1 ? value[1] : 0;
|
||||
@ -264,12 +299,9 @@ out_featneg_failed:
|
||||
|
||||
EXPORT_SYMBOL_GPL(dccp_parse_options);
|
||||
|
||||
void dccp_encode_value_var(const u64 value, u8 *to, const u8 len)
|
||||
static void dccp_encode_value_var(const u32 value, unsigned char *to,
|
||||
const unsigned int len)
|
||||
{
|
||||
if (len >= DCCP_OPTVAL_MAXLEN)
|
||||
*to++ = (value & 0xFF0000000000ull) >> 40;
|
||||
if (len > 4)
|
||||
*to++ = (value & 0xFF00000000ull) >> 32;
|
||||
if (len > 3)
|
||||
*to++ = (value & 0xFF000000) >> 24;
|
||||
if (len > 2)
|
||||
@ -429,140 +461,92 @@ static int dccp_insert_option_timestamp_echo(struct dccp_sock *dp,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
|
||||
static int dccp_insert_feat_opt(struct sk_buff *skb, u8 type, u8 feat,
|
||||
u8 *val, u8 len)
|
||||
{
|
||||
struct dccp_sock *dp = dccp_sk(sk);
|
||||
struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
|
||||
struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
|
||||
const u16 buflen = dccp_ackvec_buflen(av);
|
||||
/* Figure out how many options do we need to represent the ackvec */
|
||||
const u8 nr_opts = DIV_ROUND_UP(buflen, DCCP_SINGLE_OPT_MAXLEN);
|
||||
u16 len = buflen + 2 * nr_opts;
|
||||
u8 i, nonce = 0;
|
||||
const unsigned char *tail, *from;
|
||||
unsigned char *to;
|
||||
u8 *to;
|
||||
|
||||
if (dcb->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
|
||||
DCCP_WARN("Lacking space for %u bytes on %s packet\n", len,
|
||||
dccp_packet_name(dcb->dccpd_type));
|
||||
return -1;
|
||||
}
|
||||
/*
|
||||
* Since Ack Vectors are variable-length, we can not always predict
|
||||
* their size. To catch exception cases where the space is running out
|
||||
* on the skb, a separate Sync is scheduled to carry the Ack Vector.
|
||||
*/
|
||||
if (len > DCCPAV_MIN_OPTLEN &&
|
||||
len + dcb->dccpd_opt_len + skb->len > dp->dccps_mss_cache) {
|
||||
DCCP_WARN("No space left for Ack Vector (%u) on skb (%u+%u), "
|
||||
"MPS=%u ==> reduce payload size?\n", len, skb->len,
|
||||
dcb->dccpd_opt_len, dp->dccps_mss_cache);
|
||||
dp->dccps_sync_scheduled = 1;
|
||||
return 0;
|
||||
}
|
||||
dcb->dccpd_opt_len += len;
|
||||
|
||||
to = skb_push(skb, len);
|
||||
len = buflen;
|
||||
from = av->av_buf + av->av_buf_head;
|
||||
tail = av->av_buf + DCCPAV_MAX_ACKVEC_LEN;
|
||||
|
||||
for (i = 0; i < nr_opts; ++i) {
|
||||
int copylen = len;
|
||||
|
||||
if (len > DCCP_SINGLE_OPT_MAXLEN)
|
||||
copylen = DCCP_SINGLE_OPT_MAXLEN;
|
||||
|
||||
/*
|
||||
* RFC 4340, 12.2: Encode the Nonce Echo for this Ack Vector via
|
||||
* its type; ack_nonce is the sum of all individual buf_nonce's.
|
||||
*/
|
||||
nonce ^= av->av_buf_nonce[i];
|
||||
|
||||
*to++ = DCCPO_ACK_VECTOR_0 + av->av_buf_nonce[i];
|
||||
*to++ = copylen + 2;
|
||||
|
||||
/* Check if buf_head wraps */
|
||||
if (from + copylen > tail) {
|
||||
const u16 tailsize = tail - from;
|
||||
|
||||
memcpy(to, from, tailsize);
|
||||
to += tailsize;
|
||||
len -= tailsize;
|
||||
copylen -= tailsize;
|
||||
from = av->av_buf;
|
||||
}
|
||||
|
||||
memcpy(to, from, copylen);
|
||||
from += copylen;
|
||||
to += copylen;
|
||||
len -= copylen;
|
||||
}
|
||||
/*
|
||||
* Each sent Ack Vector is recorded in the list, as per A.2 of RFC 4340.
|
||||
*/
|
||||
if (dccp_ackvec_update_records(av, dcb->dccpd_seq, nonce))
|
||||
return -ENOBUFS;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* dccp_insert_option_mandatory - Mandatory option (5.8.2)
|
||||
* Note that since we are using skb_push, this function needs to be called
|
||||
* _after_ inserting the option it is supposed to influence (stack order).
|
||||
*/
|
||||
int dccp_insert_option_mandatory(struct sk_buff *skb)
|
||||
{
|
||||
if (DCCP_SKB_CB(skb)->dccpd_opt_len >= DCCP_MAX_OPT_LEN)
|
||||
return -1;
|
||||
|
||||
DCCP_SKB_CB(skb)->dccpd_opt_len++;
|
||||
*skb_push(skb, 1) = DCCPO_MANDATORY;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* dccp_insert_fn_opt - Insert single Feature-Negotiation option into @skb
|
||||
* @type: %DCCPO_CHANGE_L, %DCCPO_CHANGE_R, %DCCPO_CONFIRM_L, %DCCPO_CONFIRM_R
|
||||
* @feat: one out of %dccp_feature_numbers
|
||||
* @val: NN value or SP array (preferred element first) to copy
|
||||
* @len: true length of @val in bytes (excluding first element repetition)
|
||||
* @repeat_first: whether to copy the first element of @val twice
|
||||
* The last argument is used to construct Confirm options, where the preferred
|
||||
* value and the preference list appear separately (RFC 4340, 6.3.1). Preference
|
||||
* lists are kept such that the preferred entry is always first, so we only need
|
||||
* to copy twice, and avoid the overhead of cloning into a bigger array.
|
||||
*/
|
||||
int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat,
|
||||
u8 *val, u8 len, bool repeat_first)
|
||||
{
|
||||
u8 tot_len, *to;
|
||||
|
||||
/* take the `Feature' field and possible repetition into account */
|
||||
if (len > (DCCP_SINGLE_OPT_MAXLEN - 2)) {
|
||||
DCCP_WARN("length %u for feature %u too large\n", len, feat);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (unlikely(val == NULL || len == 0))
|
||||
len = repeat_first = 0;
|
||||
tot_len = 3 + repeat_first + len;
|
||||
|
||||
if (DCCP_SKB_CB(skb)->dccpd_opt_len + tot_len > DCCP_MAX_OPT_LEN) {
|
||||
if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 3 > DCCP_MAX_OPT_LEN) {
|
||||
DCCP_WARN("packet too small for feature %d option!\n", feat);
|
||||
return -1;
|
||||
}
|
||||
DCCP_SKB_CB(skb)->dccpd_opt_len += tot_len;
|
||||
|
||||
to = skb_push(skb, tot_len);
|
||||
DCCP_SKB_CB(skb)->dccpd_opt_len += len + 3;
|
||||
|
||||
to = skb_push(skb, len + 3);
|
||||
*to++ = type;
|
||||
*to++ = tot_len;
|
||||
*to++ = len + 3;
|
||||
*to++ = feat;
|
||||
|
||||
if (repeat_first)
|
||||
*to++ = *val;
|
||||
if (len)
|
||||
memcpy(to, val, len);
|
||||
|
||||
dccp_pr_debug("%s(%s (%d), ...), length %d\n",
|
||||
dccp_feat_typename(type),
|
||||
dccp_feat_name(feat), feat, len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dccp_insert_options_feat(struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
struct dccp_sock *dp = dccp_sk(sk);
|
||||
struct dccp_minisock *dmsk = dccp_msk(sk);
|
||||
struct dccp_opt_pend *opt, *next;
|
||||
int change = 0;
|
||||
|
||||
/* confirm any options [NN opts] */
|
||||
list_for_each_entry_safe(opt, next, &dmsk->dccpms_conf, dccpop_node) {
|
||||
dccp_insert_feat_opt(skb, opt->dccpop_type,
|
||||
opt->dccpop_feat, opt->dccpop_val,
|
||||
opt->dccpop_len);
|
||||
/* fear empty confirms */
|
||||
if (opt->dccpop_val)
|
||||
kfree(opt->dccpop_val);
|
||||
kfree(opt);
|
||||
}
|
||||
INIT_LIST_HEAD(&dmsk->dccpms_conf);
|
||||
|
||||
/* see which features we need to send */
|
||||
list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
|
||||
/* see if we need to send any confirm */
|
||||
if (opt->dccpop_sc) {
|
||||
dccp_insert_feat_opt(skb, opt->dccpop_type + 1,
|
||||
opt->dccpop_feat,
|
||||
opt->dccpop_sc->dccpoc_val,
|
||||
opt->dccpop_sc->dccpoc_len);
|
||||
|
||||
BUG_ON(!opt->dccpop_sc->dccpoc_val);
|
||||
kfree(opt->dccpop_sc->dccpoc_val);
|
||||
kfree(opt->dccpop_sc);
|
||||
opt->dccpop_sc = NULL;
|
||||
}
|
||||
|
||||
/* any option not confirmed, re-send it */
|
||||
if (!opt->dccpop_conf) {
|
||||
dccp_insert_feat_opt(skb, opt->dccpop_type,
|
||||
opt->dccpop_feat, opt->dccpop_val,
|
||||
opt->dccpop_len);
|
||||
change++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Retransmit timer.
|
||||
* If this is the master listening sock, we don't set a timer on it. It
|
||||
* should be fine because if the dude doesn't receive our RESPONSE
|
||||
* [which will contain the CHANGE] he will send another REQUEST which
|
||||
* will "retrnasmit" the change.
|
||||
*/
|
||||
if (change && dp->dccps_role != DCCP_ROLE_LISTEN) {
|
||||
dccp_pr_debug("reset feat negotiation timer %p\n", sk);
|
||||
|
||||
/* XXX don't reset the timer on re-transmissions. I.e. reset it
|
||||
* only when sending new stuff i guess. Currently the timer
|
||||
* never backs off because on re-transmission it just resets it!
|
||||
*/
|
||||
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
|
||||
inet_csk(sk)->icsk_rto, DCCP_RTO_MAX);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -581,30 +565,19 @@ static void dccp_insert_option_padding(struct sk_buff *skb)
|
||||
int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
struct dccp_sock *dp = dccp_sk(sk);
|
||||
struct dccp_minisock *dmsk = dccp_msk(sk);
|
||||
|
||||
DCCP_SKB_CB(skb)->dccpd_opt_len = 0;
|
||||
|
||||
if (dp->dccps_send_ndp_count && dccp_insert_option_ndp(sk, skb))
|
||||
if (dmsk->dccpms_send_ndp_count &&
|
||||
dccp_insert_option_ndp(sk, skb))
|
||||
return -1;
|
||||
|
||||
if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA) {
|
||||
|
||||
/* Feature Negotiation */
|
||||
if (dccp_feat_insert_opts(dp, NULL, skb))
|
||||
if (!dccp_packet_without_ack(skb)) {
|
||||
if (dmsk->dccpms_send_ack_vector &&
|
||||
dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) &&
|
||||
dccp_insert_option_ackvec(sk, skb))
|
||||
return -1;
|
||||
|
||||
if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_REQUEST) {
|
||||
/*
|
||||
* Obtain RTT sample from Request/Response exchange.
|
||||
* This is currently used in CCID 3 initialisation.
|
||||
*/
|
||||
if (dccp_insert_option_timestamp(sk, skb))
|
||||
return -1;
|
||||
|
||||
} else if (dccp_ackvec_pending(sk) &&
|
||||
dccp_insert_option_ackvec(sk, skb)) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (dp->dccps_hc_rx_insert_options) {
|
||||
@ -613,6 +586,21 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
|
||||
dp->dccps_hc_rx_insert_options = 0;
|
||||
}
|
||||
|
||||
/* Feature negotiation */
|
||||
/* Data packets can't do feat negotiation */
|
||||
if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA &&
|
||||
DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATAACK &&
|
||||
dccp_insert_options_feat(sk, skb))
|
||||
return -1;
|
||||
|
||||
/*
|
||||
* Obtain RTT sample from Request/Response exchange.
|
||||
* This is currently used in CCID 3 initialisation.
|
||||
*/
|
||||
if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_REQUEST &&
|
||||
dccp_insert_option_timestamp(sk, skb))
|
||||
return -1;
|
||||
|
||||
if (dp->dccps_timestamp_echo != 0 &&
|
||||
dccp_insert_option_timestamp_echo(dp, NULL, skb))
|
||||
return -1;
|
||||
@ -625,9 +613,6 @@ int dccp_insert_options_rsk(struct dccp_request_sock *dreq, struct sk_buff *skb)
|
||||
{
|
||||
DCCP_SKB_CB(skb)->dccpd_opt_len = 0;
|
||||
|
||||
if (dccp_feat_insert_opts(NULL, dreq, skb))
|
||||
return -1;
|
||||
|
||||
if (dreq->dreq_timestamp_echo != 0 &&
|
||||
dccp_insert_option_timestamp_echo(NULL, dreq, skb))
|
||||
return -1;
|
||||
|
@ -26,13 +26,11 @@ static inline void dccp_event_ack_sent(struct sock *sk)
|
||||
inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
|
||||
}
|
||||
|
||||
/* enqueue @skb on sk_send_head for retransmission, return clone to send now */
|
||||
static struct sk_buff *dccp_skb_entail(struct sock *sk, struct sk_buff *skb)
|
||||
static void dccp_skb_entail(struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
skb_set_owner_w(skb, sk);
|
||||
WARN_ON(sk->sk_send_head);
|
||||
sk->sk_send_head = skb;
|
||||
return skb_clone(sk->sk_send_head, gfp_any());
|
||||
}
|
||||
|
||||
/*
|
||||
@ -163,27 +161,21 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
|
||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||
struct dccp_sock *dp = dccp_sk(sk);
|
||||
u32 ccmps = dccp_determine_ccmps(dp);
|
||||
u32 cur_mps = ccmps ? min(pmtu, ccmps) : pmtu;
|
||||
int cur_mps = ccmps ? min(pmtu, ccmps) : pmtu;
|
||||
|
||||
/* Account for header lengths and IPv4/v6 option overhead */
|
||||
cur_mps -= (icsk->icsk_af_ops->net_header_len + icsk->icsk_ext_hdr_len +
|
||||
sizeof(struct dccp_hdr) + sizeof(struct dccp_hdr_ext));
|
||||
|
||||
/*
|
||||
* Leave enough headroom for common DCCP header options.
|
||||
* This only considers options which may appear on DCCP-Data packets, as
|
||||
* per table 3 in RFC 4340, 5.8. When running out of space for other
|
||||
* options (eg. Ack Vector which can take up to 255 bytes), it is better
|
||||
* to schedule a separate Ack. Thus we leave headroom for the following:
|
||||
* - 1 byte for Slow Receiver (11.6)
|
||||
* - 6 bytes for Timestamp (13.1)
|
||||
* - 10 bytes for Timestamp Echo (13.3)
|
||||
* - 8 bytes for NDP count (7.7, when activated)
|
||||
* - 6 bytes for Data Checksum (9.3)
|
||||
* - %DCCPAV_MIN_OPTLEN bytes for Ack Vector size (11.4, when enabled)
|
||||
* FIXME: this should come from the CCID infrastructure, where, say,
|
||||
* TFRC will say it wants TIMESTAMPS, ELAPSED time, etc, for now lets
|
||||
* put a rough estimate for NDP + TIMESTAMP + TIMESTAMP_ECHO + ELAPSED
|
||||
* TIME + TFRC_OPT_LOSS_EVENT_RATE + TFRC_OPT_RECEIVE_RATE + padding to
|
||||
* make it a multiple of 4
|
||||
*/
|
||||
cur_mps -= roundup(1 + 6 + 10 + dp->dccps_send_ndp_count * 8 + 6 +
|
||||
(dp->dccps_hc_rx_ackvec ? DCCPAV_MIN_OPTLEN : 0), 4);
|
||||
|
||||
cur_mps -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4;
|
||||
|
||||
/* And store cached results */
|
||||
icsk->icsk_pmtu_cookie = pmtu;
|
||||
@ -208,158 +200,95 @@ void dccp_write_space(struct sock *sk)
|
||||
}
|
||||
|
||||
/**
|
||||
* dccp_wait_for_ccid - Await CCID send permission
|
||||
* dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet
|
||||
* @sk: socket to wait for
|
||||
* @delay: timeout in jiffies
|
||||
* This is used by CCIDs which need to delay the send time in process context.
|
||||
* @skb: current skb to pass on for waiting
|
||||
* @delay: sleep timeout in milliseconds (> 0)
|
||||
* This function is called by default when the socket is closed, and
|
||||
* when a non-zero linger time is set on the socket. For consistency
|
||||
*/
|
||||
static int dccp_wait_for_ccid(struct sock *sk, unsigned long delay)
|
||||
static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, int delay)
|
||||
{
|
||||
struct dccp_sock *dp = dccp_sk(sk);
|
||||
DEFINE_WAIT(wait);
|
||||
long remaining;
|
||||
unsigned long jiffdelay;
|
||||
int rc;
|
||||
|
||||
prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
|
||||
sk->sk_write_pending++;
|
||||
release_sock(sk);
|
||||
do {
|
||||
dccp_pr_debug("delayed send by %d msec\n", delay);
|
||||
jiffdelay = msecs_to_jiffies(delay);
|
||||
|
||||
remaining = schedule_timeout(delay);
|
||||
prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
|
||||
|
||||
lock_sock(sk);
|
||||
sk->sk_write_pending--;
|
||||
finish_wait(sk->sk_sleep, &wait);
|
||||
sk->sk_write_pending++;
|
||||
release_sock(sk);
|
||||
schedule_timeout(jiffdelay);
|
||||
lock_sock(sk);
|
||||
sk->sk_write_pending--;
|
||||
|
||||
if (signal_pending(current) || sk->sk_err)
|
||||
return -1;
|
||||
return remaining;
|
||||
}
|
||||
if (sk->sk_err)
|
||||
goto do_error;
|
||||
if (signal_pending(current))
|
||||
goto do_interrupted;
|
||||
|
||||
/**
|
||||
* dccp_xmit_packet - Send data packet under control of CCID
|
||||
* Transmits next-queued payload and informs CCID to account for the packet.
|
||||
*/
|
||||
static void dccp_xmit_packet(struct sock *sk)
|
||||
{
|
||||
int err, len;
|
||||
struct dccp_sock *dp = dccp_sk(sk);
|
||||
struct sk_buff *skb = dccp_qpolicy_pop(sk);
|
||||
|
||||
if (unlikely(skb == NULL))
|
||||
return;
|
||||
len = skb->len;
|
||||
|
||||
if (sk->sk_state == DCCP_PARTOPEN) {
|
||||
const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD;
|
||||
/*
|
||||
* See 8.1.5 - Handshake Completion.
|
||||
*
|
||||
* For robustness we resend Confirm options until the client has
|
||||
* entered OPEN. During the initial feature negotiation, the MPS
|
||||
* is smaller than usual, reduced by the Change/Confirm options.
|
||||
*/
|
||||
if (!list_empty(&dp->dccps_featneg) && len > cur_mps) {
|
||||
DCCP_WARN("Payload too large (%d) for featneg.\n", len);
|
||||
dccp_send_ack(sk);
|
||||
dccp_feat_list_purge(&dp->dccps_featneg);
|
||||
}
|
||||
|
||||
inet_csk_schedule_ack(sk);
|
||||
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
|
||||
inet_csk(sk)->icsk_rto,
|
||||
DCCP_RTO_MAX);
|
||||
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK;
|
||||
} else if (dccp_ack_pending(sk)) {
|
||||
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK;
|
||||
} else {
|
||||
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATA;
|
||||
}
|
||||
|
||||
err = dccp_transmit_skb(sk, skb);
|
||||
if (err)
|
||||
dccp_pr_debug("transmit_skb() returned err=%d\n", err);
|
||||
/*
|
||||
* Register this one as sent even if an error occurred. To the remote
|
||||
* end a local packet drop is indistinguishable from network loss, i.e.
|
||||
* any local drop will eventually be reported via receiver feedback.
|
||||
*/
|
||||
ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len);
|
||||
|
||||
/*
|
||||
* If the CCID needs to transfer additional header options out-of-band
|
||||
* (e.g. Ack Vectors or feature-negotiation options), it activates this
|
||||
* flag to schedule a Sync. The Sync will automatically incorporate all
|
||||
* currently pending header options, thus clearing the backlog.
|
||||
*/
|
||||
if (dp->dccps_sync_scheduled)
|
||||
dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC);
|
||||
}
|
||||
|
||||
/**
|
||||
* dccp_flush_write_queue - Drain queue at end of connection
|
||||
* Since dccp_sendmsg queues packets without waiting for them to be sent, it may
|
||||
* happen that the TX queue is not empty at the end of a connection. We give the
|
||||
* HC-sender CCID a grace period of up to @time_budget jiffies. If this function
|
||||
* returns with a non-empty write queue, it will be purged later.
|
||||
*/
|
||||
void dccp_flush_write_queue(struct sock *sk, long *time_budget)
|
||||
{
|
||||
struct dccp_sock *dp = dccp_sk(sk);
|
||||
struct sk_buff *skb;
|
||||
long delay, rc;
|
||||
|
||||
while (*time_budget > 0 && (skb = skb_peek(&sk->sk_write_queue))) {
|
||||
rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
|
||||
} while ((delay = rc) > 0);
|
||||
out:
|
||||
finish_wait(sk->sk_sleep, &wait);
|
||||
return rc;
|
||||
|
||||
switch (ccid_packet_dequeue_eval(rc)) {
|
||||
case CCID_PACKET_WILL_DEQUEUE_LATER:
|
||||
/*
|
||||
* If the CCID determines when to send, the next sending
|
||||
* time is unknown or the CCID may not even send again
|
||||
* (e.g. remote host crashes or lost Ack packets).
|
||||
*/
|
||||
DCCP_WARN("CCID did not manage to send all packets\n");
|
||||
return;
|
||||
case CCID_PACKET_DELAY:
|
||||
delay = msecs_to_jiffies(rc);
|
||||
if (delay > *time_budget)
|
||||
return;
|
||||
rc = dccp_wait_for_ccid(sk, delay);
|
||||
if (rc < 0)
|
||||
return;
|
||||
*time_budget -= (delay - rc);
|
||||
/* check again if we can send now */
|
||||
break;
|
||||
case CCID_PACKET_SEND_AT_ONCE:
|
||||
dccp_xmit_packet(sk);
|
||||
break;
|
||||
case CCID_PACKET_ERR:
|
||||
skb_dequeue(&sk->sk_write_queue);
|
||||
kfree_skb(skb);
|
||||
dccp_pr_debug("packet discarded due to err=%ld\n", rc);
|
||||
}
|
||||
}
|
||||
do_error:
|
||||
rc = -EPIPE;
|
||||
goto out;
|
||||
do_interrupted:
|
||||
rc = -EINTR;
|
||||
goto out;
|
||||
}
|
||||
|
||||
void dccp_write_xmit(struct sock *sk)
|
||||
void dccp_write_xmit(struct sock *sk, int block)
|
||||
{
|
||||
struct dccp_sock *dp = dccp_sk(sk);
|
||||
struct sk_buff *skb;
|
||||
|
||||
while ((skb = dccp_qpolicy_top(sk))) {
|
||||
int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
|
||||
while ((skb = skb_peek(&sk->sk_write_queue))) {
|
||||
int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
|
||||
|
||||
switch (ccid_packet_dequeue_eval(rc)) {
|
||||
case CCID_PACKET_WILL_DEQUEUE_LATER:
|
||||
return;
|
||||
case CCID_PACKET_DELAY:
|
||||
sk_reset_timer(sk, &dp->dccps_xmit_timer,
|
||||
jiffies + msecs_to_jiffies(rc));
|
||||
return;
|
||||
case CCID_PACKET_SEND_AT_ONCE:
|
||||
dccp_xmit_packet(sk);
|
||||
break;
|
||||
case CCID_PACKET_ERR:
|
||||
dccp_qpolicy_drop(sk, skb);
|
||||
dccp_pr_debug("packet discarded due to err=%d\n", rc);
|
||||
if (err > 0) {
|
||||
if (!block) {
|
||||
sk_reset_timer(sk, &dp->dccps_xmit_timer,
|
||||
msecs_to_jiffies(err)+jiffies);
|
||||
break;
|
||||
} else
|
||||
err = dccp_wait_for_ccid(sk, skb, err);
|
||||
if (err && err != -EINTR)
|
||||
DCCP_BUG("err=%d after dccp_wait_for_ccid", err);
|
||||
}
|
||||
|
||||
skb_dequeue(&sk->sk_write_queue);
|
||||
if (err == 0) {
|
||||
struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
|
||||
const int len = skb->len;
|
||||
|
||||
if (sk->sk_state == DCCP_PARTOPEN) {
|
||||
/* See 8.1.5. Handshake Completion */
|
||||
inet_csk_schedule_ack(sk);
|
||||
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
|
||||
inet_csk(sk)->icsk_rto,
|
||||
DCCP_RTO_MAX);
|
||||
dcb->dccpd_type = DCCP_PKT_DATAACK;
|
||||
} else if (dccp_ack_pending(sk))
|
||||
dcb->dccpd_type = DCCP_PKT_DATAACK;
|
||||
else
|
||||
dcb->dccpd_type = DCCP_PKT_DATA;
|
||||
|
||||
err = dccp_transmit_skb(sk, skb);
|
||||
ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len);
|
||||
if (err)
|
||||
DCCP_BUG("err=%d after ccid_hc_tx_packet_sent",
|
||||
err);
|
||||
} else {
|
||||
dccp_pr_debug("packet discarded due to err=%d\n", err);
|
||||
kfree_skb(skb);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -410,12 +339,10 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
|
||||
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE;
|
||||
DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss;
|
||||
|
||||
/* Resolve feature dependencies resulting from choice of CCID */
|
||||
if (dccp_feat_server_ccid_dependencies(dreq))
|
||||
goto response_failed;
|
||||
|
||||
if (dccp_insert_options_rsk(dreq, skb))
|
||||
goto response_failed;
|
||||
if (dccp_insert_options_rsk(dreq, skb)) {
|
||||
kfree_skb(skb);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Build and checksum header */
|
||||
dh = dccp_zeroed_hdr(skb, dccp_header_size);
|
||||
@ -436,9 +363,6 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
|
||||
inet_rsk(req)->acked = 1;
|
||||
DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
|
||||
return skb;
|
||||
response_failed:
|
||||
kfree_skb(skb);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(dccp_make_response);
|
||||
@ -523,9 +447,8 @@ int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code)
|
||||
/*
|
||||
* Do all connect socket setups that can be done AF independent.
|
||||
*/
|
||||
int dccp_connect(struct sock *sk)
|
||||
static inline void dccp_connect_init(struct sock *sk)
|
||||
{
|
||||
struct sk_buff *skb;
|
||||
struct dccp_sock *dp = dccp_sk(sk);
|
||||
struct dst_entry *dst = __sk_dst_get(sk);
|
||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||
@ -535,13 +458,19 @@ int dccp_connect(struct sock *sk)
|
||||
|
||||
dccp_sync_mss(sk, dst_mtu(dst));
|
||||
|
||||
/* do not connect if feature negotiation setup fails */
|
||||
if (dccp_feat_finalise_settings(dccp_sk(sk)))
|
||||
return -EPROTO;
|
||||
|
||||
/* Initialise GAR as per 8.5; AWL/AWH are set in dccp_transmit_skb() */
|
||||
dp->dccps_gar = dp->dccps_iss;
|
||||
|
||||
icsk->icsk_retransmits = 0;
|
||||
}
|
||||
|
||||
int dccp_connect(struct sock *sk)
|
||||
{
|
||||
struct sk_buff *skb;
|
||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||
|
||||
dccp_connect_init(sk);
|
||||
|
||||
skb = alloc_skb(sk->sk_prot->max_header, sk->sk_allocation);
|
||||
if (unlikely(skb == NULL))
|
||||
return -ENOBUFS;
|
||||
@ -551,11 +480,11 @@ int dccp_connect(struct sock *sk)
|
||||
|
||||
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST;
|
||||
|
||||
dccp_transmit_skb(sk, dccp_skb_entail(sk, skb));
|
||||
dccp_skb_entail(sk, skb);
|
||||
dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL));
|
||||
DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS);
|
||||
|
||||
/* Timer for repeating the REQUEST until an answer. */
|
||||
icsk->icsk_retransmits = 0;
|
||||
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
|
||||
icsk->icsk_rto, DCCP_RTO_MAX);
|
||||
return 0;
|
||||
@ -642,12 +571,6 @@ void dccp_send_sync(struct sock *sk, const u64 ackno,
|
||||
DCCP_SKB_CB(skb)->dccpd_type = pkt_type;
|
||||
DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno;
|
||||
|
||||
/*
|
||||
* Clear the flag in case the Sync was scheduled for out-of-band data,
|
||||
* such as carrying a long Ack Vector.
|
||||
*/
|
||||
dccp_sk(sk)->dccps_sync_scheduled = 0;
|
||||
|
||||
dccp_transmit_skb(sk, skb);
|
||||
}
|
||||
|
||||
@ -676,7 +599,9 @@ void dccp_send_close(struct sock *sk, const int active)
|
||||
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE;
|
||||
|
||||
if (active) {
|
||||
skb = dccp_skb_entail(sk, skb);
|
||||
dccp_write_xmit(sk, 1);
|
||||
dccp_skb_entail(sk, skb);
|
||||
dccp_transmit_skb(sk, skb_clone(skb, prio));
|
||||
/*
|
||||
* Retransmission timer for active-close: RFC 4340, 8.3 requires
|
||||
* to retransmit the Close/CloseReq until the CLOSING/CLOSEREQ
|
||||
@ -689,6 +614,6 @@ void dccp_send_close(struct sock *sk, const int active)
|
||||
*/
|
||||
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
|
||||
DCCP_TIMEOUT_INIT, DCCP_RTO_MAX);
|
||||
}
|
||||
dccp_transmit_skb(sk, skb);
|
||||
} else
|
||||
dccp_transmit_skb(sk, skb);
|
||||
}
|
||||
|
@ -46,54 +46,75 @@ static struct {
|
||||
struct kfifo *fifo;
|
||||
spinlock_t lock;
|
||||
wait_queue_head_t wait;
|
||||
ktime_t start;
|
||||
struct timespec tstart;
|
||||
} dccpw;
|
||||
|
||||
static void jdccp_write_xmit(struct sock *sk)
|
||||
static void printl(const char *fmt, ...)
|
||||
{
|
||||
va_list args;
|
||||
int len;
|
||||
struct timespec now;
|
||||
char tbuf[256];
|
||||
|
||||
va_start(args, fmt);
|
||||
getnstimeofday(&now);
|
||||
|
||||
now = timespec_sub(now, dccpw.tstart);
|
||||
|
||||
len = sprintf(tbuf, "%lu.%06lu ",
|
||||
(unsigned long) now.tv_sec,
|
||||
(unsigned long) now.tv_nsec / NSEC_PER_USEC);
|
||||
len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args);
|
||||
va_end(args);
|
||||
|
||||
kfifo_put(dccpw.fifo, tbuf, len);
|
||||
wake_up(&dccpw.wait);
|
||||
}
|
||||
|
||||
static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk,
|
||||
struct msghdr *msg, size_t size)
|
||||
{
|
||||
const struct dccp_minisock *dmsk = dccp_msk(sk);
|
||||
const struct inet_sock *inet = inet_sk(sk);
|
||||
struct ccid3_hc_tx_sock *hctx = NULL;
|
||||
struct timespec tv;
|
||||
char buf[256];
|
||||
int len, ccid = ccid_get_current_tx_ccid(dccp_sk(sk));
|
||||
const struct ccid3_hc_tx_sock *hctx;
|
||||
|
||||
if (ccid == DCCPC_CCID3)
|
||||
if (dmsk->dccpms_tx_ccid == DCCPC_CCID3)
|
||||
hctx = ccid3_hc_tx_sk(sk);
|
||||
else
|
||||
hctx = NULL;
|
||||
|
||||
if (!port || ntohs(inet->dport) == port || ntohs(inet->sport) == port) {
|
||||
|
||||
tv = ktime_to_timespec(ktime_sub(ktime_get(), dccpw.start));
|
||||
len = sprintf(buf, "%lu.%09lu %d.%d.%d.%d:%u %d.%d.%d.%d:%u %d",
|
||||
(unsigned long)tv.tv_sec,
|
||||
(unsigned long)tv.tv_nsec,
|
||||
NIPQUAD(inet->saddr), ntohs(inet->sport),
|
||||
NIPQUAD(inet->daddr), ntohs(inet->dport), ccid);
|
||||
|
||||
if (port == 0 || ntohs(inet->dport) == port ||
|
||||
ntohs(inet->sport) == port) {
|
||||
if (hctx)
|
||||
len += sprintf(buf + len, " %d %d %d %u %u %u %d",
|
||||
hctx->s, hctx->rtt, hctx->p, hctx->x_calc,
|
||||
(unsigned)(hctx->x_recv >> 6),
|
||||
(unsigned)(hctx->x >> 6), hctx->t_ipi);
|
||||
|
||||
len += sprintf(buf + len, "\n");
|
||||
kfifo_put(dccpw.fifo, buf, len);
|
||||
wake_up(&dccpw.wait);
|
||||
printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %d %d %d %u "
|
||||
"%llu %llu %d\n",
|
||||
NIPQUAD(inet->saddr), ntohs(inet->sport),
|
||||
NIPQUAD(inet->daddr), ntohs(inet->dport), size,
|
||||
hctx->ccid3hctx_s, hctx->ccid3hctx_rtt,
|
||||
hctx->ccid3hctx_p, hctx->ccid3hctx_x_calc,
|
||||
hctx->ccid3hctx_x_recv >> 6,
|
||||
hctx->ccid3hctx_x >> 6, hctx->ccid3hctx_t_ipi);
|
||||
else
|
||||
printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d\n",
|
||||
NIPQUAD(inet->saddr), ntohs(inet->sport),
|
||||
NIPQUAD(inet->daddr), ntohs(inet->dport), size);
|
||||
}
|
||||
|
||||
jprobe_return();
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct jprobe dccp_send_probe = {
|
||||
.kp = {
|
||||
.symbol_name = "dccp_write_xmit",
|
||||
.symbol_name = "dccp_sendmsg",
|
||||
},
|
||||
.entry = jdccp_write_xmit,
|
||||
.entry = jdccp_sendmsg,
|
||||
};
|
||||
|
||||
static int dccpprobe_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
kfifo_reset(dccpw.fifo);
|
||||
dccpw.start = ktime_get();
|
||||
getnstimeofday(&dccpw.tstart);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
285
net/dccp/proto.c
285
net/dccp/proto.c
@ -67,9 +67,6 @@ void dccp_set_state(struct sock *sk, const int state)
|
||||
case DCCP_OPEN:
|
||||
if (oldstate != DCCP_OPEN)
|
||||
DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
|
||||
/* Client retransmits all Confirm options until entering OPEN */
|
||||
if (oldstate == DCCP_PARTOPEN)
|
||||
dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
|
||||
break;
|
||||
|
||||
case DCCP_CLOSED:
|
||||
@ -178,25 +175,63 @@ EXPORT_SYMBOL_GPL(dccp_state_name);
|
||||
int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
|
||||
{
|
||||
struct dccp_sock *dp = dccp_sk(sk);
|
||||
struct dccp_minisock *dmsk = dccp_msk(sk);
|
||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||
|
||||
dccp_minisock_init(&dp->dccps_minisock);
|
||||
|
||||
icsk->icsk_rto = DCCP_TIMEOUT_INIT;
|
||||
icsk->icsk_syn_retries = sysctl_dccp_request_retries;
|
||||
sk->sk_state = DCCP_CLOSED;
|
||||
sk->sk_write_space = dccp_write_space;
|
||||
icsk->icsk_sync_mss = dccp_sync_mss;
|
||||
dp->dccps_mss_cache = TCP_MIN_RCVMSS;
|
||||
dp->dccps_mss_cache = 536;
|
||||
dp->dccps_rate_last = jiffies;
|
||||
dp->dccps_role = DCCP_ROLE_UNDEFINED;
|
||||
dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
|
||||
dp->dccps_tx_qlen = sysctl_dccp_tx_qlen;
|
||||
dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
|
||||
|
||||
dccp_init_xmit_timers(sk);
|
||||
|
||||
INIT_LIST_HEAD(&dp->dccps_featneg);
|
||||
/* control socket doesn't need feat nego */
|
||||
if (likely(ctl_sock_initialized))
|
||||
return dccp_feat_init(sk);
|
||||
/*
|
||||
* FIXME: We're hardcoding the CCID, and doing this at this point makes
|
||||
* the listening (master) sock get CCID control blocks, which is not
|
||||
* necessary, but for now, to not mess with the test userspace apps,
|
||||
* lets leave it here, later the real solution is to do this in a
|
||||
* setsockopt(CCIDs-I-want/accept). -acme
|
||||
*/
|
||||
if (likely(ctl_sock_initialized)) {
|
||||
int rc = dccp_feat_init(dmsk);
|
||||
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
if (dmsk->dccpms_send_ack_vector) {
|
||||
dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
|
||||
if (dp->dccps_hc_rx_ackvec == NULL)
|
||||
return -ENOMEM;
|
||||
}
|
||||
dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
|
||||
sk, GFP_KERNEL);
|
||||
dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
|
||||
sk, GFP_KERNEL);
|
||||
if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
|
||||
dp->dccps_hc_tx_ccid == NULL)) {
|
||||
ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
|
||||
ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
|
||||
if (dmsk->dccpms_send_ack_vector) {
|
||||
dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
|
||||
dp->dccps_hc_rx_ackvec = NULL;
|
||||
}
|
||||
dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
|
||||
return -ENOMEM;
|
||||
}
|
||||
} else {
|
||||
/* control socket doesn't need feat nego */
|
||||
INIT_LIST_HEAD(&dmsk->dccpms_pending);
|
||||
INIT_LIST_HEAD(&dmsk->dccpms_conf);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -205,6 +240,7 @@ EXPORT_SYMBOL_GPL(dccp_init_sock);
|
||||
void dccp_destroy_sock(struct sock *sk)
|
||||
{
|
||||
struct dccp_sock *dp = dccp_sk(sk);
|
||||
struct dccp_minisock *dmsk = dccp_msk(sk);
|
||||
|
||||
/*
|
||||
* DCCP doesn't use sk_write_queue, just sk_send_head
|
||||
@ -222,7 +258,7 @@ void dccp_destroy_sock(struct sock *sk)
|
||||
kfree(dp->dccps_service_list);
|
||||
dp->dccps_service_list = NULL;
|
||||
|
||||
if (dp->dccps_hc_rx_ackvec != NULL) {
|
||||
if (dmsk->dccpms_send_ack_vector) {
|
||||
dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
|
||||
dp->dccps_hc_rx_ackvec = NULL;
|
||||
}
|
||||
@ -231,7 +267,7 @@ void dccp_destroy_sock(struct sock *sk)
|
||||
dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
|
||||
|
||||
/* clean up feature negotiation state */
|
||||
dccp_feat_list_purge(&dp->dccps_featneg);
|
||||
dccp_feat_clean(dmsk);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(dccp_destroy_sock);
|
||||
@ -241,9 +277,6 @@ static inline int dccp_listen_start(struct sock *sk, int backlog)
|
||||
struct dccp_sock *dp = dccp_sk(sk);
|
||||
|
||||
dp->dccps_role = DCCP_ROLE_LISTEN;
|
||||
/* do not start to listen if feature negotiation setup fails */
|
||||
if (dccp_feat_finalise_settings(dp))
|
||||
return -EPROTO;
|
||||
return inet_csk_listen_start(sk, backlog);
|
||||
}
|
||||
|
||||
@ -433,70 +466,42 @@ static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
|
||||
{
|
||||
u8 *list, len;
|
||||
int i, rc;
|
||||
|
||||
if (cscov < 0 || cscov > 15)
|
||||
return -EINVAL;
|
||||
/*
|
||||
* Populate a list of permissible values, in the range cscov...15. This
|
||||
* is necessary since feature negotiation of single values only works if
|
||||
* both sides incidentally choose the same value. Since the list starts
|
||||
* lowest-value first, negotiation will pick the smallest shared value.
|
||||
*/
|
||||
if (cscov == 0)
|
||||
return 0;
|
||||
len = 16 - cscov;
|
||||
|
||||
list = kmalloc(len, GFP_KERNEL);
|
||||
if (list == NULL)
|
||||
return -ENOBUFS;
|
||||
|
||||
for (i = 0; i < len; i++)
|
||||
list[i] = cscov++;
|
||||
|
||||
rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
|
||||
|
||||
if (rc == 0) {
|
||||
if (rx)
|
||||
dccp_sk(sk)->dccps_pcrlen = cscov;
|
||||
else
|
||||
dccp_sk(sk)->dccps_pcslen = cscov;
|
||||
}
|
||||
kfree(list);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int dccp_setsockopt_ccid(struct sock *sk, int type,
|
||||
char __user *optval, int optlen)
|
||||
/* byte 1 is feature. the rest is the preference list */
|
||||
static int dccp_setsockopt_change(struct sock *sk, int type,
|
||||
struct dccp_so_feat __user *optval)
|
||||
{
|
||||
struct dccp_so_feat opt;
|
||||
u8 *val;
|
||||
int rc = 0;
|
||||
int rc;
|
||||
|
||||
if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
|
||||
if (copy_from_user(&opt, optval, sizeof(opt)))
|
||||
return -EFAULT;
|
||||
/*
|
||||
* rfc4340: 6.1. Change Options
|
||||
*/
|
||||
if (opt.dccpsf_len < 1)
|
||||
return -EINVAL;
|
||||
|
||||
val = kmalloc(optlen, GFP_KERNEL);
|
||||
if (val == NULL)
|
||||
val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
|
||||
if (!val)
|
||||
return -ENOMEM;
|
||||
|
||||
if (copy_from_user(val, optval, optlen)) {
|
||||
kfree(val);
|
||||
return -EFAULT;
|
||||
if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
|
||||
rc = -EFAULT;
|
||||
goto out_free_val;
|
||||
}
|
||||
|
||||
lock_sock(sk);
|
||||
if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
|
||||
rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
|
||||
rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
|
||||
val, opt.dccpsf_len, GFP_KERNEL);
|
||||
if (rc)
|
||||
goto out_free_val;
|
||||
|
||||
if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
|
||||
rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
|
||||
release_sock(sk);
|
||||
|
||||
kfree(val);
|
||||
out:
|
||||
return rc;
|
||||
|
||||
out_free_val:
|
||||
kfree(val);
|
||||
goto out;
|
||||
}
|
||||
|
||||
static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
|
||||
@ -505,21 +510,7 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
|
||||
struct dccp_sock *dp = dccp_sk(sk);
|
||||
int val, err = 0;
|
||||
|
||||
switch (optname) {
|
||||
case DCCP_SOCKOPT_PACKET_SIZE:
|
||||
DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
|
||||
return 0;
|
||||
case DCCP_SOCKOPT_CHANGE_L:
|
||||
case DCCP_SOCKOPT_CHANGE_R:
|
||||
DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
|
||||
return 0;
|
||||
case DCCP_SOCKOPT_CCID:
|
||||
case DCCP_SOCKOPT_RX_CCID:
|
||||
case DCCP_SOCKOPT_TX_CCID:
|
||||
return dccp_setsockopt_ccid(sk, optname, optval, optlen);
|
||||
}
|
||||
|
||||
if (optlen < (int)sizeof(int))
|
||||
if (optlen < sizeof(int))
|
||||
return -EINVAL;
|
||||
|
||||
if (get_user(val, (int __user *)optval))
|
||||
@ -530,38 +521,53 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
|
||||
|
||||
lock_sock(sk);
|
||||
switch (optname) {
|
||||
case DCCP_SOCKOPT_PACKET_SIZE:
|
||||
DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
|
||||
err = 0;
|
||||
break;
|
||||
case DCCP_SOCKOPT_CHANGE_L:
|
||||
if (optlen != sizeof(struct dccp_so_feat))
|
||||
err = -EINVAL;
|
||||
else
|
||||
err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
|
||||
(struct dccp_so_feat __user *)
|
||||
optval);
|
||||
break;
|
||||
case DCCP_SOCKOPT_CHANGE_R:
|
||||
if (optlen != sizeof(struct dccp_so_feat))
|
||||
err = -EINVAL;
|
||||
else
|
||||
err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
|
||||
(struct dccp_so_feat __user *)
|
||||
optval);
|
||||
break;
|
||||
case DCCP_SOCKOPT_SERVER_TIMEWAIT:
|
||||
if (dp->dccps_role != DCCP_ROLE_SERVER)
|
||||
err = -EOPNOTSUPP;
|
||||
else
|
||||
dp->dccps_server_timewait = (val != 0);
|
||||
break;
|
||||
case DCCP_SOCKOPT_SEND_CSCOV:
|
||||
err = dccp_setsockopt_cscov(sk, val, false);
|
||||
break;
|
||||
case DCCP_SOCKOPT_RECV_CSCOV:
|
||||
err = dccp_setsockopt_cscov(sk, val, true);
|
||||
break;
|
||||
case DCCP_SOCKOPT_QPOLICY_ID:
|
||||
if (sk->sk_state != DCCP_CLOSED)
|
||||
err = -EISCONN;
|
||||
else if (val < 0 || val >= DCCPQ_POLICY_MAX)
|
||||
case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */
|
||||
if (val < 0 || val > 15)
|
||||
err = -EINVAL;
|
||||
else
|
||||
dp->dccps_qpolicy = val;
|
||||
dp->dccps_pcslen = val;
|
||||
break;
|
||||
case DCCP_SOCKOPT_QPOLICY_TXQLEN:
|
||||
if (val < 0)
|
||||
case DCCP_SOCKOPT_RECV_CSCOV: /* receiver side, RFC 4340 sec. 9.2.1 */
|
||||
if (val < 0 || val > 15)
|
||||
err = -EINVAL;
|
||||
else
|
||||
dp->dccps_tx_qlen = val;
|
||||
else {
|
||||
dp->dccps_pcrlen = val;
|
||||
/* FIXME: add feature negotiation,
|
||||
* ChangeL(MinimumChecksumCoverage, val) */
|
||||
}
|
||||
break;
|
||||
default:
|
||||
err = -ENOPROTOOPT;
|
||||
break;
|
||||
}
|
||||
release_sock(sk);
|
||||
|
||||
release_sock(sk);
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -642,18 +648,6 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
|
||||
case DCCP_SOCKOPT_GET_CUR_MPS:
|
||||
val = dp->dccps_mss_cache;
|
||||
break;
|
||||
case DCCP_SOCKOPT_AVAILABLE_CCIDS:
|
||||
return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
|
||||
case DCCP_SOCKOPT_TX_CCID:
|
||||
val = ccid_get_current_tx_ccid(dp);
|
||||
if (val < 0)
|
||||
return -ENOPROTOOPT;
|
||||
break;
|
||||
case DCCP_SOCKOPT_RX_CCID:
|
||||
val = ccid_get_current_rx_ccid(dp);
|
||||
if (val < 0)
|
||||
return -ENOPROTOOPT;
|
||||
break;
|
||||
case DCCP_SOCKOPT_SERVER_TIMEWAIT:
|
||||
val = dp->dccps_server_timewait;
|
||||
break;
|
||||
@ -663,12 +657,6 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
|
||||
case DCCP_SOCKOPT_RECV_CSCOV:
|
||||
val = dp->dccps_pcrlen;
|
||||
break;
|
||||
case DCCP_SOCKOPT_QPOLICY_ID:
|
||||
val = dp->dccps_qpolicy;
|
||||
break;
|
||||
case DCCP_SOCKOPT_QPOLICY_TXQLEN:
|
||||
val = dp->dccps_tx_qlen;
|
||||
break;
|
||||
case 128 ... 191:
|
||||
return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
|
||||
len, (u32 __user *)optval, optlen);
|
||||
@ -711,47 +699,6 @@ int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
|
||||
EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
|
||||
#endif
|
||||
|
||||
static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
|
||||
{
|
||||
struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg);
|
||||
|
||||
/*
|
||||
* Assign an (opaque) qpolicy priority value to skb->priority.
|
||||
*
|
||||
* We are overloading this skb field for use with the qpolicy subystem.
|
||||
* The skb->priority is normally used for the SO_PRIORITY option, which
|
||||
* is initialised from sk_priority. Since the assignment of sk_priority
|
||||
* to skb->priority happens later (on layer 3), we overload this field
|
||||
* for use with queueing priorities as long as the skb is on layer 4.
|
||||
* The default priority value (if nothing is set) is 0.
|
||||
*/
|
||||
skb->priority = 0;
|
||||
|
||||
for (; cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) {
|
||||
|
||||
if (!CMSG_OK(msg, cmsg))
|
||||
return -EINVAL;
|
||||
|
||||
if (cmsg->cmsg_level != SOL_DCCP)
|
||||
continue;
|
||||
|
||||
if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
|
||||
!dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
|
||||
return -EINVAL;
|
||||
|
||||
switch (cmsg->cmsg_type) {
|
||||
case DCCP_SCM_PRIORITY:
|
||||
if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
|
||||
return -EINVAL;
|
||||
skb->priority = *(__u32 *)CMSG_DATA(cmsg);
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
|
||||
size_t len)
|
||||
{
|
||||
@ -767,7 +714,8 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
|
||||
|
||||
lock_sock(sk);
|
||||
|
||||
if (dccp_qpolicy_full(sk)) {
|
||||
if (sysctl_dccp_tx_qlen &&
|
||||
(sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
|
||||
rc = -EAGAIN;
|
||||
goto out_release;
|
||||
}
|
||||
@ -795,12 +743,8 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
|
||||
if (rc != 0)
|
||||
goto out_discard;
|
||||
|
||||
rc = dccp_msghdr_parse(msg, skb);
|
||||
if (rc != 0)
|
||||
goto out_discard;
|
||||
|
||||
dccp_qpolicy_push(sk, skb);
|
||||
dccp_write_xmit(sk);
|
||||
skb_queue_tail(&sk->sk_write_queue, skb);
|
||||
dccp_write_xmit(sk,0);
|
||||
out_release:
|
||||
release_sock(sk);
|
||||
return rc ? : len;
|
||||
@ -1023,22 +967,9 @@ void dccp_close(struct sock *sk, long timeout)
|
||||
/* Check zero linger _after_ checking for unread data. */
|
||||
sk->sk_prot->disconnect(sk, 0);
|
||||
} else if (sk->sk_state != DCCP_CLOSED) {
|
||||
/*
|
||||
* Normal connection termination. May need to wait if there are
|
||||
* still packets in the TX queue that are delayed by the CCID.
|
||||
*/
|
||||
dccp_flush_write_queue(sk, &timeout);
|
||||
dccp_terminate_connection(sk);
|
||||
}
|
||||
|
||||
/*
|
||||
* Flush write queue. This may be necessary in several cases:
|
||||
* - we have been closed by the peer but still have application data;
|
||||
* - abortive termination (unread data or zero linger time),
|
||||
* - normal termination but queue could not be flushed within time limit
|
||||
*/
|
||||
__skb_queue_purge(&sk->sk_write_queue);
|
||||
|
||||
sk_stream_wait_close(sk, timeout);
|
||||
|
||||
adjudge_to_death:
|
||||
|
@ -1,137 +0,0 @@
|
||||
/*
|
||||
* net/dccp/qpolicy.c
|
||||
*
|
||||
* Policy-based packet dequeueing interface for DCCP.
|
||||
*
|
||||
* Copyright (c) 2008 Tomasz Grobelny <tomasz@grobelny.oswiecenia.net>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License v2
|
||||
* as published by the Free Software Foundation.
|
||||
*/
|
||||
#include "dccp.h"
|
||||
|
||||
/*
|
||||
* Simple Dequeueing Policy:
|
||||
* If tx_qlen is different from 0, enqueue up to tx_qlen elements.
|
||||
*/
|
||||
static void qpolicy_simple_push(struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
skb_queue_tail(&sk->sk_write_queue, skb);
|
||||
}
|
||||
|
||||
static bool qpolicy_simple_full(struct sock *sk)
|
||||
{
|
||||
return dccp_sk(sk)->dccps_tx_qlen &&
|
||||
sk->sk_write_queue.qlen >= dccp_sk(sk)->dccps_tx_qlen;
|
||||
}
|
||||
|
||||
static struct sk_buff *qpolicy_simple_top(struct sock *sk)
|
||||
{
|
||||
return skb_peek(&sk->sk_write_queue);
|
||||
}
|
||||
|
||||
/*
|
||||
* Priority-based Dequeueing Policy:
|
||||
* If tx_qlen is different from 0 and the queue has reached its upper bound
|
||||
* of tx_qlen elements, replace older packets lowest-priority-first.
|
||||
*/
|
||||
static struct sk_buff *qpolicy_prio_best_skb(struct sock *sk)
|
||||
{
|
||||
struct sk_buff *skb, *best = NULL;
|
||||
|
||||
skb_queue_walk(&sk->sk_write_queue, skb)
|
||||
if (best == NULL || skb->priority > best->priority)
|
||||
best = skb;
|
||||
return best;
|
||||
}
|
||||
|
||||
static struct sk_buff *qpolicy_prio_worst_skb(struct sock *sk)
|
||||
{
|
||||
struct sk_buff *skb, *worst = NULL;
|
||||
|
||||
skb_queue_walk(&sk->sk_write_queue, skb)
|
||||
if (worst == NULL || skb->priority < worst->priority)
|
||||
worst = skb;
|
||||
return worst;
|
||||
}
|
||||
|
||||
static bool qpolicy_prio_full(struct sock *sk)
|
||||
{
|
||||
if (qpolicy_simple_full(sk))
|
||||
dccp_qpolicy_drop(sk, qpolicy_prio_worst_skb(sk));
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* struct dccp_qpolicy_operations - TX Packet Dequeueing Interface
|
||||
* @push: add a new @skb to the write queue
|
||||
* @full: indicates that no more packets will be admitted
|
||||
* @top: peeks at whatever the queueing policy defines as its `top'
|
||||
*/
|
||||
static struct dccp_qpolicy_operations {
|
||||
void (*push) (struct sock *sk, struct sk_buff *skb);
|
||||
bool (*full) (struct sock *sk);
|
||||
struct sk_buff* (*top) (struct sock *sk);
|
||||
__be32 params;
|
||||
|
||||
} qpol_table[DCCPQ_POLICY_MAX] = {
|
||||
[DCCPQ_POLICY_SIMPLE] = {
|
||||
.push = qpolicy_simple_push,
|
||||
.full = qpolicy_simple_full,
|
||||
.top = qpolicy_simple_top,
|
||||
.params = 0,
|
||||
},
|
||||
[DCCPQ_POLICY_PRIO] = {
|
||||
.push = qpolicy_simple_push,
|
||||
.full = qpolicy_prio_full,
|
||||
.top = qpolicy_prio_best_skb,
|
||||
.params = DCCP_SCM_PRIORITY,
|
||||
},
|
||||
};
|
||||
|
||||
/*
|
||||
* Externally visible interface
|
||||
*/
|
||||
void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
qpol_table[dccp_sk(sk)->dccps_qpolicy].push(sk, skb);
|
||||
}
|
||||
|
||||
bool dccp_qpolicy_full(struct sock *sk)
|
||||
{
|
||||
return qpol_table[dccp_sk(sk)->dccps_qpolicy].full(sk);
|
||||
}
|
||||
|
||||
void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
if (skb != NULL) {
|
||||
skb_unlink(skb, &sk->sk_write_queue);
|
||||
kfree_skb(skb);
|
||||
}
|
||||
}
|
||||
|
||||
struct sk_buff *dccp_qpolicy_top(struct sock *sk)
|
||||
{
|
||||
return qpol_table[dccp_sk(sk)->dccps_qpolicy].top(sk);
|
||||
}
|
||||
|
||||
struct sk_buff *dccp_qpolicy_pop(struct sock *sk)
|
||||
{
|
||||
struct sk_buff *skb = dccp_qpolicy_top(sk);
|
||||
|
||||
/* Clear any skb fields that we used internally */
|
||||
skb->priority = 0;
|
||||
|
||||
if (skb)
|
||||
skb_unlink(skb, &sk->sk_write_queue);
|
||||
return skb;
|
||||
}
|
||||
|
||||
bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param)
|
||||
{
|
||||
/* check if exactly one bit is set */
|
||||
if (!param || (param & (param - 1)))
|
||||
return false;
|
||||
return (qpol_table[dccp_sk(sk)->dccps_qpolicy].params & param) == param;
|
||||
}
|
@ -18,72 +18,76 @@
|
||||
#error This file should not be compiled without CONFIG_SYSCTL defined
|
||||
#endif
|
||||
|
||||
/* Boundary values */
|
||||
static int zero = 0,
|
||||
u8_max = 0xFF;
|
||||
static unsigned long seqw_min = 32;
|
||||
|
||||
static struct ctl_table dccp_default_table[] = {
|
||||
{
|
||||
.procname = "seq_window",
|
||||
.data = &sysctl_dccp_sequence_window,
|
||||
.maxlen = sizeof(sysctl_dccp_sequence_window),
|
||||
.data = &sysctl_dccp_feat_sequence_window,
|
||||
.maxlen = sizeof(sysctl_dccp_feat_sequence_window),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_doulongvec_minmax,
|
||||
.extra1 = &seqw_min, /* RFC 4340, 7.5.2 */
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
{
|
||||
.procname = "rx_ccid",
|
||||
.data = &sysctl_dccp_rx_ccid,
|
||||
.maxlen = sizeof(sysctl_dccp_rx_ccid),
|
||||
.data = &sysctl_dccp_feat_rx_ccid,
|
||||
.maxlen = sizeof(sysctl_dccp_feat_rx_ccid),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &u8_max, /* RFC 4340, 10. */
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
{
|
||||
.procname = "tx_ccid",
|
||||
.data = &sysctl_dccp_tx_ccid,
|
||||
.maxlen = sizeof(sysctl_dccp_tx_ccid),
|
||||
.data = &sysctl_dccp_feat_tx_ccid,
|
||||
.maxlen = sizeof(sysctl_dccp_feat_tx_ccid),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &u8_max, /* RFC 4340, 10. */
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
{
|
||||
.procname = "ack_ratio",
|
||||
.data = &sysctl_dccp_feat_ack_ratio,
|
||||
.maxlen = sizeof(sysctl_dccp_feat_ack_ratio),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
{
|
||||
.procname = "send_ackvec",
|
||||
.data = &sysctl_dccp_feat_send_ack_vector,
|
||||
.maxlen = sizeof(sysctl_dccp_feat_send_ack_vector),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
{
|
||||
.procname = "send_ndp",
|
||||
.data = &sysctl_dccp_feat_send_ndp_count,
|
||||
.maxlen = sizeof(sysctl_dccp_feat_send_ndp_count),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
{
|
||||
.procname = "request_retries",
|
||||
.data = &sysctl_dccp_request_retries,
|
||||
.maxlen = sizeof(sysctl_dccp_request_retries),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &u8_max,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
{
|
||||
.procname = "retries1",
|
||||
.data = &sysctl_dccp_retries1,
|
||||
.maxlen = sizeof(sysctl_dccp_retries1),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &u8_max,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
{
|
||||
.procname = "retries2",
|
||||
.data = &sysctl_dccp_retries2,
|
||||
.maxlen = sizeof(sysctl_dccp_retries2),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &u8_max,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
{
|
||||
.procname = "tx_qlen",
|
||||
.data = &sysctl_dccp_tx_qlen,
|
||||
.maxlen = sizeof(sysctl_dccp_tx_qlen),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &zero,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
{
|
||||
.procname = "sync_ratelimit",
|
||||
|
@ -87,6 +87,17 @@ static void dccp_retransmit_timer(struct sock *sk)
|
||||
{
|
||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||
|
||||
/* retransmit timer is used for feature negotiation throughout
|
||||
* connection. In this case, no packet is re-transmitted, but rather an
|
||||
* ack is generated and pending changes are placed into its options.
|
||||
*/
|
||||
if (sk->sk_send_head == NULL) {
|
||||
dccp_pr_debug("feat negotiation retransmit timeout %p\n", sk);
|
||||
if (sk->sk_state == DCCP_OPEN)
|
||||
dccp_send_ack(sk);
|
||||
goto backoff;
|
||||
}
|
||||
|
||||
/*
|
||||
* More than than 4MSL (8 minutes) has passed, a RESET(aborted) was
|
||||
* sent, no need to retransmit, this sock is dead.
|
||||
@ -115,6 +126,7 @@ static void dccp_retransmit_timer(struct sock *sk)
|
||||
return;
|
||||
}
|
||||
|
||||
backoff:
|
||||
icsk->icsk_backoff++;
|
||||
|
||||
icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX);
|
||||
@ -237,35 +249,32 @@ out:
|
||||
sock_put(sk);
|
||||
}
|
||||
|
||||
/**
|
||||
* dccp_write_xmitlet - Workhorse for CCID packet dequeueing interface
|
||||
* See the comments above %ccid_dequeueing_decision for supported modes.
|
||||
*/
|
||||
static void dccp_write_xmitlet(unsigned long data)
|
||||
/* Transmit-delay timer: used by the CCIDs to delay actual send time */
|
||||
static void dccp_write_xmit_timer(unsigned long data)
|
||||
{
|
||||
struct sock *sk = (struct sock *)data;
|
||||
struct dccp_sock *dp = dccp_sk(sk);
|
||||
|
||||
bh_lock_sock(sk);
|
||||
if (sock_owned_by_user(sk))
|
||||
sk_reset_timer(sk, &dccp_sk(sk)->dccps_xmit_timer, jiffies + 1);
|
||||
sk_reset_timer(sk, &dp->dccps_xmit_timer, jiffies+1);
|
||||
else
|
||||
dccp_write_xmit(sk);
|
||||
dccp_write_xmit(sk, 0);
|
||||
bh_unlock_sock(sk);
|
||||
sock_put(sk);
|
||||
}
|
||||
|
||||
static void dccp_write_xmit_timer(unsigned long data)
|
||||
static void dccp_init_write_xmit_timer(struct sock *sk)
|
||||
{
|
||||
dccp_write_xmitlet(data);
|
||||
sock_put((struct sock *)data);
|
||||
struct dccp_sock *dp = dccp_sk(sk);
|
||||
|
||||
setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer,
|
||||
(unsigned long)sk);
|
||||
}
|
||||
|
||||
void dccp_init_xmit_timers(struct sock *sk)
|
||||
{
|
||||
struct dccp_sock *dp = dccp_sk(sk);
|
||||
|
||||
tasklet_init(&dp->dccps_xmitlet, dccp_write_xmitlet, (unsigned long)sk);
|
||||
setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer,
|
||||
(unsigned long)sk);
|
||||
dccp_init_write_xmit_timer(sk);
|
||||
inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer,
|
||||
&dccp_keepalive_timer);
|
||||
}
|
||||
@ -281,7 +290,8 @@ u32 dccp_timestamp(void)
|
||||
{
|
||||
s64 delta = ktime_us_delta(ktime_get_real(), dccp_timestamp_seed);
|
||||
|
||||
return div_u64(delta, DCCP_TIME_RESOLUTION);
|
||||
do_div(delta, 10);
|
||||
return delta;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dccp_timestamp);
|
||||
|
||||
|
@ -811,12 +811,25 @@ void tcp_update_metrics(struct sock *sk)
|
||||
}
|
||||
}
|
||||
|
||||
/* Numbers are taken from RFC3390.
|
||||
*
|
||||
* John Heffner states:
|
||||
*
|
||||
* The RFC specifies a window of no more than 4380 bytes
|
||||
* unless 2*MSS > 4380. Reading the pseudocode in the RFC
|
||||
* is a bit misleading because they use a clamp at 4380 bytes
|
||||
* rather than use a multiplier in the relevant range.
|
||||
*/
|
||||
__u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
|
||||
{
|
||||
__u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
|
||||
|
||||
if (!cwnd)
|
||||
cwnd = rfc3390_bytes_to_packets(tp->mss_cache);
|
||||
if (!cwnd) {
|
||||
if (tp->mss_cache > 1460)
|
||||
cwnd = 2;
|
||||
else
|
||||
cwnd = (tp->mss_cache > 1095) ? 3 : 4;
|
||||
}
|
||||
return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user