linux/include/net/sctp/sctp.h

594 lines
18 KiB
C
Raw Normal View History

/* SCTP kernel implementation
* (C) Copyright IBM Corp. 2001, 2004
* Copyright (c) 1999-2000 Cisco, Inc.
* Copyright (c) 1999-2001 Motorola, Inc.
* Copyright (c) 2001-2003 Intel Corp.
*
* This file is part of the SCTP kernel implementation
*
* The base lksctp header.
*
* This SCTP implementation is free software;
* you can redistribute it and/or modify it under the terms of
* the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This SCTP implementation is distributed in the hope that it
* will be useful, but WITHOUT ANY WARRANTY; without even the implied
* ************************
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU CC; see the file COPYING. If not, see
* <http://www.gnu.org/licenses/>.
*
* Please send any bug reports or fixes you make to the
* email address(es):
* lksctp developers <linux-sctp@vger.kernel.org>
*
* Written or modified by:
* La Monte H.P. Yarroll <piggy@acm.org>
* Xingang Guo <xingang.guo@intel.com>
* Jon Grimm <jgrimm@us.ibm.com>
* Daisy Chang <daisyc@us.ibm.com>
* Sridhar Samudrala <sri@us.ibm.com>
* Ardelle Fan <ardelle.fan@intel.com>
* Ryan Layer <rmlayer@us.ibm.com>
* Kevin Gao <kevin.gao@intel.com>
*/
#ifndef __net_sctp_h__
#define __net_sctp_h__
/* Header Strategy.
* Start getting some control over the header file depencies:
* includes
* constants
* structs
* prototypes
* macros, externs, and inlines
*
* Move test_frame specific items out of the kernel headers
* and into the test frame headers. This is not perfect in any sense
* and will continue to evolve.
*/
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/in.h>
#include <linux/tty.h>
#include <linux/proc_fs.h>
#include <linux/spinlock.h>
#include <linux/jiffies.h>
#include <linux/idr.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
#include <net/ip6_route.h>
#endif
#include <asm/uaccess.h>
#include <asm/page.h>
#include <net/sock.h>
#include <net/snmp.h>
#include <net/sctp/structs.h>
#include <net/sctp/constants.h>
#ifdef CONFIG_IP_SCTP_MODULE
#define SCTP_PROTOSW_FLAG 0
#else /* static! */
#define SCTP_PROTOSW_FLAG INET_PROTOSW_PERMANENT
#endif
/* Round an int up to the next multiple of 4. */
#define WORD_ROUND(s) (((s)+3)&~3)
/* Truncate to the previous multiple of 4. */
#define WORD_TRUNC(s) ((s)&~3)
/*
* Function declarations.
*/
/*
* sctp/protocol.c
*/
int sctp_copy_local_addr_list(struct net *, struct sctp_bind_addr *,
sctp_scope_t, gfp_t gfp, int flags);
struct sctp_pf *sctp_get_pf_specific(sa_family_t family);
int sctp_register_pf(struct sctp_pf *, sa_family_t);
void sctp_addr_wq_mgmt(struct net *, struct sctp_sockaddr_entry *, int);
/*
* sctp/socket.c
*/
int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb);
int sctp_inet_listen(struct socket *sock, int backlog);
void sctp_write_space(struct sock *sk);
void sctp_data_ready(struct sock *sk);
unsigned int sctp_poll(struct file *file, struct socket *sock,
poll_table *wait);
void sctp_sock_rfree(struct sk_buff *skb);
void sctp_copy_sock(struct sock *newsk, struct sock *sk,
struct sctp_association *asoc);
extern struct percpu_counter sctp_sockets_allocated;
int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *);
struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *);
int sctp_transport_walk_start(struct rhashtable_iter *iter);
void sctp_transport_walk_stop(struct rhashtable_iter *iter);
struct sctp_transport *sctp_transport_get_next(struct net *net,
struct rhashtable_iter *iter);
struct sctp_transport *sctp_transport_get_idx(struct net *net,
struct rhashtable_iter *iter, int pos);
int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *),
struct net *net,
const union sctp_addr *laddr,
const union sctp_addr *paddr, void *p);
int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *),
struct net *net, int pos, void *p);
int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p);
int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc,
struct sctp_info *info);
/*
* sctp/primitive.c
*/
int sctp_primitive_ASSOCIATE(struct net *, struct sctp_association *, void *arg);
int sctp_primitive_SHUTDOWN(struct net *, struct sctp_association *, void *arg);
int sctp_primitive_ABORT(struct net *, struct sctp_association *, void *arg);
int sctp_primitive_SEND(struct net *, struct sctp_association *, void *arg);
int sctp_primitive_REQUESTHEARTBEAT(struct net *, struct sctp_association *, void *arg);
int sctp_primitive_ASCONF(struct net *, struct sctp_association *, void *arg);
/*
* sctp/input.c
*/
int sctp_rcv(struct sk_buff *skb);
void sctp_v4_err(struct sk_buff *skb, u32 info);
void sctp_hash_endpoint(struct sctp_endpoint *);
void sctp_unhash_endpoint(struct sctp_endpoint *);
struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *,
struct sctphdr *, struct sctp_association **,
struct sctp_transport **);
void sctp_err_finish(struct sock *, struct sctp_association *);
void sctp_icmp_frag_needed(struct sock *, struct sctp_association *,
struct sctp_transport *t, __u32 pmtu);
void sctp_icmp_redirect(struct sock *, struct sctp_transport *,
struct sk_buff *);
void sctp_icmp_proto_unreachable(struct sock *sk,
struct sctp_association *asoc,
struct sctp_transport *t);
void sctp_backlog_migrate(struct sctp_association *assoc,
struct sock *oldsk, struct sock *newsk);
int sctp_transport_hashtable_init(void);
void sctp_transport_hashtable_destroy(void);
void sctp_hash_transport(struct sctp_transport *t);
void sctp_unhash_transport(struct sctp_transport *t);
struct sctp_transport *sctp_addrs_lookup_transport(
struct net *net,
const union sctp_addr *laddr,
const union sctp_addr *paddr);
struct sctp_transport *sctp_epaddr_lookup_transport(
const struct sctp_endpoint *ep,
const union sctp_addr *paddr);
/*
* sctp/proc.c
*/
int sctp_snmp_proc_init(struct net *net);
void sctp_snmp_proc_exit(struct net *net);
int sctp_eps_proc_init(struct net *net);
void sctp_eps_proc_exit(struct net *net);
int sctp_assocs_proc_init(struct net *net);
void sctp_assocs_proc_exit(struct net *net);
int sctp_remaddr_proc_init(struct net *net);
void sctp_remaddr_proc_exit(struct net *net);
/*
* Module global variables
*/
/*
* sctp/protocol.c
*/
extern struct kmem_cache *sctp_chunk_cachep __read_mostly;
extern struct kmem_cache *sctp_bucket_cachep __read_mostly;
extern long sysctl_sctp_mem[3];
extern int sysctl_sctp_rmem[3];
extern int sysctl_sctp_wmem[3];
/*
* Section: Macros, externs, and inlines
*/
/* SCTP SNMP MIB stats handlers */
#define SCTP_INC_STATS(net, field) SNMP_INC_STATS((net)->sctp.sctp_statistics, field)
#define __SCTP_INC_STATS(net, field) __SNMP_INC_STATS((net)->sctp.sctp_statistics, field)
#define SCTP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->sctp.sctp_statistics, field)
/* sctp mib definitions */
enum {
SCTP_MIB_NUM = 0,
SCTP_MIB_CURRESTAB, /* CurrEstab */
SCTP_MIB_ACTIVEESTABS, /* ActiveEstabs */
SCTP_MIB_PASSIVEESTABS, /* PassiveEstabs */
SCTP_MIB_ABORTEDS, /* Aborteds */
SCTP_MIB_SHUTDOWNS, /* Shutdowns */
SCTP_MIB_OUTOFBLUES, /* OutOfBlues */
SCTP_MIB_CHECKSUMERRORS, /* ChecksumErrors */
SCTP_MIB_OUTCTRLCHUNKS, /* OutCtrlChunks */
SCTP_MIB_OUTORDERCHUNKS, /* OutOrderChunks */
SCTP_MIB_OUTUNORDERCHUNKS, /* OutUnorderChunks */
SCTP_MIB_INCTRLCHUNKS, /* InCtrlChunks */
SCTP_MIB_INORDERCHUNKS, /* InOrderChunks */
SCTP_MIB_INUNORDERCHUNKS, /* InUnorderChunks */
SCTP_MIB_FRAGUSRMSGS, /* FragUsrMsgs */
SCTP_MIB_REASMUSRMSGS, /* ReasmUsrMsgs */
SCTP_MIB_OUTSCTPPACKS, /* OutSCTPPacks */
SCTP_MIB_INSCTPPACKS, /* InSCTPPacks */
SCTP_MIB_T1_INIT_EXPIREDS,
SCTP_MIB_T1_COOKIE_EXPIREDS,
SCTP_MIB_T2_SHUTDOWN_EXPIREDS,
SCTP_MIB_T3_RTX_EXPIREDS,
SCTP_MIB_T4_RTO_EXPIREDS,
SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS,
SCTP_MIB_DELAY_SACK_EXPIREDS,
SCTP_MIB_AUTOCLOSE_EXPIREDS,
SCTP_MIB_T1_RETRANSMITS,
SCTP_MIB_T3_RETRANSMITS,
SCTP_MIB_PMTUD_RETRANSMITS,
SCTP_MIB_FAST_RETRANSMITS,
SCTP_MIB_IN_PKT_SOFTIRQ,
SCTP_MIB_IN_PKT_BACKLOG,
SCTP_MIB_IN_PKT_DISCARDS,
SCTP_MIB_IN_DATA_CHUNK_DISCARDS,
__SCTP_MIB_MAX
};
#define SCTP_MIB_MAX __SCTP_MIB_MAX
struct sctp_mib {
unsigned long mibs[SCTP_MIB_MAX];
};
sctp: Add support to per-association statistics via a new SCTP_GET_ASSOC_STATS call The current SCTP stack is lacking a mechanism to have per association statistics. This is an implementation modeled after OpenSolaris' SCTP_GET_ASSOC_STATS. Userspace part will follow on lksctp if/when there is a general ACK on this. V4: - Move ipackets++ before q->immediate.func() for consistency reasons - Move sctp_max_rto() at the end of sctp_transport_update_rto() to avoid returning bogus RTO values - return asoc->rto_min when max_obs_rto value has not changed V3: - Increase ictrlchunks in sctp_assoc_bh_rcv() as well - Move ipackets++ to sctp_inq_push() - return 0 when no rto updates took place since the last call V2: - Implement partial retrieval of stat struct to cope for future expansion - Kill the rtxpackets counter as it cannot be precise anyway - Rename outseqtsns to outofseqtsns to make it clearer that these are out of sequence unexpected TSNs - Move asoc->ipackets++ under a lock to avoid potential miscounts - Fold asoc->opackets++ into the already existing asoc check - Kill unneeded (q->asoc) test when increasing rtxchunks - Do not count octrlchunks if sending failed (SCTP_XMIT_OK != 0) - Don't count SHUTDOWNs as SACKs - Move SCTP_GET_ASSOC_STATS to the private space API - Adjust the len check in sctp_getsockopt_assoc_stats() to allow for future struct growth - Move association statistics in their own struct - Update idupchunks when we send a SACK with dup TSNs - return min_rto in max_rto when RTO has not changed. Also return the transport when max_rto last changed. Signed-off: Michele Baldessari <michele@acksyn.org> Acked-by: Vlad Yasevich <vyasevich@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-12-01 04:49:42 +00:00
/* helper function to track stats about max rto and related transport */
static inline void sctp_max_rto(struct sctp_association *asoc,
struct sctp_transport *trans)
{
if (asoc->stats.max_obs_rto < (__u64)trans->rto) {
asoc->stats.max_obs_rto = trans->rto;
memset(&asoc->stats.obs_rto_ipaddr, 0,
sizeof(struct sockaddr_storage));
memcpy(&asoc->stats.obs_rto_ipaddr, &trans->ipaddr,
trans->af_specific->sockaddr_len);
}
}
/*
* Macros for keeping a global reference of object allocations.
*/
#ifdef CONFIG_SCTP_DBG_OBJCNT
extern atomic_t sctp_dbg_objcnt_sock;
extern atomic_t sctp_dbg_objcnt_ep;
extern atomic_t sctp_dbg_objcnt_assoc;
extern atomic_t sctp_dbg_objcnt_transport;
extern atomic_t sctp_dbg_objcnt_chunk;
extern atomic_t sctp_dbg_objcnt_bind_addr;
extern atomic_t sctp_dbg_objcnt_bind_bucket;
extern atomic_t sctp_dbg_objcnt_addr;
extern atomic_t sctp_dbg_objcnt_ssnmap;
extern atomic_t sctp_dbg_objcnt_datamsg;
extern atomic_t sctp_dbg_objcnt_keys;
/* Macros to atomically increment/decrement objcnt counters. */
#define SCTP_DBG_OBJCNT_INC(name) \
atomic_inc(&sctp_dbg_objcnt_## name)
#define SCTP_DBG_OBJCNT_DEC(name) \
atomic_dec(&sctp_dbg_objcnt_## name)
#define SCTP_DBG_OBJCNT(name) \
atomic_t sctp_dbg_objcnt_## name = ATOMIC_INIT(0)
/* Macro to help create new entries in in the global array of
* objcnt counters.
*/
#define SCTP_DBG_OBJCNT_ENTRY(name) \
{.label= #name, .counter= &sctp_dbg_objcnt_## name}
void sctp_dbg_objcnt_init(struct net *);
void sctp_dbg_objcnt_exit(struct net *);
#else
#define SCTP_DBG_OBJCNT_INC(name)
#define SCTP_DBG_OBJCNT_DEC(name)
static inline void sctp_dbg_objcnt_init(struct net *net) { return; }
static inline void sctp_dbg_objcnt_exit(struct net *net) { return; }
#endif /* CONFIG_SCTP_DBG_OBJCOUNT */
#if defined CONFIG_SYSCTL
void sctp_sysctl_register(void);
void sctp_sysctl_unregister(void);
int sctp_sysctl_net_register(struct net *net);
void sctp_sysctl_net_unregister(struct net *net);
#else
static inline void sctp_sysctl_register(void) { return; }
static inline void sctp_sysctl_unregister(void) { return; }
static inline int sctp_sysctl_net_register(struct net *net) { return 0; }
static inline void sctp_sysctl_net_unregister(struct net *net) { return; }
#endif
/* Size of Supported Address Parameter for 'x' address types. */
#define SCTP_SAT_LEN(x) (sizeof(struct sctp_paramhdr) + (x) * sizeof(__u16))
#if IS_ENABLED(CONFIG_IPV6)
void sctp_v6_pf_init(void);
void sctp_v6_pf_exit(void);
int sctp_v6_protosw_init(void);
void sctp_v6_protosw_exit(void);
int sctp_v6_add_protocol(void);
void sctp_v6_del_protocol(void);
#else /* #ifdef defined(CONFIG_IPV6) */
static inline void sctp_v6_pf_init(void) { return; }
static inline void sctp_v6_pf_exit(void) { return; }
static inline int sctp_v6_protosw_init(void) { return 0; }
static inline void sctp_v6_protosw_exit(void) { return; }
static inline int sctp_v6_add_protocol(void) { return 0; }
static inline void sctp_v6_del_protocol(void) { return; }
#endif /* #if defined(CONFIG_IPV6) */
/* Map an association to an assoc_id. */
static inline sctp_assoc_t sctp_assoc2id(const struct sctp_association *asoc)
{
return asoc ? asoc->assoc_id : 0;
}
net: sctp: fix ABI mismatch through sctp_assoc_to_state helper Since SCTP day 1, that is, 19b55a2af145 ("Initial commit") from lksctp tree, the official <netinet/sctp.h> header carries a copy of enum sctp_sstat_state that looks like (compared to the current in-kernel enumeration): User definition: Kernel definition: enum sctp_sstat_state { typedef enum { SCTP_EMPTY = 0, <removed> SCTP_CLOSED = 1, SCTP_STATE_CLOSED = 0, SCTP_COOKIE_WAIT = 2, SCTP_STATE_COOKIE_WAIT = 1, SCTP_COOKIE_ECHOED = 3, SCTP_STATE_COOKIE_ECHOED = 2, SCTP_ESTABLISHED = 4, SCTP_STATE_ESTABLISHED = 3, SCTP_SHUTDOWN_PENDING = 5, SCTP_STATE_SHUTDOWN_PENDING = 4, SCTP_SHUTDOWN_SENT = 6, SCTP_STATE_SHUTDOWN_SENT = 5, SCTP_SHUTDOWN_RECEIVED = 7, SCTP_STATE_SHUTDOWN_RECEIVED = 6, SCTP_SHUTDOWN_ACK_SENT = 8, SCTP_STATE_SHUTDOWN_ACK_SENT = 7, }; } sctp_state_t; This header was later on also placed into the uapi, so that user space programs can compile without having <netinet/sctp.h>, but the shipped with <linux/sctp.h> instead. While RFC6458 under 8.2.1.Association Status (SCTP_STATUS) says that sstat_state can range from SCTP_CLOSED to SCTP_SHUTDOWN_ACK_SENT, we nevertheless have a what it appears to be dummy SCTP_EMPTY state from the very early days. While it seems to do just nothing, commit 0b8f9e25b0aa ("sctp: remove completely unsed EMPTY state") did the right thing and removed this dead code. That however, causes an off-by-one when the user asks the SCTP stack via SCTP_STATUS API and checks for the current socket state thus yielding possibly undefined behaviour in applications as they expect the kernel to tell the right thing. The enumeration had to be changed however as based on the current socket state, we access a function pointer lookup-table through this. Therefore, I think the best way to deal with this is just to add a helper function sctp_assoc_to_state() to encapsulate the off-by-one quirk. Reported-by: Tristan Su <sooqing@gmail.com> Fixes: 0b8f9e25b0aa ("sctp: remove completely unsed EMPTY state") Signed-off-by: Daniel Borkmann <dborkman@redhat.com> Acked-by: Vlad Yasevich <vyasevich@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-08-28 13:28:26 +00:00
static inline enum sctp_sstat_state
sctp_assoc_to_state(const struct sctp_association *asoc)
{
/* SCTP's uapi always had SCTP_EMPTY(=0) as a dummy state, but we
* got rid of it in kernel space. Therefore SCTP_CLOSED et al
* start at =1 in user space, but actually as =0 in kernel space.
* Now that we can not break user space and SCTP_EMPTY is exposed
* there, we need to fix it up with an ugly offset not to break
* applications. :(
*/
return asoc->state + 1;
}
/* Look up the association by its id. */
struct sctp_association *sctp_id2assoc(struct sock *sk, sctp_assoc_t id);
int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp);
/* A macro to walk a list of skbs. */
#define sctp_skb_for_each(pos, head, tmp) \
skb_queue_walk_safe(head, pos, tmp)
/**
* sctp_list_dequeue - remove from the head of the queue
* @list: list to dequeue from
*
* Remove the head of the list. The head item is
* returned or %NULL if the list is empty.
*/
static inline struct list_head *sctp_list_dequeue(struct list_head *list)
{
struct list_head *result = NULL;
if (!list_empty(list)) {
result = list->next;
list_del_init(result);
}
return result;
}
/* SCTP version of skb_set_owner_r. We need this one because
* of the way we have to do receive buffer accounting on bundled
* chunks.
*/
static inline void sctp_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
{
struct sctp_ulpevent *event = sctp_skb2event(skb);
skb_orphan(skb);
skb->sk = sk;
skb->destructor = sctp_sock_rfree;
atomic_add(event->rmem_len, &sk->sk_rmem_alloc);
/*
[NET] CORE: Introducing new memory accounting interface. This patch introduces new memory accounting functions for each network protocol. Most of them are renamed from memory accounting functions for stream protocols. At the same time, some stream memory accounting functions are removed since other functions do same thing. Renaming: sk_stream_free_skb() -> sk_wmem_free_skb() __sk_stream_mem_reclaim() -> __sk_mem_reclaim() sk_stream_mem_reclaim() -> sk_mem_reclaim() sk_stream_mem_schedule -> __sk_mem_schedule() sk_stream_pages() -> sk_mem_pages() sk_stream_rmem_schedule() -> sk_rmem_schedule() sk_stream_wmem_schedule() -> sk_wmem_schedule() sk_charge_skb() -> sk_mem_charge() Removeing sk_stream_rfree(): consolidates into sock_rfree() sk_stream_set_owner_r(): consolidates into skb_set_owner_r() sk_stream_mem_schedule() The following functions are added. sk_has_account(): check if the protocol supports accounting sk_mem_uncharge(): do the opposite of sk_mem_charge() In addition, to achieve consolidation, updating sk_wmem_queued is removed from sk_mem_charge(). Next, to consolidate memory accounting functions, this patch adds memory accounting calls to network core functions. Moreover, present memory accounting call is renamed to new accounting call. Finally we replace present memory accounting calls with new interface in TCP and SCTP. Signed-off-by: Takahiro Yasui <tyasui@redhat.com> Signed-off-by: Hideo Aoki <haoki@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2007-12-31 08:11:19 +00:00
* This mimics the behavior of skb_set_owner_r
*/
sk->sk_forward_alloc -= event->rmem_len;
}
/* Tests if the list has one and only one entry. */
static inline int sctp_list_single_entry(struct list_head *head)
{
return (head->next != head) && (head->next == head->prev);
}
/* Break down data chunks at this point. */
static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu)
{
struct sctp_sock *sp = sctp_sk(asoc->base.sk);
int frag = pmtu;
frag -= sp->pf->af->net_header_len;
frag -= sizeof(struct sctphdr) + sizeof(struct sctp_data_chunk);
if (asoc->user_frag)
frag = min_t(int, frag, asoc->user_frag);
frag = WORD_TRUNC(min_t(int, frag, SCTP_MAX_CHUNK_LEN));
return frag;
}
static inline void sctp_assoc_pending_pmtu(struct sock *sk, struct sctp_association *asoc)
{
sctp_assoc_sync_pmtu(sk, asoc);
asoc->pmtu_pending = 0;
}
net: sctp: fix panic on duplicate ASCONF chunks When receiving a e.g. semi-good formed connection scan in the form of ... -------------- INIT[ASCONF; ASCONF_ACK] -------------> <----------- INIT-ACK[ASCONF; ASCONF_ACK] ------------ -------------------- COOKIE-ECHO --------------------> <-------------------- COOKIE-ACK --------------------- ---------------- ASCONF_a; ASCONF_b -----------------> ... where ASCONF_a equals ASCONF_b chunk (at least both serials need to be equal), we panic an SCTP server! The problem is that good-formed ASCONF chunks that we reply with ASCONF_ACK chunks are cached per serial. Thus, when we receive a same ASCONF chunk twice (e.g. through a lost ASCONF_ACK), we do not need to process them again on the server side (that was the idea, also proposed in the RFC). Instead, we know it was cached and we just resend the cached chunk instead. So far, so good. Where things get nasty is in SCTP's side effect interpreter, that is, sctp_cmd_interpreter(): While incoming ASCONF_a (chunk = event_arg) is being marked !end_of_packet and !singleton, and we have an association context, we do not flush the outqueue the first time after processing the ASCONF_ACK singleton chunk via SCTP_CMD_REPLY. Instead, we keep it queued up, although we set local_cork to 1. Commit 2e3216cd54b1 changed the precedence, so that as long as we get bundled, incoming chunks we try possible bundling on outgoing queue as well. Before this commit, we would just flush the output queue. Now, while ASCONF_a's ASCONF_ACK sits in the corked outq, we continue to process the same ASCONF_b chunk from the packet. As we have cached the previous ASCONF_ACK, we find it, grab it and do another SCTP_CMD_REPLY command on it. So, effectively, we rip the chunk->list pointers and requeue the same ASCONF_ACK chunk another time. Since we process ASCONF_b, it's correctly marked with end_of_packet and we enforce an uncork, and thus flush, thus crashing the kernel. Fix it by testing if the ASCONF_ACK is currently pending and if that is the case, do not requeue it. When flushing the output queue we may relink the chunk for preparing an outgoing packet, but eventually unlink it when it's copied into the skb right before transmission. Joint work with Vlad Yasevich. Fixes: 2e3216cd54b1 ("sctp: Follow security requirement of responding with 1 packet") Signed-off-by: Daniel Borkmann <dborkman@redhat.com> Signed-off-by: Vlad Yasevich <vyasevich@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-10-09 20:55:32 +00:00
static inline bool sctp_chunk_pending(const struct sctp_chunk *chunk)
{
return !list_empty(&chunk->list);
}
/* Walk through a list of TLV parameters. Don't trust the
* individual parameter lengths and instead depend on
* the chunk length to indicate when to stop. Make sure
* there is room for a param header too.
*/
#define sctp_walk_params(pos, chunk, member)\
_sctp_walk_params((pos), (chunk), ntohs((chunk)->chunk_hdr.length), member)
#define _sctp_walk_params(pos, chunk, end, member)\
for (pos.v = chunk->member;\
pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\
ntohs(pos.p->length) >= sizeof(sctp_paramhdr_t);\
pos.v += WORD_ROUND(ntohs(pos.p->length)))
#define sctp_walk_errors(err, chunk_hdr)\
_sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length))
#define _sctp_walk_errors(err, chunk_hdr, end)\
for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \
sizeof(sctp_chunkhdr_t));\
(void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\
ntohs(err->length) >= sizeof(sctp_errhdr_t); \
err = (sctp_errhdr_t *)((void *)err + WORD_ROUND(ntohs(err->length))))
#define sctp_walk_fwdtsn(pos, chunk)\
_sctp_walk_fwdtsn((pos), (chunk), ntohs((chunk)->chunk_hdr->length) - sizeof(struct sctp_fwdtsn_chunk))
#define _sctp_walk_fwdtsn(pos, chunk, end)\
for (pos = chunk->subh.fwdtsn_hdr->skip;\
(void *)pos <= (void *)chunk->subh.fwdtsn_hdr->skip + end - sizeof(struct sctp_fwdtsn_skip);\
pos++)
/* External references. */
extern struct proto sctp_prot;
extern struct proto sctpv6_prot;
void sctp_put_port(struct sock *sk);
extern struct idr sctp_assocs_id;
extern spinlock_t sctp_assocs_id_lock;
/* Static inline functions. */
/* Convert from an IP version number to an Address Family symbol. */
static inline int ipver2af(__u8 ipver)
{
switch (ipver) {
case 4:
return AF_INET;
case 6:
return AF_INET6;
default:
return 0;
}
}
/* Convert from an address parameter type to an address family. */
static inline int param_type2af(__be16 type)
{
switch (type) {
case SCTP_PARAM_IPV4_ADDRESS:
return AF_INET;
case SCTP_PARAM_IPV6_ADDRESS:
return AF_INET6;
default:
return 0;
}
}
/* Warning: The following hash functions assume a power of two 'size'. */
/* This is the hash function for the SCTP port hash table. */
static inline int sctp_phashfn(struct net *net, __u16 lport)
{
return (net_hash_mix(net) + lport) & (sctp_port_hashsize - 1);
}
/* This is the hash function for the endpoint hash table. */
static inline int sctp_ep_hashfn(struct net *net, __u16 lport)
{
return (net_hash_mix(net) + lport) & (sctp_ep_hashsize - 1);
}
hlist: drop the node parameter from iterators I'm not sure why, but the hlist for each entry iterators were conceived list_for_each_entry(pos, head, member) The hlist ones were greedy and wanted an extra parameter: hlist_for_each_entry(tpos, pos, head, member) Why did they need an extra pos parameter? I'm not quite sure. Not only they don't really need it, it also prevents the iterator from looking exactly like the list iterator, which is unfortunate. Besides the semantic patch, there was some manual work required: - Fix up the actual hlist iterators in linux/list.h - Fix up the declaration of other iterators based on the hlist ones. - A very small amount of places were using the 'node' parameter, this was modified to use 'obj->member' instead. - Coccinelle didn't handle the hlist_for_each_entry_safe iterator properly, so those had to be fixed up manually. The semantic patch which is mostly the work of Peter Senna Tschudin is here: @@ iterator name hlist_for_each_entry, hlist_for_each_entry_continue, hlist_for_each_entry_from, hlist_for_each_entry_rcu, hlist_for_each_entry_rcu_bh, hlist_for_each_entry_continue_rcu_bh, for_each_busy_worker, ax25_uid_for_each, ax25_for_each, inet_bind_bucket_for_each, sctp_for_each_hentry, sk_for_each, sk_for_each_rcu, sk_for_each_from, sk_for_each_safe, sk_for_each_bound, hlist_for_each_entry_safe, hlist_for_each_entry_continue_rcu, nr_neigh_for_each, nr_neigh_for_each_safe, nr_node_for_each, nr_node_for_each_safe, for_each_gfn_indirect_valid_sp, for_each_gfn_sp, for_each_host; type T; expression a,c,d,e; identifier b; statement S; @@ -T b; <+... when != b ( hlist_for_each_entry(a, - b, c, d) S | hlist_for_each_entry_continue(a, - b, c) S | hlist_for_each_entry_from(a, - b, c) S | hlist_for_each_entry_rcu(a, - b, c, d) S | hlist_for_each_entry_rcu_bh(a, - b, c, d) S | hlist_for_each_entry_continue_rcu_bh(a, - b, c) S | for_each_busy_worker(a, c, - b, d) S | ax25_uid_for_each(a, - b, c) S | ax25_for_each(a, - b, c) S | inet_bind_bucket_for_each(a, - b, c) S | sctp_for_each_hentry(a, - b, c) S | sk_for_each(a, - b, c) S | sk_for_each_rcu(a, - b, c) S | sk_for_each_from -(a, b) +(a) S + sk_for_each_from(a) S | sk_for_each_safe(a, - b, c, d) S | sk_for_each_bound(a, - b, c) S | hlist_for_each_entry_safe(a, - b, c, d, e) S | hlist_for_each_entry_continue_rcu(a, - b, c) S | nr_neigh_for_each(a, - b, c) S | nr_neigh_for_each_safe(a, - b, c, d) S | nr_node_for_each(a, - b, c) S | nr_node_for_each_safe(a, - b, c, d) S | - for_each_gfn_sp(a, c, d, b) S + for_each_gfn_sp(a, c, d) S | - for_each_gfn_indirect_valid_sp(a, c, d, b) S + for_each_gfn_indirect_valid_sp(a, c, d) S | for_each_host(a, - b, c) S | for_each_host_safe(a, - b, c, d) S | for_each_mesh_entry(a, - b, c, d) S ) ...+> [akpm@linux-foundation.org: drop bogus change from net/ipv4/raw.c] [akpm@linux-foundation.org: drop bogus hunk from net/ipv6/raw.c] [akpm@linux-foundation.org: checkpatch fixes] [akpm@linux-foundation.org: fix warnings] [akpm@linux-foudnation.org: redo intrusive kvm changes] Tested-by: Peter Senna Tschudin <peter.senna@gmail.com> Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Sasha Levin <sasha.levin@oracle.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Gleb Natapov <gleb@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-02-28 01:06:00 +00:00
#define sctp_for_each_hentry(epb, head) \
hlist_for_each_entry(epb, head, node)
/* Is a socket of this style? */
#define sctp_style(sk, style) __sctp_style((sk), (SCTP_SOCKET_##style))
static inline int __sctp_style(const struct sock *sk, sctp_socket_type_t style)
{
return sctp_sk(sk)->type == style;
}
/* Is the association in this state? */
#define sctp_state(asoc, state) __sctp_state((asoc), (SCTP_STATE_##state))
static inline int __sctp_state(const struct sctp_association *asoc,
sctp_state_t state)
{
return asoc->state == state;
}
/* Is the socket in this state? */
#define sctp_sstate(sk, state) __sctp_sstate((sk), (SCTP_SS_##state))
static inline int __sctp_sstate(const struct sock *sk, sctp_sock_state_t state)
{
return sk->sk_state == state;
}
/* Map v4-mapped v6 address back to v4 address */
static inline void sctp_v6_map_v4(union sctp_addr *addr)
{
addr->v4.sin_family = AF_INET;
addr->v4.sin_port = addr->v6.sin6_port;
addr->v4.sin_addr.s_addr = addr->v6.sin6_addr.s6_addr32[3];
}
/* Map v4 address to v4-mapped v6 address */
static inline void sctp_v4_map_v6(union sctp_addr *addr)
{
__be16 port;
port = addr->v4.sin_port;
addr->v6.sin6_addr.s6_addr32[3] = addr->v4.sin_addr.s_addr;
addr->v6.sin6_port = port;
addr->v6.sin6_family = AF_INET6;
sctp: Fixup v4mapped behaviour to comply with Sock API The SCTP socket extensions API document describes the v4mapping option as follows: 8.1.15. Set/Clear IPv4 Mapped Addresses (SCTP_I_WANT_MAPPED_V4_ADDR) This socket option is a Boolean flag which turns on or off the mapping of IPv4 addresses. If this option is turned on, then IPv4 addresses will be mapped to V6 representation. If this option is turned off, then no mapping will be done of V4 addresses and a user will receive both PF_INET6 and PF_INET type addresses on the socket. See [RFC3542] for more details on mapped V6 addresses. This description isn't really in line with what the code does though. Introduce addr_to_user (renamed addr_v4map), which should be called before any sockaddr is passed back to user space. The new function places the sockaddr into the correct format depending on the SCTP_I_WANT_MAPPED_V4_ADDR option. Audit all places that touched v4mapped and either sanely construct a v4 or v6 address then call addr_to_user, or drop the unnecessary v4mapped check entirely. Audit all places that call addr_to_user and verify they are on a sycall return path. Add a custom getname that formats the address properly. Several bugs are addressed: - SCTP_I_WANT_MAPPED_V4_ADDR=0 often returned garbage for addresses to user space - The addr_len returned from recvmsg was not correct when returning AF_INET on a v6 socket - flowlabel and scope_id were not zerod when promoting a v4 to v6 - Some syscalls like bind and connect behaved differently depending on v4mapped Tested bind, getpeername, getsockname, connect, and recvmsg for proper behaviour in v4mapped = 1 and 0 cases. Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Tested-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com> Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-07-30 18:40:53 +00:00
addr->v6.sin6_flowinfo = 0;
addr->v6.sin6_scope_id = 0;
addr->v6.sin6_addr.s6_addr32[0] = 0;
addr->v6.sin6_addr.s6_addr32[1] = 0;
addr->v6.sin6_addr.s6_addr32[2] = htonl(0x0000ffff);
}
/* The cookie is always 0 since this is how it's used in the
* pmtu code.
*/
static inline struct dst_entry *sctp_transport_dst_check(struct sctp_transport *t)
{
if (t->dst && !dst_check(t->dst, t->dst_cookie)) {
dst_release(t->dst);
t->dst = NULL;
}
return t->dst;
}
#endif /* __net_sctp_h__ */