Merge branch 'tipc-link-improvements'

Jon Maloy says:

====================
tipc: some link level code improvements

Extensive testing has revealed some weaknesses and non-optimal solutions
in the link level code.

This commit series addresses those issues.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2015-10-15 23:55:33 -07:00
commit ae23051820
7 changed files with 172 additions and 120 deletions

View File

@ -362,6 +362,7 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr)
b_ptr->media->disable_media(b_ptr);
tipc_node_delete_links(net, b_ptr->identity);
RCU_INIT_POINTER(b_ptr->media_ptr, NULL);
if (b_ptr->link_req)
tipc_disc_delete(b_ptr->link_req);
@ -399,16 +400,13 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
/* tipc_disable_l2_media - detach TIPC bearer from an L2 interface
*
* Mark L2 bearer as inactive so that incoming buffers are thrown away,
* then get worker thread to complete bearer cleanup. (Can't do cleanup
* here because cleanup code needs to sleep and caller holds spinlocks.)
* Mark L2 bearer as inactive so that incoming buffers are thrown away
*/
void tipc_disable_l2_media(struct tipc_bearer *b)
{
struct net_device *dev;
dev = (struct net_device *)rtnl_dereference(b->media_ptr);
RCU_INIT_POINTER(b->media_ptr, NULL);
RCU_INIT_POINTER(dev->tipc_ptr, NULL);
synchronize_net();
dev_put(dev);
@ -554,7 +552,7 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
case NETDEV_CHANGE:
if (netif_carrier_ok(dev))
break;
case NETDEV_DOWN:
case NETDEV_GOING_DOWN:
case NETDEV_CHANGEMTU:
tipc_reset_bearer(net, b_ptr);
break;

View File

@ -120,11 +120,21 @@ bool tipc_link_is_up(struct tipc_link *l)
return link_is_up(l);
}
bool tipc_link_peer_is_down(struct tipc_link *l)
{
return l->state == LINK_PEER_RESET;
}
bool tipc_link_is_reset(struct tipc_link *l)
{
return l->state & (LINK_RESET | LINK_FAILINGOVER | LINK_ESTABLISHING);
}
bool tipc_link_is_establishing(struct tipc_link *l)
{
return l->state == LINK_ESTABLISHING;
}
bool tipc_link_is_synching(struct tipc_link *l)
{
return l->state == LINK_SYNCHING;
@ -321,14 +331,15 @@ int tipc_link_fsm_evt(struct tipc_link *l, int evt)
switch (evt) {
case LINK_ESTABLISH_EVT:
l->state = LINK_ESTABLISHED;
rc |= TIPC_LINK_UP_EVT;
break;
case LINK_FAILOVER_BEGIN_EVT:
l->state = LINK_FAILINGOVER;
break;
case LINK_PEER_RESET_EVT:
case LINK_RESET_EVT:
l->state = LINK_RESET;
break;
case LINK_FAILURE_EVT:
case LINK_PEER_RESET_EVT:
case LINK_SYNCH_BEGIN_EVT:
case LINK_FAILOVER_END_EVT:
break;
@ -578,8 +589,6 @@ void tipc_link_purge_queues(struct tipc_link *l_ptr)
void tipc_link_reset(struct tipc_link *l)
{
tipc_link_fsm_evt(l, LINK_RESET_EVT);
/* Link is down, accept any session */
l->peer_session = WILDCARD_SESSION;
@ -953,7 +962,7 @@ static bool tipc_data_input(struct tipc_link *link, struct sk_buff *skb,
case TIPC_HIGH_IMPORTANCE:
case TIPC_CRITICAL_IMPORTANCE:
case CONN_MANAGER:
__skb_queue_tail(inputq, skb);
skb_queue_tail(inputq, skb);
return true;
case NAME_DISTRIBUTOR:
node->bclink.recv_permitted = true;
@ -982,6 +991,7 @@ static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb,
struct tipc_msg *hdr = buf_msg(skb);
struct sk_buff **reasm_skb = &l->reasm_buf;
struct sk_buff *iskb;
struct sk_buff_head tmpq;
int usr = msg_user(hdr);
int rc = 0;
int pos = 0;
@ -1006,10 +1016,12 @@ static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb,
}
if (usr == MSG_BUNDLER) {
skb_queue_head_init(&tmpq);
l->stats.recv_bundles++;
l->stats.recv_bundled += msg_msgcnt(hdr);
while (tipc_msg_extract(skb, &iskb, &pos))
tipc_data_input(l, iskb, inputq);
tipc_data_input(l, iskb, &tmpq);
tipc_skb_queue_splice_tail(&tmpq, inputq);
return 0;
} else if (usr == MSG_FRAGMENTER) {
l->stats.recv_fragments++;
@ -1044,49 +1056,76 @@ static bool tipc_link_release_pkts(struct tipc_link *l, u16 acked)
return released;
}
/* tipc_link_build_ack_msg: prepare link acknowledge message for transmission
*/
void tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
{
l->rcv_unacked = 0;
l->stats.sent_acks++;
tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq);
}
/* tipc_link_build_reset_msg: prepare link RESET or ACTIVATE message
*/
void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
{
int mtyp = RESET_MSG;
if (l->state == LINK_ESTABLISHING)
mtyp = ACTIVATE_MSG;
tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, xmitq);
}
/* tipc_link_build_nack_msg: prepare link nack message for transmission
*/
static void tipc_link_build_nack_msg(struct tipc_link *l,
struct sk_buff_head *xmitq)
{
u32 def_cnt = ++l->stats.deferred_recv;
if ((skb_queue_len(&l->deferdq) == 1) || !(def_cnt % TIPC_NACK_INTV))
tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq);
}
/* tipc_link_rcv - process TIPC packets/messages arriving from off-node
* @link: the link that should handle the message
* @l: the link that should handle the message
* @skb: TIPC packet
* @xmitq: queue to place packets to be sent after this call
*/
int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
struct sk_buff_head *xmitq)
{
struct sk_buff_head *arrvq = &l->deferdq;
struct sk_buff_head tmpq;
struct sk_buff_head *defq = &l->deferdq;
struct tipc_msg *hdr;
u16 seqno, rcv_nxt;
u16 seqno, rcv_nxt, win_lim;
int rc = 0;
__skb_queue_head_init(&tmpq);
if (unlikely(!__tipc_skb_queue_sorted(arrvq, skb))) {
if (!(skb_queue_len(arrvq) % TIPC_NACK_INTV))
tipc_link_build_proto_msg(l, STATE_MSG, 0,
0, 0, 0, xmitq);
return rc;
}
while ((skb = skb_peek(arrvq))) {
do {
hdr = buf_msg(skb);
seqno = msg_seqno(hdr);
rcv_nxt = l->rcv_nxt;
win_lim = rcv_nxt + TIPC_MAX_LINK_WIN;
/* Verify and update link state */
if (unlikely(msg_user(hdr) == LINK_PROTOCOL)) {
__skb_dequeue(arrvq);
rc = tipc_link_proto_rcv(l, skb, xmitq);
continue;
}
if (unlikely(msg_user(hdr) == LINK_PROTOCOL))
return tipc_link_proto_rcv(l, skb, xmitq);
if (unlikely(!link_is_up(l))) {
rc = tipc_link_fsm_evt(l, LINK_ESTABLISH_EVT);
if (!link_is_up(l)) {
kfree_skb(__skb_dequeue(arrvq));
goto exit;
}
if (l->state == LINK_ESTABLISHING)
rc = TIPC_LINK_UP_EVT;
goto drop;
}
/* Don't send probe at next timeout expiration */
l->silent_intv_cnt = 0;
/* Drop if outside receive window */
if (unlikely(less(seqno, rcv_nxt) || more(seqno, win_lim))) {
l->stats.duplicates++;
goto drop;
}
/* Forward queues and wake up waiting users */
if (likely(tipc_link_release_pkts(l, msg_ack(hdr)))) {
tipc_link_advance_backlog(l, xmitq);
@ -1094,39 +1133,28 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
link_prepare_wakeup(l);
}
/* Defer reception if there is a gap in the sequence */
seqno = msg_seqno(hdr);
rcv_nxt = l->rcv_nxt;
if (unlikely(less(rcv_nxt, seqno))) {
l->stats.deferred_recv++;
goto exit;
/* Defer delivery if sequence gap */
if (unlikely(seqno != rcv_nxt)) {
__tipc_skb_queue_sorted(defq, seqno, skb);
tipc_link_build_nack_msg(l, xmitq);
break;
}
__skb_dequeue(arrvq);
/* Drop if packet already received */
if (unlikely(more(rcv_nxt, seqno))) {
l->stats.duplicates++;
kfree_skb(skb);
goto exit;
}
/* Packet can be delivered */
/* Deliver packet */
l->rcv_nxt++;
l->stats.recv_info++;
if (unlikely(!tipc_data_input(l, skb, &tmpq)))
rc = tipc_link_input(l, skb, &tmpq);
if (!tipc_data_input(l, skb, l->inputq))
rc = tipc_link_input(l, skb, l->inputq);
if (unlikely(rc))
break;
if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN))
tipc_link_build_ack_msg(l, xmitq);
/* Ack at regular intervals */
if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN)) {
l->rcv_unacked = 0;
l->stats.sent_acks++;
tipc_link_build_proto_msg(l, STATE_MSG,
0, 0, 0, 0, xmitq);
}
}
exit:
tipc_skb_queue_splice_tail(&tmpq, l->inputq);
} while ((skb = __skb_dequeue(defq)));
return rc;
drop:
kfree_skb(skb);
return rc;
}
@ -1250,7 +1278,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
}
/* tipc_link_tnl_prepare(): prepare and return a list of tunnel packets
* with contents of the link's tranmsit and backlog queues.
* with contents of the link's transmit and backlog queues.
*/
void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
int mtyp, struct sk_buff_head *xmitq)
@ -1331,6 +1359,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
u16 peers_tol = msg_link_tolerance(hdr);
u16 peers_prio = msg_linkprio(hdr);
u16 rcv_nxt = l->rcv_nxt;
int mtyp = msg_type(hdr);
char *if_name;
int rc = 0;
@ -1340,7 +1369,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
if (link_own_addr(l) > msg_prevnode(hdr))
l->net_plane = msg_net_plane(hdr);
switch (msg_type(hdr)) {
switch (mtyp) {
case RESET_MSG:
/* Ignore duplicate RESET with old session number */
@ -1367,12 +1396,14 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
if (in_range(peers_prio, l->priority + 1, TIPC_MAX_LINK_PRI))
l->priority = peers_prio;
if (msg_type(hdr) == RESET_MSG) {
rc |= tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT);
} else if (!link_is_up(l)) {
tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT);
rc |= tipc_link_fsm_evt(l, LINK_ESTABLISH_EVT);
}
/* ACTIVATE_MSG serves as PEER_RESET if link is already down */
if ((mtyp == RESET_MSG) || !link_is_up(l))
rc = tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT);
/* ACTIVATE_MSG takes up link if it was already locally reset */
if ((mtyp == ACTIVATE_MSG) && (l->state == LINK_ESTABLISHING))
rc = TIPC_LINK_UP_EVT;
l->peer_session = msg_session(hdr);
l->peer_bearer_id = msg_bearer_id(hdr);
if (l->mtu > msg_max_pkt(hdr))
@ -1389,9 +1420,12 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
l->stats.recv_states++;
if (msg_probe(hdr))
l->stats.recv_probes++;
rc = tipc_link_fsm_evt(l, LINK_ESTABLISH_EVT);
if (!link_is_up(l))
if (!link_is_up(l)) {
if (l->state == LINK_ESTABLISHING)
rc = TIPC_LINK_UP_EVT;
break;
}
/* Send NACK if peer has sent pkts we haven't received yet */
if (more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l))

View File

@ -185,7 +185,7 @@ struct tipc_link {
} backlog[5];
u16 snd_nxt;
u16 last_retransm;
u32 window;
u16 window;
u32 stale_count;
/* Reception */
@ -213,10 +213,13 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
int mtyp, struct sk_buff_head *xmitq);
void tipc_link_build_bcast_sync_msg(struct tipc_link *l,
struct sk_buff_head *xmitq);
void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq);
int tipc_link_fsm_evt(struct tipc_link *l, int evt);
void tipc_link_reset_fragments(struct tipc_link *l_ptr);
bool tipc_link_is_up(struct tipc_link *l);
bool tipc_link_peer_is_down(struct tipc_link *l);
bool tipc_link_is_reset(struct tipc_link *l);
bool tipc_link_is_establishing(struct tipc_link *l);
bool tipc_link_is_synching(struct tipc_link *l);
bool tipc_link_is_failingover(struct tipc_link *l);
bool tipc_link_is_blocked(struct tipc_link *l);

View File

@ -590,3 +590,34 @@ error:
kfree_skb(head);
return NULL;
}
/* tipc_skb_queue_sorted(); sort pkt into list according to sequence number
* @list: list to be appended to
* @seqno: sequence number of buffer to add
* @skb: buffer to add
*/
void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
struct sk_buff *skb)
{
struct sk_buff *_skb, *tmp;
if (skb_queue_empty(list) || less(seqno, buf_seqno(skb_peek(list)))) {
__skb_queue_head(list, skb);
return;
}
if (more(seqno, buf_seqno(skb_peek_tail(list)))) {
__skb_queue_tail(list, skb);
return;
}
skb_queue_walk_safe(list, _skb, tmp) {
if (more(seqno, buf_seqno(_skb)))
continue;
if (seqno == buf_seqno(_skb))
break;
__skb_queue_before(list, _skb, skb);
return;
}
kfree_skb(skb);
}

View File

@ -790,6 +790,8 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
int offset, int dsz, int mtu, struct sk_buff_head *list);
bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err);
struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list);
void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
struct sk_buff *skb);
static inline u16 buf_seqno(struct sk_buff *skb)
{
@ -862,38 +864,6 @@ static inline struct sk_buff *tipc_skb_dequeue(struct sk_buff_head *list,
return skb;
}
/* tipc_skb_queue_sorted(); sort pkt into list according to sequence number
* @list: list to be appended to
* @skb: buffer to add
* Returns true if queue should treated further, otherwise false
*/
static inline bool __tipc_skb_queue_sorted(struct sk_buff_head *list,
struct sk_buff *skb)
{
struct sk_buff *_skb, *tmp;
struct tipc_msg *hdr = buf_msg(skb);
u16 seqno = msg_seqno(hdr);
if (skb_queue_empty(list) || (msg_user(hdr) == LINK_PROTOCOL)) {
__skb_queue_head(list, skb);
return true;
}
if (likely(less(seqno, buf_seqno(skb_peek(list))))) {
__skb_queue_head(list, skb);
return true;
}
if (!more(seqno, buf_seqno(skb_peek_tail(list)))) {
skb_queue_walk_safe(list, _skb, tmp) {
if (likely(less(seqno, buf_seqno(_skb)))) {
__skb_queue_before(list, _skb, skb);
return true;
}
}
}
__skb_queue_tail(list, skb);
return false;
}
/* tipc_skb_queue_splice_tail - append an skb list to lock protected list
* @list: the new list to append. Not lock protected
* @head: target list. Lock protected.

View File

@ -317,7 +317,11 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id,
struct tipc_link *ol = node_active_link(n, 0);
struct tipc_link *nl = n->links[bearer_id].link;
if (!nl || !tipc_link_is_up(nl))
if (!nl)
return;
tipc_link_fsm_evt(nl, LINK_ESTABLISH_EVT);
if (!tipc_link_is_up(nl))
return;
n->working_links++;
@ -416,7 +420,13 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
}
if (!tipc_node_is_up(n)) {
if (tipc_link_peer_is_down(l))
tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT);
tipc_node_fsm_evt(n, SELF_LOST_CONTACT_EVT);
tipc_link_fsm_evt(l, LINK_RESET_EVT);
tipc_link_reset(l);
tipc_link_build_reset_msg(l, xmitq);
*maddr = &n->links[*bearer_id].maddr;
node_lost_contact(n, &le->inputq);
return;
}
@ -428,6 +438,7 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
n->sync_point = tnl->rcv_nxt + (U16_MAX / 2 - 1);
tipc_link_tnl_prepare(l, tnl, FAILOVER_MSG, xmitq);
tipc_link_reset(l);
tipc_link_fsm_evt(l, LINK_RESET_EVT);
tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT);
tipc_node_fsm_evt(n, NODE_FAILOVER_BEGIN_EVT);
*maddr = &n->links[tnl->bearer_id].maddr;
@ -437,20 +448,28 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete)
{
struct tipc_link_entry *le = &n->links[bearer_id];
struct tipc_link *l = le->link;
struct tipc_media_addr *maddr;
struct sk_buff_head xmitq;
if (!l)
return;
__skb_queue_head_init(&xmitq);
tipc_node_lock(n);
__tipc_node_link_down(n, &bearer_id, &xmitq, &maddr);
if (delete && le->link) {
kfree(le->link);
le->link = NULL;
n->link_cnt--;
if (!tipc_link_is_establishing(l)) {
__tipc_node_link_down(n, &bearer_id, &xmitq, &maddr);
if (delete) {
kfree(l);
le->link = NULL;
n->link_cnt--;
}
} else {
/* Defuse pending tipc_node_link_up() */
tipc_link_fsm_evt(l, LINK_RESET_EVT);
}
tipc_node_unlock(n);
tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr);
tipc_sk_rcv(n->net, &le->inputq);
}
@ -567,6 +586,7 @@ void tipc_node_check_dest(struct net *net, u32 onode,
goto exit;
}
tipc_link_reset(l);
tipc_link_fsm_evt(l, LINK_RESET_EVT);
if (n->state == NODE_FAILINGOVER)
tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT);
le->link = l;
@ -579,7 +599,7 @@ void tipc_node_check_dest(struct net *net, u32 onode,
memcpy(&le->maddr, maddr, sizeof(*maddr));
exit:
tipc_node_unlock(n);
if (reset)
if (reset && !tipc_link_is_reset(l))
tipc_node_link_down(n, b->identity, false);
tipc_node_put(n);
}
@ -686,10 +706,10 @@ static void tipc_node_fsm_evt(struct tipc_node *n, int evt)
break;
case SELF_ESTABL_CONTACT_EVT:
case PEER_LOST_CONTACT_EVT:
break;
case NODE_SYNCH_END_EVT:
case NODE_SYNCH_BEGIN_EVT:
case NODE_FAILOVER_BEGIN_EVT:
break;
case NODE_SYNCH_BEGIN_EVT:
case NODE_FAILOVER_END_EVT:
default:
goto illegal_evt;
@ -849,9 +869,6 @@ static void node_lost_contact(struct tipc_node *n_ptr,
tipc_link_fsm_evt(l, LINK_FAILOVER_END_EVT);
}
/* Prevent re-contact with node until cleanup is done */
tipc_node_fsm_evt(n_ptr, SELF_LOST_CONTACT_EVT);
/* Notify publications from this node */
n_ptr->action_flags |= TIPC_NOTIFY_NODE_DOWN;

View File

@ -425,7 +425,6 @@ static void tipc_udp_disable(struct tipc_bearer *b)
}
if (ub->ubsock)
sock_set_flag(ub->ubsock->sk, SOCK_DEAD);
RCU_INIT_POINTER(b->media_ptr, NULL);
RCU_INIT_POINTER(ub->bearer, NULL);
/* sock_release need to be done outside of rtnl lock */