forked from Minki/linux
fs: dlm: reconnect if socket error report occurs
This patch will change the reconnect handling that if an error occurs if a socket error callback is occurred. This will also handle reconnects in a non blocking connecting case which is currently missing. If error ECONNREFUSED is reported we delay the reconnect by one second. Signed-off-by: Alexander Aring <aahringo@redhat.com> Signed-off-by: David Teigland <teigland@redhat.com>
This commit is contained in:
parent
7443bc9625
commit
ba868d9dea
@ -79,6 +79,8 @@ struct connection {
|
||||
#define CF_CLOSING 8
|
||||
#define CF_SHUTDOWN 9
|
||||
#define CF_CONNECTED 10
|
||||
#define CF_RECONNECT 11
|
||||
#define CF_DELAY_CONNECT 12
|
||||
struct list_head writequeue; /* List of outgoing writequeue_entries */
|
||||
spinlock_t writequeue_lock;
|
||||
void (*connect_action) (struct connection *); /* What to do to connect */
|
||||
@ -87,6 +89,7 @@ struct connection {
|
||||
#define MAX_CONNECT_RETRIES 3
|
||||
struct hlist_node list;
|
||||
struct connection *othercon;
|
||||
struct connection *sendcon;
|
||||
struct work_struct rwork; /* Receive workqueue */
|
||||
struct work_struct swork; /* Send workqueue */
|
||||
wait_queue_head_t shutdown_wait; /* wait for graceful shutdown */
|
||||
@ -585,6 +588,22 @@ static void lowcomms_error_report(struct sock *sk)
|
||||
dlm_config.ci_tcp_port, sk->sk_err,
|
||||
sk->sk_err_soft);
|
||||
}
|
||||
|
||||
/* below sendcon only handling */
|
||||
if (test_bit(CF_IS_OTHERCON, &con->flags))
|
||||
con = con->sendcon;
|
||||
|
||||
switch (sk->sk_err) {
|
||||
case ECONNREFUSED:
|
||||
set_bit(CF_DELAY_CONNECT, &con->flags);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (!test_and_set_bit(CF_RECONNECT, &con->flags))
|
||||
queue_work(send_workqueue, &con->swork);
|
||||
|
||||
out:
|
||||
read_unlock_bh(&sk->sk_callback_lock);
|
||||
if (orig_report)
|
||||
@ -702,6 +721,8 @@ static void close_connection(struct connection *con, bool and_other,
|
||||
con->rx_leftover = 0;
|
||||
con->retries = 0;
|
||||
clear_bit(CF_CONNECTED, &con->flags);
|
||||
clear_bit(CF_DELAY_CONNECT, &con->flags);
|
||||
clear_bit(CF_RECONNECT, &con->flags);
|
||||
mutex_unlock(&con->sock_mutex);
|
||||
clear_bit(CF_CLOSING, &con->flags);
|
||||
}
|
||||
@ -840,18 +861,15 @@ out_resched:
|
||||
|
||||
out_close:
|
||||
mutex_unlock(&con->sock_mutex);
|
||||
if (ret != -EAGAIN) {
|
||||
/* Reconnect when there is something to send */
|
||||
if (ret == 0) {
|
||||
close_connection(con, false, true, false);
|
||||
if (ret == 0) {
|
||||
log_print("connection %p got EOF from %d",
|
||||
con, con->nodeid);
|
||||
/* handling for tcp shutdown */
|
||||
clear_bit(CF_SHUTDOWN, &con->flags);
|
||||
wake_up(&con->shutdown_wait);
|
||||
/* signal to breaking receive worker */
|
||||
ret = -1;
|
||||
}
|
||||
log_print("connection %p got EOF from %d",
|
||||
con, con->nodeid);
|
||||
/* handling for tcp shutdown */
|
||||
clear_bit(CF_SHUTDOWN, &con->flags);
|
||||
wake_up(&con->shutdown_wait);
|
||||
/* signal to breaking receive worker */
|
||||
ret = -1;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
@ -940,6 +958,7 @@ static int accept_from_sock(struct listen_connection *con)
|
||||
lockdep_set_subclass(&othercon->sock_mutex, 1);
|
||||
set_bit(CF_IS_OTHERCON, &othercon->flags);
|
||||
newcon->othercon = othercon;
|
||||
othercon->sendcon = newcon;
|
||||
} else {
|
||||
/* close other sock con if we have something new */
|
||||
close_connection(othercon, false, true, false);
|
||||
@ -1504,7 +1523,7 @@ static void send_to_sock(struct connection *con)
|
||||
cond_resched();
|
||||
goto out;
|
||||
} else if (ret < 0)
|
||||
goto send_error;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Don't starve people filling buffers */
|
||||
@ -1521,14 +1540,6 @@ out:
|
||||
mutex_unlock(&con->sock_mutex);
|
||||
return;
|
||||
|
||||
send_error:
|
||||
mutex_unlock(&con->sock_mutex);
|
||||
close_connection(con, false, false, true);
|
||||
/* Requeue the send work. When the work daemon runs again, it will try
|
||||
a new connection, then call this function again. */
|
||||
queue_work(send_workqueue, &con->swork);
|
||||
return;
|
||||
|
||||
out_connect:
|
||||
mutex_unlock(&con->sock_mutex);
|
||||
queue_work(send_workqueue, &con->swork);
|
||||
@ -1605,8 +1616,15 @@ static void process_send_sockets(struct work_struct *work)
|
||||
WARN_ON(test_bit(CF_IS_OTHERCON, &con->flags));
|
||||
|
||||
clear_bit(CF_WRITE_PENDING, &con->flags);
|
||||
if (con->sock == NULL) /* not mutex protected so check it inside too */
|
||||
|
||||
if (test_and_clear_bit(CF_RECONNECT, &con->flags))
|
||||
close_connection(con, false, false, true);
|
||||
|
||||
if (con->sock == NULL) { /* not mutex protected so check it inside too */
|
||||
if (test_and_clear_bit(CF_DELAY_CONNECT, &con->flags))
|
||||
msleep(1000);
|
||||
con->connect_action(con);
|
||||
}
|
||||
if (!list_empty(&con->writequeue))
|
||||
send_to_sock(con);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user