forked from Minki/linux
[TIMEWAIT]: Move inet_timewait_death_row routines to net/ipv4/inet_timewait_sock.c
Also export the ones that will be used in the next changeset, when DCCP uses this infrastructure. Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
295ff7edb8
commit
696ab2d3bf
@ -82,6 +82,10 @@ struct inet_timewait_death_row {
|
|||||||
int sysctl_max_tw_buckets;
|
int sysctl_max_tw_buckets;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
extern void inet_twdr_hangman(unsigned long data);
|
||||||
|
extern void inet_twdr_twkill_work(void *data);
|
||||||
|
extern void inet_twdr_twcal_tick(unsigned long data);
|
||||||
|
|
||||||
#if (BITS_PER_LONG == 64)
|
#if (BITS_PER_LONG == 64)
|
||||||
#define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 8
|
#define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 8
|
||||||
#else
|
#else
|
||||||
@ -206,4 +210,10 @@ extern void __inet_twsk_kill(struct inet_timewait_sock *tw,
|
|||||||
extern void __inet_twsk_hashdance(struct inet_timewait_sock *tw,
|
extern void __inet_twsk_hashdance(struct inet_timewait_sock *tw,
|
||||||
struct sock *sk,
|
struct sock *sk,
|
||||||
struct inet_hashinfo *hashinfo);
|
struct inet_hashinfo *hashinfo);
|
||||||
|
|
||||||
|
extern void inet_twsk_schedule(struct inet_timewait_sock *tw,
|
||||||
|
struct inet_timewait_death_row *twdr,
|
||||||
|
const int timeo, const int timewait_len);
|
||||||
|
extern void inet_twsk_deschedule(struct inet_timewait_sock *tw,
|
||||||
|
struct inet_timewait_death_row *twdr);
|
||||||
#endif /* _INET_TIMEWAIT_SOCK_ */
|
#endif /* _INET_TIMEWAIT_SOCK_ */
|
||||||
|
@ -44,8 +44,6 @@ extern struct inet_hashinfo tcp_hashinfo;
|
|||||||
|
|
||||||
extern atomic_t tcp_orphan_count;
|
extern atomic_t tcp_orphan_count;
|
||||||
extern void tcp_time_wait(struct sock *sk, int state, int timeo);
|
extern void tcp_time_wait(struct sock *sk, int state, int timeo);
|
||||||
extern void inet_twsk_deschedule(struct inet_timewait_sock *tw,
|
|
||||||
struct inet_timewait_death_row *twdr);
|
|
||||||
|
|
||||||
#define MAX_TCP_HEADER (128 + MAX_HEADER)
|
#define MAX_TCP_HEADER (128 + MAX_HEADER)
|
||||||
|
|
||||||
|
@ -12,6 +12,7 @@
|
|||||||
|
|
||||||
#include <net/inet_hashtables.h>
|
#include <net/inet_hashtables.h>
|
||||||
#include <net/inet_timewait_sock.h>
|
#include <net/inet_timewait_sock.h>
|
||||||
|
#include <net/ip.h>
|
||||||
|
|
||||||
/* Must be called with locally disabled BHs. */
|
/* Must be called with locally disabled BHs. */
|
||||||
void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashinfo)
|
void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashinfo)
|
||||||
@ -85,6 +86,8 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
|
|||||||
write_unlock(&ehead->lock);
|
write_unlock(&ehead->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
|
||||||
|
|
||||||
struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state)
|
struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state)
|
||||||
{
|
{
|
||||||
struct inet_timewait_sock *tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_slab,
|
struct inet_timewait_sock *tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_slab,
|
||||||
@ -112,3 +115,270 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat
|
|||||||
|
|
||||||
return tw;
|
return tw;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EXPORT_SYMBOL_GPL(inet_twsk_alloc);
|
||||||
|
|
||||||
|
/* Returns non-zero if quota exceeded. */
|
||||||
|
static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr,
|
||||||
|
const int slot)
|
||||||
|
{
|
||||||
|
struct inet_timewait_sock *tw;
|
||||||
|
struct hlist_node *node;
|
||||||
|
unsigned int killed;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
/* NOTE: compare this to previous version where lock
|
||||||
|
* was released after detaching chain. It was racy,
|
||||||
|
* because tw buckets are scheduled in not serialized context
|
||||||
|
* in 2.3 (with netfilter), and with softnet it is common, because
|
||||||
|
* soft irqs are not sequenced.
|
||||||
|
*/
|
||||||
|
killed = 0;
|
||||||
|
ret = 0;
|
||||||
|
rescan:
|
||||||
|
inet_twsk_for_each_inmate(tw, node, &twdr->cells[slot]) {
|
||||||
|
__inet_twsk_del_dead_node(tw);
|
||||||
|
spin_unlock(&twdr->death_lock);
|
||||||
|
__inet_twsk_kill(tw, twdr->hashinfo);
|
||||||
|
inet_twsk_put(tw);
|
||||||
|
killed++;
|
||||||
|
spin_lock(&twdr->death_lock);
|
||||||
|
if (killed > INET_TWDR_TWKILL_QUOTA) {
|
||||||
|
ret = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* While we dropped twdr->death_lock, another cpu may have
|
||||||
|
* killed off the next TW bucket in the list, therefore
|
||||||
|
* do a fresh re-read of the hlist head node with the
|
||||||
|
* lock reacquired. We still use the hlist traversal
|
||||||
|
* macro in order to get the prefetches.
|
||||||
|
*/
|
||||||
|
goto rescan;
|
||||||
|
}
|
||||||
|
|
||||||
|
twdr->tw_count -= killed;
|
||||||
|
NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITED, killed);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
void inet_twdr_hangman(unsigned long data)
|
||||||
|
{
|
||||||
|
struct inet_timewait_death_row *twdr;
|
||||||
|
int unsigned need_timer;
|
||||||
|
|
||||||
|
twdr = (struct inet_timewait_death_row *)data;
|
||||||
|
spin_lock(&twdr->death_lock);
|
||||||
|
|
||||||
|
if (twdr->tw_count == 0)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
need_timer = 0;
|
||||||
|
if (inet_twdr_do_twkill_work(twdr, twdr->slot)) {
|
||||||
|
twdr->thread_slots |= (1 << twdr->slot);
|
||||||
|
mb();
|
||||||
|
schedule_work(&twdr->twkill_work);
|
||||||
|
need_timer = 1;
|
||||||
|
} else {
|
||||||
|
/* We purged the entire slot, anything left? */
|
||||||
|
if (twdr->tw_count)
|
||||||
|
need_timer = 1;
|
||||||
|
}
|
||||||
|
twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1));
|
||||||
|
if (need_timer)
|
||||||
|
mod_timer(&twdr->tw_timer, jiffies + twdr->period);
|
||||||
|
out:
|
||||||
|
spin_unlock(&twdr->death_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPORT_SYMBOL_GPL(inet_twdr_hangman);
|
||||||
|
|
||||||
|
extern void twkill_slots_invalid(void);
|
||||||
|
|
||||||
|
void inet_twdr_twkill_work(void *data)
|
||||||
|
{
|
||||||
|
struct inet_timewait_death_row *twdr = data;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
if ((INET_TWDR_TWKILL_SLOTS - 1) > (sizeof(twdr->thread_slots) * 8))
|
||||||
|
twkill_slots_invalid();
|
||||||
|
|
||||||
|
while (twdr->thread_slots) {
|
||||||
|
spin_lock_bh(&twdr->death_lock);
|
||||||
|
for (i = 0; i < INET_TWDR_TWKILL_SLOTS; i++) {
|
||||||
|
if (!(twdr->thread_slots & (1 << i)))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
while (inet_twdr_do_twkill_work(twdr, i) != 0) {
|
||||||
|
if (need_resched()) {
|
||||||
|
spin_unlock_bh(&twdr->death_lock);
|
||||||
|
schedule();
|
||||||
|
spin_lock_bh(&twdr->death_lock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
twdr->thread_slots &= ~(1 << i);
|
||||||
|
}
|
||||||
|
spin_unlock_bh(&twdr->death_lock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPORT_SYMBOL_GPL(inet_twdr_twkill_work);
|
||||||
|
|
||||||
|
/* These are always called from BH context. See callers in
|
||||||
|
* tcp_input.c to verify this.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* This is for handling early-kills of TIME_WAIT sockets. */
|
||||||
|
void inet_twsk_deschedule(struct inet_timewait_sock *tw,
|
||||||
|
struct inet_timewait_death_row *twdr)
|
||||||
|
{
|
||||||
|
spin_lock(&twdr->death_lock);
|
||||||
|
if (inet_twsk_del_dead_node(tw)) {
|
||||||
|
inet_twsk_put(tw);
|
||||||
|
if (--twdr->tw_count == 0)
|
||||||
|
del_timer(&twdr->tw_timer);
|
||||||
|
}
|
||||||
|
spin_unlock(&twdr->death_lock);
|
||||||
|
__inet_twsk_kill(tw, twdr->hashinfo);
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPORT_SYMBOL(inet_twsk_deschedule);
|
||||||
|
|
||||||
|
void inet_twsk_schedule(struct inet_timewait_sock *tw,
|
||||||
|
struct inet_timewait_death_row *twdr,
|
||||||
|
const int timeo, const int timewait_len)
|
||||||
|
{
|
||||||
|
struct hlist_head *list;
|
||||||
|
int slot;
|
||||||
|
|
||||||
|
/* timeout := RTO * 3.5
|
||||||
|
*
|
||||||
|
* 3.5 = 1+2+0.5 to wait for two retransmits.
|
||||||
|
*
|
||||||
|
* RATIONALE: if FIN arrived and we entered TIME-WAIT state,
|
||||||
|
* our ACK acking that FIN can be lost. If N subsequent retransmitted
|
||||||
|
* FINs (or previous seqments) are lost (probability of such event
|
||||||
|
* is p^(N+1), where p is probability to lose single packet and
|
||||||
|
* time to detect the loss is about RTO*(2^N - 1) with exponential
|
||||||
|
* backoff). Normal timewait length is calculated so, that we
|
||||||
|
* waited at least for one retransmitted FIN (maximal RTO is 120sec).
|
||||||
|
* [ BTW Linux. following BSD, violates this requirement waiting
|
||||||
|
* only for 60sec, we should wait at least for 240 secs.
|
||||||
|
* Well, 240 consumes too much of resources 8)
|
||||||
|
* ]
|
||||||
|
* This interval is not reduced to catch old duplicate and
|
||||||
|
* responces to our wandering segments living for two MSLs.
|
||||||
|
* However, if we use PAWS to detect
|
||||||
|
* old duplicates, we can reduce the interval to bounds required
|
||||||
|
* by RTO, rather than MSL. So, if peer understands PAWS, we
|
||||||
|
* kill tw bucket after 3.5*RTO (it is important that this number
|
||||||
|
* is greater than TS tick!) and detect old duplicates with help
|
||||||
|
* of PAWS.
|
||||||
|
*/
|
||||||
|
slot = (timeo + (1 << INET_TWDR_RECYCLE_TICK) - 1) >> INET_TWDR_RECYCLE_TICK;
|
||||||
|
|
||||||
|
spin_lock(&twdr->death_lock);
|
||||||
|
|
||||||
|
/* Unlink it, if it was scheduled */
|
||||||
|
if (inet_twsk_del_dead_node(tw))
|
||||||
|
twdr->tw_count--;
|
||||||
|
else
|
||||||
|
atomic_inc(&tw->tw_refcnt);
|
||||||
|
|
||||||
|
if (slot >= INET_TWDR_RECYCLE_SLOTS) {
|
||||||
|
/* Schedule to slow timer */
|
||||||
|
if (timeo >= timewait_len) {
|
||||||
|
slot = INET_TWDR_TWKILL_SLOTS - 1;
|
||||||
|
} else {
|
||||||
|
slot = (timeo + twdr->period - 1) / twdr->period;
|
||||||
|
if (slot >= INET_TWDR_TWKILL_SLOTS)
|
||||||
|
slot = INET_TWDR_TWKILL_SLOTS - 1;
|
||||||
|
}
|
||||||
|
tw->tw_ttd = jiffies + timeo;
|
||||||
|
slot = (twdr->slot + slot) & (INET_TWDR_TWKILL_SLOTS - 1);
|
||||||
|
list = &twdr->cells[slot];
|
||||||
|
} else {
|
||||||
|
tw->tw_ttd = jiffies + (slot << INET_TWDR_RECYCLE_TICK);
|
||||||
|
|
||||||
|
if (twdr->twcal_hand < 0) {
|
||||||
|
twdr->twcal_hand = 0;
|
||||||
|
twdr->twcal_jiffie = jiffies;
|
||||||
|
twdr->twcal_timer.expires = twdr->twcal_jiffie +
|
||||||
|
(slot << INET_TWDR_RECYCLE_TICK);
|
||||||
|
add_timer(&twdr->twcal_timer);
|
||||||
|
} else {
|
||||||
|
if (time_after(twdr->twcal_timer.expires,
|
||||||
|
jiffies + (slot << INET_TWDR_RECYCLE_TICK)))
|
||||||
|
mod_timer(&twdr->twcal_timer,
|
||||||
|
jiffies + (slot << INET_TWDR_RECYCLE_TICK));
|
||||||
|
slot = (twdr->twcal_hand + slot) & (INET_TWDR_RECYCLE_SLOTS - 1);
|
||||||
|
}
|
||||||
|
list = &twdr->twcal_row[slot];
|
||||||
|
}
|
||||||
|
|
||||||
|
hlist_add_head(&tw->tw_death_node, list);
|
||||||
|
|
||||||
|
if (twdr->tw_count++ == 0)
|
||||||
|
mod_timer(&twdr->tw_timer, jiffies + twdr->period);
|
||||||
|
spin_unlock(&twdr->death_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPORT_SYMBOL_GPL(inet_twsk_schedule);
|
||||||
|
|
||||||
|
void inet_twdr_twcal_tick(unsigned long data)
|
||||||
|
{
|
||||||
|
struct inet_timewait_death_row *twdr;
|
||||||
|
int n, slot;
|
||||||
|
unsigned long j;
|
||||||
|
unsigned long now = jiffies;
|
||||||
|
int killed = 0;
|
||||||
|
int adv = 0;
|
||||||
|
|
||||||
|
twdr = (struct inet_timewait_death_row *)data;
|
||||||
|
|
||||||
|
spin_lock(&twdr->death_lock);
|
||||||
|
if (twdr->twcal_hand < 0)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
slot = twdr->twcal_hand;
|
||||||
|
j = twdr->twcal_jiffie;
|
||||||
|
|
||||||
|
for (n = 0; n < INET_TWDR_RECYCLE_SLOTS; n++) {
|
||||||
|
if (time_before_eq(j, now)) {
|
||||||
|
struct hlist_node *node, *safe;
|
||||||
|
struct inet_timewait_sock *tw;
|
||||||
|
|
||||||
|
inet_twsk_for_each_inmate_safe(tw, node, safe,
|
||||||
|
&twdr->twcal_row[slot]) {
|
||||||
|
__inet_twsk_del_dead_node(tw);
|
||||||
|
__inet_twsk_kill(tw, twdr->hashinfo);
|
||||||
|
inet_twsk_put(tw);
|
||||||
|
killed++;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (!adv) {
|
||||||
|
adv = 1;
|
||||||
|
twdr->twcal_jiffie = j;
|
||||||
|
twdr->twcal_hand = slot;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!hlist_empty(&twdr->twcal_row[slot])) {
|
||||||
|
mod_timer(&twdr->twcal_timer, j);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
j += 1 << INET_TWDR_RECYCLE_TICK;
|
||||||
|
slot = (slot + 1) & (INET_TWDR_RECYCLE_SLOTS - 1);
|
||||||
|
}
|
||||||
|
twdr->twcal_hand = -1;
|
||||||
|
|
||||||
|
out:
|
||||||
|
if ((twdr->tw_count -= killed) == 0)
|
||||||
|
del_timer(&twdr->tw_timer);
|
||||||
|
NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITKILLED, killed);
|
||||||
|
spin_unlock(&twdr->death_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick);
|
||||||
|
@ -35,12 +35,6 @@
|
|||||||
#define SYNC_INIT 1
|
#define SYNC_INIT 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* New-style handling of TIME_WAIT sockets. */
|
|
||||||
|
|
||||||
static void inet_twdr_hangman(unsigned long data);
|
|
||||||
static void inet_twdr_twkill_work(void *data);
|
|
||||||
static void inet_twdr_twcal_tick(unsigned long data);
|
|
||||||
|
|
||||||
int sysctl_tcp_syncookies = SYNC_INIT;
|
int sysctl_tcp_syncookies = SYNC_INIT;
|
||||||
int sysctl_tcp_abort_on_overflow;
|
int sysctl_tcp_abort_on_overflow;
|
||||||
|
|
||||||
@ -63,10 +57,6 @@ struct inet_timewait_death_row tcp_death_row = {
|
|||||||
|
|
||||||
EXPORT_SYMBOL_GPL(tcp_death_row);
|
EXPORT_SYMBOL_GPL(tcp_death_row);
|
||||||
|
|
||||||
static void inet_twsk_schedule(struct inet_timewait_sock *tw,
|
|
||||||
struct inet_timewait_death_row *twdr,
|
|
||||||
const int timeo);
|
|
||||||
|
|
||||||
static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
|
static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
|
||||||
{
|
{
|
||||||
if (seq == s_win)
|
if (seq == s_win)
|
||||||
@ -173,9 +163,11 @@ kill_with_rst:
|
|||||||
if (tw->tw_family == AF_INET &&
|
if (tw->tw_family == AF_INET &&
|
||||||
tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp &&
|
tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp &&
|
||||||
tcp_v4_tw_remember_stamp(tw))
|
tcp_v4_tw_remember_stamp(tw))
|
||||||
inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout);
|
inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout,
|
||||||
|
TCP_TIMEWAIT_LEN);
|
||||||
else
|
else
|
||||||
inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN);
|
inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
|
||||||
|
TCP_TIMEWAIT_LEN);
|
||||||
return TCP_TW_ACK;
|
return TCP_TW_ACK;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -213,7 +205,8 @@ kill:
|
|||||||
return TCP_TW_SUCCESS;
|
return TCP_TW_SUCCESS;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN);
|
inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
|
||||||
|
TCP_TIMEWAIT_LEN);
|
||||||
|
|
||||||
if (tmp_opt.saw_tstamp) {
|
if (tmp_opt.saw_tstamp) {
|
||||||
tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
|
tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
|
||||||
@ -263,7 +256,8 @@ kill:
|
|||||||
* Do not reschedule in the last case.
|
* Do not reschedule in the last case.
|
||||||
*/
|
*/
|
||||||
if (paws_reject || th->ack)
|
if (paws_reject || th->ack)
|
||||||
inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN);
|
inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
|
||||||
|
TCP_TIMEWAIT_LEN);
|
||||||
|
|
||||||
/* Send ACK. Note, we do not put the bucket,
|
/* Send ACK. Note, we do not put the bucket,
|
||||||
* it will be released by caller.
|
* it will be released by caller.
|
||||||
@ -326,7 +320,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
|
|||||||
timeo = TCP_TIMEWAIT_LEN;
|
timeo = TCP_TIMEWAIT_LEN;
|
||||||
}
|
}
|
||||||
|
|
||||||
inet_twsk_schedule(tw, &tcp_death_row, timeo);
|
inet_twsk_schedule(tw, &tcp_death_row, timeo,
|
||||||
|
TCP_TIMEWAIT_LEN);
|
||||||
inet_twsk_put(tw);
|
inet_twsk_put(tw);
|
||||||
} else {
|
} else {
|
||||||
/* Sorry, if we're out of memory, just CLOSE this
|
/* Sorry, if we're out of memory, just CLOSE this
|
||||||
@ -341,261 +336,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
|
|||||||
tcp_done(sk);
|
tcp_done(sk);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Returns non-zero if quota exceeded. */
|
|
||||||
static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr,
|
|
||||||
const int slot)
|
|
||||||
{
|
|
||||||
struct inet_timewait_sock *tw;
|
|
||||||
struct hlist_node *node;
|
|
||||||
unsigned int killed;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
/* NOTE: compare this to previous version where lock
|
|
||||||
* was released after detaching chain. It was racy,
|
|
||||||
* because tw buckets are scheduled in not serialized context
|
|
||||||
* in 2.3 (with netfilter), and with softnet it is common, because
|
|
||||||
* soft irqs are not sequenced.
|
|
||||||
*/
|
|
||||||
killed = 0;
|
|
||||||
ret = 0;
|
|
||||||
rescan:
|
|
||||||
inet_twsk_for_each_inmate(tw, node, &twdr->cells[slot]) {
|
|
||||||
__inet_twsk_del_dead_node(tw);
|
|
||||||
spin_unlock(&twdr->death_lock);
|
|
||||||
__inet_twsk_kill(tw, twdr->hashinfo);
|
|
||||||
inet_twsk_put(tw);
|
|
||||||
killed++;
|
|
||||||
spin_lock(&twdr->death_lock);
|
|
||||||
if (killed > INET_TWDR_TWKILL_QUOTA) {
|
|
||||||
ret = 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* While we dropped twdr->death_lock, another cpu may have
|
|
||||||
* killed off the next TW bucket in the list, therefore
|
|
||||||
* do a fresh re-read of the hlist head node with the
|
|
||||||
* lock reacquired. We still use the hlist traversal
|
|
||||||
* macro in order to get the prefetches.
|
|
||||||
*/
|
|
||||||
goto rescan;
|
|
||||||
}
|
|
||||||
|
|
||||||
twdr->tw_count -= killed;
|
|
||||||
NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITED, killed);
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void inet_twdr_hangman(unsigned long data)
|
|
||||||
{
|
|
||||||
struct inet_timewait_death_row *twdr;
|
|
||||||
int unsigned need_timer;
|
|
||||||
|
|
||||||
twdr = (struct inet_timewait_death_row *)data;
|
|
||||||
spin_lock(&twdr->death_lock);
|
|
||||||
|
|
||||||
if (twdr->tw_count == 0)
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
need_timer = 0;
|
|
||||||
if (inet_twdr_do_twkill_work(twdr, twdr->slot)) {
|
|
||||||
twdr->thread_slots |= (1 << twdr->slot);
|
|
||||||
mb();
|
|
||||||
schedule_work(&twdr->twkill_work);
|
|
||||||
need_timer = 1;
|
|
||||||
} else {
|
|
||||||
/* We purged the entire slot, anything left? */
|
|
||||||
if (twdr->tw_count)
|
|
||||||
need_timer = 1;
|
|
||||||
}
|
|
||||||
twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1));
|
|
||||||
if (need_timer)
|
|
||||||
mod_timer(&twdr->tw_timer, jiffies + twdr->period);
|
|
||||||
out:
|
|
||||||
spin_unlock(&twdr->death_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
extern void twkill_slots_invalid(void);
|
|
||||||
|
|
||||||
static void inet_twdr_twkill_work(void *data)
|
|
||||||
{
|
|
||||||
struct inet_timewait_death_row *twdr = data;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
if ((INET_TWDR_TWKILL_SLOTS - 1) > (sizeof(twdr->thread_slots) * 8))
|
|
||||||
twkill_slots_invalid();
|
|
||||||
|
|
||||||
while (twdr->thread_slots) {
|
|
||||||
spin_lock_bh(&twdr->death_lock);
|
|
||||||
for (i = 0; i < INET_TWDR_TWKILL_SLOTS; i++) {
|
|
||||||
if (!(twdr->thread_slots & (1 << i)))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
while (inet_twdr_do_twkill_work(twdr, i) != 0) {
|
|
||||||
if (need_resched()) {
|
|
||||||
spin_unlock_bh(&twdr->death_lock);
|
|
||||||
schedule();
|
|
||||||
spin_lock_bh(&twdr->death_lock);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
twdr->thread_slots &= ~(1 << i);
|
|
||||||
}
|
|
||||||
spin_unlock_bh(&twdr->death_lock);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* These are always called from BH context. See callers in
|
|
||||||
* tcp_input.c to verify this.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* This is for handling early-kills of TIME_WAIT sockets. */
|
|
||||||
void inet_twsk_deschedule(struct inet_timewait_sock *tw,
|
|
||||||
struct inet_timewait_death_row *twdr)
|
|
||||||
{
|
|
||||||
spin_lock(&twdr->death_lock);
|
|
||||||
if (inet_twsk_del_dead_node(tw)) {
|
|
||||||
inet_twsk_put(tw);
|
|
||||||
if (--twdr->tw_count == 0)
|
|
||||||
del_timer(&twdr->tw_timer);
|
|
||||||
}
|
|
||||||
spin_unlock(&twdr->death_lock);
|
|
||||||
__inet_twsk_kill(tw, twdr->hashinfo);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void inet_twsk_schedule(struct inet_timewait_sock *tw,
|
|
||||||
struct inet_timewait_death_row *twdr,
|
|
||||||
const int timeo)
|
|
||||||
{
|
|
||||||
struct hlist_head *list;
|
|
||||||
int slot;
|
|
||||||
|
|
||||||
/* timeout := RTO * 3.5
|
|
||||||
*
|
|
||||||
* 3.5 = 1+2+0.5 to wait for two retransmits.
|
|
||||||
*
|
|
||||||
* RATIONALE: if FIN arrived and we entered TIME-WAIT state,
|
|
||||||
* our ACK acking that FIN can be lost. If N subsequent retransmitted
|
|
||||||
* FINs (or previous seqments) are lost (probability of such event
|
|
||||||
* is p^(N+1), where p is probability to lose single packet and
|
|
||||||
* time to detect the loss is about RTO*(2^N - 1) with exponential
|
|
||||||
* backoff). Normal timewait length is calculated so, that we
|
|
||||||
* waited at least for one retransmitted FIN (maximal RTO is 120sec).
|
|
||||||
* [ BTW Linux. following BSD, violates this requirement waiting
|
|
||||||
* only for 60sec, we should wait at least for 240 secs.
|
|
||||||
* Well, 240 consumes too much of resources 8)
|
|
||||||
* ]
|
|
||||||
* This interval is not reduced to catch old duplicate and
|
|
||||||
* responces to our wandering segments living for two MSLs.
|
|
||||||
* However, if we use PAWS to detect
|
|
||||||
* old duplicates, we can reduce the interval to bounds required
|
|
||||||
* by RTO, rather than MSL. So, if peer understands PAWS, we
|
|
||||||
* kill tw bucket after 3.5*RTO (it is important that this number
|
|
||||||
* is greater than TS tick!) and detect old duplicates with help
|
|
||||||
* of PAWS.
|
|
||||||
*/
|
|
||||||
slot = (timeo + (1 << INET_TWDR_RECYCLE_TICK) - 1) >> INET_TWDR_RECYCLE_TICK;
|
|
||||||
|
|
||||||
spin_lock(&twdr->death_lock);
|
|
||||||
|
|
||||||
/* Unlink it, if it was scheduled */
|
|
||||||
if (inet_twsk_del_dead_node(tw))
|
|
||||||
twdr->tw_count--;
|
|
||||||
else
|
|
||||||
atomic_inc(&tw->tw_refcnt);
|
|
||||||
|
|
||||||
if (slot >= INET_TWDR_RECYCLE_SLOTS) {
|
|
||||||
/* Schedule to slow timer */
|
|
||||||
if (timeo >= TCP_TIMEWAIT_LEN) {
|
|
||||||
slot = INET_TWDR_TWKILL_SLOTS - 1;
|
|
||||||
} else {
|
|
||||||
slot = (timeo + twdr->period - 1) / twdr->period;
|
|
||||||
if (slot >= INET_TWDR_TWKILL_SLOTS)
|
|
||||||
slot = INET_TWDR_TWKILL_SLOTS - 1;
|
|
||||||
}
|
|
||||||
tw->tw_ttd = jiffies + timeo;
|
|
||||||
slot = (twdr->slot + slot) & (INET_TWDR_TWKILL_SLOTS - 1);
|
|
||||||
list = &twdr->cells[slot];
|
|
||||||
} else {
|
|
||||||
tw->tw_ttd = jiffies + (slot << INET_TWDR_RECYCLE_TICK);
|
|
||||||
|
|
||||||
if (twdr->twcal_hand < 0) {
|
|
||||||
twdr->twcal_hand = 0;
|
|
||||||
twdr->twcal_jiffie = jiffies;
|
|
||||||
twdr->twcal_timer.expires = twdr->twcal_jiffie +
|
|
||||||
(slot << INET_TWDR_RECYCLE_TICK);
|
|
||||||
add_timer(&twdr->twcal_timer);
|
|
||||||
} else {
|
|
||||||
if (time_after(twdr->twcal_timer.expires,
|
|
||||||
jiffies + (slot << INET_TWDR_RECYCLE_TICK)))
|
|
||||||
mod_timer(&twdr->twcal_timer,
|
|
||||||
jiffies + (slot << INET_TWDR_RECYCLE_TICK));
|
|
||||||
slot = (twdr->twcal_hand + slot) & (INET_TWDR_RECYCLE_SLOTS - 1);
|
|
||||||
}
|
|
||||||
list = &twdr->twcal_row[slot];
|
|
||||||
}
|
|
||||||
|
|
||||||
hlist_add_head(&tw->tw_death_node, list);
|
|
||||||
|
|
||||||
if (twdr->tw_count++ == 0)
|
|
||||||
mod_timer(&twdr->tw_timer, jiffies + twdr->period);
|
|
||||||
spin_unlock(&twdr->death_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
void inet_twdr_twcal_tick(unsigned long data)
|
|
||||||
{
|
|
||||||
struct inet_timewait_death_row *twdr;
|
|
||||||
int n, slot;
|
|
||||||
unsigned long j;
|
|
||||||
unsigned long now = jiffies;
|
|
||||||
int killed = 0;
|
|
||||||
int adv = 0;
|
|
||||||
|
|
||||||
twdr = (struct inet_timewait_death_row *)data;
|
|
||||||
|
|
||||||
spin_lock(&twdr->death_lock);
|
|
||||||
if (twdr->twcal_hand < 0)
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
slot = twdr->twcal_hand;
|
|
||||||
j = twdr->twcal_jiffie;
|
|
||||||
|
|
||||||
for (n = 0; n < INET_TWDR_RECYCLE_SLOTS; n++) {
|
|
||||||
if (time_before_eq(j, now)) {
|
|
||||||
struct hlist_node *node, *safe;
|
|
||||||
struct inet_timewait_sock *tw;
|
|
||||||
|
|
||||||
inet_twsk_for_each_inmate_safe(tw, node, safe,
|
|
||||||
&twdr->twcal_row[slot]) {
|
|
||||||
__inet_twsk_del_dead_node(tw);
|
|
||||||
__inet_twsk_kill(tw, twdr->hashinfo);
|
|
||||||
inet_twsk_put(tw);
|
|
||||||
killed++;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (!adv) {
|
|
||||||
adv = 1;
|
|
||||||
twdr->twcal_jiffie = j;
|
|
||||||
twdr->twcal_hand = slot;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!hlist_empty(&twdr->twcal_row[slot])) {
|
|
||||||
mod_timer(&twdr->twcal_timer, j);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
j += 1 << INET_TWDR_RECYCLE_TICK;
|
|
||||||
slot = (slot + 1) & (INET_TWDR_RECYCLE_SLOTS - 1);
|
|
||||||
}
|
|
||||||
twdr->twcal_hand = -1;
|
|
||||||
|
|
||||||
out:
|
|
||||||
if ((twdr->tw_count -= killed) == 0)
|
|
||||||
del_timer(&twdr->tw_timer);
|
|
||||||
NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITKILLED, killed);
|
|
||||||
spin_unlock(&twdr->death_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* This is not only more efficient than what we used to do, it eliminates
|
/* This is not only more efficient than what we used to do, it eliminates
|
||||||
* a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM
|
* a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM
|
||||||
*
|
*
|
||||||
@ -933,4 +673,3 @@ EXPORT_SYMBOL(tcp_check_req);
|
|||||||
EXPORT_SYMBOL(tcp_child_process);
|
EXPORT_SYMBOL(tcp_child_process);
|
||||||
EXPORT_SYMBOL(tcp_create_openreq_child);
|
EXPORT_SYMBOL(tcp_create_openreq_child);
|
||||||
EXPORT_SYMBOL(tcp_timewait_state_process);
|
EXPORT_SYMBOL(tcp_timewait_state_process);
|
||||||
EXPORT_SYMBOL(inet_twsk_deschedule);
|
|
||||||
|
Loading…
Reference in New Issue
Block a user