📄 tcp_minisocks.c
字号:
tw->tw_ts_recent_stamp = tp->ts_recent_stamp; tw_dead_node_init(tw);#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (tw->tw_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); ipv6_addr_copy(&tw->tw_v6_daddr, &np->daddr); ipv6_addr_copy(&tw->tw_v6_rcv_saddr, &np->rcv_saddr); tw->tw_v6_ipv6only = np->ipv6only; } else { memset(&tw->tw_v6_daddr, 0, sizeof(tw->tw_v6_daddr)); memset(&tw->tw_v6_rcv_saddr, 0, sizeof(tw->tw_v6_rcv_saddr)); tw->tw_v6_ipv6only = 0; }#endif /* Linkage updates. */ __tcp_tw_hashdance(sk, tw); /* Get the TIME_WAIT timeout firing. */ if (timeo < rto) timeo = rto; if (recycle_ok) { tw->tw_timeout = rto; } else { tw->tw_timeout = TCP_TIMEWAIT_LEN; if (state == TCP_TIME_WAIT) timeo = TCP_TIMEWAIT_LEN; } tcp_tw_schedule(tw, timeo); tcp_tw_put(tw); } else { /* Sorry, if we're out of memory, just CLOSE this * socket up. We've got bigger problems than * non-graceful socket closings. */ if (net_ratelimit()) printk(KERN_INFO "TCP: time wait bucket table overflow\n"); } tcp_update_metrics(sk); tcp_done(sk);}/* Kill off TIME_WAIT sockets once their lifetime has expired. */static int tcp_tw_death_row_slot;static void tcp_twkill(unsigned long);/* TIME_WAIT reaping mechanism. */#define TCP_TWKILL_SLOTS 8 /* Please keep this a power of 2. */#define TCP_TWKILL_PERIOD (TCP_TIMEWAIT_LEN/TCP_TWKILL_SLOTS)#define TCP_TWKILL_QUOTA 100static struct hlist_head tcp_tw_death_row[TCP_TWKILL_SLOTS];static spinlock_t tw_death_lock = SPIN_LOCK_UNLOCKED;static struct timer_list tcp_tw_timer = TIMER_INITIALIZER(tcp_twkill, 0, 0);static void twkill_work(void *);static DECLARE_WORK(tcp_twkill_work, twkill_work, NULL);static u32 twkill_thread_slots;/* Returns non-zero if quota exceeded. */static int tcp_do_twkill_work(int slot, unsigned int quota){ struct tcp_tw_bucket *tw; struct hlist_node *node; unsigned int killed; int ret; /* NOTE: compare this to previous version where lock * was released after detaching chain. It was racy, * because tw buckets are scheduled in not serialized context * in 2.3 (with netfilter), and with softnet it is common, because * soft irqs are not sequenced. */ killed = 0; ret = 0;rescan: tw_for_each_inmate(tw, node, &tcp_tw_death_row[slot]) { __tw_del_dead_node(tw); spin_unlock(&tw_death_lock); tcp_timewait_kill(tw); tcp_tw_put(tw); killed++; spin_lock(&tw_death_lock); if (killed > quota) { ret = 1; break; } /* While we dropped tw_death_lock, another cpu may have * killed off the next TW bucket in the list, therefore * do a fresh re-read of the hlist head node with the * lock reacquired. We still use the hlist traversal * macro in order to get the prefetches. */ goto rescan; } tcp_tw_count -= killed; NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITED, killed); return ret;}static void tcp_twkill(unsigned long dummy){ int need_timer, ret; spin_lock(&tw_death_lock); if (tcp_tw_count == 0) goto out; need_timer = 0; ret = tcp_do_twkill_work(tcp_tw_death_row_slot, TCP_TWKILL_QUOTA); if (ret) { twkill_thread_slots |= (1 << tcp_tw_death_row_slot); mb(); schedule_work(&tcp_twkill_work); need_timer = 1; } else { /* We purged the entire slot, anything left? */ if (tcp_tw_count) need_timer = 1; } tcp_tw_death_row_slot = ((tcp_tw_death_row_slot + 1) & (TCP_TWKILL_SLOTS - 1)); if (need_timer) mod_timer(&tcp_tw_timer, jiffies + TCP_TWKILL_PERIOD);out: spin_unlock(&tw_death_lock);}extern void twkill_slots_invalid(void);static void twkill_work(void *dummy){ int i; if ((TCP_TWKILL_SLOTS - 1) > (sizeof(twkill_thread_slots) * 8)) twkill_slots_invalid(); while (twkill_thread_slots) { spin_lock_bh(&tw_death_lock); for (i = 0; i < TCP_TWKILL_SLOTS; i++) { if (!(twkill_thread_slots & (1 << i))) continue; while (tcp_do_twkill_work(i, TCP_TWKILL_QUOTA) != 0) { if (need_resched()) { spin_unlock_bh(&tw_death_lock); schedule(); spin_lock_bh(&tw_death_lock); } } twkill_thread_slots &= ~(1 << i); } spin_unlock_bh(&tw_death_lock); }}/* These are always called from BH context. See callers in * tcp_input.c to verify this. *//* This is for handling early-kills of TIME_WAIT sockets. */void tcp_tw_deschedule(struct tcp_tw_bucket *tw){ spin_lock(&tw_death_lock); if (tw_del_dead_node(tw)) { tcp_tw_put(tw); if (--tcp_tw_count == 0) del_timer(&tcp_tw_timer); } spin_unlock(&tw_death_lock); tcp_timewait_kill(tw);}/* Short-time timewait calendar */static int tcp_twcal_hand = -1;static int tcp_twcal_jiffie;static void tcp_twcal_tick(unsigned long);static struct timer_list tcp_twcal_timer = TIMER_INITIALIZER(tcp_twcal_tick, 0, 0);static struct hlist_head tcp_twcal_row[TCP_TW_RECYCLE_SLOTS];void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo){ struct hlist_head *list; int slot; /* timeout := RTO * 3.5 * * 3.5 = 1+2+0.5 to wait for two retransmits. * * RATIONALE: if FIN arrived and we entered TIME-WAIT state, * our ACK acking that FIN can be lost. If N subsequent retransmitted * FINs (or previous seqments) are lost (probability of such event * is p^(N+1), where p is probability to lose single packet and * time to detect the loss is about RTO*(2^N - 1) with exponential * backoff). Normal timewait length is calculated so, that we * waited at least for one retransmitted FIN (maximal RTO is 120sec). * [ BTW Linux. following BSD, violates this requirement waiting * only for 60sec, we should wait at least for 240 secs. * Well, 240 consumes too much of resources 8) * ] * This interval is not reduced to catch old duplicate and * responces to our wandering segments living for two MSLs. * However, if we use PAWS to detect * old duplicates, we can reduce the interval to bounds required * by RTO, rather than MSL. So, if peer understands PAWS, we * kill tw bucket after 3.5*RTO (it is important that this number * is greater than TS tick!) and detect old duplicates with help * of PAWS. */ slot = (timeo + (1<<TCP_TW_RECYCLE_TICK) - 1) >> TCP_TW_RECYCLE_TICK; spin_lock(&tw_death_lock); /* Unlink it, if it was scheduled */ if (tw_del_dead_node(tw)) tcp_tw_count--; else atomic_inc(&tw->tw_refcnt); if (slot >= TCP_TW_RECYCLE_SLOTS) { /* Schedule to slow timer */ if (timeo >= TCP_TIMEWAIT_LEN) { slot = TCP_TWKILL_SLOTS-1; } else { slot = (timeo + TCP_TWKILL_PERIOD-1) / TCP_TWKILL_PERIOD; if (slot >= TCP_TWKILL_SLOTS) slot = TCP_TWKILL_SLOTS-1; } tw->tw_ttd = jiffies + timeo; slot = (tcp_tw_death_row_slot + slot) & (TCP_TWKILL_SLOTS - 1); list = &tcp_tw_death_row[slot]; } else { tw->tw_ttd = jiffies + (slot << TCP_TW_RECYCLE_TICK); if (tcp_twcal_hand < 0) { tcp_twcal_hand = 0; tcp_twcal_jiffie = jiffies; tcp_twcal_timer.expires = tcp_twcal_jiffie + (slot<<TCP_TW_RECYCLE_TICK); add_timer(&tcp_twcal_timer); } else { if (time_after(tcp_twcal_timer.expires, jiffies + (slot<<TCP_TW_RECYCLE_TICK))) mod_timer(&tcp_twcal_timer, jiffies + (slot<<TCP_TW_RECYCLE_TICK)); slot = (tcp_twcal_hand + slot)&(TCP_TW_RECYCLE_SLOTS-1); } list = &tcp_twcal_row[slot]; } hlist_add_head(&tw->tw_death_node, list); if (tcp_tw_count++ == 0) mod_timer(&tcp_tw_timer, jiffies+TCP_TWKILL_PERIOD); spin_unlock(&tw_death_lock);}void tcp_twcal_tick(unsigned long dummy){ int n, slot; unsigned long j; unsigned long now = jiffies; int killed = 0; int adv = 0; spin_lock(&tw_death_lock); if (tcp_twcal_hand < 0) goto out; slot = tcp_twcal_hand; j = tcp_twcal_jiffie; for (n=0; n<TCP_TW_RECYCLE_SLOTS; n++) { if (time_before_eq(j, now)) { struct hlist_node *node, *safe; struct tcp_tw_bucket *tw; tw_for_each_inmate_safe(tw, node, safe, &tcp_twcal_row[slot]) { __tw_del_dead_node(tw); tcp_timewait_kill(tw); tcp_tw_put(tw); killed++; } } else { if (!adv) { adv = 1; tcp_twcal_jiffie = j; tcp_twcal_hand = slot; } if (!hlist_empty(&tcp_twcal_row[slot])) { mod_timer(&tcp_twcal_timer, j); goto out; } } j += (1<<TCP_TW_RECYCLE_TICK); slot = (slot+1)&(TCP_TW_RECYCLE_SLOTS-1); } tcp_twcal_hand = -1;out: if ((tcp_tw_count -= killed) == 0) del_timer(&tcp_tw_timer); NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITKILLED, killed); spin_unlock(&tw_death_lock);}/* This is not only more efficient than what we used to do, it eliminates * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM * * Actually, we could lots of memory writes here. tp of listening * socket contains all necessary default parameters. */struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, struct sk_buff *skb){ /* allocate the newsk from the same slab of the master sock, * if not, at sk_free time we'll try to free it from the wrong * slabcache (i.e. is it TCPv4 or v6?) -acme */ struct sock *newsk = sk_alloc(PF_INET, GFP_ATOMIC, 0, sk->sk_prot->slab); if(newsk != NULL) { struct tcp_opt *newtp; struct sk_filter *filter; memcpy(newsk, sk, sizeof(struct tcp_sock)); newsk->sk_state = TCP_SYN_RECV; /* SANITY */ sk_node_init(&newsk->sk_node); tcp_sk(newsk)->bind_hash = NULL; /* Clone the TCP header template */ inet_sk(newsk)->dport = req->rmt_port; sock_lock_init(newsk); bh_lock_sock(newsk); newsk->sk_dst_lock = RW_LOCK_UNLOCKED; atomic_set(&newsk->sk_rmem_alloc, 0); skb_queue_head_init(&newsk->sk_receive_queue); atomic_set(&newsk->sk_wmem_alloc, 0); skb_queue_head_init(&newsk->sk_write_queue); atomic_set(&newsk->sk_omem_alloc, 0); newsk->sk_wmem_queued = 0; newsk->sk_forward_alloc = 0; sock_reset_flag(newsk, SOCK_DONE); newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; newsk->sk_send_head = NULL; newsk->sk_callback_lock = RW_LOCK_UNLOCKED;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -