📄 tcp.c
字号:
timeo = schedule_timeout(timeo); lock_sock(sk); err = 0; if (tp->accept_queue) break; err = -EINVAL; if (sk->sk_state != TCP_LISTEN) break; err = sock_intr_errno(timeo); if (signal_pending(current)) break; err = -EAGAIN; if (!timeo) break; } finish_wait(sk->sk_sleep, &wait); return err;}/* * This will accept the next outstanding connection. */struct sock *tcp_accept(struct sock *sk, int flags, int *err){ struct tcp_opt *tp = tcp_sk(sk); struct open_request *req; struct sock *newsk; int error; lock_sock(sk); /* We need to make sure that this socket is listening, * and that it has something pending. */ error = -EINVAL; if (sk->sk_state != TCP_LISTEN) goto out; /* Find already established connection */ if (!tp->accept_queue) { long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); /* If this is a non blocking socket don't sleep */ error = -EAGAIN; if (!timeo) goto out; error = wait_for_connect(sk, timeo); if (error) goto out; } req = tp->accept_queue; if ((tp->accept_queue = req->dl_next) == NULL) tp->accept_queue_tail = NULL; newsk = req->sk; sk_acceptq_removed(sk); tcp_openreq_fastfree(req); BUG_TRAP(newsk->sk_state != TCP_SYN_RECV); release_sock(sk); return newsk;out: release_sock(sk); *err = error; return NULL;}/* * Socket option code for TCP. */int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen){ struct tcp_opt *tp = tcp_sk(sk); int val; int err = 0; if (level != SOL_TCP) return tp->af_specific->setsockopt(sk, level, optname, optval, optlen); if (optlen < sizeof(int)) return -EINVAL; if (get_user(val, (int __user *)optval)) return -EFAULT; lock_sock(sk); switch (optname) { case TCP_MAXSEG: /* Values greater than interface MTU won't take effect. However * at the point when this call is done we typically don't yet * know which interface is going to be used */ if (val < 8 || val > MAX_TCP_WINDOW) { err = -EINVAL; break; } tp->user_mss = val; break; case TCP_NODELAY: if (val) { /* TCP_NODELAY is weaker than TCP_CORK, so that * this option on corked socket is remembered, but * it is not activated until cork is cleared. * * However, when TCP_NODELAY is set we make * an explicit push, which overrides even TCP_CORK * for currently queued segments. */ tp->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH; tcp_push_pending_frames(sk, tp); } else { tp->nonagle &= ~TCP_NAGLE_OFF; } break; case TCP_CORK: /* When set indicates to always queue non-full frames. * Later the user clears this option and we transmit * any pending partial frames in the queue. This is * meant to be used alongside sendfile() to get properly * filled frames when the user (for example) must write * out headers with a write() call first and then use * sendfile to send out the data parts. * * TCP_CORK can be set together with TCP_NODELAY and it is * stronger than TCP_NODELAY. */ if (val) { tp->nonagle |= TCP_NAGLE_CORK; } else { tp->nonagle &= ~TCP_NAGLE_CORK; if (tp->nonagle&TCP_NAGLE_OFF) tp->nonagle |= TCP_NAGLE_PUSH; tcp_push_pending_frames(sk, tp); } break; case TCP_KEEPIDLE: if (val < 1 || val > MAX_TCP_KEEPIDLE) err = -EINVAL; else { tp->keepalive_time = val * HZ; if (sock_flag(sk, SOCK_KEEPOPEN) && !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { __u32 elapsed = tcp_time_stamp - tp->rcv_tstamp; if (tp->keepalive_time > elapsed) elapsed = tp->keepalive_time - elapsed; else elapsed = 0; tcp_reset_keepalive_timer(sk, elapsed); } } break; case TCP_KEEPINTVL: if (val < 1 || val > MAX_TCP_KEEPINTVL) err = -EINVAL; else tp->keepalive_intvl = val * HZ; break; case TCP_KEEPCNT: if (val < 1 || val > MAX_TCP_KEEPCNT) err = -EINVAL; else tp->keepalive_probes = val; break; case TCP_SYNCNT: if (val < 1 || val > MAX_TCP_SYNCNT) err = -EINVAL; else tp->syn_retries = val; break; case TCP_LINGER2: if (val < 0) tp->linger2 = -1; else if (val > sysctl_tcp_fin_timeout / HZ) tp->linger2 = 0; else tp->linger2 = val * HZ; break; case TCP_DEFER_ACCEPT: tp->defer_accept = 0; if (val > 0) { /* Translate value in seconds to number of * retransmits */ while (tp->defer_accept < 32 && val > ((TCP_TIMEOUT_INIT / HZ) << tp->defer_accept)) tp->defer_accept++; tp->defer_accept++; } break; case TCP_WINDOW_CLAMP: if (!val) { if (sk->sk_state != TCP_CLOSE) { err = -EINVAL; break; } tp->window_clamp = 0; } else tp->window_clamp = val < SOCK_MIN_RCVBUF / 2 ? SOCK_MIN_RCVBUF / 2 : val; break; case TCP_QUICKACK: if (!val) { tp->ack.pingpong = 1; } else { tp->ack.pingpong = 0; if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && tcp_ack_scheduled(tp)) { tp->ack.pending |= TCP_ACK_PUSHED; cleanup_rbuf(sk, 1); if (!(val & 1)) tp->ack.pingpong = 1; } } break; default: err = -ENOPROTOOPT; break; }; release_sock(sk); return err;}int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen){ struct tcp_opt *tp = tcp_sk(sk); int val, len; if (level != SOL_TCP) return tp->af_specific->getsockopt(sk, level, optname, optval, optlen); if (get_user(len, optlen)) return -EFAULT; len = min_t(unsigned int, len, sizeof(int)); if (len < 0) return -EINVAL; switch (optname) { case TCP_MAXSEG: val = tp->mss_cache_std; if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) val = tp->user_mss; break; case TCP_NODELAY: val = !!(tp->nonagle&TCP_NAGLE_OFF); break; case TCP_CORK: val = !!(tp->nonagle&TCP_NAGLE_CORK); break; case TCP_KEEPIDLE: val = (tp->keepalive_time ? : sysctl_tcp_keepalive_time) / HZ; break; case TCP_KEEPINTVL: val = (tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl) / HZ; break; case TCP_KEEPCNT: val = tp->keepalive_probes ? : sysctl_tcp_keepalive_probes; break; case TCP_SYNCNT: val = tp->syn_retries ? : sysctl_tcp_syn_retries; break; case TCP_LINGER2: val = tp->linger2; if (val >= 0) val = (val ? : sysctl_tcp_fin_timeout) / HZ; break; case TCP_DEFER_ACCEPT: val = !tp->defer_accept ? 0 : ((TCP_TIMEOUT_INIT / HZ) << (tp->defer_accept - 1)); break; case TCP_WINDOW_CLAMP: val = tp->window_clamp; break; case TCP_INFO: { struct tcp_info info; if (get_user(len, optlen)) return -EFAULT; tcp_get_info(sk, &info); len = min_t(unsigned int, len, sizeof(info)); if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &info, len)) return -EFAULT; return 0; } case TCP_QUICKACK: val = !tp->ack.pingpong; break; default: return -ENOPROTOOPT; }; if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &val, len)) return -EFAULT; return 0;}extern void __skb_cb_too_small_for_tcp(int, int);extern void tcpdiag_init(void);static __initdata unsigned long thash_entries;static int __init set_thash_entries(char *str){ if (!str) return 0; thash_entries = simple_strtoul(str, &str, 0); return 1;}__setup("thash_entries=", set_thash_entries);void __init tcp_init(void){ struct sk_buff *skb = NULL; unsigned long goal; int order, i; if (sizeof(struct tcp_skb_cb) > sizeof(skb->cb)) __skb_cb_too_small_for_tcp(sizeof(struct tcp_skb_cb), sizeof(skb->cb)); tcp_openreq_cachep = kmem_cache_create("tcp_open_request", sizeof(struct open_request), 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if (!tcp_openreq_cachep) panic("tcp_init: Cannot alloc open_request cache."); tcp_bucket_cachep = kmem_cache_create("tcp_bind_bucket", sizeof(struct tcp_bind_bucket), 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if (!tcp_bucket_cachep) panic("tcp_init: Cannot alloc tcp_bind_bucket cache."); tcp_timewait_cachep = kmem_cache_create("tcp_tw_bucket", sizeof(struct tcp_tw_bucket), 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if (!tcp_timewait_cachep) panic("tcp_init: Cannot alloc tcp_tw_bucket cache."); /* Size and allocate the main established and bind bucket * hash tables. * * The methodology is similar to that of the buffer cache. */ if (num_physpages >= (128 * 1024)) goal = num_physpages >> (21 - PAGE_SHIFT); else goal = num_physpages >> (23 - PAGE_SHIFT); if (thash_entries) goal = (thash_entries * sizeof(struct tcp_ehash_bucket)) >> PAGE_SHIFT; for (order = 0; (1UL << order) < goal; order++) ; do { tcp_ehash_size = (1UL << order) * PAGE_SIZE / sizeof(struct tcp_ehash_bucket); tcp_ehash_size >>= 1; while (tcp_ehash_size & (tcp_ehash_size - 1)) tcp_ehash_size--; tcp_ehash = (struct tcp_ehash_bucket *) __get_free_pages(GFP_ATOMIC, order); } while (!tcp_ehash && --order > 0); if (!tcp_ehash) panic("Failed to allocate TCP established hash table\n"); for (i = 0; i < (tcp_ehash_size << 1); i++) { tcp_ehash[i].lock = RW_LOCK_UNLOCKED; INIT_HLIST_HEAD(&tcp_ehash[i].chain); } do { tcp_bhash_size = (1UL << order) * PAGE_SIZE / sizeof(struct tcp_bind_hashbucket); if ((tcp_bhash_size > (64 * 1024)) && order > 0) continue; tcp_bhash = (struct tcp_bind_hashbucket *) __get_free_pages(GFP_ATOMIC, order); } while (!tcp_bhash && --order >= 0); if (!tcp_bhash) panic("Failed to allocate TCP bind hash table\n"); for (i = 0; i < tcp_bhash_size; i++) { tcp_bhash[i].lock = SPIN_LOCK_UNLOCKED; INIT_HLIST_HEAD(&tcp_bhash[i].chain); } /* Try to be a bit smarter and adjust defaults depending * on available memory. */ if (order > 4) { sysctl_local_port_range[0] = 32768; sysctl_local_port_range[1] = 61000; sysctl_tcp_max_tw_buckets = 180000; sysctl_tcp_max_orphans = 4096 << (order - 4); sysctl_max_syn_backlog = 1024; } else if (order < 3) { sysctl_local_port_range[0] = 1024 * (3 - order); sysctl_tcp_max_tw_buckets >>= (3 - order); sysctl_tcp_max_orphans >>= (3 - order); sysctl_max_syn_backlog = 128; } tcp_port_rover = sysctl_local_port_range[0] - 1; sysctl_tcp_mem[0] = 768 << order; sysctl_tcp_mem[1] = 1024 << order; sysctl_tcp_mem[2] = 1536 << order; if (order < 3) { sysctl_tcp_wmem[2] = 64 * 1024; sysctl_tcp_rmem[0] = PAGE_SIZE; sysctl_tcp_rmem[1] = 43689; sysctl_tcp_rmem[2] = 2 * 43689; } printk(KERN_INFO "TCP: Hash tables configured " "(established %d bind %d)\n", tcp_ehash_size << 1, tcp_bhash_size); tcpdiag_init();}EXPORT_SYMBOL(tcp_accept);EXPORT_SYMBOL(tcp_close);EXPORT_SYMBOL(tcp_close_state);EXPORT_SYMBOL(tcp_destroy_sock);EXPORT_SYMBOL(tcp_disconnect);EXPORT_SYMBOL(tcp_getsockopt);EXPORT_SYMBOL(tcp_ioctl);EXPORT_SYMBOL(tcp_openreq_cachep);EXPORT_SYMBOL(tcp_poll);EXPORT_SYMBOL(tcp_read_sock);EXPORT_SYMBOL(tcp_recvmsg);EXPORT_SYMBOL(tcp_sendmsg);EXPORT_SYMBOL(tcp_sendpage);EXPORT_SYMBOL(tcp_setsockopt);EXPORT_SYMBOL(tcp_shutdown);EXPORT_SYMBOL(tcp_statistics);EXPORT_SYMBOL(tcp_timewait_cachep);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -