📄 tcp.c
字号:
if (atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]) { tcp_enter_memory_pressure(); goto suppress_allocation; } /* Under pressure. */ if (atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[1]) tcp_enter_memory_pressure(); if (kind) { if (atomic_read(&sk->rmem_alloc) < sysctl_tcp_rmem[0]) return 1; } else { if (sk->wmem_queued < sysctl_tcp_wmem[0]) return 1; } if (!tcp_memory_pressure || sysctl_tcp_mem[2] > atomic_read(&tcp_sockets_allocated) * TCP_PAGES(sk->wmem_queued+atomic_read(&sk->rmem_alloc)+ sk->forward_alloc)) return 1;suppress_allocation: if (kind == 0) { tcp_moderate_sndbuf(sk); /* Fail only if socket is _under_ its sndbuf. * In this case we cannot block, so that we have to fail. */ if (sk->wmem_queued+size >= sk->sndbuf) return 1; } /* Alas. Undo changes. */ sk->forward_alloc -= amt*TCP_MEM_QUANTUM; atomic_sub(amt, &tcp_memory_allocated); return 0;}void __tcp_mem_reclaim(struct sock *sk){ if (sk->forward_alloc >= TCP_MEM_QUANTUM) { atomic_sub(sk->forward_alloc/TCP_MEM_QUANTUM, &tcp_memory_allocated); sk->forward_alloc &= (TCP_MEM_QUANTUM-1); if (tcp_memory_pressure && atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) tcp_memory_pressure = 0; }}void tcp_rfree(struct sk_buff *skb){ struct sock *sk = skb->sk; atomic_sub(skb->truesize, &sk->rmem_alloc); sk->forward_alloc += skb->truesize;}/* * LISTEN is a special case for poll.. */static __inline__ unsigned int tcp_listen_poll(struct sock *sk, poll_table *wait){ return sk->tp_pinfo.af_tcp.accept_queue ? (POLLIN | POLLRDNORM) : 0;}/* * Wait for a TCP event. * * Note that we don't need to lock the socket, as the upper poll layers * take care of normal races (between the test and the event) and we don't * go look at any of the socket buffers directly. */unsigned int tcp_poll(struct file * file, struct socket *sock, poll_table *wait){ unsigned int mask; struct sock *sk = sock->sk; struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); poll_wait(file, sk->sleep, wait); if (sk->state == TCP_LISTEN) return tcp_listen_poll(sk, wait); /* Socket is not locked. We are protected from async events by poll logic and correct handling of state changes made by another threads is impossible in any case. */ mask = 0; if (sk->err) mask = POLLERR; /* * POLLHUP is certainly not done right. But poll() doesn't * have a notion of HUP in just one direction, and for a * socket the read side is more interesting. * * Some poll() documentation says that POLLHUP is incompatible * with the POLLOUT/POLLWR flags, so somebody should check this * all. But careful, it tends to be safer to return too many * bits than too few, and you can easily break real applications * if you don't tell them that something has hung up! * * Check-me. * * Check number 1. POLLHUP is _UNMASKABLE_ event (see UNIX98 and * our fs/select.c). It means that after we received EOF, * poll always returns immediately, making impossible poll() on write() * in state CLOSE_WAIT. One solution is evident --- to set POLLHUP * if and only if shutdown has been made in both directions. * Actually, it is interesting to look how Solaris and DUX * solve this dilemma. I would prefer, if PULLHUP were maskable, * then we could set it on SND_SHUTDOWN. BTW examples given * in Stevens' books assume exactly this behaviour, it explains * why PULLHUP is incompatible with POLLOUT. --ANK * * NOTE. Check for TCP_CLOSE is added. The goal is to prevent * blocking on fresh not-connected or disconnected socket. --ANK */ if (sk->shutdown == SHUTDOWN_MASK || sk->state == TCP_CLOSE) mask |= POLLHUP; if (sk->shutdown & RCV_SHUTDOWN) mask |= POLLIN | POLLRDNORM; /* Connected? */ if ((1 << sk->state) & ~(TCPF_SYN_SENT|TCPF_SYN_RECV)) { /* Potential race condition. If read of tp below will * escape above sk->state, we can be illegally awaken * in SYN_* states. */ if ((tp->rcv_nxt != tp->copied_seq) && (tp->urg_seq != tp->copied_seq || tp->rcv_nxt != tp->copied_seq+1 || sk->urginline || !tp->urg_data)) mask |= POLLIN | POLLRDNORM; if (!(sk->shutdown & SEND_SHUTDOWN)) { if (tcp_wspace(sk) >= tcp_min_write_space(sk)) { mask |= POLLOUT | POLLWRNORM; } else { /* send SIGIO later */ set_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags); set_bit(SOCK_NOSPACE, &sk->socket->flags); /* Race breaker. If space is freed after * wspace test but before the flags are set, * IO signal will be lost. */ if (tcp_wspace(sk) >= tcp_min_write_space(sk)) mask |= POLLOUT | POLLWRNORM; } } if (tp->urg_data & TCP_URG_VALID) mask |= POLLPRI; } return mask;}/* * TCP socket write_space callback. Not used. */void tcp_write_space(struct sock *sk){}int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg){ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); int answ; switch(cmd) { case SIOCINQ: if (sk->state == TCP_LISTEN) return(-EINVAL); lock_sock(sk); if ((1<<sk->state) & (TCPF_SYN_SENT|TCPF_SYN_RECV)) answ = 0; else if (sk->urginline || !tp->urg_data || before(tp->urg_seq,tp->copied_seq) || !before(tp->urg_seq,tp->rcv_nxt)) { answ = tp->rcv_nxt - tp->copied_seq; /* Subtract 1, if FIN is in queue. */ if (answ && !skb_queue_empty(&sk->receive_queue)) answ -= ((struct sk_buff*)sk->receive_queue.prev)->h.th->fin; } else answ = tp->urg_seq - tp->copied_seq; release_sock(sk); break; case SIOCATMARK: { answ = tp->urg_data && tp->urg_seq == tp->copied_seq; break; } case SIOCOUTQ: if (sk->state == TCP_LISTEN) return(-EINVAL); if ((1<<sk->state) & (TCPF_SYN_SENT|TCPF_SYN_RECV)) answ = 0; else answ = tp->write_seq - tp->snd_una; break; default: return(-ENOIOCTLCMD); }; return put_user(answ, (int *)arg);}int tcp_listen_start(struct sock *sk){ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); struct tcp_listen_opt *lopt; sk->max_ack_backlog = 0; sk->ack_backlog = 0; tp->accept_queue = tp->accept_queue_tail = NULL; tp->syn_wait_lock = RW_LOCK_UNLOCKED; lopt = kmalloc(sizeof(struct tcp_listen_opt), GFP_KERNEL); if (!lopt) return -ENOMEM; memset(lopt, 0, sizeof(struct tcp_listen_opt)); for (lopt->max_qlen_log = 6; ; lopt->max_qlen_log++) if ((1<<lopt->max_qlen_log) >= sysctl_max_syn_backlog) break; write_lock_bh(&tp->syn_wait_lock); tp->listen_opt = lopt; write_unlock_bh(&tp->syn_wait_lock); /* There is race window here: we announce ourselves listening, * but this transition is still not validated by get_port(). * It is OK, because this socket enters to hash table only * after validation is complete. */ sk->state = TCP_LISTEN; if (sk->prot->get_port(sk, sk->num) == 0) { sk->sport = htons(sk->num); sk_dst_reset(sk); sk->prot->hash(sk); return 0; } sk->state = TCP_CLOSE; write_lock_bh(&tp->syn_wait_lock); tp->listen_opt = NULL; write_unlock_bh(&tp->syn_wait_lock); kfree(lopt); return -EADDRINUSE;}/* * This routine closes sockets which have been at least partially * opened, but not yet accepted. */static void tcp_listen_stop (struct sock *sk){ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); struct tcp_listen_opt *lopt = tp->listen_opt; struct open_request *acc_req = tp->accept_queue; struct open_request *req; int i; tcp_delete_keepalive_timer(sk); /* make all the listen_opt local to us */ write_lock_bh(&tp->syn_wait_lock); tp->listen_opt =NULL; write_unlock_bh(&tp->syn_wait_lock); tp->accept_queue = tp->accept_queue_tail = NULL; if (lopt->qlen) { for (i=0; i<TCP_SYNQ_HSIZE; i++) { while ((req = lopt->syn_table[i]) != NULL) { lopt->syn_table[i] = req->dl_next; lopt->qlen--; tcp_openreq_free(req); /* Following specs, it would be better either to send FIN * (and enter FIN-WAIT-1, it is normal close) * or to send active reset (abort). * Certainly, it is pretty dangerous while synflood, but it is * bad justification for our negligence 8) * To be honest, we are not able to make either * of the variants now. --ANK */ } } } BUG_TRAP(lopt->qlen == 0); kfree(lopt); while ((req=acc_req) != NULL) { struct sock *child = req->sk; acc_req = req->dl_next; local_bh_disable(); bh_lock_sock(child); BUG_TRAP(child->lock.users==0); sock_hold(child); tcp_disconnect(child, O_NONBLOCK); sock_orphan(child); atomic_inc(&tcp_orphan_count); tcp_destroy_sock(child); bh_unlock_sock(child); local_bh_enable(); sock_put(child); tcp_acceptq_removed(sk); tcp_openreq_fastfree(req); } BUG_TRAP(sk->ack_backlog == 0);}/* * Wait for a socket to get into the connected state * * Note: Must be called with the socket locked. */static int wait_for_tcp_connect(struct sock * sk, int flags, long *timeo_p){ struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); while((1 << sk->state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) { if(sk->err) return sock_error(sk); if((1 << sk->state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) { if(sk->keepopen && !(flags&MSG_NOSIGNAL)) send_sig(SIGPIPE, tsk, 0); return -EPIPE; } if(!*timeo_p) return -EAGAIN; if(signal_pending(tsk)) return sock_intr_errno(*timeo_p); __set_task_state(tsk, TASK_INTERRUPTIBLE); add_wait_queue(sk->sleep, &wait); sk->tp_pinfo.af_tcp.write_pending++; release_sock(sk); *timeo_p = schedule_timeout(*timeo_p); lock_sock(sk); __set_task_state(tsk, TASK_RUNNING); remove_wait_queue(sk->sleep, &wait); sk->tp_pinfo.af_tcp.write_pending--; } return 0;}static inline int tcp_memory_free(struct sock *sk){ return sk->wmem_queued < sk->sndbuf;}/* * Wait for more memory for a socket */static long wait_for_tcp_memory(struct sock * sk, long timeo){ long vm_wait = 0; long current_timeo = timeo; DECLARE_WAITQUEUE(wait, current); if (tcp_memory_free(sk)) current_timeo = vm_wait = (net_random()%(HZ/5))+2; clear_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags); add_wait_queue(sk->sleep, &wait); for (;;) { set_bit(SOCK_NOSPACE, &sk->socket->flags); set_current_state(TASK_INTERRUPTIBLE); if (signal_pending(current)) break; if (tcp_memory_free(sk) && !vm_wait) break; if (sk->shutdown & SEND_SHUTDOWN) break; if (sk->err) break; release_sock(sk); if (!tcp_memory_free(sk) || vm_wait) current_timeo = schedule_timeout(current_timeo); lock_sock(sk); if (vm_wait) { if (timeo != MAX_SCHEDULE_TIMEOUT && (timeo -= vm_wait-current_timeo) < 0) timeo = 0; break; } else { timeo = current_timeo; } } current->state = TASK_RUNNING; remove_wait_queue(sk->sleep, &wait); return timeo;}/* When all user supplied data has been queued set the PSH bit */#define PSH_NEEDED (seglen == 0 && iovlen == 0)/* * This routine copies from a user buffer into a socket, * and starts the transmit system. */int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size){ struct iovec *iov; struct tcp_opt *tp; struct sk_buff *skb; int iovlen, flags; int mss_now; int err, copied; long timeo; err = 0; tp = &(sk->tp_pinfo.af_tcp); lock_sock(sk); TCP_CHECK_TIMER(sk); flags = msg->msg_flags; timeo = sock_sndtimeo(sk, flags&MSG_DONTWAIT); /* Wait for a connection to finish. */ if ((1 << sk->state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) if((err = wait_for_tcp_connect(sk, flags, &timeo)) != 0) goto out_unlock; /* This should be in poll */ clear_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags); mss_now = tcp_current_mss(sk); /* Ok commence sending. */ iovlen = msg->msg_iovlen; iov = msg->msg_iov; copied = 0; while (--iovlen >= 0) { int seglen=iov->iov_len; unsigned char * from=iov->iov_base; iov++;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -