📄 tcp_ipv4.c
字号:
next: if(firstpass--) { struct tcp_bind_bucket *tb = tcp_bhash[tcp_bhashfn(hpnum)]; for( ; (tb && tb->port != hpnum); tb = tb->next) ; if(tb) { s = tb->owners; goto pass2; } }gotit: return result;}#endif /* CONFIG_IP_TRANSPARENT_PROXY */static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb){ return secure_tcp_sequence_number(skb->nh.iph->daddr, skb->nh.iph->saddr, skb->h.th->dest, skb->h.th->source);}/* Check that a TCP address is unique, don't allow multiple * connects to/from the same address. Actually we can optimize * quite a bit, since the socket about to connect is still * in TCP_CLOSE, a tcp_bind_bucket for the local port he will * use will exist, with a NULL owners list. So check for that. * The good_socknum and verify_bind scheme we use makes this * work. */static int tcp_v4_unique_address(struct sock *sk){ struct tcp_bind_bucket *tb; unsigned short snum = sk->num; int retval = 1; /* Freeze the hash while we snoop around. */ SOCKHASH_LOCK(); tb = tcp_bhash[tcp_bhashfn(snum)]; for(; tb; tb = tb->next) { if(tb->port == snum && tb->owners != NULL) { /* Almost certainly the re-use port case, search the real hashes * so it actually scales. */ sk = __tcp_v4_lookup(NULL, sk->daddr, sk->dport, sk->rcv_saddr, htons(snum), sk->bound_dev_if); if((sk != NULL) && (sk->state != TCP_LISTEN)) retval = 0; break; } } SOCKHASH_UNLOCK(); return retval;}/* This will initiate an outgoing connection. */int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len){ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); struct sockaddr_in *usin = (struct sockaddr_in *) uaddr; struct sk_buff *buff; struct rtable *rt; u32 daddr, nexthop; int tmp; if (sk->state != TCP_CLOSE) return(-EISCONN); /* Don't allow a double connect. */ if (sk->daddr) return -EINVAL; if (addr_len < sizeof(struct sockaddr_in)) return(-EINVAL); if (usin->sin_family != AF_INET) { static int complained; if (usin->sin_family) return(-EAFNOSUPPORT); if (!complained++) printk(KERN_DEBUG "%s forgot to set AF_INET in %s\n", current->comm, __FUNCTION__); } nexthop = daddr = usin->sin_addr.s_addr; if (sk->opt && sk->opt->srr) { if (daddr == 0) return -EINVAL; nexthop = sk->opt->faddr; } tmp = ip_route_connect(&rt, nexthop, sk->saddr, RT_TOS(sk->ip_tos)|RTO_CONN|sk->localroute, sk->bound_dev_if); if (tmp < 0) return tmp; if (rt->rt_flags&(RTCF_MULTICAST|RTCF_BROADCAST)) { ip_rt_put(rt); return -ENETUNREACH; } dst_release(xchg(&sk->dst_cache, rt)); buff = sock_wmalloc(sk, (MAX_HEADER + sk->prot->max_header), 0, GFP_KERNEL); if (buff == NULL) return -ENOBUFS; /* Socket has no identity, so lock_sock() is useless. Also * since state==TCP_CLOSE (checked above) the socket cannot * possibly be in the hashes. TCP hash locking is only * needed while checking quickly for a unique address. * However, the socket does need to be (and is) locked * in tcp_connect(). * Perhaps this addresses all of ANK's concerns. 8-) -DaveM */ sk->dport = usin->sin_port; sk->daddr = rt->rt_dst; if (sk->opt && sk->opt->srr) sk->daddr = daddr; if (!sk->saddr) sk->saddr = rt->rt_src; sk->rcv_saddr = sk->saddr; if (!tcp_v4_unique_address(sk)) { kfree_skb(buff); sk->daddr = 0; return -EADDRNOTAVAIL; } tp->write_seq = secure_tcp_sequence_number(sk->saddr, sk->daddr, sk->sport, usin->sin_port); tp->ext_header_len = 0; if (sk->opt) tp->ext_header_len = sk->opt->optlen; /* Reset mss clamp */ tp->mss_clamp = ~0; if (!ip_dont_fragment(sk, &rt->u.dst) && rt->u.dst.pmtu > 576 && rt->rt_dst != rt->rt_gateway) { /* Clamp mss at maximum of 536 and user_mss. Probably, user ordered to override tiny segment size in gatewayed case. */ tp->mss_clamp = max(tp->user_mss, 536); } tcp_connect(sk, buff, rt->u.dst.pmtu); return 0;}static int tcp_v4_sendmsg(struct sock *sk, struct msghdr *msg, int len){ int retval = -EINVAL; /* Do sanity checking for sendmsg/sendto/send. */ if (msg->msg_flags & ~(MSG_OOB|MSG_DONTROUTE|MSG_DONTWAIT|MSG_NOSIGNAL)) goto out; if (msg->msg_name) { struct sockaddr_in *addr=(struct sockaddr_in *)msg->msg_name; if (msg->msg_namelen < sizeof(*addr)) goto out; if (addr->sin_family && addr->sin_family != AF_INET) goto out; retval = -ENOTCONN; if(sk->state == TCP_CLOSE) goto out; retval = -EISCONN; if (addr->sin_port != sk->dport) goto out; if (addr->sin_addr.s_addr != sk->daddr) goto out; } retval = tcp_do_sendmsg(sk, msg);out: return retval;}/* * Do a linear search in the socket open_request list. * This should be replaced with a global hash table. */static struct open_request *tcp_v4_search_req(struct tcp_opt *tp, struct iphdr *iph, struct tcphdr *th, struct open_request **prevp){ struct open_request *req, *prev; __u16 rport = th->source; /* assumption: the socket is not in use. * as we checked the user count on tcp_rcv and we're * running from a soft interrupt. */ prev = (struct open_request *) (&tp->syn_wait_queue); for (req = prev->dl_next; req; req = req->dl_next) { if (req->af.v4_req.rmt_addr == iph->saddr && req->af.v4_req.loc_addr == iph->daddr && req->rmt_port == rport#ifdef CONFIG_IP_TRANSPARENT_PROXY && req->lcl_port == th->dest#endif ) { *prevp = prev; return req; } prev = req; } return NULL; }/* * This routine does path mtu discovery as defined in RFC1191. */static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *ip, unsigned mtu){ struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; if (atomic_read(&sk->sock_readers)) return; /* Don't interested in TCP_LISTEN and open_requests (SYN-ACKs * send out by Linux are always <576bytes so they should go through * unfragmented). */ if (sk->state == TCP_LISTEN) return; /* We don't check in the destentry if pmtu discovery is forbidden * on this route. We just assume that no packet_to_big packets * are send back when pmtu discovery is not active. * There is a small race when the user changes this flag in the * route, but I think that's acceptable. */ if (sk->dst_cache == NULL) return; ip_rt_update_pmtu(sk->dst_cache, mtu); if (sk->ip_pmtudisc != IP_PMTUDISC_DONT && tp->pmtu_cookie > sk->dst_cache->pmtu) { tcp_sync_mss(sk, sk->dst_cache->pmtu); /* Resend the TCP packet because it's * clear that the old packet has been * dropped. This is the new "fast" path mtu * discovery. */ tcp_simple_retransmit(sk); } /* else let the usual retransmit timer handle it */}/* * This routine is called by the ICMP module when it gets some * sort of error condition. If err < 0 then the socket should * be closed and the error returned to the user. If err > 0 * it's just the icmp type << 8 | icmp code. After adjustment * header points to the first 8 bytes of the tcp header. We need * to find the appropriate port. * * The locking strategy used here is very "optimistic". When * someone else accesses the socket the ICMP is just dropped * and for some paths there is no check at all. * A more general error queue to queue errors for later handling * is probably better. * * sk->err and sk->err_soft should be atomic_t. */void tcp_v4_err(struct sk_buff *skb, unsigned char *dp, int len){ struct iphdr *iph = (struct iphdr*)dp; struct tcphdr *th; struct tcp_opt *tp; int type = skb->h.icmph->type; int code = skb->h.icmph->code;#if ICMP_MIN_LENGTH < 14 int no_flags = 0;#else#define no_flags 0#endif struct sock *sk; __u32 seq; int err; if (len < (iph->ihl << 2) + ICMP_MIN_LENGTH) { icmp_statistics.IcmpInErrors++; return; }#if ICMP_MIN_LENGTH < 14 if (len < (iph->ihl << 2) + 14) no_flags = 1;#endif th = (struct tcphdr*)(dp+(iph->ihl<<2)); sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr, th->source, skb->dev->ifindex); if (sk == NULL || sk->state == TCP_TIME_WAIT) { icmp_statistics.IcmpInErrors++; return; } tp = &sk->tp_pinfo.af_tcp; seq = ntohl(th->seq); if (sk->state != TCP_LISTEN && !between(seq, tp->snd_una, tp->snd_nxt)) { net_statistics.OutOfWindowIcmps++; return; } switch (type) { case ICMP_SOURCE_QUENCH:#ifndef OLD_SOURCE_QUENCH /* This is deprecated */ tp->snd_ssthresh = tcp_recalc_ssthresh(tp); tp->snd_cwnd = tp->snd_ssthresh; tp->snd_cwnd_cnt = 0; tp->high_seq = tp->snd_nxt;#endif return; case ICMP_PARAMETERPROB: err = EPROTO; break; case ICMP_DEST_UNREACH: if (code > NR_ICMP_UNREACH) return; if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ do_pmtu_discovery(sk, iph, ntohs(skb->h.icmph->un.frag.mtu)); return; } err = icmp_err_convert[code].errno; break; case ICMP_TIME_EXCEEDED: err = EHOSTUNREACH; break; default: return; } switch (sk->state) { struct open_request *req, *prev; case TCP_LISTEN: /* Prevent race conditions with accept() - * ICMP is unreliable. */ if (atomic_read(&sk->sock_readers)) { net_statistics.LockDroppedIcmps++; /* If too many ICMPs get dropped on busy * servers this needs to be solved differently. */ return; } /* The final ACK of the handshake should be already * handled in the new socket context, not here. * Strictly speaking - an ICMP error for the final * ACK should set the opening flag, but that is too * complicated right now. */ if (!no_flags && !th->syn && !th->ack) return; req = tcp_v4_search_req(tp, iph, th, &prev); if (!req) return; if (seq != req->snt_isn) { net_statistics.OutOfWindowIcmps++; return; } if (req->sk) { /* * Already in ESTABLISHED and a big socket is created, * set error code there. * The error will _not_ be reported in the accept(), * but only with the next operation on the socket after * accept. */ sk = req->sk; } else { /* * Still in SYN_RECV, just remove it silently. * There is no good way to pass the error to the newly * created socket, and POSIX does not want network * errors returned from accept(). */ tp->syn_backlog--; tcp_synq_unlink(tp, req, prev); req->class->destructor(req); tcp_openreq_free(req); return; } break; case TCP_SYN_SENT: case TCP_SYN_RECV: /* Cannot happen */ if (!no_flags && !th->syn) return; tcp_statistics.TcpAttemptFails++; sk->err = err; sk->zapped = 1; mb(); sk->error_report(sk); return; } /* If we've already connected we will keep trying * until we time out, or the user gives up. * * rfc1122 4.2.3.9 allows to consider as hard errors * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too, * but it is obsoleted by pmtu discovery). * * Note, that in modern internet, where routing is unreliable * and in each dark corner broken firewalls sit, sending random * errors ordered by their masters even this two messages finally lose * their original sense (even Linux sends invalid PORT_UNREACHs) * * Now we are in compliance with RFCs. * --ANK (980905) */ if (sk->ip_recverr) { /* This code isn't serialized with the socket code */ /* ANK (980927) ... which is harmless now, sk->err's may be safely lost. */ sk->err = err; mb(); sk->error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ } else { /* Only an error on timeout */ sk->err_soft = err; mb(); }}/* This routine computes an IPv4 TCP checksum. */void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, struct sk_buff *skb){ th->check = 0; th->check = tcp_v4_check(th, len, sk->saddr, sk->daddr, csum_partial((char *)th, th->doff<<2, skb->csum));}/* * This routine will send an RST to the other tcp. * * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.) * for reset. * Answer: if a packet caused RST, it is not for a socket * existing in our system, if it is matched to a socket, * it is just duplicate segment or bug in other side's TCP. * So that we build reply only basing on parameters * arrived with segment. * Exception: precedence violation. We do not implement it in any case. */static void tcp_v4_send_reset(struct sk_buff *skb){ struct tcphdr *th = skb->h.th; struct tcphdr rth; struct ip_reply_arg arg; /* Never send a reset in response to a reset. */ if (th->rst) return; if (((struct rtable*)skb->dst)->rt_type != RTN_LOCAL) {#ifdef CONFIG_IP_TRANSPARENT_PROXY if (((struct rtable*)skb->dst)->rt_type == RTN_UNICAST) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);#endif return; } /* Swap the send and the receive. */ memset(&rth, 0, sizeof(struct tcphdr)); rth.dest = th->source; rth.source = th->dest; rth.doff = sizeof(struct tcphdr)/4; rth.rst = 1; if (th->ack) { rth.seq = th->ack_seq; } else { rth.ack = 1; rth.ack_seq = th->syn ? htonl(ntohl(th->seq)+1) : th->seq; } memset(&arg, 0, sizeof arg); arg.iov[0].iov_base = (unsigned char *)&rth; arg.iov[0].iov_len = sizeof rth; arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr, skb->nh.iph->saddr, /*XXX*/ sizeof(struct tcphdr), IPPROTO_TCP, 0); arg.n_iov = 1; arg.csumoffset = offsetof(struct tcphdr, check) / 2; ip_send_reply(tcp_socket->sk, skb, &arg, sizeof rth); tcp_statistics.TcpOutSegs++; tcp_statistics.TcpOutRsts++;}/*
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -