📄 tcp_ipv4.c
字号:
#ifdef CONFIG_IP_TRANSPARENT_PROXY newsk->num = ntohs(skb->h.th->dest); newsk->sport = req->lcl_port;#endif newsk->opt = req->af.v4_req.opt; newtp->ext_header_len = 0; if (newsk->opt) newtp->ext_header_len = newsk->opt->optlen; tcp_sync_mss(newsk, dst->pmtu); newtp->rcv_mss = newtp->mss_clamp; /* It would be better to use newtp->mss_clamp here */ if (newsk->rcvbuf < (3 * newtp->pmtu_cookie)) newsk->rcvbuf = min ((3 * newtp->pmtu_cookie), sysctl_rmem_max); if (newsk->sndbuf < (3 * newtp->pmtu_cookie)) newsk->sndbuf = min ((3 * newtp->pmtu_cookie), sysctl_wmem_max); /* We run in BH processing itself or within a BH atomic * sequence (backlog) so no locking is needed. */ __tcp_v4_hash(newsk); __tcp_inherit_port(sk, newsk); __add_to_prot_sklist(newsk); sk->data_ready(sk, 0); /* Deliver SIGIO */ return newsk;exit: dst_release(dst); return NULL;}static void tcp_v4_rst_req(struct sock *sk, struct sk_buff *skb){ struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; struct open_request *req, *prev; req = tcp_v4_search_req(tp,skb->nh.iph, skb->h.th, &prev); if (!req) return; /* Sequence number check required by RFC793 */ if (before(TCP_SKB_CB(skb)->seq, req->rcv_isn) || after(TCP_SKB_CB(skb)->seq, req->rcv_isn+1)) return; tcp_synq_unlink(tp, req, prev); (req->sk ? sk->ack_backlog : tp->syn_backlog)--; req->class->destructor(req); tcp_openreq_free(req); net_statistics.EmbryonicRsts++;}/* Check for embryonic sockets (open_requests) We check packets with * only the SYN bit set against the open_request queue too: This * increases connection latency a bit, but is required to detect * retransmitted SYNs. */static inline struct sock *tcp_v4_hnd_req(struct sock *sk,struct sk_buff *skb){ struct tcphdr *th = skb->h.th; u32 flg = ((u32 *)th)[3]; /* Check for RST */ if (flg & __constant_htonl(0x00040000)) { tcp_v4_rst_req(sk, skb); return NULL; } /* Check for SYN|ACK */ if (flg & __constant_htonl(0x00120000)) { struct open_request *req, *dummy; struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); /* Find possible connection requests. */ req = tcp_v4_search_req(tp, skb->nh.iph, th, &dummy); if (req) { sk = tcp_check_req(sk, skb, req); }#ifdef CONFIG_SYN_COOKIES else if ((flg & __constant_htonl(0x00120000))==__constant_htonl(0x00100000)) { sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt)); }#endif } return sk; }int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb){#ifdef CONFIG_FILTER struct sk_filter *filter = sk->filter; if (filter && sk_filter(skb, filter)) goto discard;#endif /* CONFIG_FILTER */ /* * This doesn't check if the socket has enough room for the packet. * Either process the packet _without_ queueing it and then free it, * or do the check later. */ skb_set_owner_r(skb, sk); if (sk->state == TCP_ESTABLISHED) { /* Fast path */ if (tcp_rcv_established(sk, skb, skb->h.th, skb->len)) goto reset; return 0; } if (sk->state == TCP_LISTEN) { struct sock *nsk; nsk = tcp_v4_hnd_req(sk, skb); if (!nsk) goto discard; /* * Queue it on the new socket if the new socket is active, * otherwise we just shortcircuit this and continue with * the new socket.. */ if (atomic_read(&nsk->sock_readers)) { skb_orphan(skb); __skb_queue_tail(&nsk->back_log, skb); return 0; } sk = nsk; } if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len)) goto reset; return 0;reset: tcp_v4_send_reset(skb);discard: kfree_skb(skb); /* Be careful here. If this function gets more complicated and * gcc suffers from register pressure on the x86, sk (in %ebx) * might be destroyed here. This current version compiles correctly, * but you have been warned. */ return 0;}/* * From tcp_input.c */int tcp_v4_rcv(struct sk_buff *skb, unsigned short len){ struct tcphdr *th; struct sock *sk; if (skb->pkt_type!=PACKET_HOST) goto discard_it; th = skb->h.th; /* Pull up the IP header. */ __skb_pull(skb, skb->h.raw - skb->data); /* Count it even if it's bad */ tcp_statistics.TcpInSegs++; len = skb->len; if (len < sizeof(struct tcphdr)) goto bad_packet; /* Try to use the device checksum if provided. */ switch (skb->ip_summed) { case CHECKSUM_NONE: skb->csum = csum_partial((char *)th, len, 0); case CHECKSUM_HW: if (tcp_v4_check(th,len,skb->nh.iph->saddr,skb->nh.iph->daddr,skb->csum)) { NETDEBUG(printk(KERN_DEBUG "TCPv4 bad checksum " "from %d.%d.%d.%d:%04x to %d.%d.%d.%d:%04x, " "len=%d/%d/%d\n", NIPQUAD(skb->nh.iph->saddr), ntohs(th->source), NIPQUAD(skb->nh.iph->daddr), ntohs(th->dest), len, skb->len, ntohs(skb->nh.iph->tot_len))); bad_packet: tcp_statistics.TcpInErrs++; goto discard_it; } default: ; /* CHECKSUM_UNNECESSARY */ } if((th->doff * 4) < sizeof(struct tcphdr) || len < (th->doff * 4)) goto bad_packet;#ifdef CONFIG_IP_TRANSPARENT_PROXY if (IPCB(skb)->redirport) sk = tcp_v4_proxy_lookup(th->dest, skb->nh.iph->saddr, th->source, skb->nh.iph->daddr, skb->dev, IPCB(skb)->redirport, skb->dev->ifindex); else {#endif sk = __tcp_v4_lookup(th, skb->nh.iph->saddr, th->source, skb->nh.iph->daddr, th->dest, skb->dev->ifindex);#ifdef CONFIG_IP_TRANSPARENT_PROXY if (!sk) sk = tcp_v4_search_proxy_openreq(skb); }#endif if (!sk) goto no_tcp_socket; if(!ipsec_sk_policy(sk,skb)) goto discard_it; TCP_SKB_CB(skb)->seq = ntohl(th->seq); TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + len - th->doff*4); TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); skb->used = 0; if (sk->state == TCP_TIME_WAIT) goto do_time_wait; if (!atomic_read(&sk->sock_readers)) return tcp_v4_do_rcv(sk, skb); __skb_queue_tail(&sk->back_log, skb); return 0;no_tcp_socket: tcp_v4_send_reset(skb);discard_it: /* Discard frame. */ kfree_skb(skb); return 0;do_time_wait: /* Sorry for the ugly switch. 2.3 will have a better solution. */ switch (tcp_timewait_state_process((struct tcp_tw_bucket *)sk, skb, th, skb->len)) { case TCP_TW_ACK: tcp_v4_send_ack(skb, ((struct tcp_tw_bucket *)sk)->snd_nxt, ((struct tcp_tw_bucket *)sk)->rcv_nxt, ((struct tcp_tw_bucket *)sk)->window); goto discard_it; case TCP_TW_RST: goto no_tcp_socket; default: goto discard_it; }}static void __tcp_v4_rehash(struct sock *sk){ struct sock **skp = &tcp_ehash[(sk->hashent = tcp_sk_hashfn(sk))]; SOCKHASH_LOCK(); if(sk->pprev) { if(sk->next) sk->next->pprev = sk->pprev; *sk->pprev = sk->next; sk->pprev = NULL; tcp_reg_zap(sk); } if((sk->next = *skp) != NULL) (*skp)->pprev = &sk->next; *skp = sk; sk->pprev = skp; SOCKHASH_UNLOCK();}int tcp_v4_rebuild_header(struct sock *sk){ struct rtable *rt = (struct rtable *)sk->dst_cache; __u32 new_saddr; int want_rewrite = sysctl_ip_dynaddr && sk->state == TCP_SYN_SENT; if(rt == NULL) return 0; /* Force route checking if want_rewrite. * The idea is good, the implementation is disguisting. * Well, if I made bind on this socket, you cannot randomly ovewrite * its source address. --ANK */ if (want_rewrite) { int tmp; struct rtable *new_rt; __u32 old_saddr = rt->rt_src; /* Query new route using another rt buffer */ tmp = ip_route_connect(&new_rt, rt->rt_dst, 0, RT_TOS(sk->ip_tos)|sk->localroute, sk->bound_dev_if); /* Only useful if different source addrs */ if (tmp == 0) { /* * Only useful if different source addrs */ if (new_rt->rt_src != old_saddr ) { dst_release(sk->dst_cache); sk->dst_cache = &new_rt->u.dst; rt = new_rt; goto do_rewrite; } dst_release(&new_rt->u.dst); } } if (rt->u.dst.obsolete) { int err; err = ip_route_output(&rt, rt->rt_dst, rt->rt_src, rt->key.tos|RTO_CONN, rt->key.oif); if (err) { sk->err_soft=-err; sk->error_report(sk); return -1; } dst_release(xchg(&sk->dst_cache, &rt->u.dst)); } return 0;do_rewrite: new_saddr = rt->rt_src; /* Ouch!, this should not happen. */ if (!sk->saddr || !sk->rcv_saddr) { printk(KERN_WARNING "tcp_v4_rebuild_header(): not valid sock addrs: " "saddr=%08X rcv_saddr=%08X\n", ntohl(sk->saddr), ntohl(sk->rcv_saddr)); return 0; } if (new_saddr != sk->saddr) { if (sysctl_ip_dynaddr > 1) { printk(KERN_INFO "tcp_v4_rebuild_header(): shifting sk->saddr " "from %d.%d.%d.%d to %d.%d.%d.%d\n", NIPQUAD(sk->saddr), NIPQUAD(new_saddr)); } sk->saddr = new_saddr; sk->rcv_saddr = new_saddr; /* XXX The only one ugly spot where we need to * XXX really change the sockets identity after * XXX it has entered the hashes. -DaveM */ __tcp_v4_rehash(sk); } return 0;}static struct sock * tcp_v4_get_sock(struct sk_buff *skb, struct tcphdr *th){ return tcp_v4_lookup(skb->nh.iph->saddr, th->source, skb->nh.iph->daddr, th->dest, skb->dev->ifindex);}static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr){ struct sockaddr_in *sin = (struct sockaddr_in *) uaddr; sin->sin_family = AF_INET; sin->sin_addr.s_addr = sk->daddr; sin->sin_port = sk->dport;}struct tcp_func ipv4_specific = { ip_queue_xmit, tcp_v4_send_check, tcp_v4_rebuild_header, tcp_v4_conn_request, tcp_v4_syn_recv_sock, tcp_v4_get_sock, sizeof(struct iphdr), ip_setsockopt, ip_getsockopt, v4_addr2sockaddr, sizeof(struct sockaddr_in)};/* NOTE: A lot of things set to zero explicitly by call to * sk_alloc() so need not be done here. */static int tcp_v4_init_sock(struct sock *sk){ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); skb_queue_head_init(&tp->out_of_order_queue); tcp_init_xmit_timers(sk); tp->rto = TCP_TIMEOUT_INIT; /*TCP_WRITE_TIME*/ tp->mdev = TCP_TIMEOUT_INIT; tp->mss_clamp = ~0; /* So many TCP implementations out there (incorrectly) count the * initial SYN frame in their delayed-ACK and congestion control * algorithms that we must have the following bandaid to talk * efficiently to them. -DaveM */ tp->snd_cwnd = 2; /* See draft-stevens-tcpca-spec-01 for discussion of the * initialization of these values. */ tp->snd_cwnd_cnt = 0; tp->snd_ssthresh = 0x7fffffff; /* Infinity */ sk->state = TCP_CLOSE; sk->max_ack_backlog = SOMAXCONN; tp->rcv_mss = 536; sk->write_space = tcp_write_space; /* Init SYN queue. */ tcp_synq_init(tp); sk->tp_pinfo.af_tcp.af_specific = &ipv4_specific; return 0;}static int tcp_v4_destroy_sock(struct sock *sk){ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); struct sk_buff *skb; tcp_clear_xmit_timers(sk); if (sk->keepopen) tcp_dec_slow_timer(TCP_SLT_KEEPALIVE); /* Cleanup up the write buffer. */ while((skb = __skb_dequeue(&sk->write_queue)) != NULL) kfree_skb(skb); /* Cleans up our, hopefuly empty, out_of_order_queue. */ while((skb = __skb_dequeue(&tp->out_of_order_queue)) != NULL) kfree_skb(skb); /* Clean up a referenced TCP bind bucket, this only happens if a * port is allocated for a socket, but it never fully connects. */ if(sk->prev != NULL) tcp_put_port(sk); return 0;}struct proto tcp_prot = { (struct sock *)&tcp_prot, /* sklist_next */ (struct sock *)&tcp_prot, /* sklist_prev */ tcp_close, /* close */ tcp_v4_connect, /* connect */ tcp_accept, /* accept */ NULL, /* retransmit */ tcp_write_wakeup, /* write_wakeup */ tcp_read_wakeup, /* read_wakeup */ tcp_poll, /* poll */ tcp_ioctl, /* ioctl */ tcp_v4_init_sock, /* init */ tcp_v4_destroy_sock, /* destroy */ tcp_shutdown, /* shutdown */ tcp_setsockopt, /* setsockopt */ tcp_getsockopt, /* getsockopt */ tcp_v4_sendmsg, /* sendmsg */ tcp_recvmsg, /* recvmsg */ NULL, /* bind */ tcp_v4_do_rcv, /* backlog_rcv */ tcp_v4_hash, /* hash */ tcp_v4_unhash, /* unhash */ tcp_v4_get_port, /* get_port */ 128, /* max_header */ 0, /* retransmits */ "TCP", /* name */ 0, /* inuse */ 0 /* highestinuse */};__initfunc(void tcp_v4_init(struct net_proto_family *ops)){ int err; tcp_inode.i_mode = S_IFSOCK; tcp_inode.i_sock = 1; tcp_inode.i_uid = 0; tcp_inode.i_gid = 0; tcp_socket->inode = &tcp_inode; tcp_socket->state = SS_UNCONNECTED; tcp_socket->type=SOCK_RAW; if ((err=ops->create(tcp_socket, IPPROTO_TCP))<0) panic("Failed to create the TCP control socket.\n"); tcp_socket->sk->allocation=GFP_ATOMIC; tcp_socket->sk->num = 256; /* Don't receive any data */ tcp_socket->sk->ip_ttl = MAXTTL;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -