📄 tcp_ipv4.c
字号:
no_tcp_socket: if (len < (th->doff<<2) || tcp_checksum_complete(skb)) {bad_packet: TCP_INC_STATS_BH(TcpInErrs); } else { tcp_v4_send_reset(skb); }discard_it: /* Discard frame. */ kfree_skb(skb); return 0;discard_and_relse: sock_put(sk); goto discard_it;do_time_wait: if (len < (th->doff<<2) || tcp_checksum_complete(skb)) { TCP_INC_STATS_BH(TcpInErrs); goto discard_and_relse; } switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk, skb, th, skb->len)) { case TCP_TW_SYN: { struct sock *sk2; sk2 = tcp_v4_lookup_listener(skb->nh.iph->daddr, ntohs(th->dest), tcp_v4_iif(skb)); if (sk2 != NULL) { tcp_tw_deschedule((struct tcp_tw_bucket *)sk); tcp_timewait_kill((struct tcp_tw_bucket *)sk); tcp_tw_put((struct tcp_tw_bucket *)sk); sk = sk2; goto process; } /* Fall through to ACK */ } case TCP_TW_ACK: tcp_v4_timewait_ack(sk, skb); break; case TCP_TW_RST: goto no_tcp_socket; case TCP_TW_SUCCESS:; } goto discard_it;}/* With per-bucket locks this operation is not-atomic, so that * this version is not worse. */static void __tcp_v4_rehash(struct sock *sk){ sk->prot->unhash(sk); sk->prot->hash(sk);}static int tcp_v4_reselect_saddr(struct sock *sk){ int err; struct rtable *rt; __u32 old_saddr = sk->saddr; __u32 new_saddr; __u32 daddr = sk->daddr; if(sk->protinfo.af_inet.opt && sk->protinfo.af_inet.opt->srr) daddr = sk->protinfo.af_inet.opt->faddr; /* Query new route. */ err = ip_route_connect(&rt, daddr, 0, RT_TOS(sk->protinfo.af_inet.tos)|sk->localroute, sk->bound_dev_if); if (err) return err; __sk_dst_set(sk, &rt->u.dst); /* sk->route_caps = rt->u.dst.dev->features; */ new_saddr = rt->rt_src; if (new_saddr == old_saddr) return 0; if (sysctl_ip_dynaddr > 1) { printk(KERN_INFO "tcp_v4_rebuild_header(): shifting sk->saddr " "from %d.%d.%d.%d to %d.%d.%d.%d\n", NIPQUAD(old_saddr), NIPQUAD(new_saddr)); } sk->saddr = new_saddr; sk->rcv_saddr = new_saddr; /* XXX The only one ugly spot where we need to * XXX really change the sockets identity after * XXX it has entered the hashes. -DaveM * * Besides that, it does not check for connection * uniqueness. Wait for troubles. */ __tcp_v4_rehash(sk); return 0;}int tcp_v4_rebuild_header(struct sock *sk){ struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0); u32 daddr; int err; /* Route is OK, nothing to do. */ if (rt != NULL) return 0; /* Reroute. */ daddr = sk->daddr; if(sk->protinfo.af_inet.opt && sk->protinfo.af_inet.opt->srr) daddr = sk->protinfo.af_inet.opt->faddr; err = ip_route_output(&rt, daddr, sk->saddr, RT_TOS(sk->protinfo.af_inet.tos) | RTO_CONN | sk->localroute, sk->bound_dev_if); if (!err) { __sk_dst_set(sk, &rt->u.dst); /* sk->route_caps = rt->u.dst.dev->features; */ return 0; } /* Routing failed... */ /* sk->route_caps = 0; */ if (!sysctl_ip_dynaddr || sk->state != TCP_SYN_SENT || (sk->userlocks & SOCK_BINDADDR_LOCK) || (err = tcp_v4_reselect_saddr(sk)) != 0) { sk->err_soft=-err; /* sk->error_report(sk); */ } return err;}static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr){ struct sockaddr_in *sin = (struct sockaddr_in *) uaddr; sin->sin_family = AF_INET; sin->sin_addr.s_addr = sk->daddr; sin->sin_port = sk->dport;}/* VJ's idea. Save last timestamp seen from this destination * and hold it at least for normal timewait interval to use for duplicate * segment detection in subsequent connections, before they enter synchronized * state. */int tcp_v4_remember_stamp(struct sock *sk){ struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; struct rtable *rt = (struct rtable*)__sk_dst_get(sk); struct inet_peer *peer = NULL; int release_it = 0; if (rt == NULL || rt->rt_dst != sk->daddr) { peer = inet_getpeer(sk->daddr, 1); release_it = 1; } else { if (rt->peer == NULL) rt_bind_peer(rt, 1); peer = rt->peer; } if (peer) { if ((s32)(peer->tcp_ts - tp->ts_recent) <= 0 || (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec && peer->tcp_ts_stamp <= tp->ts_recent_stamp)) { peer->tcp_ts_stamp = tp->ts_recent_stamp; peer->tcp_ts = tp->ts_recent; } if (release_it) inet_putpeer(peer); return 1; } return 0;}int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw){ struct inet_peer *peer = NULL; peer = inet_getpeer(tw->daddr, 1); if (peer) { if ((s32)(peer->tcp_ts - tw->ts_recent) <= 0 || (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec && peer->tcp_ts_stamp <= tw->ts_recent_stamp)) { peer->tcp_ts_stamp = tw->ts_recent_stamp; peer->tcp_ts = tw->ts_recent; } inet_putpeer(peer); return 1; } return 0;}struct tcp_func ipv4_specific = { ip_queue_xmit, tcp_v4_send_check, tcp_v4_rebuild_header, tcp_v4_conn_request, tcp_v4_syn_recv_sock, tcp_v4_hash_connecting, tcp_v4_remember_stamp, sizeof(struct iphdr), ip_setsockopt, ip_getsockopt, v4_addr2sockaddr, sizeof(struct sockaddr_in)};/* NOTE: A lot of things set to zero explicitly by call to * sk_alloc() so need not be done here. */static int tcp_v4_init_sock(struct sock *sk){ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); skb_queue_head_init(&tp->out_of_order_queue); tcp_init_xmit_timers(sk); tcp_prequeue_init(tp); tp->rto = TCP_TIMEOUT_INIT; tp->mdev = TCP_TIMEOUT_INIT; /* So many TCP implementations out there (incorrectly) count the * initial SYN frame in their delayed-ACK and congestion control * algorithms that we must have the following bandaid to talk * efficiently to them. -DaveM */ tp->snd_cwnd = 2; /* See draft-stevens-tcpca-spec-01 for discussion of the * initialization of these values. */ tp->snd_ssthresh = 0x7fffffff; /* Infinity */ tp->snd_cwnd_clamp = ~0; tp->mss_cache = 536; tp->reordering = sysctl_tcp_reordering; sk->state = TCP_CLOSE; sk->write_space = tcp_write_space; sk->tp_pinfo.af_tcp.af_specific = &ipv4_specific; sk->sndbuf = sysctl_tcp_wmem[1]; sk->rcvbuf = sysctl_tcp_rmem[1]; atomic_inc(&tcp_sockets_allocated); return 0;}static int tcp_v4_destroy_sock(struct sock *sk){ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); tcp_clear_xmit_timers(sk); /* Cleanup up the write buffer. */ tcp_writequeue_purge(sk); /* Cleans up our, hopefuly empty, out_of_order_queue. */ __skb_queue_purge(&tp->out_of_order_queue); /* Clean prequeue, it must be empty really */ __skb_queue_purge(&tp->ucopy.prequeue); /* Clean up a referenced TCP bind bucket. */ if(sk->prev != NULL) tcp_put_port(sk); atomic_dec(&tcp_sockets_allocated); return 0;}/* Proc filesystem TCP sock list dumping. */static void get_openreq(struct sock *sk, struct open_request *req, char *tmpbuf, int i, int uid){ int ttd = req->expires - jiffies; sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08X %08X %5d %8d %u %d %p", i, req->af.v4_req.loc_addr, ntohs(sk->sport), req->af.v4_req.rmt_addr, ntohs(req->rmt_port), TCP_SYN_RECV, 0,0, /* could print option size, but that is af dependent. */ 1, /* timers active (only the expire timer) */ ttd, req->retrans, uid, 0, /* non standard timer */ 0, /* open_requests have no inode */ atomic_read(&sk->refcnt), req ); }static void get_tcp_sock(struct sock *sp, char *tmpbuf, int i){ unsigned int dest, src; __u16 destp, srcp; int timer_active; unsigned long timer_expires; struct tcp_opt *tp = &sp->tp_pinfo.af_tcp; dest = sp->daddr; src = sp->rcv_saddr; destp = ntohs(sp->dport); srcp = ntohs(sp->sport); if (tp->pending == TCP_TIME_RETRANS) { timer_active = 1; timer_expires = tp->timeout; } else if (tp->pending == TCP_TIME_PROBE0) { timer_active = 4; timer_expires = tp->timeout; } else if (timer_pending(&sp->timer)) { timer_active = 2; timer_expires = sp->timer.expires; } else { timer_active = 0; timer_expires = jiffies; } sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d", i, src, srcp, dest, destp, sp->state, tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq, timer_active, timer_expires-jiffies, tp->retransmits, sock_i_uid(sp), tp->probes_out, sock_i_ino(sp), atomic_read(&sp->refcnt), sp, tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong, tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh );}static void get_timewait_sock(struct tcp_tw_bucket *tw, char *tmpbuf, int i){ unsigned int dest, src; __u16 destp, srcp; int ttd = tw->ttd - jiffies; if (ttd < 0) ttd = 0; dest = tw->daddr; src = tw->rcv_saddr; destp = ntohs(tw->dport); srcp = ntohs(tw->sport); sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08X %08X %5d %8d %d %d %p", i, src, srcp, dest, destp, tw->substate, 0, 0, 3, ttd, 0, 0, 0, 0, atomic_read(&tw->refcnt), tw);}#define TMPSZ 150int tcp_get_info(char *buffer, char **start, off_t offset, int length){ int len = 0, num = 0, i; off_t begin, pos = 0; char tmpbuf[TMPSZ+1]; if (offset < TMPSZ) len += sprintf(buffer, "%-*s\n", TMPSZ-1, " sl local_address rem_address st tx_queue " "rx_queue tr tm->when retrnsmt uid timeout inode"); pos = TMPSZ; /* First, walk listening socket table. */ tcp_listen_lock(); for(i = 0; i < TCP_LHTABLE_SIZE; i++) { struct sock *sk = tcp_listening_hash[i]; struct tcp_listen_opt *lopt; int k; for (sk = tcp_listening_hash[i]; sk; sk = sk->next, num++) { struct open_request *req; int uid; struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); if (!TCP_INET_FAMILY(sk->family)) goto skip_listen; pos += TMPSZ; if (pos >= offset) { get_tcp_sock(sk, tmpbuf, num); len += sprintf(buffer+len, "%-*s\n", TMPSZ-1, tmpbuf); if (len >= length) { tcp_listen_unlock(); goto out_no_bh; } }skip_listen: uid = sock_i_uid(sk); read_lock_bh(&tp->syn_wait_lock); lopt = tp->listen_opt; if (lopt && lopt->qlen != 0) { for (k=0; k<TCP_SYNQ_HSIZE; k++) { for (req = lopt->syn_table[k]; req; req = req->dl_next, num++) { if (!TCP_INET_FAMILY(req->class->family)) continue; pos += TMPSZ; if (pos <= offset) continue; get_openreq(sk, req, tmpbuf, num, uid); len += sprintf(buffer+len, "%-*s\n", TMPSZ-1, tmpbuf); if(len >= length) { read_unlock_bh(&tp->syn_wait_lock); tcp_listen_unlock(); goto out_no_bh; } } } } read_unlock_bh(&tp->syn_wait_lock); /* Completed requests are in normal socket hash table */ } } tcp_listen_unlock(); local_bh_disable(); /* Next, walk established hash chain. */ for (i = 0; i < tcp_ehash_size; i++) { struct tcp_ehash_bucket *head = &tcp_ehash[i]; struct sock *sk; struct tcp_tw_bucket *tw; read_lock(&head->lock); for(sk = head->chain; sk; sk = sk->next, num++) { if (!TCP_INET_FAMILY(sk->family)) continue; pos += TMPSZ; if (pos <= offset) continue; get_tcp_sock(sk, tmpbuf, num); len += sprintf(buffer+len, "%-*s\n", TMPSZ-1, tmpbuf); if(len >= length) { read_unlock(&head->lock); goto out; } } for (tw = (struct tcp_tw_bucket *)tcp_ehash[i+tcp_ehash_size].chain; tw != NULL; tw = (struct tcp_tw_bucket *)tw->next, num++) { if (!TCP_INET_FAMILY(tw->family)) continue; pos += TMPSZ; if (pos <= offset) continue; get_timewait_sock(tw, tmpbuf, num); len += sprintf(buffer+len, "%-*s\n", TMPSZ-1, tmpbuf); if(len >= length) { read_unlock(&head->lock); goto out; } } read_unlock(&head->lock); }out: local_bh_enable();out_no_bh: begin = len - (pos - offset); *start = buffer + begin; len -= begin; if(len > length) len = length; if (len < 0) len = 0; return len;}struct proto tcp_prot = { name: "TCP", close: tcp_close, connect: tcp_v4_connect, disconnect: tcp_disconnect, accept: tcp_accept, ioctl: tcp_ioctl, init: tcp_v4_init_sock, destroy: tcp_v4_destroy_sock, shutdown: tcp_shutdown, setsockopt: tcp_setsockopt, getsockopt: tcp_getsockopt, sendmsg: tcp_sendmsg, recvmsg: tcp_recvmsg, backlog_rcv: tcp_v4_do_rcv, hash: tcp_v4_hash, unhash: tcp_unhash, get_port: tcp_v4_get_port,};void __init tcp_v4_init(struct net_proto_family *ops){ int err; tcp_inode.i_mode = S_IFSOCK; tcp_inode.i_sock = 1; tcp_inode.i_uid = 0; tcp_inode.i_gid = 0; init_waitqueue_head(&tcp_inode.i_wait); init_waitqueue_head(&tcp_inode.u.socket_i.wait); tcp_socket->inode = &tcp_inode; tcp_socket->state = SS_UNCONNECTED; tcp_socket->type=SOCK_RAW; if ((err=ops->create(tcp_socket, IPPROTO_TCP))<0) panic("Failed to create the TCP control socket.\n"); tcp_socket->sk->allocation=GFP_ATOMIC; tcp_socket->sk->protinfo.af_inet.ttl = MAXTTL; /* Unhash it so that IP input processing does not even * see it, we do not wish this socket to see incoming * packets. */ tcp_socket->sk->prot->unhash(tcp_socket->sk);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -