📄 tcp.c
字号:
1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
8 * Version: @(#)tcp.c 1.0.16 05/25/93
9 *
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 * Corey Minyard <wf-rch!minyard@relay.EU.net>
14 * Florian La Roche, <flla@stud.uni-sb.de>
15 *
16 * Fixes:
17 * Alan Cox : Numerous verify_area() calls
18 * Alan Cox : Set the ACK bit on a reset
19 * Alan Cox : Stopped it crashing if it closed while sk->inuse=1
20 * and was trying to connect (tcp_err()).
21 * Alan Cox : All icmp error handling was broken
22 * pointers passed where wrong and the
23 * socket was looked up backwards. Nobody
24 * tested any icmp error code obviously.
25 * Alan Cox : tcp_err() now handled properly. It wakes people
26 * on errors. select behaves and the icmp error race
27 * has gone by moving it into sock.c
28 * Alan Cox : tcp_reset() fixed to work for everything not just
29 * packets for unknown sockets.
30 * Alan Cox : tcp option processing.
31 * Alan Cox : Reset tweaked (still not 100%) [Had syn rule wrong]
32 * Herp Rosmanith : More reset fixes
33 * Alan Cox : No longer acks invalid rst frames. Acking
34 * any kind of RST is right out.
35 * Alan Cox : Sets an ignore me flag on an rst receive
36 * otherwise odd bits of prattle escape still
37 * Alan Cox : Fixed another acking RST frame bug. Should stop
38 * LAN workplace lockups.
39 * Alan Cox : Some tidyups using the new skb list facilities
40 * Alan Cox : sk->keepopen now seems to work
41 * Alan Cox : Pulls options out correctly on accepts
42 * Alan Cox : Fixed assorted sk->rqueue->next errors
43 * Alan Cox : PSH doesn't end a TCP read. Switched a bit to skb ops.
44 * Alan Cox : Tidied tcp_data to avoid a potential nasty.
45 * Alan Cox : Added some beter commenting, as the tcp is hard to follow
46 * Alan Cox : Removed incorrect check for 20 * psh
47 * Michael O'Reilly : ack < copied bug fix.
48 * Johannes Stille : Misc tcp fixes (not all in yet).
49 * Alan Cox : FIN with no memory -> CRASH
50 * Alan Cox : Added socket option proto entries. Also added awareness of them to accept.
51 * Alan Cox : Added TCP options (SOL_TCP)
52 * Alan Cox : Switched wakeup calls to callbacks, so the kernel can layer network sockets.
53 * Alan Cox : Use ip_tos/ip_ttl settings.
54 * Alan Cox : Handle FIN (more) properly (we hope).
55 * Alan Cox : RST frames sent on unsynchronised state ack error/
56 * Alan Cox : Put in missing check for SYN bit.
57 * Alan Cox : Added tcp_select_window() aka NET2E
58 * window non shrink trick.
59 * Alan Cox : Added a couple of small NET2E timer fixes
60 * Charles Hedrick : TCP fixes
61 * Toomas Tamm : TCP window fixes
62 * Alan Cox : Small URG fix to rlogin ^C ack fight
63 * Charles Hedrick : Window fix
64 * Linus : Rewrote tcp_read() and URG handling
65 * completely
66 *
67 *
68 * To Fix:
69 * Possibly a problem with accept(). BSD accept never fails after
70 * it causes a select. Linux can - given the official select semantics I
71 * feel that _really_ its the BSD network programs that are bust (notably
72 * inetd, which hangs occasionally because of this).
73 * Add VJ Fastrecovery algorithm ?
74 * Protocol closedown badly messed up.
75 * Incompatiblity with spider ports (tcp hangs on that
76 * socket occasionally).
77 * MSG_PEEK and read on same socket at once can cause crashes.
78 *
79 * This program is free software; you can redistribute it and/or
80 * modify it under the terms of the GNU General Public License
81 * as published by the Free Software Foundation; either version
82 * 2 of the License, or(at your option) any later version.
83 */
84 #include <linux/types.h>
85 #include <linux/sched.h>
86 #include <linux/mm.h>
87 #include <linux/string.h>
88 #include <linux/socket.h>
89 #include <linux/sockios.h>
90 #include <linux/termios.h>
91 #include <linux/in.h>
92 #include <linux/fcntl.h>
93 #include "inet.h"
94 #include "dev.h"
95 #include "ip.h"
96 #include "protocol.h"
97 #include "icmp.h"
98 #include "tcp.h"
99 #include "skbuff.h"
100 #include "sock.h"
101 #include "arp.h"
102 #include <linux/errno.h>
103 #include <linux/timer.h>
104 #include <asm/system.h>
105 #include <asm/segment.h>
106 #include <linux/mm.h>
107
108 #define SEQ_TICK 3
109 unsigned long seq_offset;
110 #define SUBNETSARELOCAL
111
112 static __inline__ int
113 min(unsigned int a, unsigned int b)
114 {
115 if (a < b) return(a);
116 return(b);
117 }
118
119
120 static void __print_th(struct tcphdr *th)
121 {
122 unsigned char *ptr;
123
124 printk("TCP header:\n");
125 printk(" source=%d, dest=%d, seq =%ld, ack_seq = %ld\n",
126 ntohs(th->source), ntohs(th->dest),
127 ntohl(th->seq), ntohl(th->ack_seq));
128 printk(" fin=%d, syn=%d, rst=%d, psh=%d, ack=%d, urg=%d res1=%d res2=%d\n",
129 th->fin, th->syn, th->rst, th->psh, th->ack,
130 th->urg, th->res1, th->res2);
131 printk(" window = %d, check = %d urg_ptr = %d\n",
132 ntohs(th->window), ntohs(th->check), ntohs(th->urg_ptr));
133 printk(" doff = %d\n", th->doff);
134 ptr =(unsigned char *)(th + 1);
135 printk(" options = %d %d %d %d\n", ptr[0], ptr[1], ptr[2], ptr[3]);
136 }
137
138 static inline void print_th(struct tcphdr *th)
139 {
140 if (inet_debug == DBG_TCP)
141 __print_th(th);
142 }
143
144 /* This routine grabs the first thing off of a rcv queue. */
145 static struct sk_buff *
146 get_firstr(struct sock *sk)
147 {
148 return skb_dequeue(&sk->rqueue);
149 }
150
151 /*
152 * Difference between two values in tcp ack terms.
153 */
154
155 static long
156 diff(unsigned long seq1, unsigned long seq2)
157 {
158 long d;
159
160 d = seq1 - seq2;
161 if (d > 0) return(d);
162
163 /* I hope this returns what I want. */
164 return(~d+1);
165 }
166
167 /* This routine picks a TCP windows for a socket based on
168 the following constraints
169
170 1. The window can never be shrunk once it is offered (RFC 793)
171 2. We limit memory per socket
172
173 For now we use NET2E3's heuristic of offering half the memory
174 we have handy. All is not as bad as this seems however because
175 of two things. Firstly we will bin packets even within the window
176 in order to get the data we are waiting for into the memory limit.
177 Secondly we bin common duplicate forms at receive time
178
179 Better heuristics welcome
180 */
181
182 static int tcp_select_window(struct sock *sk)
183 {
184 int new_window = sk->prot->rspace(sk);
185
186 /*
187 * two things are going on here. First, we don't ever offer a
188 * window less than min(sk->mss, MAX_WINDOW/2). This is the
189 * receiver side of SWS as specified in RFC1122.
190 * Second, we always give them at least the window they
191 * had before, in order to avoid retracting window. This
192 * is technically allowed, but RFC1122 advises against it and
193 * in practice it causes trouble.
194 */
195 if (new_window < min(sk->mss, MAX_WINDOW/2) ||
196 new_window < sk->window)
197 return(sk->window);
198 return(new_window);
199 }
200
201 /* Enter the time wait state. */
202
203 static void tcp_time_wait(struct sock *sk)
204 {
205 sk->state = TCP_TIME_WAIT;
206 sk->shutdown = SHUTDOWN_MASK;
207 if (!sk->dead)
208 sk->state_change(sk);
209 reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
210 }
211
212 /*
213 * A timer event has trigger a tcp retransmit timeout. The
214 * socket xmit queue is ready and set up to send. Because
215 * the ack receive code keeps the queue straight we do
216 * nothing clever here.
217 */
218
219 static void
220 tcp_retransmit(struct sock *sk, int all)
221 {
222 if (all) {
223 ip_retransmit(sk, all);
224 return;
225 }
226
227 sk->ssthresh = sk->cong_window >> 1; /* remember window where we lost */
228 /* sk->ssthresh in theory can be zero. I guess that's OK */
229 sk->cong_count = 0;
230
231 sk->cong_window = 1;
232
233 /* Do the actual retransmit. */
234 ip_retransmit(sk, all);
235 }
236
237
238 /*
239 * This routine is called by the ICMP module when it gets some
240 * sort of error condition. If err < 0 then the socket should
241 * be closed and the error returned to the user. If err > 0
242 * it's just the icmp type << 8 | icmp code. After adjustment
243 * header points to the first 8 bytes of the tcp header. We need
244 * to find the appropriate port.
245 */
246 void
247 tcp_err(int err, unsigned char *header, unsigned long daddr,
248 unsigned long saddr, struct inet_protocol *protocol)
249 {
250 struct tcphdr *th;
251 struct sock *sk;
252 struct iphdr *iph=(struct iphdr *)header;
253
254 header+=4*iph->ihl;
255
256 DPRINTF((DBG_TCP, "TCP: tcp_err(%d, hdr=%X, daddr=%X saddr=%X, protocol=%X)\n",
257 err, header, daddr, saddr, protocol));
258
259 th =(struct tcphdr *)header;
260 sk = get_sock(&tcp_prot, th->source/*dest*/, daddr, th->dest/*source*/, saddr);
261 print_th(th);
262
263 if (sk == NULL) return;
264
265 if(err<0)
266 {
267 sk->err = -err;
268 sk->error_report(sk);
269 return;
270 }
271
272 if ((err & 0xff00) == (ICMP_SOURCE_QUENCH << 8)) {
273 /*
274 * FIXME:
275 * For now we will just trigger a linear backoff.
276 * The slow start code should cause a real backoff here.
277 */
278 if (sk->cong_window > 4) sk->cong_window--;
279 return;
280 }
281
282 DPRINTF((DBG_TCP, "TCP: icmp_err got error\n"));
283 sk->err = icmp_err_convert[err & 0xff].errno;
284
285 /*
286 * If we've already connected we will keep trying
287 * until we time out, or the user gives up.
288 */
289 if (icmp_err_convert[err & 0xff].fatal) {
290 if (sk->state == TCP_SYN_SENT) {
291 sk->state = TCP_CLOSE;
292 sk->error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
293 }
294 }
295 return;
296 }
297
298
299 /*
300 * Walk down the receive queue counting readable data until we hit the end or we find a gap
301 * in the received data queue (ie a frame missing that needs sending to us)
302 */
303
304 static int
305 tcp_readable(struct sock *sk)
306 {
307 unsigned long counted;
308 unsigned long amount;
309 struct sk_buff *skb;
310 int count=0;
311 int sum;
312 unsigned long flags;
313
314 DPRINTF((DBG_TCP, "tcp_readable(sk=%X)\n", sk));
315 if(sk && sk->debug)
316 printk("tcp_readable: %p - ",sk);
317
318 if (sk == NULL || skb_peek(&sk->rqueue) == NULL) /* Empty sockets are easy! */
319 {
320 if(sk && sk->debug)
321 printk("empty\n");
322 return(0);
323 }
324
325 counted = sk->copied_seq+1; /* Where we are at the moment */
326 amount = 0;
327
328 save_flags(flags); /* So nobody adds things at the wrong moment */
329 cli();
330 skb =(struct sk_buff *)sk->rqueue;
331
332 /* Do until a push or until we are out of data. */
333 do {
334 count++;
335 #ifdef OLD
336 /* This is wrong: It breaks Chameleon amongst other stacks */
337 if (count > 20) {
338 restore_flags(flags);
339 DPRINTF((DBG_TCP, "tcp_readable, more than 20 packets without a psh\n"));
340 printk("tcp_read: possible read_queue corruption.\n");
341 return(amount);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -