📄 ip.c
字号:
1207 #endif
1208
1209 /* This function receives all incoming IP datagrams. */
1210 int
1211 ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
1212 {
1213 struct iphdr *iph = skb->h.iph;
1214 unsigned char hash;
1215 unsigned char flag = 0;
1216 unsigned char opts_p = 0; /* Set iff the packet has options. */
1217 struct inet_protocol *ipprot;
1218 static struct options opt; /* since we don't use these yet, and they
1219 take up stack space. */
1220 int brd;
1221 int is_frag=0;
1222
1223 DPRINTF((DBG_IP, "<<\n"));
1224
1225 skb->ip_hdr = iph; /* Fragments can cause ICMP errors too! */
1226 /* Is the datagram acceptable? */
1227 if (skb->len<sizeof(struct iphdr) || iph->ihl<5 || iph->version != 4 || ip_fast_csum((unsigned char *)iph, iph->ihl) !=0) {
1228 DPRINTF((DBG_IP, "\nIP: *** datagram error ***\n"));
1229 DPRINTF((DBG_IP, " SRC = %s ", in_ntoa(iph->saddr)));
1230 DPRINTF((DBG_IP, " DST = %s (ignored)\n", in_ntoa(iph->daddr)));
1231 skb->sk = NULL;
1232 kfree_skb(skb, FREE_WRITE);
1233 return(0);
1234 }
1235
1236 if (iph->ihl != 5) { /* Fast path for the typical optionless IP packet. */
1237 ip_print(iph); /* Bogus, only for debugging. */
1238 memset((char *) &opt, 0, sizeof(opt));
1239 if (do_options(iph, &opt) != 0)
1240 return 0;
1241 opts_p = 1;
1242 }
1243
1244 if (iph->frag_off & 0x0020)
1245 is_frag|=1;
1246 if (ntohs(iph->frag_off) & 0x1fff)
1247 is_frag|=2;
1248
1249 /* Do any IP forwarding required. chk_addr() is expensive -- avoid it someday. */
1250 if ((brd = chk_addr(iph->daddr)) == 0) {
1251 #ifdef CONFIG_IP_FORWARD
1252 ip_forward(skb, dev, is_frag);
1253 #else
1254 printk("Machine %x tried to use us as a forwarder to %x but we have forwarding disabled!\n",
1255 iph->saddr,iph->daddr);
1256 #endif
1257 skb->sk = NULL;
1258 kfree_skb(skb, FREE_WRITE);
1259 return(0);
1260 }
1261
1262 /*
1263 * Reassemble IP fragments.
1264 */
1265
1266 if(is_frag)
1267 {
1268 #ifdef CONFIG_IP_DEFRAG
1269 skb=ip_defrag(iph,skb,dev);
1270 if(skb==NULL)
1271 {
1272 return 0;
1273 }
1274 iph=skb->h.iph;
1275 #else
1276 printk("\nIP: *** datagram fragmentation not yet implemented ***\n");
1277 printk(" SRC = %s ", in_ntoa(iph->saddr));
1278 printk(" DST = %s (ignored)\n", in_ntoa(iph->daddr));
1279 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, dev);
1280 skb->sk = NULL;
1281 kfree_skb(skb, FREE_WRITE);
1282 return(0);
1283 #endif
1284 }
1285
1286
1287
1288 if(brd==IS_INVBCAST)
1289 {
1290 /* printk("Invalid broadcast address from %x [target %x] (Probably they have a wrong netmask)\n",
1291 iph->saddr,iph->daddr);*/
1292 skb->sk=NULL;
1293 kfree_skb(skb,FREE_WRITE);
1294 return(0);
1295 }
1296
1297 /* Point into the IP datagram, just past the header. */
1298
1299 skb->ip_hdr = iph;
1300 skb->h.raw += iph->ihl*4;
1301 hash = iph->protocol & (MAX_INET_PROTOS -1);
1302 for (ipprot = (struct inet_protocol *)inet_protos[hash];
1303 ipprot != NULL;
1304 ipprot=(struct inet_protocol *)ipprot->next)
1305 {
1306 struct sk_buff *skb2;
1307
1308 if (ipprot->protocol != iph->protocol) continue;
1309 DPRINTF((DBG_IP, "Using protocol = %X:\n", ipprot));
1310 print_ipprot(ipprot);
1311
1312 /*
1313 * See if we need to make a copy of it. This will
1314 * only be set if more than one protocol wants it.
1315 * and then not for the last one.
1316 */
1317 if (ipprot->copy) {
1318 skb2 = alloc_skb(skb->mem_len, GFP_ATOMIC);
1319 if (skb2 == NULL)
1320 continue;
1321 memcpy(skb2, skb, skb->mem_len);
1322 skb2->mem_addr = skb2;
1323 skb2->ip_hdr = (struct iphdr *)(
1324 (unsigned long)skb2 +
1325 (unsigned long) skb->ip_hdr -
1326 (unsigned long)skb);
1327 skb2->h.raw = (unsigned char *)(
1328 (unsigned long)skb2 +
1329 (unsigned long) skb->h.raw -
1330 (unsigned long)skb);
1331 skb2->free=1;
1332 } else {
1333 skb2 = skb;
1334 }
1335 flag = 1;
1336
1337 /*
1338 * Pass on the datagram to each protocol that wants it,
1339 * based on the datagram protocol. We should really
1340 * check the protocol handler's return values here...
1341 */
1342 ipprot->handler(skb2, dev, opts_p ? &opt : 0, iph->daddr,
1343 (ntohs(iph->tot_len) - (iph->ihl * 4)),
1344 iph->saddr, 0, ipprot);
1345
1346 }
1347
1348 /*
1349 * All protocols checked.
1350 * If this packet was a broadcast, we may *not* reply to it, since that
1351 * causes (proven, grin) ARP storms and a leakage of memory (i.e. all
1352 * ICMP reply messages get queued up for transmission...)
1353 */
1354 if (!flag) {
1355 if (brd != IS_BROADCAST)
1356 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, dev);
1357 skb->sk = NULL;
1358 kfree_skb(skb, FREE_WRITE);
1359 }
1360
1361 return(0);
1362 }
1363
1364
1365 /*
1366 * Queues a packet to be sent, and starts the transmitter
1367 * if necessary. if free = 1 then we free the block after
1368 * transmit, otherwise we don't.
1369 * This routine also needs to put in the total length, and
1370 * compute the checksum.
1371 */
1372 void
1373 ip_queue_xmit(struct sock *sk, struct device *dev,
1374 struct sk_buff *skb, int free)
1375 {
1376 struct iphdr *iph;
1377 unsigned char *ptr;
1378
1379 if (sk == NULL) free = 1;
1380 if (dev == NULL) {
1381 printk("IP: ip_queue_xmit dev = NULL\n");
1382 return;
1383 }
1384 IS_SKB(skb);
1385 skb->free = free;
1386 skb->dev = dev;
1387 skb->when = jiffies;
1388
1389 DPRINTF((DBG_IP, ">>\n"));
1390 ptr = skb->data;
1391 ptr += dev->hard_header_len;
1392 iph = (struct iphdr *)ptr;
1393 skb->ip_hdr = iph;
1394 iph->tot_len = ntohs(skb->len-dev->hard_header_len);
1395
1396 if(skb->len > dev->mtu)
1397 {
1398 /* printk("Fragment!\n");*/
1399 ip_fragment(sk,skb,dev,0);
1400 IS_SKB(skb);
1401 kfree_skb(skb,FREE_WRITE);
1402 return;
1403 }
1404
1405 ip_send_check(iph);
1406 ip_print(iph);
1407 skb->next = NULL;
1408
1409 /* See if this is the one trashing our queue. Ross? */
1410 skb->magic = 1;
1411 if (!free) {
1412 skb->link3 = NULL;
1413 sk->packets_out++;
1414 cli();
1415 if (sk->send_head == NULL) {
1416 sk->send_tail = skb;
1417 sk->send_head = skb;
1418 } else {
1419 /* See if we've got a problem. */
1420 if (sk->send_tail == NULL) {
1421 printk("IP: ***bug sk->send_tail == NULL != sk->send_head\n");
1422 sort_send(sk);
1423 } else {
1424 sk->send_tail->link3 = skb;
1425 sk->send_tail = skb;
1426 }
1427 }
1428 sti();
1429 reset_timer(sk, TIME_WRITE, sk->rto);
1430 } else {
1431 skb->sk = sk;
1432 }
1433
1434 /* If the indicated interface is up and running, kick it. */
1435 if (dev->flags & IFF_UP) {
1436 if (sk != NULL) {
1437 dev->queue_xmit(skb, dev, sk->priority);
1438 }
1439 else {
1440 dev->queue_xmit(skb, dev, SOPRI_NORMAL);
1441 }
1442 } else {
1443 if (free) kfree_skb(skb, FREE_WRITE);
1444 }
1445 }
1446
1447
1448 void
1449 ip_do_retransmit(struct sock *sk, int all)
1450 {
1451 struct sk_buff * skb;
1452 struct proto *prot;
1453 struct device *dev;
1454 int retransmits;
1455
1456 prot = sk->prot;
1457 skb = sk->send_head;
1458 retransmits = sk->retransmits;
1459 while (skb != NULL) {
1460 dev = skb->dev;
1461 /* I know this can't happen but as it does.. */
1462 if(dev==NULL)
1463 {
1464 printk("ip_retransmit: NULL device bug!\n");
1465 goto oops;
1466 }
1467
1468 IS_SKB(skb);
1469
1470 /*
1471 * The rebuild_header function sees if the ARP is done.
1472 * If not it sends a new ARP request, and if so it builds
1473 * the header.
1474 */
1475 cli(); /* We might get interrupted by an arp reply here and fill
1476 the frame in twice. Because of the technique used this
1477 would be a little sad */
1478 if (!skb->arp) {
1479 if (dev->rebuild_header(skb->data, dev)) {
1480 sti(); /* Failed to rebuild - next */
1481 if (!all) break;
1482 skb = (struct sk_buff *)skb->link3;
1483 continue;
1484 }
1485 }
1486 skb->arp = 1;
1487 sti();
1488 skb->when = jiffies;
1489
1490 /* If the interface is (still) up and running, kick it. */
1491 if (dev->flags & IFF_UP) {
1492 if (sk && !skb_device_locked(skb))
1493 dev->queue_xmit(skb, dev, sk->priority);
1494 /* else dev->queue_xmit(skb, dev, SOPRI_NORMAL ); CANNOT HAVE SK=NULL HERE */
1495 }
1496
1497 oops: retransmits++;
1498 sk->prot->retransmits ++;
1499 if (!all) break;
1500
1501 /* This should cut it off before we send too many packets. */
1502 if (sk->retransmits > sk->cong_window) break;
1503 skb = (struct sk_buff *)skb->link3;
1504 }
1505 }
1506
1507 /*
1508 * This is the normal code called for timeouts. It does the retransmission
1509 * and then does backoff. ip_do_retransmit is separated out because
1510 * tcp_ack needs to send stuff from the retransmit queue without
1511 * initiating a backoff.
1512 */
1513
1514 void
1515 ip_retransmit(struct sock *sk, int all)
1516 {
1517 ip_do_retransmit(sk, all);
1518
1519 /*
1520 * Increase the timeout each time we retransmit. Note that
1521 * we do not increase the rtt estimate. rto is initialized
1522 * from rtt, but increases here. Jacobson (SIGCOMM 88) suggests
1523 * that doubling rto each time is the least we can get away with.
1524 * In KA9Q, Karns uses this for the first few times, and then
1525 * goes to quadratic. netBSD doubles, but only goes up to *64,
1526 * and clamps at 1 to 64 sec afterwards. Note that 120 sec is
1527 * defined in the protocol as the maximum possible RTT. I guess
1528 * we'll have to use something other than TCP to talk to the
1529 * University of Mars.
1530 */
1531
1532 sk->retransmits++;
1533 sk->backoff++;
1534 sk->rto = min(sk->rto << 1, 120*HZ);
1535 reset_timer(sk, TIME_WRITE, sk->rto);
1536 }
1537
1538 /*
1539 * Socket option code for IP. This is the end of the line after any TCP,UDP etc options on
1540 * an IP socket.
1541 */
1542
1543 int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
1544 {
1545 int val,err;
1546
1547 if (optval == NULL)
1548 return(-EINVAL);
1549
1550 err=verify_area(VERIFY_READ, optval, sizeof(int));
1551 if(err)
1552 return err;
1553
1554 val = get_fs_long((unsigned long *)optval);
1555
1556 if(level!=SOL_IP)
1557 return -EOPNOTSUPP;
1558
1559 switch(optname)
1560 {
1561 case IP_TOS:
1562 if(val<0||val>255)
1563 return -EINVAL;
1564 sk->ip_tos=val;
1565 return 0;
1566 case IP_TTL:
1567 if(val<1||val>255)
1568 return -EINVAL;
1569 sk->ip_ttl=val;
1570 return 0;
1571 /* IP_OPTIONS and friends go here eventually */
1572 default:
1573 return(-ENOPROTOOPT);
1574 }
1575 }
1576
1577 int ip_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
1578 {
1579 int val,err;
1580
1581 if(level!=SOL_IP)
1582 return -EOPNOTSUPP;
1583
1584 switch(optname)
1585 {
1586 case IP_TOS:
1587 val=sk->ip_tos;
1588 break;
1589 case IP_TTL:
1590 val=sk->ip_ttl;
1591 break;
1592 default:
1593 return(-ENOPROTOOPT);
1594 }
1595 err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
1596 if(err)
1597 return err;
1598 put_fs_long(sizeof(int),(unsigned long *) optlen);
1599
1600 err=verify_area(VERIFY_WRITE, optval, sizeof(int));
1601 if(err)
1602 return err;
1603 put_fs_long(val,(unsigned long *)optval);
1604
1605 return(0);
1606 }
1607
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -