📄 ip_output.c
字号:
/* * Begin outputting the bytes. */ id = sk->protinfo.af_inet.id++; do { char *data; struct sk_buff * skb; /* * Get the memory we require with some space left for alignment. */ skb = sock_alloc_send_skb(sk, fraglen+hh_len+15, flags&MSG_DONTWAIT, &err); if (skb == NULL) goto error; /* * Fill in the control structures */ skb->priority = sk->priority; skb->dst = dst_clone(&rt->u.dst); skb_reserve(skb, hh_len); /* * Find where to start putting bytes. */ data = skb_put(skb, fraglen); skb->nh.iph = (struct iphdr *)data; /* * Only write IP header onto non-raw packets */ { struct iphdr *iph = (struct iphdr *)data; iph->version = 4; iph->ihl = 5; if (opt) { iph->ihl += opt->optlen>>2; ip_options_build(skb, opt, ipc->addr, rt, offset); } iph->tos = sk->protinfo.af_inet.tos; iph->tot_len = htons(fraglen - fragheaderlen + iph->ihl*4); iph->frag_off = htons(offset>>3)|mf|df; iph->id = id; if (!mf) { if (offset || !df) { /* Select an unpredictable ident only * for packets without DF or having * been fragmented. */ __ip_select_ident(iph, &rt->u.dst); id = iph->id; } /* * Any further fragments will have MF set. */ mf = htons(IP_MF); } if (rt->rt_type == RTN_MULTICAST) iph->ttl = sk->protinfo.af_inet.mc_ttl; else iph->ttl = sk->protinfo.af_inet.ttl; iph->protocol = sk->protocol; iph->check = 0; iph->saddr = rt->rt_src; iph->daddr = rt->rt_dst; iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); data += iph->ihl*4; } /* * User data callback */ if (getfrag(frag, data, offset, fraglen-fragheaderlen)) { err = -EFAULT; kfree_skb(skb); goto error; } offset -= (maxfraglen-fragheaderlen); fraglen = maxfraglen; nfrags++; err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, skb->dst->dev, output_maybe_reroute); if (err) { if (err > 0) err = sk->protinfo.af_inet.recverr ? net_xmit_errno(err) : 0; if (err) goto error; } } while (offset >= 0); if (nfrags>1) ip_statistics[smp_processor_id()*2 + !in_softirq()].IpFragCreates += nfrags;out: return 0;error: IP_INC_STATS(IpOutDiscards); if (nfrags>1) ip_statistics[smp_processor_id()*2 + !in_softirq()].IpFragCreates += nfrags; return err; }/* * Fast path for unfragmented packets. */int ip_build_xmit(struct sock *sk, int getfrag (const void *, char *, unsigned int, unsigned int), const void *frag, unsigned length, struct ipcm_cookie *ipc, struct rtable *rt, int flags){ int err; struct sk_buff *skb; int df; struct iphdr *iph; /* * Try the simple case first. This leaves fragmented frames, and by * choice RAW frames within 20 bytes of maximum size(rare) to the long path */ if (!sk->protinfo.af_inet.hdrincl) { length += sizeof(struct iphdr); /* * Check for slow path. */ if (length > rt->u.dst.pmtu || ipc->opt != NULL) return ip_build_xmit_slow(sk,getfrag,frag,length,ipc,rt,flags); } else { if (length > rt->u.dst.dev->mtu) { ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport, rt->u.dst.dev->mtu); return -EMSGSIZE; } } if (flags&MSG_PROBE) goto out; /* * Do path mtu discovery if needed. */ df = 0; if (ip_dont_fragment(sk, &rt->u.dst)) df = htons(IP_DF); /* * Fast path for unfragmented frames without options. */ { int hh_len = (rt->u.dst.dev->hard_header_len + 15)&~15; skb = sock_alloc_send_skb(sk, length+hh_len+15, flags&MSG_DONTWAIT, &err); if(skb==NULL) goto error; skb_reserve(skb, hh_len); } skb->priority = sk->priority; skb->dst = dst_clone(&rt->u.dst); skb->nh.iph = iph = (struct iphdr *)skb_put(skb, length); if(!sk->protinfo.af_inet.hdrincl) { iph->version=4; iph->ihl=5; iph->tos=sk->protinfo.af_inet.tos; iph->tot_len = htons(length); iph->frag_off = df; iph->ttl=sk->protinfo.af_inet.mc_ttl; ip_select_ident(iph, &rt->u.dst, sk); if (rt->rt_type != RTN_MULTICAST) iph->ttl=sk->protinfo.af_inet.ttl; iph->protocol=sk->protocol; iph->saddr=rt->rt_src; iph->daddr=rt->rt_dst; iph->check=0; iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); err = getfrag(frag, ((char *)iph)+iph->ihl*4,0, length-iph->ihl*4); } else err = getfrag(frag, (void *)iph, 0, length); if (err) goto error_fault; err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, output_maybe_reroute); if (err > 0) err = sk->protinfo.af_inet.recverr ? net_xmit_errno(err) : 0; if (err) goto error;out: return 0;error_fault: err = -EFAULT; kfree_skb(skb);error: IP_INC_STATS(IpOutDiscards); return err; }/* * This IP datagram is too large to be sent in one piece. Break it up into * smaller pieces (each of size equal to IP header plus * a block of the data of the original IP data part) that will yet fit in a * single device frame, and queue such a frame for sending. * * Yes this is inefficient, feel free to submit a quicker one. */int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)){ struct iphdr *iph; int raw = 0; int ptr; struct net_device *dev; struct sk_buff *skb2; unsigned int mtu, hlen, left, len; int offset; int not_last_frag; struct rtable *rt = (struct rtable*)skb->dst; int err = 0; dev = rt->u.dst.dev; /* * Point into the IP datagram header. */ iph = skb->nh.iph; /* * Setup starting values. */ hlen = iph->ihl * 4; left = skb->len - hlen; /* Space per frame */ mtu = rt->u.dst.pmtu - hlen; /* Size of data space */ ptr = raw + hlen; /* Where to start from */ /* * Fragment the datagram. */ offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3; not_last_frag = iph->frag_off & htons(IP_MF); /* * Keep copying data until we run out. */ while(left > 0) { len = left; /* IF: it doesn't fit, use 'mtu' - the data space left */ if (len > mtu) len = mtu; /* IF: we are not sending upto and including the packet end then align the next start on an eight byte boundary */ if (len < left) { len &= ~7; } /* * Allocate buffer. */ if ((skb2 = alloc_skb(len+hlen+dev->hard_header_len+15,GFP_ATOMIC)) == NULL) { NETDEBUG(printk(KERN_INFO "IP: frag: no memory for new fragment!\n")); err = -ENOMEM; goto fail; } /* * Set up data on packet */ skb2->pkt_type = skb->pkt_type; skb2->priority = skb->priority; skb_reserve(skb2, (dev->hard_header_len+15)&~15); skb_put(skb2, len + hlen); skb2->nh.raw = skb2->data; skb2->h.raw = skb2->data + hlen; skb2->protocol = skb->protocol; skb2->security = skb->security; /* * Charge the memory for the fragment to any owner * it might possess */ if (skb->sk) skb_set_owner_w(skb2, skb->sk); skb2->dst = dst_clone(skb->dst); skb2->dev = skb->dev; /* * Copy the packet header into the new buffer. */ memcpy(skb2->nh.raw, skb->data, hlen); /* * Copy a block of the IP datagram. */ if (skb_copy_bits(skb, ptr, skb2->h.raw, len)) BUG(); left -= len; /* * Fill in the new header fields. */ iph = skb2->nh.iph; iph->frag_off = htons((offset >> 3)); /* ANK: dirty, but effective trick. Upgrade options only if * the segment to be fragmented was THE FIRST (otherwise, * options are already fixed) and make it ONCE * on the initial skb, so that all the following fragments * will inherit fixed options. */ if (offset == 0) ip_options_fragment(skb); /* Copy the flags to each fragment. */ IPCB(skb2)->flags = IPCB(skb)->flags; /* * Added AC : If we are fragmenting a fragment that's not the * last fragment then keep MF on each bit */ if (left > 0 || not_last_frag) iph->frag_off |= htons(IP_MF); ptr += len; offset += len;#ifdef CONFIG_NET_SCHED skb2->tc_index = skb->tc_index;#endif#ifdef CONFIG_NETFILTER skb2->nfmark = skb->nfmark; /* Connection association is same as pre-frag packet */ skb2->nfct = skb->nfct; nf_conntrack_get(skb2->nfct);#ifdef CONFIG_NETFILTER_DEBUG skb2->nf_debug = skb->nf_debug;#endif#endif /* * Put this fragment into the sending queue. */ IP_INC_STATS(IpFragCreates); iph->tot_len = htons(len + hlen); ip_send_check(iph); err = output(skb2); if (err) goto fail; } kfree_skb(skb); IP_INC_STATS(IpFragOKs); return err;fail: kfree_skb(skb); IP_INC_STATS(IpFragFails); return err;}/* * Fetch data from kernel space and fill in checksum if needed. */static int ip_reply_glue_bits(const void *dptr, char *to, unsigned int offset, unsigned int fraglen){ struct ip_reply_arg *dp = (struct ip_reply_arg*)dptr; u16 *pktp = (u16 *)to; struct iovec *iov; int len; int hdrflag = 1; iov = &dp->iov[0]; if (offset >= iov->iov_len) { offset -= iov->iov_len; iov++; hdrflag = 0; } len = iov->iov_len - offset; if (fraglen > len) { /* overlapping. */ dp->csum = csum_partial_copy_nocheck(iov->iov_base+offset, to, len, dp->csum); offset = 0; fraglen -= len; to += len; iov++; } dp->csum = csum_partial_copy_nocheck(iov->iov_base+offset, to, fraglen, dp->csum); if (hdrflag && dp->csumoffset) *(pktp + dp->csumoffset) = csum_fold(dp->csum); /* fill in checksum */ return 0; }/* * Generic function to send a packet as reply to another packet. * Used to send TCP resets so far. ICMP should use this function too. * * Should run single threaded per socket because it uses the sock * structure to pass arguments. */void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, unsigned int len){ struct { struct ip_options opt; char data[40]; } replyopts; struct ipcm_cookie ipc; u32 daddr; struct rtable *rt = (struct rtable*)skb->dst; if (ip_options_echo(&replyopts.opt, skb)) return; daddr = ipc.addr = rt->rt_src; ipc.opt = NULL; if (replyopts.opt.optlen) { ipc.opt = &replyopts.opt; if (ipc.opt->srr) daddr = replyopts.opt.faddr; } if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), 0)) return; /* And let IP do all the hard work. This chunk is not reenterable, hence spinlock. Note that it uses the fact, that this function is called with locally disabled BH and that sk cannot be already spinlocked. */ bh_lock_sock(sk); sk->protinfo.af_inet.tos = skb->nh.iph->tos; sk->priority = skb->priority; sk->protocol = skb->nh.iph->protocol; ip_build_xmit(sk, ip_reply_glue_bits, arg, len, &ipc, rt, MSG_DONTWAIT); bh_unlock_sock(sk); ip_rt_put(rt);}/* * IP protocol layer initialiser */static struct packet_type ip_packet_type ={ __constant_htons(ETH_P_IP), NULL, /* All devices */ ip_rcv, (void*)1, NULL,};/* * IP registers the packet type and then calls the subprotocol initialisers */void __init ip_init(void){ dev_add_pack(&ip_packet_type); ip_rt_init(); inet_initpeers();#ifdef CONFIG_IP_MULTICAST proc_net_create("igmp", 0, ip_mc_procinfo);#endif}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -