📄 ip_output.c
字号:
if (frag->len > mtu || ((frag->len & 7) && frag->next) || skb_headroom(frag) < hlen) goto slow_path; /* Partially cloned skb? */ if (skb_shared(frag)) goto slow_path; BUG_ON(frag->sk); if (skb->sk) { sock_hold(skb->sk); frag->sk = skb->sk; frag->destructor = sock_wfree; skb->truesize -= frag->truesize; } } /* Everything is OK. Generate! */ err = 0; offset = 0; frag = skb_shinfo(skb)->frag_list; skb_shinfo(skb)->frag_list = NULL; skb->data_len = first_len - skb_headlen(skb); skb->len = first_len; iph->tot_len = htons(first_len); iph->frag_off = htons(IP_MF); ip_send_check(iph); for (;;) { /* Prepare header of the next frame, * before previous one went down. */ if (frag) { frag->ip_summed = CHECKSUM_NONE; skb_reset_transport_header(frag); __skb_push(frag, hlen); skb_reset_network_header(frag); memcpy(skb_network_header(frag), iph, hlen); iph = ip_hdr(frag); iph->tot_len = htons(frag->len); ip_copy_metadata(frag, skb); if (offset == 0) ip_options_fragment(frag); offset += skb->len - hlen; iph->frag_off = htons(offset>>3); if (frag->next != NULL) iph->frag_off |= htons(IP_MF); /* Ready, complete checksum */ ip_send_check(iph); } err = output(skb); if (!err) IP_INC_STATS(IPSTATS_MIB_FRAGCREATES); if (err || !frag) break; skb = frag; frag = skb->next; skb->next = NULL; } if (err == 0) { IP_INC_STATS(IPSTATS_MIB_FRAGOKS); return 0; } while (frag) { skb = frag->next; kfree_skb(frag); frag = skb; } IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); return err; }slow_path: left = skb->len - hlen; /* Space per frame */ ptr = raw + hlen; /* Where to start from */ /* for bridged IP traffic encapsulated inside f.e. a vlan header, * we need to make room for the encapsulating header */ pad = nf_bridge_pad(skb); ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad); mtu -= pad; /* * Fragment the datagram. */ offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3; not_last_frag = iph->frag_off & htons(IP_MF); /* * Keep copying data until we run out. */ while (left > 0) { len = left; /* IF: it doesn't fit, use 'mtu' - the data space left */ if (len > mtu) len = mtu; /* IF: we are not sending upto and including the packet end then align the next start on an eight byte boundary */ if (len < left) { len &= ~7; } /* * Allocate buffer. */ if ((skb2 = alloc_skb(len+hlen+ll_rs, GFP_ATOMIC)) == NULL) { NETDEBUG(KERN_INFO "IP: frag: no memory for new fragment!\n"); err = -ENOMEM; goto fail; } /* * Set up data on packet */ ip_copy_metadata(skb2, skb); skb_reserve(skb2, ll_rs); skb_put(skb2, len + hlen); skb_reset_network_header(skb2); skb2->transport_header = skb2->network_header + hlen; /* * Charge the memory for the fragment to any owner * it might possess */ if (skb->sk) skb_set_owner_w(skb2, skb->sk); /* * Copy the packet header into the new buffer. */ skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen); /* * Copy a block of the IP datagram. */ if (skb_copy_bits(skb, ptr, skb_transport_header(skb2), len)) BUG(); left -= len; /* * Fill in the new header fields. */ iph = ip_hdr(skb2); iph->frag_off = htons((offset >> 3)); /* ANK: dirty, but effective trick. Upgrade options only if * the segment to be fragmented was THE FIRST (otherwise, * options are already fixed) and make it ONCE * on the initial skb, so that all the following fragments * will inherit fixed options. */ if (offset == 0) ip_options_fragment(skb); /* * Added AC : If we are fragmenting a fragment that's not the * last fragment then keep MF on each bit */ if (left > 0 || not_last_frag) iph->frag_off |= htons(IP_MF); ptr += len; offset += len; /* * Put this fragment into the sending queue. */ iph->tot_len = htons(len + hlen); ip_send_check(iph); err = output(skb2); if (err) goto fail; IP_INC_STATS(IPSTATS_MIB_FRAGCREATES); } kfree_skb(skb); IP_INC_STATS(IPSTATS_MIB_FRAGOKS); return err;fail: kfree_skb(skb); IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); return err;}EXPORT_SYMBOL(ip_fragment);intip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb){ struct iovec *iov = from; if (skb->ip_summed == CHECKSUM_PARTIAL) { if (memcpy_fromiovecend(to, iov, offset, len) < 0) return -EFAULT; } else { __wsum csum = 0; if (csum_partial_copy_fromiovecend(to, iov, offset, len, &csum) < 0) return -EFAULT; skb->csum = csum_block_add(skb->csum, csum, odd); } return 0;}static inline __wsumcsum_page(struct page *page, int offset, int copy){ char *kaddr; __wsum csum; kaddr = kmap(page); csum = csum_partial(kaddr + offset, copy, 0); kunmap(page); return csum;}static inline int ip_ufo_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int hh_len, int fragheaderlen, int transhdrlen, int mtu,unsigned int flags){ struct sk_buff *skb; int err; /* There is support for UDP fragmentation offload by network * device, so create one single skb packet containing complete * udp datagram */ if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { skb = sock_alloc_send_skb(sk, hh_len + fragheaderlen + transhdrlen + 20, (flags & MSG_DONTWAIT), &err); if (skb == NULL) return err; /* reserve space for Hardware header */ skb_reserve(skb, hh_len); /* create space for UDP/IP header */ skb_put(skb,fragheaderlen + transhdrlen); /* initialize network header pointer */ skb_reset_network_header(skb); /* initialize protocol header pointer */ skb->transport_header = skb->network_header + fragheaderlen; skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; sk->sk_sndmsg_off = 0; } err = skb_append_datato_frags(sk,skb, getfrag, from, (length - transhdrlen)); if (!err) { /* specify the length of each IP datagram fragment*/ skb_shinfo(skb)->gso_size = mtu - fragheaderlen; skb_shinfo(skb)->gso_type = SKB_GSO_UDP; __skb_queue_tail(&sk->sk_write_queue, skb); return 0; } /* There is not enough support do UFO , * so follow normal path */ kfree_skb(skb); return err;}/* * ip_append_data() and ip_append_page() can make one large IP datagram * from many pieces of data. Each pieces will be holded on the socket * until ip_push_pending_frames() is called. Each piece can be a page * or non-page data. * * Not only UDP, other transport protocols - e.g. raw sockets - can use * this interface potentially. * * LATER: length must be adjusted by pad at tail, when it is required. */int ip_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, struct ipcm_cookie *ipc, struct rtable *rt, unsigned int flags){ struct inet_sock *inet = inet_sk(sk); struct sk_buff *skb; struct ip_options *opt = NULL; int hh_len; int exthdrlen; int mtu; int copy; int err; int offset = 0; unsigned int maxfraglen, fragheaderlen; int csummode = CHECKSUM_NONE; if (flags&MSG_PROBE) return 0; if (skb_queue_empty(&sk->sk_write_queue)) { /* * setup for corking. */ opt = ipc->opt; if (opt) { if (inet->cork.opt == NULL) { inet->cork.opt = kmalloc(sizeof(struct ip_options) + 40, sk->sk_allocation); if (unlikely(inet->cork.opt == NULL)) return -ENOBUFS; } memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen); inet->cork.flags |= IPCORK_OPT; inet->cork.addr = ipc->addr; } dst_hold(&rt->u.dst); inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path); inet->cork.rt = rt; inet->cork.length = 0; sk->sk_sndmsg_page = NULL; sk->sk_sndmsg_off = 0; if ((exthdrlen = rt->u.dst.header_len) != 0) { length += exthdrlen; transhdrlen += exthdrlen; } } else { rt = inet->cork.rt; if (inet->cork.flags & IPCORK_OPT) opt = inet->cork.opt; transhdrlen = 0; exthdrlen = 0; mtu = inet->cork.fragsize; } hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; if (inet->cork.length + length > 0xFFFF - fragheaderlen) { ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu-exthdrlen); return -EMSGSIZE; } /* * transhdrlen > 0 means that this is the first fragment and we wish * it won't be fragmented in the future. */ if (transhdrlen && length + fragheaderlen <= mtu && rt->u.dst.dev->features & NETIF_F_V4_CSUM && !exthdrlen) csummode = CHECKSUM_PARTIAL; inet->cork.length += length; if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) && (rt->u.dst.dev->features & NETIF_F_UFO)) { err = ip_ufo_append_data(sk, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, mtu, flags); if (err) goto error; return 0; } /* So, what's going on in the loop below? * * We use calculated fragment length to generate chained skb, * each of segments is IP fragment ready for sending to network after * adding appropriate IP header. */ if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) goto alloc_new_skb; while (length > 0) { /* Check if the remaining data fits into current packet. */ copy = mtu - skb->len; if (copy < length) copy = maxfraglen - skb->len; if (copy <= 0) { char *data; unsigned int datalen; unsigned int fraglen; unsigned int fraggap; unsigned int alloclen; struct sk_buff *skb_prev;alloc_new_skb: skb_prev = skb; if (skb_prev) fraggap = skb_prev->len - maxfraglen; else fraggap = 0; /* * If remaining data exceeds the mtu, * we know we need more fragment(s). */ datalen = length + fraggap; if (datalen > mtu - fragheaderlen) datalen = maxfraglen - fragheaderlen; fraglen = datalen + fragheaderlen; if ((flags & MSG_MORE) && !(rt->u.dst.dev->features&NETIF_F_SG)) alloclen = mtu; else alloclen = datalen + fragheaderlen; /* The last fragment gets additional space at tail. * Note, with MSG_MORE we overallocate on fragments, * because we have no idea what fragment will be * the last. */ if (datalen == length + fraggap) alloclen += rt->u.dst.trailer_len; if (transhdrlen) { skb = sock_alloc_send_skb(sk, alloclen + hh_len + 15, (flags & MSG_DONTWAIT), &err); } else { skb = NULL; if (atomic_read(&sk->sk_wmem_alloc) <= 2 * sk->sk_sndbuf) skb = sock_wmalloc(sk, alloclen + hh_len + 15, 1, sk->sk_allocation); if (unlikely(skb == NULL)) err = -ENOBUFS; } if (skb == NULL) goto error; /* * Fill in the control structures */ skb->ip_summed = csummode; skb->csum = 0; skb_reserve(skb, hh_len); /* * Find where to start putting bytes. */ data = skb_put(skb, fraglen); skb_set_network_header(skb, exthdrlen); skb->transport_header = (skb->network_header + fragheaderlen); data += fragheaderlen; if (fraggap) { skb->csum = skb_copy_and_csum_bits( skb_prev, maxfraglen,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -