📄 ip_fragment.c
字号:
/* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * The IP fragmentation functionality. * * Version: $Id: ip_fragment.c,v 1.59 2002/01/12 07:54:56 davem Exp $ * * Authors: Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG> * Alan Cox <Alan.Cox@linux.org> * * Fixes: * Alan Cox : Split from ip.c , see ip_input.c for history. * David S. Miller : Begin massive cleanup... * Andi Kleen : Add sysctls. * xxxx : Overlapfrag bug. * Ultima : ip_expire() kernel panic. * Bill Hawes : Frag accounting and evictor fixes. * John McDonald : 0 length frag bug. * Alexey Kuznetsov: SMP races, threading, cleanup. * Patrick McHardy : LRU queue of frag heads for evictor. */#include <linux/config.h>#include <linux/module.h>#include <linux/types.h>#include <linux/mm.h>#include <linux/jiffies.h>#include <linux/skbuff.h>#include <linux/list.h>#include <linux/ip.h>#include <linux/icmp.h>#include <linux/netdevice.h>#include <linux/jhash.h>#include <linux/random.h>#include <net/sock.h>#include <net/ip.h>#include <net/icmp.h>#include <net/checksum.h>#include <linux/tcp.h>#include <linux/udp.h>#include <linux/inet.h>#include <linux/netfilter_ipv4.h>/* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6 * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c * as well. Or notify me, at least. --ANK *//* Fragment cache limits. We will commit 256K at one time. Should we * cross that limit we will prune down to 192K. This should cope with * even the most extreme cases without allowing an attacker to measurably * harm machine performance. */int sysctl_ipfrag_high_thresh = 256*1024;int sysctl_ipfrag_low_thresh = 192*1024;/* Important NOTE! Fragment queue must be destroyed before MSL expires. * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL. */int sysctl_ipfrag_time = IP_FRAG_TIME;struct ipfrag_skb_cb{ struct inet_skb_parm h; int offset;};#define FRAG_CB(skb) ((struct ipfrag_skb_cb*)((skb)->cb))/* Describe an entry in the "incomplete datagrams" queue. */struct ipq { struct ipq *next; /* linked list pointers */ struct list_head lru_list; /* lru list member */ u32 saddr; u32 daddr; u16 id; u8 protocol; u8 last_in;#define COMPLETE 4#define FIRST_IN 2#define LAST_IN 1 struct sk_buff *fragments; /* linked list of received fragments */ int len; /* total length of original datagram */ int meat; spinlock_t lock; atomic_t refcnt; struct timer_list timer; /* when will this queue expire? */ struct ipq **pprev; int iif; struct timeval stamp;};/* Hash table. */#define IPQ_HASHSZ 64/* Per-bucket lock is easy to add now. */static struct ipq *ipq_hash[IPQ_HASHSZ];static rwlock_t ipfrag_lock = RW_LOCK_UNLOCKED;static u32 ipfrag_hash_rnd;static LIST_HEAD(ipq_lru_list);int ip_frag_nqueues = 0;static __inline__ void __ipq_unlink(struct ipq *qp){ if(qp->next) qp->next->pprev = qp->pprev; *qp->pprev = qp->next; list_del(&qp->lru_list); ip_frag_nqueues--;}static __inline__ void ipq_unlink(struct ipq *ipq){ write_lock(&ipfrag_lock); __ipq_unlink(ipq); write_unlock(&ipfrag_lock);}static unsigned int ipqhashfn(u16 id, u32 saddr, u32 daddr, u8 prot){ return jhash_3words((u32)id << 16 | prot, saddr, daddr, ipfrag_hash_rnd) & (IPQ_HASHSZ - 1);}static struct timer_list ipfrag_secret_timer;int sysctl_ipfrag_secret_interval = 10 * 60 * HZ;static void ipfrag_secret_rebuild(unsigned long dummy){ unsigned long now = jiffies; int i; write_lock(&ipfrag_lock); get_random_bytes(&ipfrag_hash_rnd, sizeof(u32)); for (i = 0; i < IPQ_HASHSZ; i++) { struct ipq *q; q = ipq_hash[i]; while (q) { struct ipq *next = q->next; unsigned int hval = ipqhashfn(q->id, q->saddr, q->daddr, q->protocol); if (hval != i) { /* Unlink. */ if (q->next) q->next->pprev = q->pprev; *q->pprev = q->next; /* Relink to new hash chain. */ if ((q->next = ipq_hash[hval]) != NULL) q->next->pprev = &q->next; ipq_hash[hval] = q; q->pprev = &ipq_hash[hval]; } q = next; } } write_unlock(&ipfrag_lock); mod_timer(&ipfrag_secret_timer, now + sysctl_ipfrag_secret_interval);}atomic_t ip_frag_mem = ATOMIC_INIT(0); /* Memory used for fragments *//* Memory Tracking Functions. */static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work){ if (work) *work -= skb->truesize; atomic_sub(skb->truesize, &ip_frag_mem); kfree_skb(skb);}static __inline__ void frag_free_queue(struct ipq *qp, int *work){ if (work) *work -= sizeof(struct ipq); atomic_sub(sizeof(struct ipq), &ip_frag_mem); kfree(qp);}static __inline__ struct ipq *frag_alloc_queue(void){ struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC); if(!qp) return NULL; atomic_add(sizeof(struct ipq), &ip_frag_mem); return qp;}/* Destruction primitives. *//* Complete destruction of ipq. */static void ip_frag_destroy(struct ipq *qp, int *work){ struct sk_buff *fp; BUG_TRAP(qp->last_in&COMPLETE); BUG_TRAP(del_timer(&qp->timer) == 0); /* Release all fragment data. */ fp = qp->fragments; while (fp) { struct sk_buff *xp = fp->next; frag_kfree_skb(fp, work); fp = xp; } /* Finally, release the queue descriptor itself. */ frag_free_queue(qp, work);}static __inline__ void ipq_put(struct ipq *ipq, int *work){ if (atomic_dec_and_test(&ipq->refcnt)) ip_frag_destroy(ipq, work);}/* Kill ipq entry. It is not destroyed immediately, * because caller (and someone more) holds reference count. */static void ipq_kill(struct ipq *ipq){ if (del_timer(&ipq->timer)) atomic_dec(&ipq->refcnt); if (!(ipq->last_in & COMPLETE)) { ipq_unlink(ipq); atomic_dec(&ipq->refcnt); ipq->last_in |= COMPLETE; }}/* Memory limiting on fragments. Evictor trashes the oldest * fragment queue until we are back under the threshold. */static void __ip_evictor(int threshold){ struct ipq *qp; struct list_head *tmp; int work; work = atomic_read(&ip_frag_mem) - threshold; if (work <= 0) return; while (work > 0) { read_lock(&ipfrag_lock); if (list_empty(&ipq_lru_list)) { read_unlock(&ipfrag_lock); return; } tmp = ipq_lru_list.next; qp = list_entry(tmp, struct ipq, lru_list); atomic_inc(&qp->refcnt); read_unlock(&ipfrag_lock); spin_lock(&qp->lock); if (!(qp->last_in&COMPLETE)) ipq_kill(qp); spin_unlock(&qp->lock); ipq_put(qp, &work); IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); }}static inline void ip_evictor(void){ __ip_evictor(sysctl_ipfrag_low_thresh);}/* * Oops, a fragment queue timed out. Kill it and send an ICMP reply. */static void ip_expire(unsigned long arg){ struct ipq *qp = (struct ipq *) arg; spin_lock(&qp->lock); if (qp->last_in & COMPLETE) goto out; ipq_kill(qp); IP_INC_STATS_BH(IPSTATS_MIB_REASMTIMEOUT); IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); if ((qp->last_in&FIRST_IN) && qp->fragments != NULL) { struct sk_buff *head = qp->fragments; /* Send an ICMP "Fragment Reassembly Timeout" message. */ if ((head->dev = dev_get_by_index(qp->iif)) != NULL) { icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); dev_put(head->dev); } }out: spin_unlock(&qp->lock); ipq_put(qp, NULL);}/* Creation primitives. */static struct ipq *ip_frag_intern(unsigned int hash, struct ipq *qp_in){ struct ipq *qp; write_lock(&ipfrag_lock);#ifdef CONFIG_SMP /* With SMP race we have to recheck hash table, because * such entry could be created on other cpu, while we * promoted read lock to write lock. */ for(qp = ipq_hash[hash]; qp; qp = qp->next) { if(qp->id == qp_in->id && qp->saddr == qp_in->saddr && qp->daddr == qp_in->daddr && qp->protocol == qp_in->protocol) { atomic_inc(&qp->refcnt); write_unlock(&ipfrag_lock); qp_in->last_in |= COMPLETE; ipq_put(qp_in, NULL); return qp; } }#endif qp = qp_in; if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) atomic_inc(&qp->refcnt); atomic_inc(&qp->refcnt); if((qp->next = ipq_hash[hash]) != NULL) qp->next->pprev = &qp->next; ipq_hash[hash] = qp; qp->pprev = &ipq_hash[hash]; INIT_LIST_HEAD(&qp->lru_list); list_add_tail(&qp->lru_list, &ipq_lru_list); ip_frag_nqueues++; write_unlock(&ipfrag_lock);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -