📄 ipmr.c
字号:
/* * IP multicast routing support for mrouted 3.6/3.8 * * (c) 1995 Alan Cox, <alan@redhat.com> * Linux Consultancy and Custom Driver Development * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * * Version: $Id: ipmr.c,v 1.55 2000/11/28 13:13:27 davem Exp $ * * Fixes: * Michael Chastain : Incorrect size of copying. * Alan Cox : Added the cache manager code * Alan Cox : Fixed the clone/copy bug and device race. * Mike McLagan : Routing by source * Malcolm Beattie : Buffer handling fixes. * Alexey Kuznetsov : Double buffer free and other fixes. * SVR Anand : Fixed several multicast bugs and problems. * Alexey Kuznetsov : Status, optimisations and more. * Brad Parker : Better behaviour on mrouted upcall * overflow. * Carlos Picoto : PIMv1 Support * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header * Relax this requrement to work with older peers. * */#include <linux/config.h>#include <asm/system.h>#include <asm/uaccess.h>#include <linux/types.h>#include <linux/sched.h>#include <linux/errno.h>#include <linux/timer.h>#include <linux/mm.h>#include <linux/kernel.h>#include <linux/fcntl.h>#include <linux/stat.h>#include <linux/socket.h>#include <linux/in.h>#include <linux/inet.h>#include <linux/netdevice.h>#include <linux/inetdevice.h>#include <linux/igmp.h>#include <linux/proc_fs.h>#include <linux/mroute.h>#include <linux/init.h>#include <net/ip.h>#include <net/protocol.h>#include <linux/skbuff.h>#include <net/sock.h>#include <net/icmp.h>#include <net/udp.h>#include <net/raw.h>#include <linux/notifier.h>#include <linux/if_arp.h>#include <linux/netfilter_ipv4.h>#include <net/ipip.h>#include <net/checksum.h>#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)#define CONFIG_IP_PIMSM 1#endifstatic struct sock *mroute_socket;/* Big lock, protecting vif table, mrt cache and mroute socket state. Note that the changes are semaphored via rtnl_lock. */static rwlock_t mrt_lock = RW_LOCK_UNLOCKED;/* * Multicast router control variables */static struct vif_device vif_table[MAXVIFS]; /* Devices */static int maxvif;#define VIF_EXISTS(idx) (vif_table[idx].dev != NULL)int mroute_do_assert = 0; /* Set in PIM assert */int mroute_do_pim = 0;static struct mfc_cache *mfc_cache_array[MFC_LINES]; /* Forwarding cache */static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */atomic_t cache_resolve_queue_len; /* Size of unresolved *//* Special spinlock for queue of unresolved entries */static spinlock_t mfc_unres_lock = SPIN_LOCK_UNLOCKED;/* We return to original Alan's scheme. Hash table of resolved entries is changed only in process context and protected with weak lock mrt_lock. Queue of unresolved entries is protected with strong spinlock mfc_unres_lock. In this case data path is free of exclusive locks at all. */kmem_cache_t *mrt_cachep;static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);extern struct inet_protocol pim_protocol;static struct timer_list ipmr_expire_timer;/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */staticstruct net_device *ipmr_new_tunnel(struct vifctl *v){ struct net_device *dev; dev = __dev_get_by_name("tunl0"); if (dev) { int err; struct ifreq ifr; mm_segment_t oldfs; struct ip_tunnel_parm p; struct in_device *in_dev; memset(&p, 0, sizeof(p)); p.iph.daddr = v->vifc_rmt_addr.s_addr; p.iph.saddr = v->vifc_lcl_addr.s_addr; p.iph.version = 4; p.iph.ihl = 5; p.iph.protocol = IPPROTO_IPIP; sprintf(p.name, "dvmrp%d", v->vifc_vifi); ifr.ifr_ifru.ifru_data = (void*)&p; oldfs = get_fs(); set_fs(KERNEL_DS); err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL); set_fs(oldfs); dev = NULL; if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) { dev->flags |= IFF_MULTICAST; in_dev = __in_dev_get(dev); if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL) goto failure; in_dev->cnf.rp_filter = 0; if (dev_open(dev)) goto failure; } } return dev;failure: unregister_netdevice(dev); return NULL;}#ifdef CONFIG_IP_PIMSMstatic int reg_vif_num = -1;static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev){ read_lock(&mrt_lock); ((struct net_device_stats*)dev->priv)->tx_bytes += skb->len; ((struct net_device_stats*)dev->priv)->tx_packets++; ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT); read_unlock(&mrt_lock); kfree_skb(skb); return 0;}static struct net_device_stats *reg_vif_get_stats(struct net_device *dev){ return (struct net_device_stats*)dev->priv;}staticstruct net_device *ipmr_reg_vif(struct vifctl *v){ struct net_device *dev; struct in_device *in_dev; int size; size = sizeof(*dev) + sizeof(struct net_device_stats); dev = kmalloc(size, GFP_KERNEL); if (!dev) return NULL; memset(dev, 0, size); dev->priv = dev + 1; strcpy(dev->name, "pimreg"); dev->type = ARPHRD_PIMREG; dev->mtu = 1500 - sizeof(struct iphdr) - 8; dev->flags = IFF_NOARP; dev->hard_start_xmit = reg_vif_xmit; dev->get_stats = reg_vif_get_stats; dev->features |= NETIF_F_DYNALLOC; if (register_netdevice(dev)) { kfree(dev); return NULL; } dev->iflink = 0; if ((in_dev = inetdev_init(dev)) == NULL) goto failure; in_dev->cnf.rp_filter = 0; if (dev_open(dev)) goto failure; return dev;failure: unregister_netdevice(dev); return NULL;}#endif/* * Delete a VIF entry */ static int vif_delete(int vifi){ struct vif_device *v; struct net_device *dev; struct in_device *in_dev; if (vifi < 0 || vifi >= maxvif) return -EADDRNOTAVAIL; v = &vif_table[vifi]; write_lock_bh(&mrt_lock); dev = v->dev; v->dev = NULL; if (!dev) { write_unlock_bh(&mrt_lock); return -EADDRNOTAVAIL; }#ifdef CONFIG_IP_PIMSM if (vifi == reg_vif_num) reg_vif_num = -1;#endif if (vifi+1 == maxvif) { int tmp; for (tmp=vifi-1; tmp>=0; tmp--) { if (VIF_EXISTS(tmp)) break; } maxvif = tmp+1; } write_unlock_bh(&mrt_lock); dev_set_allmulti(dev, -1); if ((in_dev = __in_dev_get(dev)) != NULL) { in_dev->cnf.mc_forwarding--; ip_rt_multicast_event(in_dev); } if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER)) unregister_netdevice(dev); dev_put(dev); return 0;}/* Destroy an unresolved cache entry, killing queued skbs and reporting error to netlink readers. */static void ipmr_destroy_unres(struct mfc_cache *c){ struct sk_buff *skb; atomic_dec(&cache_resolve_queue_len); while((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {#ifdef CONFIG_RTNETLINK if (skb->nh.iph->version == 0) { struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); nlh->nlmsg_type = NLMSG_ERROR; nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); skb_trim(skb, nlh->nlmsg_len); ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -ETIMEDOUT; netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT); } else#endif kfree_skb(skb); } kmem_cache_free(mrt_cachep, c);}/* Single timer process for all the unresolved queue. */void ipmr_expire_process(unsigned long dummy){ unsigned long now; unsigned long expires; struct mfc_cache *c, **cp; if (!spin_trylock(&mfc_unres_lock)) { mod_timer(&ipmr_expire_timer, jiffies+HZ/10); return; } if (atomic_read(&cache_resolve_queue_len) == 0) goto out; now = jiffies; expires = 10*HZ; cp = &mfc_unres_queue; while ((c=*cp) != NULL) { long interval = c->mfc_un.unres.expires - now; if (interval > 0) { if (interval < expires) expires = interval; cp = &c->next; continue; } *cp = c->next; ipmr_destroy_unres(c); } if (atomic_read(&cache_resolve_queue_len)) mod_timer(&ipmr_expire_timer, jiffies + expires);out: spin_unlock(&mfc_unres_lock);}/* Fill oifs list. It is called under write locked mrt_lock. */static void ipmr_update_threshoulds(struct mfc_cache *cache, unsigned char *ttls){ int vifi; cache->mfc_un.res.minvif = MAXVIFS; cache->mfc_un.res.maxvif = 0; memset(cache->mfc_un.res.ttls, 255, MAXVIFS); for (vifi=0; vifi<maxvif; vifi++) { if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) { cache->mfc_un.res.ttls[vifi] = ttls[vifi]; if (cache->mfc_un.res.minvif > vifi) cache->mfc_un.res.minvif = vifi; if (cache->mfc_un.res.maxvif <= vifi) cache->mfc_un.res.maxvif = vifi + 1; } }}static int vif_add(struct vifctl *vifc, int mrtsock){ int vifi = vifc->vifc_vifi; struct vif_device *v = &vif_table[vifi]; struct net_device *dev; struct in_device *in_dev; /* Is vif busy ? */ if (VIF_EXISTS(vifi)) return -EADDRINUSE; switch (vifc->vifc_flags) {#ifdef CONFIG_IP_PIMSM case VIFF_REGISTER: /* * Special Purpose VIF in PIM * All the packets will be sent to the daemon */ if (reg_vif_num >= 0) return -EADDRINUSE; dev = ipmr_reg_vif(vifc); if (!dev) return -ENOBUFS; break;#endif case VIFF_TUNNEL: dev = ipmr_new_tunnel(vifc); if (!dev) return -ENOBUFS; break; case 0: dev=ip_dev_find(vifc->vifc_lcl_addr.s_addr); if (!dev) return -EADDRNOTAVAIL; __dev_put(dev); break; default: return -EINVAL; } if ((in_dev = __in_dev_get(dev)) == NULL) return -EADDRNOTAVAIL; in_dev->cnf.mc_forwarding++; dev_set_allmulti(dev, +1); ip_rt_multicast_event(in_dev); /* * Fill in the VIF structures */ v->rate_limit=vifc->vifc_rate_limit; v->local=vifc->vifc_lcl_addr.s_addr; v->remote=vifc->vifc_rmt_addr.s_addr; v->flags=vifc->vifc_flags; if (!mrtsock) v->flags |= VIFF_STATIC; v->threshold=vifc->vifc_threshold; v->bytes_in = 0; v->bytes_out = 0; v->pkt_in = 0; v->pkt_out = 0; v->link = dev->ifindex; if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER)) v->link = dev->iflink; /* And finish update writing critical data */ write_lock_bh(&mrt_lock); dev_hold(dev); v->dev=dev;#ifdef CONFIG_IP_PIMSM if (v->flags&VIFF_REGISTER) reg_vif_num = vifi;#endif if (vifi+1 > maxvif) maxvif = vifi+1; write_unlock_bh(&mrt_lock); return 0;}static struct mfc_cache *ipmr_cache_find(__u32 origin, __u32 mcastgrp){ int line=MFC_HASH(mcastgrp,origin); struct mfc_cache *c; for (c=mfc_cache_array[line]; c; c = c->next) { if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp) break; } return c;}/* * Allocate a multicast cache entry */static struct mfc_cache *ipmr_cache_alloc(void){ struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_KERNEL); if(c==NULL) return NULL; memset(c, 0, sizeof(*c)); c->mfc_un.res.minvif = MAXVIFS; return c;}static struct mfc_cache *ipmr_cache_alloc_unres(void){ struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_ATOMIC); if(c==NULL) return NULL; memset(c, 0, sizeof(*c)); skb_queue_head_init(&c->mfc_un.unres.unresolved); c->mfc_un.unres.expires = jiffies + 10*HZ; return c;}/* * A cache entry has gone into a resolved state from queued */ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c){ struct sk_buff *skb; /* * Play the pending entries through our router */ while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {#ifdef CONFIG_RTNETLINK if (skb->nh.iph->version == 0) { int err; struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) { nlh->nlmsg_len = skb->tail - (u8*)nlh; } else { nlh->nlmsg_type = NLMSG_ERROR; nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); skb_trim(skb, nlh->nlmsg_len); ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -EMSGSIZE; } err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT); } else#endif ip_mr_forward(skb, c, 0); }}/* * Bounce a cache query up to mrouted. We could use netlink for this but mrouted * expects the following bizarre scheme. * * Called under mrt_lock. */ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert){ struct sk_buff *skb; int ihl = pkt->nh.iph->ihl<<2; struct igmphdr *igmp; struct igmpmsg *msg; int ret;#ifdef CONFIG_IP_PIMSM if (assert == IGMPMSG_WHOLEPKT) skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); else#endif skb = alloc_skb(128, GFP_ATOMIC); if(!skb) return -ENOBUFS;#ifdef CONFIG_IP_PIMSM if (assert == IGMPMSG_WHOLEPKT) { /* Ugly, but we have no choice with this interface. Duplicate old header, fix ihl, length etc. And all this only to mangle msg->im_msgtype and to set msg->im_mbz to "mbz" :-) */ msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr)); skb->nh.raw = skb->h.raw = (u8*)msg; memcpy(msg, pkt->nh.raw, sizeof(struct iphdr)); msg->im_msgtype = IGMPMSG_WHOLEPKT; msg->im_mbz = 0; msg->im_vif = reg_vif_num; skb->nh.iph->ihl = sizeof(struct iphdr) >> 2; skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr)); } else #endif { /* * Copy the IP header */ skb->nh.iph = (struct iphdr *)skb_put(skb, ihl); memcpy(skb->data,pkt->data,ihl); skb->nh.iph->protocol = 0; /* Flag to the kernel this is a route add */ msg = (struct igmpmsg*)skb->nh.iph; msg->im_vif = vifi; skb->dst = dst_clone(pkt->dst); /* * Add our header */ igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr)); igmp->type =
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -