varp.c

来自「xen虚拟机源代码安装包」· C语言 代码 · 共 1,537 行 · 第 1/3 页

C
1,537
字号
/* * Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by the  * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. *  * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free software Foundation, Inc., * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA * */#ifdef __KERNEL__#include <linux/config.h>#include <linux/kernel.h>#include <linux/module.h>#include <linux/init.h>#include <linux/string.h>#include <linux/version.h>#include <linux/net.h>#include <linux/in.h>#include <linux/inet.h>#include <linux/netdevice.h>#include <linux/inetdevice.h>#include <linux/udp.h>#include <net/ip.h>#include <net/protocol.h>#include <net/route.h>#include <linux/skbuff.h>#include <linux/spinlock.h>#include <asm/semaphore.h>#else#include "sys_kernel.h"#include <netinet/in.h>#include <arpa/inet.h>#include <linux/ip.h>#include <linux/udp.h>#include "spinlock.h"#include "skbuff.h"#endif#include <tunnel.h>#include <vnet.h>#include <vif.h>#include <if_varp.h>#include <varp.h>#include <varp_util.h>#include <vnet.h>#include <etherip.h>#include <vnet_forward.h>#include "allocate.h"#include "iostream.h"#include "hash_table.h"#include "sys_net.h"#include "sys_string.h"#include "skb_util.h"#include "timer_util.h"#define MODULE_NAME "VARP"#define DEBUG 1#undef DEBUG#include "debug.h"/** @file VARP: Virtual ARP. * * Handles virtual ARP requests for vnet/vmac. *//*Varp uses UDP on port 1798.on domain up: ?  send varp.announce { id, vmac, vnet, coa } for each vif  that haven't announced before, or has changed.  install vif entries in local table.on varp.announce{ id, vmac, vnet, coa }:  update VARP entry for vmac x vnet if have one, reset ttl.on varp.request { id, vmac, vnet }:  if have a vif for the requested vmac/vnet,  reply with varp.announce{ id, vmac, vnet, coa }on timer:  traverse VARP table, flush old entries.on probe timer:  probe again if not out of tries.  if out of tries invalidate entry.*//** Time-to-live of varp entries (in jiffies).*/#define VARP_ENTRY_TTL      (60*HZ)/** Maximum number of varp probes to make. */#define VARP_PROBE_MAX      5/** Interval between varp probes (in jiffies). */#define VARP_PROBE_INTERVAL (3*HZ)/** Maximum number of queued skbs for a varp entry. */#define VARP_QUEUE_MAX      16/** Number of buckets in the varp table (must be prime). */#define VARP_TABLE_BUCKETS  3001/** Varp entry states. */enum {    VARP_STATE_INCOMPLETE = 1,    VARP_STATE_REACHABLE = 2,    VARP_STATE_FAILED = 3,};/** Varp entry flags. */enum {    VARP_FLAG_PROBING = 1,    VARP_FLAG_PERMANENT = 2,};/** Key for varp entries. */typedef struct VarpKey {    /** Vnet id (network order). */    VnetId vnet;    /** Virtual MAC address. */    Vmac vmac;} VarpKey;/** An entry in the varp cache. */typedef struct VarpEntry {    /** Key for the entry. */    VarpKey key;    /** Care-of address for the key. */    VarpAddr addr;    /** Last-updated timestamp. */    unsigned long timestamp;    /** State. */    short state;    /** Flags. */    short flags;    /** Reference count. */    atomic_t refcount;    /** Lock. */    rwlock_t lock;    unsigned long lflags;    /** How many probes have been made. */    atomic_t probes;    /** Probe timer. */    struct timer_list timer;    void (*error)(struct VarpEntry *ventry, struct sk_buff *skb);    /** Outbound skb queue. */    struct sk_buff_head queue;    /** Maximum size of the queue. */    int queue_max;    atomic_t deleted;} VarpEntry;/** The varp cache. Varp entries indexed by VarpKey. */typedef struct VarpTable {    HashTable *table;    /** Sweep timer. */    struct timer_list timer;    rwlock_t lock;    struct semaphore mutex;    int entry_ttl;    int probe_max;    int probe_interval;    int queue_max;} VarpTable;/** The varp cache. */static VarpTable *varp_table = NULL;/** Module parameter for the multicast address. */static char *varp_mcaddr = NULL;/** Multicast address (network order). */u32 varp_mcast_addr = 0;/** UDP port (network order). */u16 varp_port = 0;char *varp_device = "xen-br0";#define VarpTable_read_lock(vtable, flags)    \  do{ read_lock_irqsave(&(vtable)->lock, (flags)); } while(0)#define VarpTable_read_unlock(vtable, flags)  \  do{ read_unlock_irqrestore(&(vtable)->lock, (flags)); } while(0)#define VarpTable_write_lock(vtable, flags)    \  do{ write_lock_irqsave(&(vtable)->lock, (flags)); } while(0)#define VarpTable_write_unlock(vtable, flags)  \  do{ write_unlock_irqrestore(&(vtable)->lock, (flags)); } while(0)#define VarpEntry_lock(ventry, flags)    \  do{ write_lock_irqsave(&(ventry)->lock, (flags)); (ventry)->lflags = (flags); } while(0)#define VarpEntry_unlock(ventry, flags)  \  do{ (flags) = (ventry)->lflags; write_unlock_irqrestore(&(ventry)->lock, (flags)); } while(0)void VarpTable_sweep(VarpTable *vtable);void VarpTable_flush(VarpTable *vtable);void VarpTable_print(VarpTable *vtable, IOStream *io);int VarpEntry_output(VarpEntry *ventry, struct sk_buff *skb);#include "./varp_util.c"/** Print the varp cache (if debug on). */void varp_dprint(void){#ifdef DEBUG    VarpTable_print(varp_table, iostdout);#endif} /** Flush the varp cache. */void varp_flush(void){    VarpTable_flush(varp_table);}#ifdef __KERNEL__static int device_ucast_addr(const char *device, uint32_t *addr){    int err;    struct net_device *dev = NULL;    err = vnet_get_device(device, &dev);    if(err) goto exit;    err = vnet_get_device_address(dev, addr);  exit:    if(err){        *addr = 0;    }    return err;}/** Get the unicast address of the varp device. */int varp_ucast_addr(uint32_t *addr){    int err = -ENODEV;    const char *devices[] = { varp_device, "eth0", "eth1", "eth2", NULL };    const char **p;    for(p = devices; err && *p; p++){        err = device_ucast_addr(*p, addr);    }    return err;}/** Lookup a network device by name. * * @param name device name * @param dev return parameter for the device * @return 0 on success, error code otherwise */int vnet_get_device(const char *name, struct net_device **dev){    int err = 0;    *dev = dev_get_by_name(name);    if(!*dev){        err = -ENETDOWN;    }    return err;}/** Get the source address from a device. * * @param dev device * @param addr return parameter for address * @return 0 on success, error code otherwise */int vnet_get_device_address(struct net_device *dev, u32 *addr){    int err = 0;    struct in_device *in_dev;    in_dev = in_dev_get(dev);    if(!in_dev){        err = -ENODEV;        goto exit;    }    *addr = in_dev->ifa_list->ifa_address;    in_dev_put(in_dev);  exit:    return err;}#elseint varp_ucast_addr(uint32_t *addr){    return 0;}#endif/** Print varp info and the varp cache. */void varp_print(IOStream *io){    uint32_t addr = 0;    varp_ucast_addr(&addr);    IOStream_print(io, "(varp \n");    IOStream_print(io, " (device %s)\n", varp_device);    IOStream_print(io, " (mcast_addr " IPFMT ")\n", NIPQUAD(varp_mcast_addr));    IOStream_print(io, " (ucast_addr " IPFMT ")\n", NIPQUAD(addr));    IOStream_print(io, " (port %d)\n", ntohs(varp_port));    IOStream_print(io, " (encapsulation %s)\n",                   (etherip_in_udp ? "etherip_in_udp" : "etherip"));    IOStream_print(io, " (entry_ttl %lu)\n", varp_table->entry_ttl);    IOStream_print(io, ")\n");    VarpTable_print(varp_table, io);}#ifdef __KERNEL__#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)static inline int addr_route(u32 daddr, struct rtable **prt){    int err = 0;    struct flowi fl = {        .nl_u = {            .ip4_u = {                .daddr = daddr,            }        }    };        err = ip_route_output_key(prt, &fl);    return err;}#elsestatic inline int addr_route(u32 daddr, struct rtable **prt){    int err = 0;    struct rt_key key = { .dst = daddr };    err = ip_route_output_key(prt, &key);    return err;}#endif // LINUX_VERSION_CODE#ifndef LL_RESERVED_SPACE#define HH_DATA_MOD	16#define LL_RESERVED_SPACE(dev) \        ((dev->hard_header_len & ~(HH_DATA_MOD - 1)) + HH_DATA_MOD)#endif // LL_RESERVED_SPACE#else // __KERNEL__#define ip_eth_mc_map(daddr, dmac) do{ }while(0)#endif // __KERNEL__/** Send a varp protocol message. * * @param opcode varp opcode (host order) * @param dev device (may be null) * @param skb skb being replied to (may be null) * @param vnet vnet id (in network order) * @param vmac vmac (in network order) * @return 0 on success, error code otherwise */int varp_send(u16 opcode, struct net_device *dev, struct sk_buff *skbin,              VnetId *vnet, Vmac *vmac){    int err = 0;    int link_n = 0;    int ip_n = sizeof(struct iphdr);    int udp_n = sizeof(struct udphdr);    int varp_n = sizeof(VarpHdr);    struct sk_buff *skbout = NULL;    VarpHdr *varph = NULL;    u8 smacbuf[6] = {}, dmacbuf[6] = {};    u8 *smac = smacbuf, *dmac = dmacbuf;    u32 saddr = 0, daddr = 0;    u16 sport = 0, dport = 0;#if defined(DEBUG)    char vnetbuf[VNET_ID_BUF];#endif    dprintf("> opcode=%d vnet= %s vmac=" MACFMT "\n",            opcode, VnetId_ntoa(vnet, vnetbuf), MAC6TUPLE(vmac->mac));    dport = varp_port;    if(skbin){        daddr = skbin->nh.iph->saddr;        dmac = eth_hdr(skbin)->h_source;        sport = skbin->h.uh->dest;    } else {        if(MULTICAST(varp_mcast_addr)){            daddr = varp_mcast_addr;            ip_eth_mc_map(daddr, dmac);        } else {            daddr = INADDR_BROADCAST;        }        sport = varp_port;    }#ifdef __KERNEL__    {        struct in_device *in_dev = NULL;        if(!dev){            struct rtable *rt = NULL;            err = addr_route(daddr, &rt);            if(err) goto exit;            dev = rt->u.dst.dev;        }                in_dev = in_dev_get(dev);        if(!in_dev){            err = -ENODEV;            goto exit;        }        link_n = LL_RESERVED_SPACE(dev);        saddr = in_dev->ifa_list->ifa_address;        smac = dev->dev_addr;        if(daddr == INADDR_BROADCAST){            daddr = in_dev->ifa_list->ifa_broadcast;            dmac = dev->broadcast;        }        in_dev_put(in_dev);    }#else    {        extern uint32_t vnetd_addr(void);         saddr = vnetd_addr();    }#endif // __KERNEL__    dprintf("> dev=%s\n", (dev ? dev->name : "<none>"));    dprintf("> smac=" MACFMT " dmac=" MACFMT "\n", MAC6TUPLE(smac), MAC6TUPLE(dmac));    dprintf("> saddr=" IPFMT " daddr=" IPFMT "\n", NIPQUAD(saddr), NIPQUAD(daddr));    dprintf("> sport=%u dport=%u\n", ntohs(sport), ntohs(dport));    skbout = alloc_skb(link_n + ip_n + udp_n + varp_n, GFP_ATOMIC);    if (!skbout){        err = -ENOMEM;        goto exit;    }    skbout->dev = dev;    skb_reserve(skbout, link_n);    skbout->protocol = htons(ETH_P_IP);#ifdef __KERNEL__    // Device header. Pushes device header on front of skb.    if (dev->hard_header){        err = dev->hard_header(skbout, dev, ETH_P_IP, dmac, smac, skbout->len);        if(err < 0) goto exit;        skbout->mac.raw = skbout->data;    }#else    smac = smac; // Defeat unused variable warning.#endif // __KERNEL__    // IP header.    skbout->nh.raw = skb_put(skbout, ip_n);    skbout->nh.iph->version  = 4;    skbout->nh.iph->ihl      = ip_n / 4;    skbout->nh.iph->tos      = 0;    skbout->nh.iph->tot_len  = htons(ip_n + udp_n + varp_n);    skbout->nh.iph->id       = 0;    skbout->nh.iph->frag_off = 0;    skbout->nh.iph->ttl      = 64;    skbout->nh.iph->protocol = IPPROTO_UDP;    skbout->nh.iph->saddr    = saddr;    skbout->nh.iph->daddr    = daddr;      skbout->nh.iph->check    = 0;    // UDP header.    skbout->h.raw = skb_put(skbout, udp_n);    skbout->h.uh->source     = sport;    skbout->h.uh->dest       = dport;    skbout->h.uh->len        = htons(udp_n + varp_n);    skbout->h.uh->check      = 0;    // Varp header.    varph = (void*)skb_put(skbout, varp_n);    *varph = (VarpHdr){};    varph->hdr.id            = htons(VARP_ID);    varph->hdr.opcode        = htons(opcode);    varph->vnet              = *vnet;    varph->vmac              = *vmac;    varph->addr.family       = AF_INET;    varph->addr.u.ip4.s_addr = saddr;    err = skb_xmit(skbout);  exit:    if(err && skbout) kfree_skb(skbout);    dprintf("< err=%d\n", err);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?