📄 af_netlink.c
字号:
/* * NETLINK Kernel-user communication protocol. * * Authors: Alan Cox <alan@redhat.com> * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith * added netlink_proto_exit * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br> * use nlk_sk, as sk->protinfo is on a diet 8) * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org> * - inc module use count of module that owns * the kernel socket in case userspace opens * socket of same protocol * - remove all module support, since netlink is * mandatory if CONFIG_NET=y these days */#include <linux/module.h>#include <linux/capability.h>#include <linux/kernel.h>#include <linux/init.h>#include <linux/signal.h>#include <linux/sched.h>#include <linux/errno.h>#include <linux/string.h>#include <linux/stat.h>#include <linux/socket.h>#include <linux/un.h>#include <linux/fcntl.h>#include <linux/termios.h>#include <linux/sockios.h>#include <linux/net.h>#include <linux/fs.h>#include <linux/slab.h>#include <asm/uaccess.h>#include <linux/skbuff.h>#include <linux/netdevice.h>#include <linux/rtnetlink.h>#include <linux/proc_fs.h>#include <linux/seq_file.h>#include <linux/notifier.h>#include <linux/security.h>#include <linux/jhash.h>#include <linux/jiffies.h>#include <linux/random.h>#include <linux/bitops.h>#include <linux/mm.h>#include <linux/types.h>#include <linux/audit.h>#include <linux/selinux.h>#include <linux/mutex.h>#include <net/net_namespace.h>#include <net/sock.h>#include <net/scm.h>#include <net/netlink.h>#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8)#define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long))struct netlink_sock { /* struct sock has to be the first member of netlink_sock */ struct sock sk; u32 pid; u32 dst_pid; u32 dst_group; u32 flags; u32 subscriptions; u32 ngroups; unsigned long *groups; unsigned long state; wait_queue_head_t wait; struct netlink_callback *cb; struct mutex *cb_mutex; struct mutex cb_def_mutex; void (*netlink_rcv)(struct sk_buff *skb); struct module *module;};#define NETLINK_KERNEL_SOCKET 0x1#define NETLINK_RECV_PKTINFO 0x2static inline struct netlink_sock *nlk_sk(struct sock *sk){ return container_of(sk, struct netlink_sock, sk);}static inline int netlink_is_kernel(struct sock *sk){ return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET;}struct nl_pid_hash { struct hlist_head *table; unsigned long rehash_time; unsigned int mask; unsigned int shift; unsigned int entries; unsigned int max_shift; u32 rnd;};struct netlink_table { struct nl_pid_hash hash; struct hlist_head mc_list; unsigned long *listeners; unsigned int nl_nonroot; unsigned int groups; struct mutex *cb_mutex; struct module *module; int registered;};static struct netlink_table *nl_table;static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);static int netlink_dump(struct sock *sk);static void netlink_destroy_callback(struct netlink_callback *cb);static DEFINE_RWLOCK(nl_table_lock);static atomic_t nl_table_users = ATOMIC_INIT(0);static ATOMIC_NOTIFIER_HEAD(netlink_chain);static u32 netlink_group_mask(u32 group){ return group ? 1 << (group - 1) : 0;}static struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid){ return &hash->table[jhash_1word(pid, hash->rnd) & hash->mask];}static void netlink_sock_destruct(struct sock *sk){ struct netlink_sock *nlk = nlk_sk(sk); if (nlk->cb) { if (nlk->cb->done) nlk->cb->done(nlk->cb); netlink_destroy_callback(nlk->cb); } skb_queue_purge(&sk->sk_receive_queue); if (!sock_flag(sk, SOCK_DEAD)) { printk("Freeing alive netlink socket %p\n", sk); return; } BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc)); BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); BUG_TRAP(!nlk_sk(sk)->groups);}/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on SMP. * Look, when several writers sleep and reader wakes them up, all but one * immediately hit write lock and grab all the cpus. Exclusive sleep solves * this, _but_ remember, it adds useless work on UP machines. */static void netlink_table_grab(void){ write_lock_irq(&nl_table_lock); if (atomic_read(&nl_table_users)) { DECLARE_WAITQUEUE(wait, current); add_wait_queue_exclusive(&nl_table_wait, &wait); for(;;) { set_current_state(TASK_UNINTERRUPTIBLE); if (atomic_read(&nl_table_users) == 0) break; write_unlock_irq(&nl_table_lock); schedule(); write_lock_irq(&nl_table_lock); } __set_current_state(TASK_RUNNING); remove_wait_queue(&nl_table_wait, &wait); }}static __inline__ void netlink_table_ungrab(void){ write_unlock_irq(&nl_table_lock); wake_up(&nl_table_wait);}static __inline__ voidnetlink_lock_table(void){ /* read_lock() synchronizes us to netlink_table_grab */ read_lock(&nl_table_lock); atomic_inc(&nl_table_users); read_unlock(&nl_table_lock);}static __inline__ voidnetlink_unlock_table(void){ if (atomic_dec_and_test(&nl_table_users)) wake_up(&nl_table_wait);}static __inline__ struct sock *netlink_lookup(struct net *net, int protocol, u32 pid){ struct nl_pid_hash *hash = &nl_table[protocol].hash; struct hlist_head *head; struct sock *sk; struct hlist_node *node; read_lock(&nl_table_lock); head = nl_pid_hashfn(hash, pid); sk_for_each(sk, node, head) { if ((sk->sk_net == net) && (nlk_sk(sk)->pid == pid)) { sock_hold(sk); goto found; } } sk = NULL;found: read_unlock(&nl_table_lock); return sk;}static inline struct hlist_head *nl_pid_hash_alloc(size_t size){ if (size <= PAGE_SIZE) return kmalloc(size, GFP_ATOMIC); else return (struct hlist_head *) __get_free_pages(GFP_ATOMIC, get_order(size));}static inline void nl_pid_hash_free(struct hlist_head *table, size_t size){ if (size <= PAGE_SIZE) kfree(table); else free_pages((unsigned long)table, get_order(size));}static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow){ unsigned int omask, mask, shift; size_t osize, size; struct hlist_head *otable, *table; int i; omask = mask = hash->mask; osize = size = (mask + 1) * sizeof(*table); shift = hash->shift; if (grow) { if (++shift > hash->max_shift) return 0; mask = mask * 2 + 1; size *= 2; } table = nl_pid_hash_alloc(size); if (!table) return 0; memset(table, 0, size); otable = hash->table; hash->table = table; hash->mask = mask; hash->shift = shift; get_random_bytes(&hash->rnd, sizeof(hash->rnd)); for (i = 0; i <= omask; i++) { struct sock *sk; struct hlist_node *node, *tmp; sk_for_each_safe(sk, node, tmp, &otable[i]) __sk_add_node(sk, nl_pid_hashfn(hash, nlk_sk(sk)->pid)); } nl_pid_hash_free(otable, osize); hash->rehash_time = jiffies + 10 * 60 * HZ; return 1;}static inline int nl_pid_hash_dilute(struct nl_pid_hash *hash, int len){ int avg = hash->entries >> hash->shift; if (unlikely(avg > 1) && nl_pid_hash_rehash(hash, 1)) return 1; if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) { nl_pid_hash_rehash(hash, 0); return 1; } return 0;}static const struct proto_ops netlink_ops;static voidnetlink_update_listeners(struct sock *sk){ struct netlink_table *tbl = &nl_table[sk->sk_protocol]; struct hlist_node *node; unsigned long mask; unsigned int i; for (i = 0; i < NLGRPLONGS(tbl->groups); i++) { mask = 0; sk_for_each_bound(sk, node, &tbl->mc_list) { if (i < NLGRPLONGS(nlk_sk(sk)->ngroups)) mask |= nlk_sk(sk)->groups[i]; } tbl->listeners[i] = mask; } /* this function is only called with the netlink table "grabbed", which * makes sure updates are visible before bind or setsockopt return. */}static int netlink_insert(struct sock *sk, struct net *net, u32 pid){ struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; struct hlist_head *head; int err = -EADDRINUSE; struct sock *osk; struct hlist_node *node; int len; netlink_table_grab(); head = nl_pid_hashfn(hash, pid); len = 0; sk_for_each(osk, node, head) { if ((osk->sk_net == net) && (nlk_sk(osk)->pid == pid)) break; len++; } if (node) goto err; err = -EBUSY; if (nlk_sk(sk)->pid) goto err; err = -ENOMEM; if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX)) goto err; if (len && nl_pid_hash_dilute(hash, len)) head = nl_pid_hashfn(hash, pid); hash->entries++; nlk_sk(sk)->pid = pid; sk_add_node(sk, head); err = 0;err: netlink_table_ungrab(); return err;}static void netlink_remove(struct sock *sk){ netlink_table_grab(); if (sk_del_node_init(sk)) nl_table[sk->sk_protocol].hash.entries--; if (nlk_sk(sk)->subscriptions) __sk_del_bind_node(sk); netlink_table_ungrab();}static struct proto netlink_proto = { .name = "NETLINK", .owner = THIS_MODULE, .obj_size = sizeof(struct netlink_sock),};static int __netlink_create(struct net *net, struct socket *sock, struct mutex *cb_mutex, int protocol){ struct sock *sk; struct netlink_sock *nlk; sock->ops = &netlink_ops; sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto); if (!sk) return -ENOMEM; sock_init_data(sock, sk); nlk = nlk_sk(sk); if (cb_mutex) nlk->cb_mutex = cb_mutex; else { nlk->cb_mutex = &nlk->cb_def_mutex; mutex_init(nlk->cb_mutex); } init_waitqueue_head(&nlk->wait); sk->sk_destruct = netlink_sock_destruct; sk->sk_protocol = protocol; return 0;}static int netlink_create(struct net *net, struct socket *sock, int protocol){ struct module *module = NULL; struct mutex *cb_mutex; struct netlink_sock *nlk; int err = 0; sock->state = SS_UNCONNECTED; if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) return -ESOCKTNOSUPPORT; if (protocol<0 || protocol >= MAX_LINKS) return -EPROTONOSUPPORT; netlink_lock_table();#ifdef CONFIG_KMOD if (!nl_table[protocol].registered) { netlink_unlock_table(); request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol); netlink_lock_table(); }#endif if (nl_table[protocol].registered && try_module_get(nl_table[protocol].module)) module = nl_table[protocol].module; cb_mutex = nl_table[protocol].cb_mutex; netlink_unlock_table(); if ((err = __netlink_create(net, sock, cb_mutex, protocol)) < 0) goto out_module; nlk = nlk_sk(sock->sk); nlk->module = module;out: return err;out_module: module_put(module); goto out;}static int netlink_release(struct socket *sock){ struct sock *sk = sock->sk; struct netlink_sock *nlk; if (!sk) return 0; netlink_remove(sk); sock_orphan(sk); nlk = nlk_sk(sk); /* * OK. Socket is unlinked, any packets that arrive now * will be purged. */ sock->sk = NULL; wake_up_interruptible_all(&nlk->wait); skb_queue_purge(&sk->sk_write_queue); if (nlk->pid && !nlk->subscriptions) { struct netlink_notify n = { .net = sk->sk_net, .protocol = sk->sk_protocol, .pid = nlk->pid, }; atomic_notifier_call_chain(&netlink_chain, NETLINK_URELEASE, &n); } module_put(nlk->module); netlink_table_grab(); if (netlink_is_kernel(sk)) { kfree(nl_table[sk->sk_protocol].listeners); nl_table[sk->sk_protocol].module = NULL; nl_table[sk->sk_protocol].registered = 0; } else if (nlk->subscriptions) netlink_update_listeners(sk); netlink_table_ungrab(); kfree(nlk->groups); nlk->groups = NULL; sock_put(sk); return 0;}static int netlink_autobind(struct socket *sock){ struct sock *sk = sock->sk; struct net *net = sk->sk_net; struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; struct hlist_head *head; struct sock *osk; struct hlist_node *node; s32 pid = current->tgid; int err; static s32 rover = -4097;retry: cond_resched(); netlink_table_grab(); head = nl_pid_hashfn(hash, pid); sk_for_each(osk, node, head) { if ((osk->sk_net != net)) continue; if (nlk_sk(osk)->pid == pid) { /* Bind collision, search negative pid values. */ pid = rover--; if (rover > -4097) rover = -4097; netlink_table_ungrab(); goto retry; } } netlink_table_ungrab(); err = netlink_insert(sk, net, pid); if (err == -EADDRINUSE) goto retry; /* If 2 threads race to autobind, that is fine. */ if (err == -EBUSY) err = 0; return err;}static inline int netlink_capable(struct socket *sock, unsigned int flag){ return (nl_table[sock->sk->sk_protocol].nl_nonroot & flag) || capable(CAP_NET_ADMIN);}static voidnetlink_update_subscriptions(struct sock *sk, unsigned int subscriptions){ struct netlink_sock *nlk = nlk_sk(sk); if (nlk->subscriptions && !subscriptions) __sk_del_bind_node(sk); else if (!nlk->subscriptions && subscriptions) sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list); nlk->subscriptions = subscriptions;}static int netlink_realloc_groups(struct sock *sk){ struct netlink_sock *nlk = nlk_sk(sk); unsigned int groups; unsigned long *new_groups; int err = 0; netlink_table_grab(); groups = nl_table[sk->sk_protocol].groups; if (!nl_table[sk->sk_protocol].registered) { err = -ENOENT; goto out_unlock; } if (nlk->ngroups >= groups) goto out_unlock; new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC); if (new_groups == NULL) { err = -ENOMEM; goto out_unlock; } memset((char*)new_groups + NLGRPSZ(nlk->ngroups), 0, NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups)); nlk->groups = new_groups; nlk->ngroups = groups; out_unlock: netlink_table_ungrab(); return err;}static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len){ struct sock *sk = sock->sk; struct net *net = sk->sk_net; struct netlink_sock *nlk = nlk_sk(sk); struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; int err; if (nladdr->nl_family != AF_NETLINK) return -EINVAL; /* Only superuser is allowed to listen multicasts */ if (nladdr->nl_groups) { if (!netlink_capable(sock, NL_NONROOT_RECV)) return -EPERM; err = netlink_realloc_groups(sk); if (err) return err; } if (nlk->pid) { if (nladdr->nl_pid != nlk->pid) return -EINVAL; } else { err = nladdr->nl_pid ? netlink_insert(sk, net, nladdr->nl_pid) : netlink_autobind(sock); if (err) return err; } if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0])) return 0; netlink_table_grab(); netlink_update_subscriptions(sk, nlk->subscriptions + hweight32(nladdr->nl_groups) - hweight32(nlk->groups[0])); nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups; netlink_update_listeners(sk); netlink_table_ungrab(); return 0;}static int netlink_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags){ int err = 0; struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); struct sockaddr_nl *nladdr=(struct sockaddr_nl*)addr;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -