📄 sock.c
字号:
/* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Generic socket support routines. Memory allocators, socket lock/release * handler for protocols to use and generic option handler. * * * Version: $Id: sock.c,v 1.102 2000/12/11 23:00:24 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Florian La Roche, <flla@stud.uni-sb.de> * Alan Cox, <A.Cox@swansea.ac.uk> * * Fixes: * Alan Cox : Numerous verify_area() problems * Alan Cox : Connecting on a connecting socket * now returns an error for tcp. * Alan Cox : sock->protocol is set correctly. * and is not sometimes left as 0. * Alan Cox : connect handles icmp errors on a * connect properly. Unfortunately there * is a restart syscall nasty there. I * can't match BSD without hacking the C * library. Ideas urgently sought! * Alan Cox : Disallow bind() to addresses that are * not ours - especially broadcast ones!! * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost) * Alan Cox : sock_wfree/sock_rfree don't destroy sockets, * instead they leave that for the DESTROY timer. * Alan Cox : Clean up error flag in accept * Alan Cox : TCP ack handling is buggy, the DESTROY timer * was buggy. Put a remove_sock() in the handler * for memory when we hit 0. Also altered the timer * code. The ACK stuff can wait and needs major * TCP layer surgery. * Alan Cox : Fixed TCP ack bug, removed remove sock * and fixed timer/inet_bh race. * Alan Cox : Added zapped flag for TCP * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing. * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so... * Rick Sladkey : Relaxed UDP rules for matching packets. * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support * Pauline Middelink : identd support * Alan Cox : Fixed connect() taking signals I think. * Alan Cox : SO_LINGER supported * Alan Cox : Error reporting fixes * Anonymous : inet_create tidied up (sk->reuse setting) * Alan Cox : inet sockets don't set sk->type! * Alan Cox : Split socket option code * Alan Cox : Callbacks * Alan Cox : Nagle flag for Charles & Johannes stuff * Alex : Removed restriction on inet fioctl * Alan Cox : Splitting INET from NET core * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt() * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code * Alan Cox : Split IP from generic code * Alan Cox : New kfree_skbmem() * Alan Cox : Make SO_DEBUG superuser only. * Alan Cox : Allow anyone to clear SO_DEBUG * (compatibility fix) * Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput. * Alan Cox : Allocator for a socket is settable. * Alan Cox : SO_ERROR includes soft errors. * Alan Cox : Allow NULL arguments on some SO_ opts * Alan Cox : Generic socket allocation to make hooks * easier (suggested by Craig Metz). * Michael Pall : SO_ERROR returns positive errno again * Steve Whitehouse: Added default destructor to free * protocol private data. * Steve Whitehouse: Added various other default routines * common to several socket families. * Chris Evans : Call suser() check last on F_SETOWN * Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER. * Andi Kleen : Add sock_kmalloc()/sock_kfree_s() * Andi Kleen : Fix write_space callback * * To Fix: * * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */#include <linux/config.h>#include <linux/errno.h>#include <linux/types.h>#include <linux/socket.h>#include <linux/in.h>#include <linux/kernel.h>#include <linux/major.h>#include <linux/sched.h>#include <linux/timer.h>#include <linux/string.h>#include <linux/sockios.h>#include <linux/net.h>#include <linux/fcntl.h>#include <linux/mm.h>#include <linux/slab.h>#include <linux/interrupt.h>#include <linux/poll.h>#include <linux/init.h>#include <asm/uaccess.h>#include <asm/system.h>#include <linux/inet.h>#include <linux/netdevice.h>#include <net/ip.h>#include <net/protocol.h>#include <net/arp.h>#include <net/route.h>#include <net/tcp.h>#include <net/udp.h>#include <linux/skbuff.h>#include <net/sock.h>#include <net/raw.h>#include <net/icmp.h>#include <linux/ipsec.h>#ifdef CONFIG_FILTER#include <linux/filter.h>#endif#define min(a,b) ((a)<(b)?(a):(b))/* Run time adjustable parameters. */__u32 sysctl_wmem_max = SK_WMEM_MAX;__u32 sysctl_rmem_max = SK_RMEM_MAX;__u32 sysctl_wmem_default = SK_WMEM_MAX;__u32 sysctl_rmem_default = SK_RMEM_MAX;/* Maximal space eaten by iovec or ancilliary data plus some space */int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512);static int sock_set_timeout(long *timeo_p, char *optval, int optlen){ struct timeval tv; if (optlen < sizeof(tv)) return -EINVAL; if (copy_from_user(&tv, optval, sizeof(tv))) return -EFAULT; *timeo_p = MAX_SCHEDULE_TIMEOUT; if (tv.tv_sec == 0 && tv.tv_usec == 0) return 0; if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1)) *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ); return 0;}/* * This is meant for all protocols to use and covers goings on * at the socket level. Everything here is generic. */int sock_setsockopt(struct socket *sock, int level, int optname, char *optval, int optlen){ struct sock *sk=sock->sk;#ifdef CONFIG_FILTER struct sk_filter *filter;#endif int val; int valbool; int err; struct linger ling; int ret = 0; /* * Options without arguments */#ifdef SO_DONTLINGER /* Compatibility item... */ switch(optname) { case SO_DONTLINGER: sk->linger=0; return 0; }#endif if(optlen<sizeof(int)) return(-EINVAL); err = get_user(val, (int *)optval); if (err) return err; valbool = val?1:0; lock_sock(sk); switch(optname) { case SO_DEBUG: if(val && !capable(CAP_NET_ADMIN)) { ret = -EACCES; } else sk->debug=valbool; break; case SO_REUSEADDR: sk->reuse = valbool; break; case SO_TYPE: case SO_ERROR: ret = -ENOPROTOOPT; break; case SO_DONTROUTE: sk->localroute=valbool; break; case SO_BROADCAST: sk->broadcast=valbool; break; case SO_SNDBUF: /* Don't error on this BSD doesn't and if you think about it this is right. Otherwise apps have to play 'guess the biggest size' games. RCVBUF/SNDBUF are treated in BSD as hints */ if (val > sysctl_wmem_max) val = sysctl_wmem_max; sk->userlocks |= SOCK_SNDBUF_LOCK; sk->sndbuf = max(val*2,SOCK_MIN_SNDBUF); /* * Wake up sending tasks if we * upped the value. */ sk->write_space(sk); break; case SO_RCVBUF: /* Don't error on this BSD doesn't and if you think about it this is right. Otherwise apps have to play 'guess the biggest size' games. RCVBUF/SNDBUF are treated in BSD as hints */ if (val > sysctl_rmem_max) val = sysctl_rmem_max; sk->userlocks |= SOCK_RCVBUF_LOCK; /* FIXME: is this lower bound the right one? */ sk->rcvbuf = max(val*2,SOCK_MIN_RCVBUF); break; case SO_KEEPALIVE:#ifdef CONFIG_INET if (sk->protocol == IPPROTO_TCP) { tcp_set_keepalive(sk, valbool); }#endif sk->keepopen = valbool; break; case SO_OOBINLINE: sk->urginline = valbool; break; case SO_NO_CHECK: sk->no_check = valbool; break; case SO_PRIORITY: if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN)) sk->priority = val; else ret = -EPERM; break; case SO_LINGER: if(optlen<sizeof(ling)) { ret = -EINVAL; /* 1003.1g */ break; } if (copy_from_user(&ling,optval,sizeof(ling))) { ret = -EFAULT; break; } if(ling.l_onoff==0) { sk->linger=0; } else {#if (BITS_PER_LONG == 32) if (ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ) sk->lingertime=MAX_SCHEDULE_TIMEOUT; else#endif sk->lingertime=ling.l_linger*HZ; sk->linger=1; } break; case SO_BSDCOMPAT: sk->bsdism = valbool; break; case SO_PASSCRED: sock->passcred = valbool; break; case SO_TIMESTAMP: sk->rcvtstamp = valbool; break; case SO_RCVLOWAT: if (val < 0) val = INT_MAX; sk->rcvlowat = val ? : 1; break; case SO_RCVTIMEO: ret = sock_set_timeout(&sk->rcvtimeo, optval, optlen); break; case SO_SNDTIMEO: ret = sock_set_timeout(&sk->sndtimeo, optval, optlen); break;#ifdef CONFIG_NETDEVICES case SO_BINDTODEVICE: { char devname[IFNAMSIZ]; /* Sorry... */ if (!capable(CAP_NET_RAW)) { ret = -EPERM; break; } /* Bind this socket to a particular device like "eth0", * as specified in the passed interface name. If the * name is "" or the option length is zero the socket * is not bound. */ if (!valbool) { sk->bound_dev_if = 0; } else { if (optlen > IFNAMSIZ) optlen = IFNAMSIZ; if (copy_from_user(devname, optval, optlen)) { ret = -EFAULT; break; } /* Remove any cached route for this socket. */ sk_dst_reset(sk); if (devname[0] == '\0') { sk->bound_dev_if = 0; } else { struct net_device *dev = dev_get_by_name(devname); if (!dev) { ret = -ENODEV; break; } sk->bound_dev_if = dev->ifindex; dev_put(dev); } } break; }#endif#ifdef CONFIG_FILTER case SO_ATTACH_FILTER: ret = -EINVAL; if (optlen == sizeof(struct sock_fprog)) { struct sock_fprog fprog; ret = -EFAULT; if (copy_from_user(&fprog, optval, sizeof(fprog))) break; ret = sk_attach_filter(&fprog, sk); } break; case SO_DETACH_FILTER: spin_lock_bh(&sk->lock.slock); filter = sk->filter; if (filter) { sk->filter = NULL; spin_unlock_bh(&sk->lock.slock); sk_filter_release(sk, filter); break; } spin_unlock_bh(&sk->lock.slock); ret = -ENONET; break;#endif /* We implement the SO_SNDLOWAT etc to not be settable (1003.1g 5.3) */ default: ret = -ENOPROTOOPT; break; } release_sock(sk); return ret;}int sock_getsockopt(struct socket *sock, int level, int optname, char *optval, int *optlen){ struct sock *sk = sock->sk; union { int val; struct linger ling; struct timeval tm; } v; int lv=sizeof(int),len; if(get_user(len,optlen)) return -EFAULT; switch(optname) { case SO_DEBUG: v.val = sk->debug; break; case SO_DONTROUTE: v.val = sk->localroute; break; case SO_BROADCAST: v.val= sk->broadcast; break; case SO_SNDBUF: v.val=sk->sndbuf; break; case SO_RCVBUF: v.val =sk->rcvbuf; break; case SO_REUSEADDR: v.val = sk->reuse; break; case SO_KEEPALIVE: v.val = sk->keepopen; break; case SO_TYPE: v.val = sk->type; break; case SO_ERROR: v.val = -sock_error(sk); if(v.val==0) v.val=xchg(&sk->err_soft,0); break; case SO_OOBINLINE: v.val = sk->urginline; break; case SO_NO_CHECK: v.val = sk->no_check; break; case SO_PRIORITY: v.val = sk->priority; break; case SO_LINGER: lv=sizeof(v.ling); v.ling.l_onoff=sk->linger; v.ling.l_linger=sk->lingertime/HZ; break; case SO_BSDCOMPAT: v.val = sk->bsdism; break; case SO_TIMESTAMP: v.val = sk->rcvtstamp; break; case SO_RCVTIMEO: lv=sizeof(struct timeval); if (sk->rcvtimeo == MAX_SCHEDULE_TIMEOUT) { v.tm.tv_sec = 0; v.tm.tv_usec = 0; } else { v.tm.tv_sec = sk->rcvtimeo/HZ; v.tm.tv_usec = ((sk->rcvtimeo%HZ)*1000)/HZ; } break; case SO_SNDTIMEO: lv=sizeof(struct timeval); if (sk->sndtimeo == MAX_SCHEDULE_TIMEOUT) { v.tm.tv_sec = 0; v.tm.tv_usec = 0; } else { v.tm.tv_sec = sk->sndtimeo/HZ; v.tm.tv_usec = ((sk->sndtimeo%HZ)*1000)/HZ; } break; case SO_RCVLOWAT: v.val = sk->rcvlowat; break; case SO_SNDLOWAT: v.val=1; break; case SO_PASSCRED: v.val = sock->passcred; break; case SO_PEERCRED: lv=sizeof(sk->peercred); len=min(len, lv); if(copy_to_user((void*)optval, &sk->peercred, len)) return -EFAULT; goto lenout; case SO_PEERNAME: { char address[128]; if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2)) return -ENOTCONN; if (lv < len) return -EINVAL; if(copy_to_user((void*)optval, address, len)) return -EFAULT; goto lenout; } default: return(-ENOPROTOOPT); } len=min(len,lv); if(copy_to_user(optval,&v,len)) return -EFAULT;lenout: if(put_user(len, optlen)) return -EFAULT; return 0;}static kmem_cache_t *sk_cachep;/* * All socket objects are allocated here. This is for future * usage. */ struct sock *sk_alloc(int family, int priority, int zero_it){ struct sock *sk = kmem_cache_alloc(sk_cachep, priority); if(sk && zero_it) { memset(sk, 0, sizeof(struct sock)); sk->family = family; sock_lock_init(sk);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -