📄 svcsock.c

📁 Linux Kernel 2.6.9 for OMAP1710
💻 C
📖 第 1 页 / 共 3 页
字号:
 * INET callback when space is newly available on the socket. */static voidsvc_write_space(struct sock *sk){	struct svc_sock	*svsk = (struct svc_sock *)(sk->sk_user_data);	if (svsk) {		dprintk("svc: socket %p(inet %p), write_space busy=%d\n",			svsk, sk, test_bit(SK_BUSY, &svsk->sk_flags));		svc_sock_enqueue(svsk);	}	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) {		printk(KERN_WARNING "RPC svc_write_space: some sleeping on %p\n",		       svsk);		wake_up_interruptible(sk->sk_sleep);	}}/* * Receive a datagram from a UDP socket. */extern intcsum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb);static intsvc_udp_recvfrom(struct svc_rqst *rqstp){	struct svc_sock	*svsk = rqstp->rq_sock;	struct svc_serv	*serv = svsk->sk_server;	struct sk_buff	*skb;	int		err, len;	if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags))	    /* udp sockets need large rcvbuf as all pending	     * requests are still in that buffer.  sndbuf must	     * also be large enough that there is enough space	     * for one reply per thread.	     */	    svc_sock_setbufsize(svsk->sk_sock,				(serv->sv_nrthreads+3) * serv->sv_bufsz,				(serv->sv_nrthreads+3) * serv->sv_bufsz);	if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) {		svc_sock_received(svsk);		return svc_deferred_recv(rqstp);	}	clear_bit(SK_DATA, &svsk->sk_flags);	while ((skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) {		if (err == -EAGAIN) {			svc_sock_received(svsk);			return err;		}		/* possibly an icmp error */		dprintk("svc: recvfrom returned error %d\n", -err);	}	if (skb->stamp.tv_sec == 0) {		skb->stamp.tv_sec = xtime.tv_sec; 		skb->stamp.tv_usec = xtime.tv_nsec * 1000; 		/* Don't enable netstamp, sunrpc doesn't 		   need that much accuracy */	}	svsk->sk_sk->sk_stamp = skb->stamp;	set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */	/*	 * Maybe more packets - kick another thread ASAP.	 */	svc_sock_received(svsk);	len  = skb->len - sizeof(struct udphdr);	rqstp->rq_arg.len = len;	rqstp->rq_prot        = IPPROTO_UDP;	/* Get sender address */	rqstp->rq_addr.sin_family = AF_INET;	rqstp->rq_addr.sin_port = skb->h.uh->source;	rqstp->rq_addr.sin_addr.s_addr = skb->nh.iph->saddr;	rqstp->rq_daddr = skb->nh.iph->daddr;	if (skb_is_nonlinear(skb)) {		/* we have to copy */		local_bh_disable();		if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb)) {			local_bh_enable();			/* checksum error */			skb_free_datagram(svsk->sk_sk, skb);			return 0;		}		local_bh_enable();		skb_free_datagram(svsk->sk_sk, skb); 	} else {		/* we can use it in-place */		rqstp->rq_arg.head[0].iov_base = skb->data + sizeof(struct udphdr);		rqstp->rq_arg.head[0].iov_len = len;		if (skb->ip_summed != CHECKSUM_UNNECESSARY) {			if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) {				skb_free_datagram(svsk->sk_sk, skb);				return 0;			}			skb->ip_summed = CHECKSUM_UNNECESSARY;		}		rqstp->rq_skbuff = skb;	}	rqstp->rq_arg.page_base = 0;	if (len <= rqstp->rq_arg.head[0].iov_len) {		rqstp->rq_arg.head[0].iov_len = len;		rqstp->rq_arg.page_len = 0;	} else {		rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len;		rqstp->rq_argused += (rqstp->rq_arg.page_len + PAGE_SIZE - 1)/ PAGE_SIZE;	}	if (serv->sv_stats)		serv->sv_stats->netudpcnt++;	return len;}static intsvc_udp_sendto(struct svc_rqst *rqstp){	int		error;	error = svc_sendto(rqstp, &rqstp->rq_res);	if (error == -ECONNREFUSED)		/* ICMP error on earlier request. */		error = svc_sendto(rqstp, &rqstp->rq_res);	return error;}static voidsvc_udp_init(struct svc_sock *svsk){	svsk->sk_sk->sk_data_ready = svc_udp_data_ready;	svsk->sk_sk->sk_write_space = svc_write_space;	svsk->sk_recvfrom = svc_udp_recvfrom;	svsk->sk_sendto = svc_udp_sendto;	/* initialise setting must have enough space to	 * receive and respond to one request.  	 * svc_udp_recvfrom will re-adjust if necessary	 */	svc_sock_setbufsize(svsk->sk_sock,			    3 * svsk->sk_server->sv_bufsz,			    3 * svsk->sk_server->sv_bufsz);	set_bit(SK_DATA, &svsk->sk_flags); /* might have come in before data_ready set up */	set_bit(SK_CHNGBUF, &svsk->sk_flags);}/* * A data_ready event on a listening socket means there's a connection * pending. Do not use state_change as a substitute for it. */static voidsvc_tcp_listen_data_ready(struct sock *sk, int count_unused){	struct svc_sock	*svsk;	dprintk("svc: socket %p TCP (listen) state change %d\n",			sk, sk->sk_state);	if  (sk->sk_state != TCP_LISTEN) {		/*		 * This callback may called twice when a new connection		 * is established as a child socket inherits everything		 * from a parent LISTEN socket.		 * 1) data_ready method of the parent socket will be called		 *    when one of child sockets become ESTABLISHED.		 * 2) data_ready method of the child socket may be called		 *    when it receives data before the socket is accepted.		 * In case of 2, we should ignore it silently.		 */		goto out;	}	if (!(svsk = (struct svc_sock *) sk->sk_user_data)) {		printk("svc: socket %p: no user data\n", sk);		goto out;	}	set_bit(SK_CONN, &svsk->sk_flags);	svc_sock_enqueue(svsk); out:	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))		wake_up_interruptible_all(sk->sk_sleep);}/* * A state change on a connected socket means it's dying or dead. */static voidsvc_tcp_state_change(struct sock *sk){	struct svc_sock	*svsk;	dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n",			sk, sk->sk_state, sk->sk_user_data);	if (!(svsk = (struct svc_sock *) sk->sk_user_data)) {		printk("svc: socket %p: no user data\n", sk);		goto out;	}	set_bit(SK_CLOSE, &svsk->sk_flags);	svc_sock_enqueue(svsk); out:	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))		wake_up_interruptible_all(sk->sk_sleep);}static voidsvc_tcp_data_ready(struct sock *sk, int count){	struct svc_sock *	svsk;	dprintk("svc: socket %p TCP data ready (svsk %p)\n",			sk, sk->sk_user_data);	if (!(svsk = (struct svc_sock *)(sk->sk_user_data)))		goto out;	set_bit(SK_DATA, &svsk->sk_flags);	svc_sock_enqueue(svsk); out:	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))		wake_up_interruptible(sk->sk_sleep);}/* * Accept a TCP connection */static voidsvc_tcp_accept(struct svc_sock *svsk){	struct sockaddr_in sin;	struct svc_serv	*serv = svsk->sk_server;	struct socket	*sock = svsk->sk_sock;	struct socket	*newsock;	struct proto_ops *ops;	struct svc_sock	*newsvsk;	int		err, slen;	dprintk("svc: tcp_accept %p sock %p\n", svsk, sock);	if (!sock)		return;	err = sock_create_lite(PF_INET, SOCK_STREAM, IPPROTO_TCP, &newsock);	if (err) {		if (err == -ENOMEM)			printk(KERN_WARNING "%s: no more sockets!\n",			       serv->sv_name);		return;	}	dprintk("svc: tcp_accept %p allocated\n", newsock);	newsock->ops = ops = sock->ops;	clear_bit(SK_CONN, &svsk->sk_flags);	if ((err = ops->accept(sock, newsock, O_NONBLOCK)) < 0) {		if (err != -EAGAIN && net_ratelimit())			printk(KERN_WARNING "%s: accept failed (err %d)!\n",				   serv->sv_name, -err);		goto failed;		/* aborted connection or whatever */	}	set_bit(SK_CONN, &svsk->sk_flags);	svc_sock_enqueue(svsk);	slen = sizeof(sin);	err = ops->getname(newsock, (struct sockaddr *) &sin, &slen, 1);	if (err < 0) {		if (net_ratelimit())			printk(KERN_WARNING "%s: peername failed (err %d)!\n",				   serv->sv_name, -err);		goto failed;		/* aborted connection or whatever */	}	/* Ideally, we would want to reject connections from unauthorized	 * hosts here, but when we get encription, the IP of the host won't	 * tell us anything. For now just warn about unpriv connections.	 */	if (ntohs(sin.sin_port) >= 1024) {		dprintk(KERN_WARNING			"%s: connect from unprivileged port: %u.%u.%u.%u:%d\n",			serv->sv_name, 			NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));	}	dprintk("%s: connect from %u.%u.%u.%u:%04x\n", serv->sv_name,			NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));	/* make sure that a write doesn't block forever when	 * low on memory	 */	newsock->sk->sk_sndtimeo = HZ*30;	if (!(newsvsk = svc_setup_socket(serv, newsock, &err, 0)))		goto failed;	/* make sure that we don't have too many active connections.	 * If we have, something must be dropped.	 *	 * There's no point in trying to do random drop here for	 * DoS prevention. The NFS clients does 1 reconnect in 15	 * seconds. An attacker can easily beat that.	 *	 * The only somewhat efficient mechanism would be if drop	 * old connections from the same IP first. But right now	 * we don't even record the client IP in svc_sock.	 */	if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) {		struct svc_sock *svsk = NULL;		spin_lock_bh(&serv->sv_lock);		if (!list_empty(&serv->sv_tempsocks)) {			if (net_ratelimit()) {				/* Try to help the admin */				printk(KERN_NOTICE "%s: too many open TCP "					"sockets, consider increasing the "					"number of nfsd threads\n",						   serv->sv_name);				printk(KERN_NOTICE "%s: last TCP connect from "					"%u.%u.%u.%u:%d\n",					serv->sv_name,					NIPQUAD(sin.sin_addr.s_addr),					ntohs(sin.sin_port));			}			/*			 * Always select the oldest socket. It's not fair,			 * but so is life			 */			svsk = list_entry(serv->sv_tempsocks.prev,					  struct svc_sock,					  sk_list);			set_bit(SK_CLOSE, &svsk->sk_flags);			svsk->sk_inuse ++;		}		spin_unlock_bh(&serv->sv_lock);		if (svsk) {			svc_sock_enqueue(svsk);			svc_sock_put(svsk);		}	}	if (serv->sv_stats)		serv->sv_stats->nettcpconn++;	return;failed:	sock_release(newsock);	return;}/* * Receive data from a TCP socket. */static intsvc_tcp_recvfrom(struct svc_rqst *rqstp){	struct svc_sock	*svsk = rqstp->rq_sock;	struct svc_serv	*serv = svsk->sk_server;	int		len;	struct kvec vec[RPCSVC_MAXPAGES];	int pnum, vlen;	dprintk("svc: tcp_recv %p data %d conn %d close %d\n",		svsk, test_bit(SK_DATA, &svsk->sk_flags),		test_bit(SK_CONN, &svsk->sk_flags),		test_bit(SK_CLOSE, &svsk->sk_flags));	if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) {		svc_sock_received(svsk);		return svc_deferred_recv(rqstp);	}	if (test_bit(SK_CLOSE, &svsk->sk_flags)) {		svc_delete_socket(svsk);		return 0;	}	if (test_bit(SK_CONN, &svsk->sk_flags)) {		svc_tcp_accept(svsk);		svc_sock_received(svsk);		return 0;	}	if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags))		/* sndbuf needs to have room for one request		 * per thread, otherwise we can stall even when the		 * network isn't a bottleneck.		 * rcvbuf just needs to be able to hold a few requests.		 * Normally they will be removed from the queue 		 * as soon a a complete request arrives.		 */		svc_sock_setbufsize(svsk->sk_sock,				    (serv->sv_nrthreads+3) * serv->sv_bufsz,				    3 * serv->sv_bufsz);	clear_bit(SK_DATA, &svsk->sk_flags);	/* Receive data. If we haven't got the record length yet, get	 * the next four bytes. Otherwise try to gobble up as much as	 * possible up to the complete record length.	 */	if (svsk->sk_tcplen < 4) {		unsigned long	want = 4 - svsk->sk_tcplen;		struct kvec	iov;		iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen;		iov.iov_len  = want;		if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0)			goto error;		svsk->sk_tcplen += len;		if (len < want) {			dprintk("svc: short recvfrom while reading record length (%d of %lu)\n",			        len, want);			svc_sock_received(svsk);			return -EAGAIN; /* record header not complete */		}		svsk->sk_reclen = ntohl(svsk->sk_reclen);		if (!(svsk->sk_reclen & 0x80000000)) {			/* FIXME: technically, a record can be fragmented,			 *  and non-terminal fragments will not have the top			 *  bit set in the fragment length header.			 *  But apparently no known nfs clients send fragmented			 *  records. */			printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx (non-terminal)\n",			       (unsigned long) svsk->sk_reclen);			goto err_delete;		}		svsk->sk_reclen &= 0x7fffffff;		dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen);		if (svsk->sk_reclen > serv->sv_bufsz) {			printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx (large)\n",			       (unsigned long) svsk->sk_reclen);			goto err_delete;		}	}	/* Check whether enough data is available */	len = svc_recv_available(svsk);	if (len < 0)		goto error;	if (len < svsk->sk_reclen) {		dprintk("svc: incomplete TCP record (%d of %d)\n",			len, svsk->sk_reclen);		svc_sock_received(svsk);		return -EAGAIN;	/* record not complete */	}	len = svsk->sk_reclen;	set_bit(SK_DATA, &svsk->sk_flags);	vec[0] = rqstp->rq_arg.head[0];	vlen = PAGE_SIZE;	pnum = 1;	while (vlen < len) {		vec[pnum].iov_base = page_address(rqstp->rq_argpages[rqstp->rq_argused++]);		vec[pnum].iov_len = PAGE_SIZE;		pnum++;		vlen += PAGE_SIZE;	}	/* Now receive data */	len = svc_recvfrom(rqstp, vec, pnum, len);	if (len < 0)		goto error;	dprintk("svc: TCP complete record (%d bytes)\n", len);	rqstp->rq_arg.len = len;	rqstp->rq_arg.page_base = 0;	if (len <= rqstp->rq_arg.head[0].iov_len) {		rqstp->rq_arg.head[0].iov_len = len;		rqstp->rq_arg.page_len = 0;	} else {		rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len;	}	rqstp->rq_skbuff      = NULL;	rqstp->rq_prot	      = IPPROTO_TCP;	/* Reset TCP read info */	svsk->sk_reclen = 0;	svsk->sk_tcplen = 0;	svc_sock_received(svsk);	if (serv->sv_stats)		serv->sv_stats->nettcpcnt++;	return len; err_delete:	svc_delete_socket(svsk);	return -EAGAIN; error:	if (len == -EAGAIN) {		dprintk("RPC: TCP recvfrom got EAGAIN\n");		svc_sock_received(svsk);	} else {		printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",					svsk->sk_server->sv_name, -len);		svc_sock_received(svsk);	}	return len;}/* * Send out data on TCP socket. */static intsvc_tcp_sendto(struct svc_rqst *rqstp){	struct xdr_buf	*xbufp = &rqstp->rq_res;	int sent;	u32 reclen;	/* Set up the first element of the reply kvec.	 * Any other kvecs that may be in use have been taken	 * care of by the server implementation itself.	 */	reclen = htonl(0x80000000|((xbufp->len ) - 4));
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -