📄 svcsock.c
字号:
* INET callback when space is newly available on the socket. */static voidsvc_write_space(struct sock *sk){ struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data); if (svsk) { dprintk("svc: socket %p(inet %p), write_space busy=%d\n", svsk, sk, test_bit(SK_BUSY, &svsk->sk_flags)); svc_sock_enqueue(svsk); } if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) { printk(KERN_WARNING "RPC svc_write_space: some sleeping on %p\n", svsk); wake_up_interruptible(sk->sk_sleep); }}/* * Receive a datagram from a UDP socket. */extern intcsum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb);static intsvc_udp_recvfrom(struct svc_rqst *rqstp){ struct svc_sock *svsk = rqstp->rq_sock; struct svc_serv *serv = svsk->sk_server; struct sk_buff *skb; int err, len; if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags)) /* udp sockets need large rcvbuf as all pending * requests are still in that buffer. sndbuf must * also be large enough that there is enough space * for one reply per thread. */ svc_sock_setbufsize(svsk->sk_sock, (serv->sv_nrthreads+3) * serv->sv_bufsz, (serv->sv_nrthreads+3) * serv->sv_bufsz); if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) { svc_sock_received(svsk); return svc_deferred_recv(rqstp); } clear_bit(SK_DATA, &svsk->sk_flags); while ((skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) { if (err == -EAGAIN) { svc_sock_received(svsk); return err; } /* possibly an icmp error */ dprintk("svc: recvfrom returned error %d\n", -err); } if (skb->stamp.tv_sec == 0) { skb->stamp.tv_sec = xtime.tv_sec; skb->stamp.tv_usec = xtime.tv_nsec * 1000; /* Don't enable netstamp, sunrpc doesn't need that much accuracy */ } svsk->sk_sk->sk_stamp = skb->stamp; set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */ /* * Maybe more packets - kick another thread ASAP. */ svc_sock_received(svsk); len = skb->len - sizeof(struct udphdr); rqstp->rq_arg.len = len; rqstp->rq_prot = IPPROTO_UDP; /* Get sender address */ rqstp->rq_addr.sin_family = AF_INET; rqstp->rq_addr.sin_port = skb->h.uh->source; rqstp->rq_addr.sin_addr.s_addr = skb->nh.iph->saddr; rqstp->rq_daddr = skb->nh.iph->daddr; if (skb_is_nonlinear(skb)) { /* we have to copy */ local_bh_disable(); if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb)) { local_bh_enable(); /* checksum error */ skb_free_datagram(svsk->sk_sk, skb); return 0; } local_bh_enable(); skb_free_datagram(svsk->sk_sk, skb); } else { /* we can use it in-place */ rqstp->rq_arg.head[0].iov_base = skb->data + sizeof(struct udphdr); rqstp->rq_arg.head[0].iov_len = len; if (skb->ip_summed != CHECKSUM_UNNECESSARY) { if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) { skb_free_datagram(svsk->sk_sk, skb); return 0; } skb->ip_summed = CHECKSUM_UNNECESSARY; } rqstp->rq_skbuff = skb; } rqstp->rq_arg.page_base = 0; if (len <= rqstp->rq_arg.head[0].iov_len) { rqstp->rq_arg.head[0].iov_len = len; rqstp->rq_arg.page_len = 0; } else { rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; rqstp->rq_argused += (rqstp->rq_arg.page_len + PAGE_SIZE - 1)/ PAGE_SIZE; } if (serv->sv_stats) serv->sv_stats->netudpcnt++; return len;}static intsvc_udp_sendto(struct svc_rqst *rqstp){ int error; error = svc_sendto(rqstp, &rqstp->rq_res); if (error == -ECONNREFUSED) /* ICMP error on earlier request. */ error = svc_sendto(rqstp, &rqstp->rq_res); return error;}static voidsvc_udp_init(struct svc_sock *svsk){ svsk->sk_sk->sk_data_ready = svc_udp_data_ready; svsk->sk_sk->sk_write_space = svc_write_space; svsk->sk_recvfrom = svc_udp_recvfrom; svsk->sk_sendto = svc_udp_sendto; /* initialise setting must have enough space to * receive and respond to one request. * svc_udp_recvfrom will re-adjust if necessary */ svc_sock_setbufsize(svsk->sk_sock, 3 * svsk->sk_server->sv_bufsz, 3 * svsk->sk_server->sv_bufsz); set_bit(SK_DATA, &svsk->sk_flags); /* might have come in before data_ready set up */ set_bit(SK_CHNGBUF, &svsk->sk_flags);}/* * A data_ready event on a listening socket means there's a connection * pending. Do not use state_change as a substitute for it. */static voidsvc_tcp_listen_data_ready(struct sock *sk, int count_unused){ struct svc_sock *svsk; dprintk("svc: socket %p TCP (listen) state change %d\n", sk, sk->sk_state); if (sk->sk_state != TCP_LISTEN) { /* * This callback may called twice when a new connection * is established as a child socket inherits everything * from a parent LISTEN socket. * 1) data_ready method of the parent socket will be called * when one of child sockets become ESTABLISHED. * 2) data_ready method of the child socket may be called * when it receives data before the socket is accepted. * In case of 2, we should ignore it silently. */ goto out; } if (!(svsk = (struct svc_sock *) sk->sk_user_data)) { printk("svc: socket %p: no user data\n", sk); goto out; } set_bit(SK_CONN, &svsk->sk_flags); svc_sock_enqueue(svsk); out: if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible_all(sk->sk_sleep);}/* * A state change on a connected socket means it's dying or dead. */static voidsvc_tcp_state_change(struct sock *sk){ struct svc_sock *svsk; dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n", sk, sk->sk_state, sk->sk_user_data); if (!(svsk = (struct svc_sock *) sk->sk_user_data)) { printk("svc: socket %p: no user data\n", sk); goto out; } set_bit(SK_CLOSE, &svsk->sk_flags); svc_sock_enqueue(svsk); out: if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible_all(sk->sk_sleep);}static voidsvc_tcp_data_ready(struct sock *sk, int count){ struct svc_sock * svsk; dprintk("svc: socket %p TCP data ready (svsk %p)\n", sk, sk->sk_user_data); if (!(svsk = (struct svc_sock *)(sk->sk_user_data))) goto out; set_bit(SK_DATA, &svsk->sk_flags); svc_sock_enqueue(svsk); out: if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible(sk->sk_sleep);}/* * Accept a TCP connection */static voidsvc_tcp_accept(struct svc_sock *svsk){ struct sockaddr_in sin; struct svc_serv *serv = svsk->sk_server; struct socket *sock = svsk->sk_sock; struct socket *newsock; struct proto_ops *ops; struct svc_sock *newsvsk; int err, slen; dprintk("svc: tcp_accept %p sock %p\n", svsk, sock); if (!sock) return; err = sock_create_lite(PF_INET, SOCK_STREAM, IPPROTO_TCP, &newsock); if (err) { if (err == -ENOMEM) printk(KERN_WARNING "%s: no more sockets!\n", serv->sv_name); return; } dprintk("svc: tcp_accept %p allocated\n", newsock); newsock->ops = ops = sock->ops; clear_bit(SK_CONN, &svsk->sk_flags); if ((err = ops->accept(sock, newsock, O_NONBLOCK)) < 0) { if (err != -EAGAIN && net_ratelimit()) printk(KERN_WARNING "%s: accept failed (err %d)!\n", serv->sv_name, -err); goto failed; /* aborted connection or whatever */ } set_bit(SK_CONN, &svsk->sk_flags); svc_sock_enqueue(svsk); slen = sizeof(sin); err = ops->getname(newsock, (struct sockaddr *) &sin, &slen, 1); if (err < 0) { if (net_ratelimit()) printk(KERN_WARNING "%s: peername failed (err %d)!\n", serv->sv_name, -err); goto failed; /* aborted connection or whatever */ } /* Ideally, we would want to reject connections from unauthorized * hosts here, but when we get encription, the IP of the host won't * tell us anything. For now just warn about unpriv connections. */ if (ntohs(sin.sin_port) >= 1024) { dprintk(KERN_WARNING "%s: connect from unprivileged port: %u.%u.%u.%u:%d\n", serv->sv_name, NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port)); } dprintk("%s: connect from %u.%u.%u.%u:%04x\n", serv->sv_name, NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port)); /* make sure that a write doesn't block forever when * low on memory */ newsock->sk->sk_sndtimeo = HZ*30; if (!(newsvsk = svc_setup_socket(serv, newsock, &err, 0))) goto failed; /* make sure that we don't have too many active connections. * If we have, something must be dropped. * * There's no point in trying to do random drop here for * DoS prevention. The NFS clients does 1 reconnect in 15 * seconds. An attacker can easily beat that. * * The only somewhat efficient mechanism would be if drop * old connections from the same IP first. But right now * we don't even record the client IP in svc_sock. */ if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) { struct svc_sock *svsk = NULL; spin_lock_bh(&serv->sv_lock); if (!list_empty(&serv->sv_tempsocks)) { if (net_ratelimit()) { /* Try to help the admin */ printk(KERN_NOTICE "%s: too many open TCP " "sockets, consider increasing the " "number of nfsd threads\n", serv->sv_name); printk(KERN_NOTICE "%s: last TCP connect from " "%u.%u.%u.%u:%d\n", serv->sv_name, NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port)); } /* * Always select the oldest socket. It's not fair, * but so is life */ svsk = list_entry(serv->sv_tempsocks.prev, struct svc_sock, sk_list); set_bit(SK_CLOSE, &svsk->sk_flags); svsk->sk_inuse ++; } spin_unlock_bh(&serv->sv_lock); if (svsk) { svc_sock_enqueue(svsk); svc_sock_put(svsk); } } if (serv->sv_stats) serv->sv_stats->nettcpconn++; return;failed: sock_release(newsock); return;}/* * Receive data from a TCP socket. */static intsvc_tcp_recvfrom(struct svc_rqst *rqstp){ struct svc_sock *svsk = rqstp->rq_sock; struct svc_serv *serv = svsk->sk_server; int len; struct kvec vec[RPCSVC_MAXPAGES]; int pnum, vlen; dprintk("svc: tcp_recv %p data %d conn %d close %d\n", svsk, test_bit(SK_DATA, &svsk->sk_flags), test_bit(SK_CONN, &svsk->sk_flags), test_bit(SK_CLOSE, &svsk->sk_flags)); if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) { svc_sock_received(svsk); return svc_deferred_recv(rqstp); } if (test_bit(SK_CLOSE, &svsk->sk_flags)) { svc_delete_socket(svsk); return 0; } if (test_bit(SK_CONN, &svsk->sk_flags)) { svc_tcp_accept(svsk); svc_sock_received(svsk); return 0; } if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags)) /* sndbuf needs to have room for one request * per thread, otherwise we can stall even when the * network isn't a bottleneck. * rcvbuf just needs to be able to hold a few requests. * Normally they will be removed from the queue * as soon a a complete request arrives. */ svc_sock_setbufsize(svsk->sk_sock, (serv->sv_nrthreads+3) * serv->sv_bufsz, 3 * serv->sv_bufsz); clear_bit(SK_DATA, &svsk->sk_flags); /* Receive data. If we haven't got the record length yet, get * the next four bytes. Otherwise try to gobble up as much as * possible up to the complete record length. */ if (svsk->sk_tcplen < 4) { unsigned long want = 4 - svsk->sk_tcplen; struct kvec iov; iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen; iov.iov_len = want; if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0) goto error; svsk->sk_tcplen += len; if (len < want) { dprintk("svc: short recvfrom while reading record length (%d of %lu)\n", len, want); svc_sock_received(svsk); return -EAGAIN; /* record header not complete */ } svsk->sk_reclen = ntohl(svsk->sk_reclen); if (!(svsk->sk_reclen & 0x80000000)) { /* FIXME: technically, a record can be fragmented, * and non-terminal fragments will not have the top * bit set in the fragment length header. * But apparently no known nfs clients send fragmented * records. */ printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx (non-terminal)\n", (unsigned long) svsk->sk_reclen); goto err_delete; } svsk->sk_reclen &= 0x7fffffff; dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen); if (svsk->sk_reclen > serv->sv_bufsz) { printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx (large)\n", (unsigned long) svsk->sk_reclen); goto err_delete; } } /* Check whether enough data is available */ len = svc_recv_available(svsk); if (len < 0) goto error; if (len < svsk->sk_reclen) { dprintk("svc: incomplete TCP record (%d of %d)\n", len, svsk->sk_reclen); svc_sock_received(svsk); return -EAGAIN; /* record not complete */ } len = svsk->sk_reclen; set_bit(SK_DATA, &svsk->sk_flags); vec[0] = rqstp->rq_arg.head[0]; vlen = PAGE_SIZE; pnum = 1; while (vlen < len) { vec[pnum].iov_base = page_address(rqstp->rq_argpages[rqstp->rq_argused++]); vec[pnum].iov_len = PAGE_SIZE; pnum++; vlen += PAGE_SIZE; } /* Now receive data */ len = svc_recvfrom(rqstp, vec, pnum, len); if (len < 0) goto error; dprintk("svc: TCP complete record (%d bytes)\n", len); rqstp->rq_arg.len = len; rqstp->rq_arg.page_base = 0; if (len <= rqstp->rq_arg.head[0].iov_len) { rqstp->rq_arg.head[0].iov_len = len; rqstp->rq_arg.page_len = 0; } else { rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; } rqstp->rq_skbuff = NULL; rqstp->rq_prot = IPPROTO_TCP; /* Reset TCP read info */ svsk->sk_reclen = 0; svsk->sk_tcplen = 0; svc_sock_received(svsk); if (serv->sv_stats) serv->sv_stats->nettcpcnt++; return len; err_delete: svc_delete_socket(svsk); return -EAGAIN; error: if (len == -EAGAIN) { dprintk("RPC: TCP recvfrom got EAGAIN\n"); svc_sock_received(svsk); } else { printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", svsk->sk_server->sv_name, -len); svc_sock_received(svsk); } return len;}/* * Send out data on TCP socket. */static intsvc_tcp_sendto(struct svc_rqst *rqstp){ struct xdr_buf *xbufp = &rqstp->rq_res; int sent; u32 reclen; /* Set up the first element of the reply kvec. * Any other kvecs that may be in use have been taken * care of by the server implementation itself. */ reclen = htonl(0x80000000|((xbufp->len ) - 4));
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -