📄 svcsock.c
字号:
wake_up_interruptible_all(sk->sk_sleep);}static voidsvc_tcp_data_ready(struct sock *sk, int count){ struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; dprintk("svc: socket %p TCP data ready (svsk %p)\n", sk, sk->sk_user_data); if (svsk) { set_bit(SK_DATA, &svsk->sk_flags); svc_sock_enqueue(svsk); } if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible(sk->sk_sleep);}static inline int svc_port_is_privileged(struct sockaddr *sin){ switch (sin->sa_family) { case AF_INET: return ntohs(((struct sockaddr_in *)sin)->sin_port) < PROT_SOCK; case AF_INET6: return ntohs(((struct sockaddr_in6 *)sin)->sin6_port) < PROT_SOCK; default: return 0; }}/* * Accept a TCP connection */static voidsvc_tcp_accept(struct svc_sock *svsk){ struct sockaddr_storage addr; struct sockaddr *sin = (struct sockaddr *) &addr; struct svc_serv *serv = svsk->sk_server; struct socket *sock = svsk->sk_sock; struct socket *newsock; struct svc_sock *newsvsk; int err, slen; char buf[RPC_MAX_ADDRBUFLEN]; dprintk("svc: tcp_accept %p sock %p\n", svsk, sock); if (!sock) return; clear_bit(SK_CONN, &svsk->sk_flags); err = kernel_accept(sock, &newsock, O_NONBLOCK); if (err < 0) { if (err == -ENOMEM) printk(KERN_WARNING "%s: no more sockets!\n", serv->sv_name); else if (err != -EAGAIN && net_ratelimit()) printk(KERN_WARNING "%s: accept failed (err %d)!\n", serv->sv_name, -err); return; } set_bit(SK_CONN, &svsk->sk_flags); svc_sock_enqueue(svsk); err = kernel_getpeername(newsock, sin, &slen); if (err < 0) { if (net_ratelimit()) printk(KERN_WARNING "%s: peername failed (err %d)!\n", serv->sv_name, -err); goto failed; /* aborted connection or whatever */ } /* Ideally, we would want to reject connections from unauthorized * hosts here, but when we get encryption, the IP of the host won't * tell us anything. For now just warn about unpriv connections. */ if (!svc_port_is_privileged(sin)) { dprintk(KERN_WARNING "%s: connect from unprivileged port: %s\n", serv->sv_name, __svc_print_addr(sin, buf, sizeof(buf))); } dprintk("%s: connect from %s\n", serv->sv_name, __svc_print_addr(sin, buf, sizeof(buf))); /* make sure that a write doesn't block forever when * low on memory */ newsock->sk->sk_sndtimeo = HZ*30; if (!(newsvsk = svc_setup_socket(serv, newsock, &err, (SVC_SOCK_ANONYMOUS | SVC_SOCK_TEMPORARY)))) goto failed; memcpy(&newsvsk->sk_remote, sin, slen); newsvsk->sk_remotelen = slen; err = kernel_getsockname(newsock, sin, &slen); if (unlikely(err < 0)) { dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err); slen = offsetof(struct sockaddr, sa_data); } memcpy(&newsvsk->sk_local, sin, slen); svc_sock_received(newsvsk); /* make sure that we don't have too many active connections. * If we have, something must be dropped. * * There's no point in trying to do random drop here for * DoS prevention. The NFS clients does 1 reconnect in 15 * seconds. An attacker can easily beat that. * * The only somewhat efficient mechanism would be if drop * old connections from the same IP first. But right now * we don't even record the client IP in svc_sock. */ if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) { struct svc_sock *svsk = NULL; spin_lock_bh(&serv->sv_lock); if (!list_empty(&serv->sv_tempsocks)) { if (net_ratelimit()) { /* Try to help the admin */ printk(KERN_NOTICE "%s: too many open TCP " "sockets, consider increasing the " "number of nfsd threads\n", serv->sv_name); printk(KERN_NOTICE "%s: last TCP connect from %s\n", serv->sv_name, __svc_print_addr(sin, buf, sizeof(buf))); } /* * Always select the oldest socket. It's not fair, * but so is life */ svsk = list_entry(serv->sv_tempsocks.prev, struct svc_sock, sk_list); set_bit(SK_CLOSE, &svsk->sk_flags); atomic_inc(&svsk->sk_inuse); } spin_unlock_bh(&serv->sv_lock); if (svsk) { svc_sock_enqueue(svsk); svc_sock_put(svsk); } } if (serv->sv_stats) serv->sv_stats->nettcpconn++; return;failed: sock_release(newsock); return;}/* * Receive data from a TCP socket. */static intsvc_tcp_recvfrom(struct svc_rqst *rqstp){ struct svc_sock *svsk = rqstp->rq_sock; struct svc_serv *serv = svsk->sk_server; int len; struct kvec *vec; int pnum, vlen; dprintk("svc: tcp_recv %p data %d conn %d close %d\n", svsk, test_bit(SK_DATA, &svsk->sk_flags), test_bit(SK_CONN, &svsk->sk_flags), test_bit(SK_CLOSE, &svsk->sk_flags)); if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) { svc_sock_received(svsk); return svc_deferred_recv(rqstp); } if (test_bit(SK_CLOSE, &svsk->sk_flags)) { svc_delete_socket(svsk); return 0; } if (svsk->sk_sk->sk_state == TCP_LISTEN) { svc_tcp_accept(svsk); svc_sock_received(svsk); return 0; } if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags)) /* sndbuf needs to have room for one request * per thread, otherwise we can stall even when the * network isn't a bottleneck. * * We count all threads rather than threads in a * particular pool, which provides an upper bound * on the number of threads which will access the socket. * * rcvbuf just needs to be able to hold a few requests. * Normally they will be removed from the queue * as soon a a complete request arrives. */ svc_sock_setbufsize(svsk->sk_sock, (serv->sv_nrthreads+3) * serv->sv_max_mesg, 3 * serv->sv_max_mesg); clear_bit(SK_DATA, &svsk->sk_flags); /* Receive data. If we haven't got the record length yet, get * the next four bytes. Otherwise try to gobble up as much as * possible up to the complete record length. */ if (svsk->sk_tcplen < 4) { unsigned long want = 4 - svsk->sk_tcplen; struct kvec iov; iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen; iov.iov_len = want; if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0) goto error; svsk->sk_tcplen += len; if (len < want) { dprintk("svc: short recvfrom while reading record length (%d of %lu)\n", len, want); svc_sock_received(svsk); return -EAGAIN; /* record header not complete */ } svsk->sk_reclen = ntohl(svsk->sk_reclen); if (!(svsk->sk_reclen & 0x80000000)) { /* FIXME: technically, a record can be fragmented, * and non-terminal fragments will not have the top * bit set in the fragment length header. * But apparently no known nfs clients send fragmented * records. */ if (net_ratelimit()) printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx" " (non-terminal)\n", (unsigned long) svsk->sk_reclen); goto err_delete; } svsk->sk_reclen &= 0x7fffffff; dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen); if (svsk->sk_reclen > serv->sv_max_mesg) { if (net_ratelimit()) printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx" " (large)\n", (unsigned long) svsk->sk_reclen); goto err_delete; } } /* Check whether enough data is available */ len = svc_recv_available(svsk); if (len < 0) goto error; if (len < svsk->sk_reclen) { dprintk("svc: incomplete TCP record (%d of %d)\n", len, svsk->sk_reclen); svc_sock_received(svsk); return -EAGAIN; /* record not complete */ } len = svsk->sk_reclen; set_bit(SK_DATA, &svsk->sk_flags); vec = rqstp->rq_vec; vec[0] = rqstp->rq_arg.head[0]; vlen = PAGE_SIZE; pnum = 1; while (vlen < len) { vec[pnum].iov_base = page_address(rqstp->rq_pages[pnum]); vec[pnum].iov_len = PAGE_SIZE; pnum++; vlen += PAGE_SIZE; } rqstp->rq_respages = &rqstp->rq_pages[pnum]; /* Now receive data */ len = svc_recvfrom(rqstp, vec, pnum, len); if (len < 0) goto error; dprintk("svc: TCP complete record (%d bytes)\n", len); rqstp->rq_arg.len = len; rqstp->rq_arg.page_base = 0; if (len <= rqstp->rq_arg.head[0].iov_len) { rqstp->rq_arg.head[0].iov_len = len; rqstp->rq_arg.page_len = 0; } else { rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; } rqstp->rq_skbuff = NULL; rqstp->rq_prot = IPPROTO_TCP; /* Reset TCP read info */ svsk->sk_reclen = 0; svsk->sk_tcplen = 0; svc_sock_received(svsk); if (serv->sv_stats) serv->sv_stats->nettcpcnt++; return len; err_delete: svc_delete_socket(svsk); return -EAGAIN; error: if (len == -EAGAIN) { dprintk("RPC: TCP recvfrom got EAGAIN\n"); svc_sock_received(svsk); } else { printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", svsk->sk_server->sv_name, -len); goto err_delete; } return len;}/* * Send out data on TCP socket. */static intsvc_tcp_sendto(struct svc_rqst *rqstp){ struct xdr_buf *xbufp = &rqstp->rq_res; int sent; __be32 reclen; /* Set up the first element of the reply kvec. * Any other kvecs that may be in use have been taken * care of by the server implementation itself. */ reclen = htonl(0x80000000|((xbufp->len ) - 4)); memcpy(xbufp->head[0].iov_base, &reclen, 4); if (test_bit(SK_DEAD, &rqstp->rq_sock->sk_flags)) return -ENOTCONN; sent = svc_sendto(rqstp, &rqstp->rq_res); if (sent != xbufp->len) { printk(KERN_NOTICE "rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n", rqstp->rq_sock->sk_server->sv_name, (sent<0)?"got error":"sent only", sent, xbufp->len); set_bit(SK_CLOSE, &rqstp->rq_sock->sk_flags); svc_sock_enqueue(rqstp->rq_sock); sent = -EAGAIN; } return sent;}static voidsvc_tcp_init(struct svc_sock *svsk){ struct sock *sk = svsk->sk_sk; struct tcp_sock *tp = tcp_sk(sk); svsk->sk_recvfrom = svc_tcp_recvfrom; svsk->sk_sendto = svc_tcp_sendto; if (sk->sk_state == TCP_LISTEN) { dprintk("setting up TCP socket for listening\n"); sk->sk_data_ready = svc_tcp_listen_data_ready; set_bit(SK_CONN, &svsk->sk_flags); } else { dprintk("setting up TCP socket for reading\n"); sk->sk_state_change = svc_tcp_state_change; sk->sk_data_ready = svc_tcp_data_ready; sk->sk_write_space = svc_write_space; svsk->sk_reclen = 0; svsk->sk_tcplen = 0; tp->nonagle = 1; /* disable Nagle's algorithm */ /* initialise setting must have enough space to * receive and respond to one request. * svc_tcp_recvfrom will re-adjust if necessary */ svc_sock_setbufsize(svsk->sk_sock, 3 * svsk->sk_server->sv_max_mesg, 3 * svsk->sk_server->sv_max_mesg); set_bit(SK_CHNGBUF, &svsk->sk_flags); set_bit(SK_DATA, &svsk->sk_flags); if (sk->sk_state != TCP_ESTABLISHED) set_bit(SK_CLOSE, &svsk->sk_flags); }}voidsvc_sock_update_bufs(struct svc_serv *serv){ /* * The number of server threads has changed. Update * rcvbuf and sndbuf accordingly on all sockets */ struct list_head *le; spin_lock_bh(&serv->sv_lock); list_for_each(le, &serv->sv_permsocks) { struct svc_sock *svsk = list_entry(le, struct svc_sock, sk_list); set_bit(SK_CHNGBUF, &svsk->sk_flags); } list_for_each(le, &serv->sv_tempsocks) { struct svc_sock *svsk = list_entry(le, struct svc_sock, sk_list); set_bit(SK_CHNGBUF, &svsk->sk_flags); } spin_unlock_bh(&serv->sv_lock);}/* * Receive the next request on any socket. This code is carefully * organised not to touch any cachelines in the shared svc_serv * structure, only cachelines in the local svc_pool. */intsvc_recv(struct svc_rqst *rqstp, long timeout){ struct svc_sock *svsk = NULL; struct svc_serv *serv = rqstp->rq_server; struct svc_pool *pool = rqstp->rq_pool; int len, i; int pages; struct xdr_buf *arg; DECLARE_WAITQUEUE(wait, current); dprintk("svc: server %p waiting for data (to = %ld)\n", rqstp, timeout); if (rqstp->rq_sock) printk(KERN_ERR "svc_recv: service %p, socket not NULL!\n", rqstp); if (waitqueue_active(&rqstp->rq_wait)) printk(KERN_ERR "svc_recv: service %p, wait queue active!\n", rqstp); /* now allocate needed pages. If we get a failure, sleep briefly */ pages = (serv->sv_max_mesg + PAGE_SIZE) / PAGE_SIZE; for (i=0; i < pages ; i++) while (rqstp->rq_pages[i] == NULL) { struct page *p = alloc_page(GFP_KERNEL); if (!p) schedule_timeout_uninterruptible(msecs_to_jiffies(500)); rqstp->rq_pages[i] = p; } rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */ BUG_ON(pages >= RPCSVC_MAXPAGES); /* Make arg->head point to first page and arg->pages point to rest */ arg = &rqstp->rq_arg; arg->head[0].iov_base = page_address(rqstp->rq_pages[0]); arg->head[0].iov_len = PAGE_SIZE; arg->pages = rqstp->rq_pages + 1; arg->page_base = 0; /* save at least one page for response */ arg->page_len = (pages-2)*PAGE_SIZE; arg->len = (pages-1)*PAGE_SIZE; arg->tail[0].iov_len = 0; try_to_freeze(); cond_resched(); if (signalled()) return -EINTR; spin_lock_bh(&pool->sp_lock); if ((svsk = svc_sock_dequeue(pool)) != NULL) { rqstp->rq_sock = svsk; atomic_inc(&svsk->sk_inuse); rqstp->rq_reserved = serv->sv_max_mesg; atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); } else { /* No data pending. Go to sleep */ svc_thread_enqueue(pool, rqstp); /*
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -