📄 viblnd_cb.c
字号:
LASSERT (!tx->tx_md.md_active); LASSERT (tx->tx_md.md_fmrcount > 0); LASSERT (page_offset < PAGE_SIZE); LASSERT (npages >= (1 + ((page_offset + nob - 1)>>PAGE_SHIFT))); LASSERT (npages <= LNET_MAX_IOV); memset(&map_props, 0, sizeof(map_props)); map_props.start = (void *)page_offset; map_props.size = nob; map_props.page_array_len = npages; map_props.page_array = tx->tx_pages; vvrc = vv_map_fmr(kibnal_data.kib_hca, tx->tx_md.md_fmrhandle, &map_props, &tx->tx_md.md_lkey, &tx->tx_md.md_rkey); if (vvrc != vv_return_ok) { CERROR ("Can't map vaddr %p for %d in %d pages: %d\n", map_props.start, nob, npages, vvrc); return -EFAULT; } tx->tx_md.md_addr = (unsigned long)map_props.start; tx->tx_md.md_active = 1; tx->tx_md.md_fmrcount--; rd->rd_key = active ? tx->tx_md.md_lkey : tx->tx_md.md_rkey; rd->rd_nob = nob; rd->rd_addr = tx->tx_md.md_addr; /* Compensate for adaptor-tavor's munging of gatherlist addresses */ if (active) rd->rd_addr += PAGE_OFFSET; return 0;}intkibnal_setup_rd_iov (kib_tx_t *tx, kib_rdma_desc_t *rd, vv_access_con_bit_mask_t access, unsigned int niov, struct iovec *iov, int offset, int nob) { /* active if I'm sending */ int active = ((access & vv_acc_r_mem_write) == 0); int resid; int fragnob; struct page *page; int npages; unsigned long page_offset; unsigned long vaddr; LASSERT (nob > 0); LASSERT (niov > 0); while (offset >= iov->iov_len) { offset -= iov->iov_len; niov--; iov++; LASSERT (niov > 0); } if (nob > iov->iov_len - offset) { CERROR ("Can't map multiple vaddr fragments\n"); return (-EMSGSIZE); } vaddr = ((unsigned long)iov->iov_base) + offset; page_offset = vaddr & (PAGE_SIZE - 1); resid = nob; npages = 0; do { LASSERT (npages < LNET_MAX_IOV); page = kibnal_kvaddr_to_page(vaddr); if (page == NULL) { CERROR("Can't find page for %lu\n", vaddr); return -EFAULT; } tx->tx_pages[npages++] = lnet_page2phys(page); fragnob = PAGE_SIZE - (vaddr & (PAGE_SIZE - 1)); vaddr += fragnob; resid -= fragnob; } while (resid > 0); return kibnal_map_tx(tx, rd, active, npages, page_offset, nob);}intkibnal_setup_rd_kiov (kib_tx_t *tx, kib_rdma_desc_t *rd, vv_access_con_bit_mask_t access, int nkiov, lnet_kiov_t *kiov, int offset, int nob){ /* active if I'm sending */ int active = ((access & vv_acc_r_mem_write) == 0); int resid; int npages; unsigned long page_offset; CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob); LASSERT (nob > 0); LASSERT (nkiov > 0); LASSERT (nkiov <= LNET_MAX_IOV); LASSERT (!tx->tx_md.md_active); LASSERT ((rd != tx->tx_rd) == !active); while (offset >= kiov->kiov_len) { offset -= kiov->kiov_len; nkiov--; kiov++; LASSERT (nkiov > 0); } page_offset = kiov->kiov_offset + offset; resid = offset + nob; npages = 0; do { LASSERT (npages < LNET_MAX_IOV); LASSERT (nkiov > 0); if ((npages > 0 && kiov->kiov_offset != 0) || (resid > kiov->kiov_len && (kiov->kiov_offset + kiov->kiov_len) != PAGE_SIZE)) { /* Can't have gaps */ CERROR ("Can't make payload contiguous in I/O VM:" "page %d, offset %d, len %d \n", npages, kiov->kiov_offset, kiov->kiov_len); return -EINVAL; } tx->tx_pages[npages++] = lnet_page2phys(kiov->kiov_page); resid -= kiov->kiov_len; kiov++; nkiov--; } while (resid > 0); return kibnal_map_tx(tx, rd, active, npages, page_offset, nob);}#endifkib_conn_t *kibnal_find_conn_locked (kib_peer_t *peer){ struct list_head *tmp; /* just return the first connection */ list_for_each (tmp, &peer->ibp_conns) { return (list_entry(tmp, kib_conn_t, ibc_list)); } return (NULL);}voidkibnal_check_sends (kib_conn_t *conn){ kib_tx_t *tx; vv_return_t vvrc; int rc; int consume_cred; int done; /* Don't send anything until after the connection is established */ if (conn->ibc_state < IBNAL_CONN_ESTABLISHED) { CDEBUG(D_NET, "%s too soon\n", libcfs_nid2str(conn->ibc_peer->ibp_nid)); return; } spin_lock(&conn->ibc_lock); LASSERT (conn->ibc_nsends_posted <= *kibnal_tunables.kib_concurrent_sends); LASSERT (conn->ibc_reserved_credits >= 0); while (conn->ibc_reserved_credits > 0 && !list_empty(&conn->ibc_tx_queue_rsrvd)) { LASSERT (conn->ibc_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD); tx = list_entry(conn->ibc_tx_queue_rsrvd.next, kib_tx_t, tx_list); list_del(&tx->tx_list); list_add_tail(&tx->tx_list, &conn->ibc_tx_queue); conn->ibc_reserved_credits--; } if (list_empty(&conn->ibc_tx_queue) && list_empty(&conn->ibc_tx_queue_nocred) && (conn->ibc_outstanding_credits >= IBNAL_CREDIT_HIGHWATER || kibnal_send_keepalive(conn))) { spin_unlock(&conn->ibc_lock); tx = kibnal_get_idle_tx(); if (tx != NULL) kibnal_init_tx_msg(tx, IBNAL_MSG_NOOP, 0); spin_lock(&conn->ibc_lock); if (tx != NULL) kibnal_queue_tx_locked(tx, conn); } for (;;) { if (!list_empty(&conn->ibc_tx_queue_nocred)) { LASSERT (conn->ibc_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD); tx = list_entry (conn->ibc_tx_queue_nocred.next, kib_tx_t, tx_list); consume_cred = 0; } else if (!list_empty (&conn->ibc_tx_queue)) { tx = list_entry (conn->ibc_tx_queue.next, kib_tx_t, tx_list); consume_cred = 1; } else { /* nothing waiting */ break; } LASSERT (tx->tx_queued); /* We rely on this for QP sizing */ LASSERT (tx->tx_nwrq > 0 && tx->tx_nwrq <= 1 + IBNAL_MAX_RDMA_FRAGS); LASSERT (conn->ibc_outstanding_credits >= 0); LASSERT (conn->ibc_outstanding_credits <= IBNAL_MSG_QUEUE_SIZE); LASSERT (conn->ibc_credits >= 0); LASSERT (conn->ibc_credits <= IBNAL_MSG_QUEUE_SIZE); if (conn->ibc_nsends_posted == *kibnal_tunables.kib_concurrent_sends) { /* We've got some tx completions outstanding... */ CDEBUG(D_NET, "%s: posted enough\n", libcfs_nid2str(conn->ibc_peer->ibp_nid)); break; } if (consume_cred) { if (conn->ibc_credits == 0) { /* no credits */ CDEBUG(D_NET, "%s: no credits\n", libcfs_nid2str(conn->ibc_peer->ibp_nid)); break; } if (conn->ibc_credits == 1 && /* last credit reserved for */ conn->ibc_outstanding_credits == 0) { /* giving back credits */ CDEBUG(D_NET, "%s: not using last credit\n", libcfs_nid2str(conn->ibc_peer->ibp_nid)); break; } } list_del (&tx->tx_list); tx->tx_queued = 0; /* NB don't drop ibc_lock before bumping tx_sending */ if (tx->tx_msg->ibm_type == IBNAL_MSG_NOOP && (!list_empty(&conn->ibc_tx_queue) || !list_empty(&conn->ibc_tx_queue_nocred) || (conn->ibc_outstanding_credits < IBNAL_CREDIT_HIGHWATER && !kibnal_send_keepalive(conn)))) { /* redundant NOOP */ spin_unlock(&conn->ibc_lock); kibnal_tx_done(tx); spin_lock(&conn->ibc_lock); CDEBUG(D_NET, "%s: redundant noop\n", libcfs_nid2str(conn->ibc_peer->ibp_nid)); continue; } kibnal_pack_msg(tx->tx_msg, conn->ibc_version, conn->ibc_outstanding_credits, conn->ibc_peer->ibp_nid, conn->ibc_incarnation, conn->ibc_txseq); conn->ibc_txseq++; conn->ibc_outstanding_credits = 0; conn->ibc_nsends_posted++; if (consume_cred) conn->ibc_credits--; /* CAVEAT EMPTOR! This tx could be the PUT_DONE of an RDMA * PUT. If so, it was first queued here as a PUT_REQ, sent and * stashed on ibc_active_txs, matched by an incoming PUT_ACK, * and then re-queued here. It's (just) possible that * tx_sending is non-zero if we've not done the tx_complete() from * the first send; hence the ++ rather than = below. */ tx->tx_sending++; list_add (&tx->tx_list, &conn->ibc_active_txs); /* Keep holding ibc_lock while posting sends on this * connection; vv_post_send() isn't re-entrant on the same * QP!! */ LASSERT (tx->tx_nwrq > 0);#if 0 if (tx->tx_wrq[0].wr_type == vv_wr_rdma_write) CDEBUG(D_NET, "WORK[0]: RDMA gl %p for %d k %x -> "LPX64" k %x\n", tx->tx_wrq[0].scatgat_list->v_address, tx->tx_wrq[0].scatgat_list->length, tx->tx_wrq[0].scatgat_list->l_key, tx->tx_wrq[0].type.send.send_qp_type.rc_type.r_addr, tx->tx_wrq[0].type.send.send_qp_type.rc_type.r_r_key); else CDEBUG(D_NET, "WORK[0]: %s gl %p for %d k %x\n", tx->tx_wrq[0].wr_type == vv_wr_send ? "SEND" : "????", tx->tx_wrq[0].scatgat_list->v_address, tx->tx_wrq[0].scatgat_list->length, tx->tx_wrq[0].scatgat_list->l_key); if (tx->tx_nwrq > 1) { if (tx->tx_wrq[1].wr_type == vv_wr_rdma_write) CDEBUG(D_NET, "WORK[1]: RDMA gl %p for %d k %x -> "LPX64" k %x\n", tx->tx_wrq[1].scatgat_list->v_address, tx->tx_wrq[1].scatgat_list->length, tx->tx_wrq[1].scatgat_list->l_key, tx->tx_wrq[1].type.send.send_qp_type.rc_type.r_addr, tx->tx_wrq[1].type.send.send_qp_type.rc_type.r_r_key); else CDEBUG(D_NET, "WORK[1]: %s gl %p for %d k %x\n", tx->tx_wrq[1].wr_type == vv_wr_send ? "SEND" : "????", tx->tx_wrq[1].scatgat_list->v_address, tx->tx_wrq[1].scatgat_list->length, tx->tx_wrq[1].scatgat_list->l_key); }#endif rc = -ECONNABORTED; vvrc = vv_return_ok; if (conn->ibc_state == IBNAL_CONN_ESTABLISHED) { tx->tx_status = 0; vvrc = vv_post_send_list(kibnal_data.kib_hca, conn->ibc_qp, tx->tx_nwrq, tx->tx_wrq, vv_operation_type_send_rc); rc = (vvrc == vv_return_ok) ? 0 : -EIO; } conn->ibc_last_send = jiffies; if (rc != 0) { /* NB credits are transferred in the actual * message, which can only be the last work item */ conn->ibc_outstanding_credits += tx->tx_msg->ibm_credits; if (consume_cred) conn->ibc_credits++;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -