⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 viblnd_cb.c

📁 非常经典的一个分布式系统
💻 C
📖 第 1 页 / 共 5 页
字号:
        LASSERT (!tx->tx_md.md_active);        LASSERT (tx->tx_md.md_fmrcount > 0);        LASSERT (page_offset < PAGE_SIZE);        LASSERT (npages >= (1 + ((page_offset + nob - 1)>>PAGE_SHIFT)));        LASSERT (npages <= LNET_MAX_IOV);        memset(&map_props, 0, sizeof(map_props));        map_props.start          = (void *)page_offset;        map_props.size           = nob;        map_props.page_array_len = npages;        map_props.page_array     = tx->tx_pages;        vvrc = vv_map_fmr(kibnal_data.kib_hca, tx->tx_md.md_fmrhandle,                          &map_props, &tx->tx_md.md_lkey, &tx->tx_md.md_rkey);        if (vvrc != vv_return_ok) {                CERROR ("Can't map vaddr %p for %d in %d pages: %d\n",                         map_props.start, nob, npages, vvrc);                return -EFAULT;        }        tx->tx_md.md_addr = (unsigned long)map_props.start;        tx->tx_md.md_active = 1;        tx->tx_md.md_fmrcount--;        rd->rd_key = active ? tx->tx_md.md_lkey : tx->tx_md.md_rkey;        rd->rd_nob = nob;        rd->rd_addr = tx->tx_md.md_addr;        /* Compensate for adaptor-tavor's munging of gatherlist addresses */        if (active)                rd->rd_addr += PAGE_OFFSET;        return 0;}intkibnal_setup_rd_iov (kib_tx_t *tx, kib_rdma_desc_t *rd,                     vv_access_con_bit_mask_t access,                     unsigned int niov, struct iovec *iov, int offset, int nob)                 {        /* active if I'm sending */        int           active = ((access & vv_acc_r_mem_write) == 0);        int           resid;        int           fragnob;        struct page  *page;        int           npages;        unsigned long page_offset;        unsigned long vaddr;        LASSERT (nob > 0);        LASSERT (niov > 0);        while (offset >= iov->iov_len) {                offset -= iov->iov_len;                niov--;                iov++;                LASSERT (niov > 0);        }        if (nob > iov->iov_len - offset) {                CERROR ("Can't map multiple vaddr fragments\n");                return (-EMSGSIZE);        }        vaddr = ((unsigned long)iov->iov_base) + offset;                page_offset = vaddr & (PAGE_SIZE - 1);        resid = nob;        npages = 0;        do {                LASSERT (npages < LNET_MAX_IOV);                page = kibnal_kvaddr_to_page(vaddr);                if (page == NULL) {                        CERROR("Can't find page for %lu\n", vaddr);                        return -EFAULT;                }                tx->tx_pages[npages++] = lnet_page2phys(page);                fragnob = PAGE_SIZE - (vaddr & (PAGE_SIZE - 1));                vaddr += fragnob;                resid -= fragnob;        } while (resid > 0);        return kibnal_map_tx(tx, rd, active, npages, page_offset, nob);}intkibnal_setup_rd_kiov (kib_tx_t *tx, kib_rdma_desc_t *rd,                      vv_access_con_bit_mask_t access,                      int nkiov, lnet_kiov_t *kiov, int offset, int nob){        /* active if I'm sending */        int            active = ((access & vv_acc_r_mem_write) == 0);        int            resid;        int            npages;        unsigned long  page_offset;                CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob);        LASSERT (nob > 0);        LASSERT (nkiov > 0);        LASSERT (nkiov <= LNET_MAX_IOV);        LASSERT (!tx->tx_md.md_active);        LASSERT ((rd != tx->tx_rd) == !active);        while (offset >= kiov->kiov_len) {                offset -= kiov->kiov_len;                nkiov--;                kiov++;                LASSERT (nkiov > 0);        }        page_offset = kiov->kiov_offset + offset;                resid = offset + nob;        npages = 0;        do {                LASSERT (npages < LNET_MAX_IOV);                LASSERT (nkiov > 0);                if ((npages > 0 && kiov->kiov_offset != 0) ||                    (resid > kiov->kiov_len &&                      (kiov->kiov_offset + kiov->kiov_len) != PAGE_SIZE)) {                        /* Can't have gaps */                        CERROR ("Can't make payload contiguous in I/O VM:"                                "page %d, offset %d, len %d \n",                                npages, kiov->kiov_offset, kiov->kiov_len);                                                return -EINVAL;                }                tx->tx_pages[npages++] = lnet_page2phys(kiov->kiov_page);                resid -= kiov->kiov_len;                kiov++;                nkiov--;        } while (resid > 0);        return kibnal_map_tx(tx, rd, active, npages, page_offset, nob);}#endifkib_conn_t *kibnal_find_conn_locked (kib_peer_t *peer){        struct list_head *tmp;        /* just return the first connection */        list_for_each (tmp, &peer->ibp_conns) {                return (list_entry(tmp, kib_conn_t, ibc_list));        }        return (NULL);}voidkibnal_check_sends (kib_conn_t *conn){        kib_tx_t       *tx;        vv_return_t     vvrc;        int             rc;        int             consume_cred;        int             done;        /* Don't send anything until after the connection is established */        if (conn->ibc_state < IBNAL_CONN_ESTABLISHED) {                CDEBUG(D_NET, "%s too soon\n",                       libcfs_nid2str(conn->ibc_peer->ibp_nid));                return;        }                spin_lock(&conn->ibc_lock);        LASSERT (conn->ibc_nsends_posted <=                 *kibnal_tunables.kib_concurrent_sends);        LASSERT (conn->ibc_reserved_credits >= 0);                while (conn->ibc_reserved_credits > 0 &&               !list_empty(&conn->ibc_tx_queue_rsrvd)) {                LASSERT (conn->ibc_version !=                          IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD);                tx = list_entry(conn->ibc_tx_queue_rsrvd.next,                                kib_tx_t, tx_list);                list_del(&tx->tx_list);                list_add_tail(&tx->tx_list, &conn->ibc_tx_queue);                conn->ibc_reserved_credits--;        }                if (list_empty(&conn->ibc_tx_queue) &&            list_empty(&conn->ibc_tx_queue_nocred) &&            (conn->ibc_outstanding_credits >= IBNAL_CREDIT_HIGHWATER ||             kibnal_send_keepalive(conn))) {                spin_unlock(&conn->ibc_lock);                                tx = kibnal_get_idle_tx();                if (tx != NULL)                        kibnal_init_tx_msg(tx, IBNAL_MSG_NOOP, 0);                spin_lock(&conn->ibc_lock);                                if (tx != NULL)                        kibnal_queue_tx_locked(tx, conn);        }        for (;;) {                if (!list_empty(&conn->ibc_tx_queue_nocred)) {                        LASSERT (conn->ibc_version !=                                  IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD);                        tx = list_entry (conn->ibc_tx_queue_nocred.next,                                          kib_tx_t, tx_list);                        consume_cred = 0;                } else if (!list_empty (&conn->ibc_tx_queue)) {                        tx = list_entry (conn->ibc_tx_queue.next,                                          kib_tx_t, tx_list);                        consume_cred = 1;                } else {                        /* nothing waiting */                        break;                }                                LASSERT (tx->tx_queued);                /* We rely on this for QP sizing */                LASSERT (tx->tx_nwrq > 0 && tx->tx_nwrq <= 1 + IBNAL_MAX_RDMA_FRAGS);                LASSERT (conn->ibc_outstanding_credits >= 0);                LASSERT (conn->ibc_outstanding_credits <= IBNAL_MSG_QUEUE_SIZE);                LASSERT (conn->ibc_credits >= 0);                LASSERT (conn->ibc_credits <= IBNAL_MSG_QUEUE_SIZE);                if (conn->ibc_nsends_posted ==                    *kibnal_tunables.kib_concurrent_sends) {                        /* We've got some tx completions outstanding... */                        CDEBUG(D_NET, "%s: posted enough\n",                               libcfs_nid2str(conn->ibc_peer->ibp_nid));                        break;                }                                if (consume_cred) {                        if (conn->ibc_credits == 0) {   /* no credits */                                CDEBUG(D_NET, "%s: no credits\n",                                       libcfs_nid2str(conn->ibc_peer->ibp_nid));                                break;                        }                                                if (conn->ibc_credits == 1 &&   /* last credit reserved for */                            conn->ibc_outstanding_credits == 0) { /* giving back credits */                                CDEBUG(D_NET, "%s: not using last credit\n",                                       libcfs_nid2str(conn->ibc_peer->ibp_nid));                                break;                        }                }                                list_del (&tx->tx_list);                tx->tx_queued = 0;                /* NB don't drop ibc_lock before bumping tx_sending */                if (tx->tx_msg->ibm_type == IBNAL_MSG_NOOP &&                    (!list_empty(&conn->ibc_tx_queue) ||                     !list_empty(&conn->ibc_tx_queue_nocred) ||                     (conn->ibc_outstanding_credits < IBNAL_CREDIT_HIGHWATER &&                      !kibnal_send_keepalive(conn)))) {                        /* redundant NOOP */                        spin_unlock(&conn->ibc_lock);                        kibnal_tx_done(tx);                        spin_lock(&conn->ibc_lock);                        CDEBUG(D_NET, "%s: redundant noop\n",                               libcfs_nid2str(conn->ibc_peer->ibp_nid));                        continue;                }                kibnal_pack_msg(tx->tx_msg, conn->ibc_version,                                conn->ibc_outstanding_credits,                                conn->ibc_peer->ibp_nid, conn->ibc_incarnation,                                conn->ibc_txseq);                conn->ibc_txseq++;                conn->ibc_outstanding_credits = 0;                conn->ibc_nsends_posted++;                if (consume_cred)                        conn->ibc_credits--;                /* CAVEAT EMPTOR!  This tx could be the PUT_DONE of an RDMA                 * PUT.  If so, it was first queued here as a PUT_REQ, sent and                 * stashed on ibc_active_txs, matched by an incoming PUT_ACK,                 * and then re-queued here.  It's (just) possible that                 * tx_sending is non-zero if we've not done the tx_complete() from                 * the first send; hence the ++ rather than = below. */                tx->tx_sending++;                list_add (&tx->tx_list, &conn->ibc_active_txs);                /* Keep holding ibc_lock while posting sends on this                 * connection; vv_post_send() isn't re-entrant on the same                 * QP!! */                LASSERT (tx->tx_nwrq > 0);#if 0                if (tx->tx_wrq[0].wr_type == vv_wr_rdma_write)                         CDEBUG(D_NET, "WORK[0]: RDMA gl %p for %d k %x -> "LPX64" k %x\n",                               tx->tx_wrq[0].scatgat_list->v_address,                               tx->tx_wrq[0].scatgat_list->length,                               tx->tx_wrq[0].scatgat_list->l_key,                               tx->tx_wrq[0].type.send.send_qp_type.rc_type.r_addr,                               tx->tx_wrq[0].type.send.send_qp_type.rc_type.r_r_key);                else                        CDEBUG(D_NET, "WORK[0]: %s gl %p for %d k %x\n",                               tx->tx_wrq[0].wr_type == vv_wr_send ? "SEND" : "????",                               tx->tx_wrq[0].scatgat_list->v_address,                               tx->tx_wrq[0].scatgat_list->length,                               tx->tx_wrq[0].scatgat_list->l_key);                if (tx->tx_nwrq > 1) {                        if (tx->tx_wrq[1].wr_type == vv_wr_rdma_write)                                 CDEBUG(D_NET, "WORK[1]: RDMA gl %p for %d k %x -> "LPX64" k %x\n",                                       tx->tx_wrq[1].scatgat_list->v_address,                                       tx->tx_wrq[1].scatgat_list->length,                                       tx->tx_wrq[1].scatgat_list->l_key,                                       tx->tx_wrq[1].type.send.send_qp_type.rc_type.r_addr,                                       tx->tx_wrq[1].type.send.send_qp_type.rc_type.r_r_key);                        else                                CDEBUG(D_NET, "WORK[1]: %s gl %p for %d k %x\n",                                       tx->tx_wrq[1].wr_type == vv_wr_send ? "SEND" : "????",                                       tx->tx_wrq[1].scatgat_list->v_address,                                       tx->tx_wrq[1].scatgat_list->length,                                       tx->tx_wrq[1].scatgat_list->l_key);                }#endif                           rc = -ECONNABORTED;                vvrc = vv_return_ok;                if (conn->ibc_state == IBNAL_CONN_ESTABLISHED) {                        tx->tx_status = 0;                        vvrc = vv_post_send_list(kibnal_data.kib_hca,                                                 conn->ibc_qp,                                                 tx->tx_nwrq,                                                 tx->tx_wrq,                                                 vv_operation_type_send_rc);                        rc = (vvrc == vv_return_ok) ? 0 : -EIO;                }                conn->ibc_last_send = jiffies;                if (rc != 0) {                        /* NB credits are transferred in the actual                         * message, which can only be the last work item */                        conn->ibc_outstanding_credits += tx->tx_msg->ibm_credits;                        if (consume_cred)                                conn->ibc_credits++;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -