📄 ptllnd_peer.c
字号:
tx->tx_msg->ptlm_credits); list_add_tail(&tx->tx_list, &peer->peer_activeq); kptllnd_tx_addref(tx); /* 1 ref for me... */ spin_unlock_irqrestore(&peer->peer_lock, flags); if (tx->tx_type == TX_TYPE_PUT_REQUEST || tx->tx_type == TX_TYPE_GET_REQUEST) { /* Post bulk now we have safe matchbits */ rc = PtlMEAttach(kptllnd_data.kptl_nih, *kptllnd_tunables.kptl_portal, peer->peer_ptlid, tx->tx_msg->ptlm_u.rdma.kptlrm_matchbits, 0, /* ignore bits */ PTL_UNLINK, PTL_INS_BEFORE, &meh); if (rc != PTL_OK) { CERROR("PtlMEAttach(%s) failed: %s(%d)\n", libcfs_id2str(peer->peer_id), kptllnd_errtype2str(rc), rc); goto failed; } rc = PtlMDAttach(meh, tx->tx_rdma_md, PTL_UNLINK, &tx->tx_rdma_mdh); if (rc != PTL_OK) { CERROR("PtlMDAttach(%s) failed: %s(%d)\n", libcfs_id2str(tx->tx_peer->peer_id), kptllnd_errtype2str(rc), rc); rc = PtlMEUnlink(meh); LASSERT(rc == PTL_OK); tx->tx_rdma_mdh = PTL_INVALID_HANDLE; goto failed; } /* I'm not racing with the event callback here. It's a * bug if there's an event on the MD I just attached * before I actually send the RDMA request message - * probably matchbits re-used in error. */ } tx->tx_tposted = jiffies; /* going on the wire */ rc = PtlPut (tx->tx_msg_mdh, tx->tx_acked ? PTL_ACK_REQ : PTL_NOACK_REQ, peer->peer_ptlid, *kptllnd_tunables.kptl_portal, 0, /* acl cookie */ LNET_MSG_MATCHBITS, 0, /* offset */ 0); /* header data */ if (rc != PTL_OK) { CERROR("PtlPut %s error %s(%d)\n", libcfs_id2str(peer->peer_id), kptllnd_errtype2str(rc), rc); goto failed; } kptllnd_tx_decref(tx); /* drop my ref */ spin_lock_irqsave(&peer->peer_lock, flags); } spin_unlock_irqrestore(&peer->peer_lock, flags); return; failed: /* Nuke everything (including tx we were trying) */ kptllnd_peer_close(peer, -EIO); kptllnd_tx_decref(tx);}kptl_tx_t *kptllnd_find_timed_out_tx(kptl_peer_t *peer){ kptl_tx_t *tx; struct list_head *tmp; list_for_each(tmp, &peer->peer_sendq) { tx = list_entry(peer->peer_sendq.next, kptl_tx_t, tx_list); if (time_after_eq(jiffies, tx->tx_deadline)) { kptllnd_tx_addref(tx); return tx; } } list_for_each(tmp, &peer->peer_activeq) { tx = list_entry(peer->peer_activeq.next, kptl_tx_t, tx_list); if (time_after_eq(jiffies, tx->tx_deadline)) { kptllnd_tx_addref(tx); return tx; } } return NULL;}voidkptllnd_peer_check_bucket (int idx, int stamp){ struct list_head *peers = &kptllnd_data.kptl_peers[idx]; struct list_head *ptmp; kptl_peer_t *peer; kptl_tx_t *tx; unsigned long flags; int nsend; int nactive; int check_sends; CDEBUG(D_NET, "Bucket=%d, stamp=%d\n", idx, stamp); again: /* NB. Shared lock while I just look */ read_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags); list_for_each (ptmp, peers) { peer = list_entry (ptmp, kptl_peer_t, peer_list); CDEBUG(D_NET, "Peer=%s Credits=%d Outstanding=%d Send=%d\n", libcfs_id2str(peer->peer_id), peer->peer_credits, peer->peer_outstanding_credits, peer->peer_sent_credits); spin_lock(&peer->peer_lock); if (peer->peer_check_stamp == stamp) { /* checked already this pass */ spin_unlock(&peer->peer_lock); continue; } peer->peer_check_stamp = stamp; tx = kptllnd_find_timed_out_tx(peer); check_sends = peer->peer_retry_noop; spin_unlock(&peer->peer_lock); if (tx == NULL && !check_sends) continue; kptllnd_peer_addref(peer); /* 1 ref for me... */ read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags); if (tx == NULL) { /* nothing timed out */ kptllnd_peer_check_sends(peer); kptllnd_peer_decref(peer); /* ...until here or... */ /* rescan after dropping the lock */ goto again; } spin_lock_irqsave(&peer->peer_lock, flags); nsend = kptllnd_count_queue(&peer->peer_sendq); nactive = kptllnd_count_queue(&peer->peer_activeq); spin_unlock_irqrestore(&peer->peer_lock, flags); LCONSOLE_ERROR_MSG(0x126, "Timing out %s: %s\n", libcfs_id2str(peer->peer_id), (tx->tx_tposted == 0) ? "no free peer buffers" : "please check Portals"); CERROR("%s timed out: cred %d outstanding %d, sent %d, " "sendq %d, activeq %d Tx %p %s (%s%s%s) status %d " "%sposted %lu T/O %ds\n", libcfs_id2str(peer->peer_id), peer->peer_credits, peer->peer_outstanding_credits, peer->peer_sent_credits, nsend, nactive, tx, kptllnd_tx_typestr(tx->tx_type), tx->tx_active ? "A" : "", PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE) ? "" : "M", PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE) ? "" : "D", tx->tx_status, (tx->tx_tposted == 0) ? "not " : "", (tx->tx_tposted == 0) ? 0UL : (jiffies - tx->tx_tposted), *kptllnd_tunables.kptl_timeout); kptllnd_dump_ptltrace(); kptllnd_tx_decref(tx); kptllnd_peer_close(peer, -ETIMEDOUT); kptllnd_peer_decref(peer); /* ...until here */ /* start again now I've dropped the lock */ goto again; } read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags);}kptl_peer_t *kptllnd_id2peer_locked (lnet_process_id_t id){ struct list_head *peers = kptllnd_nid2peerlist(id.nid); struct list_head *tmp; kptl_peer_t *peer; list_for_each (tmp, peers) { peer = list_entry (tmp, kptl_peer_t, peer_list); LASSERT(peer->peer_state == PEER_STATE_WAITING_HELLO || peer->peer_state == PEER_STATE_ACTIVE); if (peer->peer_id.nid != id.nid || peer->peer_id.pid != id.pid) continue; kptllnd_peer_addref(peer); CDEBUG(D_NET, "%s -> %s (%d)\n", libcfs_id2str(id), kptllnd_ptlid2str(peer->peer_ptlid), atomic_read (&peer->peer_refcount)); return peer; } return NULL;}voidkptllnd_peertable_overflow_msg(char *str, lnet_process_id_t id){ LCONSOLE_ERROR_MSG(0x127, "%s %s overflows the peer table[%d]: " "messages may be dropped\n", str, libcfs_id2str(id), kptllnd_data.kptl_n_active_peers); LCONSOLE_ERROR_MSG(0x128, "Please correct by increasing " "'max_nodes' or 'max_procs_per_node'\n");}__u64kptllnd_get_last_seen_matchbits_locked(lnet_process_id_t lpid){ kptl_peer_t *peer; struct list_head *tmp; /* Find the last matchbits I saw this new peer using. Note.. A. This peer cannot be in the peer table - she's new! B. If I can't find the peer in the closing/zombie peers, all matchbits are safe because all refs to the (old) peer have gone so all txs have completed so there's no risk of matchbit collision! */ LASSERT(kptllnd_id2peer_locked(lpid) == NULL); /* peer's last matchbits can't change after it comes out of the peer * table, so first match is fine */ list_for_each (tmp, &kptllnd_data.kptl_closing_peers) { peer = list_entry (tmp, kptl_peer_t, peer_list); if (peer->peer_id.nid == lpid.nid && peer->peer_id.pid == lpid.pid) return peer->peer_last_matchbits_seen; } list_for_each (tmp, &kptllnd_data.kptl_zombie_peers) { peer = list_entry (tmp, kptl_peer_t, peer_list); if (peer->peer_id.nid == lpid.nid && peer->peer_id.pid == lpid.pid) return peer->peer_last_matchbits_seen; } return PTL_RESERVED_MATCHBITS;}kptl_peer_t *kptllnd_peer_handle_hello (ptl_process_id_t initiator, kptl_msg_t *msg){ rwlock_t *g_lock = &kptllnd_data.kptl_peer_rw_lock; kptl_peer_t *peer; kptl_peer_t *new_peer; lnet_process_id_t lpid; unsigned long flags; kptl_tx_t *hello_tx; int rc; __u64 safe_matchbits; __u64 last_matchbits_seen; lpid.nid = msg->ptlm_srcnid; lpid.pid = msg->ptlm_srcpid; CDEBUG(D_NET, "hello from %s(%s)\n", libcfs_id2str(lpid), kptllnd_ptlid2str(initiator)); if (initiator.pid != kptllnd_data.kptl_portals_id.pid && (msg->ptlm_srcpid & LNET_PID_USERFLAG) == 0) { /* If the peer's PID isn't _the_ ptllnd kernel pid, she must be * userspace. Refuse the connection if she hasn't set the * correct flag in her PID... */ CERROR("Userflag not set in hello from %s (%s)\n", libcfs_id2str(lpid), kptllnd_ptlid2str(initiator)); return NULL; } /* kptlhm_matchbits are the highest matchbits my peer may have used to * RDMA to me. I ensure I never register buffers for RDMA that could * match any she used */ safe_matchbits = msg->ptlm_u.hello.kptlhm_matchbits + 1; if (safe_matchbits < PTL_RESERVED_MATCHBITS) { CERROR("Illegal matchbits "LPX64" in HELLO from %s\n", safe_matchbits, libcfs_id2str(lpid)); return NULL; } if (msg->ptlm_u.hello.kptlhm_max_msg_size < PTLLND_MIN_BUFFER_SIZE) { CERROR("%s: max message size %d < MIN %d", libcfs_id2str(lpid), msg->ptlm_u.hello.kptlhm_max_msg_size, PTLLND_MIN_BUFFER_SIZE); return NULL; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -