ipath_rc.c
来自「LINUX 2.6.17.4的源码」· C语言 代码 · 共 1,857 行 · 第 1/4 页
C
1,857 行
/* * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */#include "ipath_verbs.h"#include "ips_common.h"/* cut down ridiculously long IB macro names */#define OP(x) IB_OPCODE_RC_##x/** * ipath_init_restart- initialize the qp->s_sge after a restart * @qp: the QP who's SGE we're restarting * @wqe: the work queue to initialize the QP's SGE from * * The QP s_lock should be held. */static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe){ struct ipath_ibdev *dev; u32 len; len = ((qp->s_psn - wqe->psn) & IPS_PSN_MASK) * ib_mtu_enum_to_int(qp->path_mtu); qp->s_sge.sge = wqe->sg_list[0]; qp->s_sge.sg_list = wqe->sg_list + 1; qp->s_sge.num_sge = wqe->wr.num_sge; ipath_skip_sge(&qp->s_sge, len); qp->s_len = wqe->length - len; dev = to_idev(qp->ibqp.device); spin_lock(&dev->pending_lock); if (list_empty(&qp->timerwait)) list_add_tail(&qp->timerwait, &dev->pending[dev->pending_index]); spin_unlock(&dev->pending_lock);}/** * ipath_make_rc_ack - construct a response packet (ACK, NAK, or RDMA read) * @qp: a pointer to the QP * @ohdr: a pointer to the IB header being constructed * @pmtu: the path MTU * * Return bth0 if constructed; otherwise, return 0. * Note the QP s_lock must be held. */static inline u32 ipath_make_rc_ack(struct ipath_qp *qp, struct ipath_other_headers *ohdr, u32 pmtu){ struct ipath_sge_state *ss; u32 hwords; u32 len; u32 bth0; /* header size in 32-bit words LRH+BTH = (8+12)/4. */ hwords = 5; /* * Send a response. Note that we are in the responder's * side of the QP context. */ switch (qp->s_ack_state) { case OP(RDMA_READ_REQUEST): ss = &qp->s_rdma_sge; len = qp->s_rdma_len; if (len > pmtu) { len = pmtu; qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST); } else qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY); qp->s_rdma_len -= len; bth0 = qp->s_ack_state << 24; ohdr->u.aeth = ipath_compute_aeth(qp); hwords++; break; case OP(RDMA_READ_RESPONSE_FIRST): qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE); /* FALLTHROUGH */ case OP(RDMA_READ_RESPONSE_MIDDLE): ss = &qp->s_rdma_sge; len = qp->s_rdma_len; if (len > pmtu) len = pmtu; else { ohdr->u.aeth = ipath_compute_aeth(qp); hwords++; qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); } qp->s_rdma_len -= len; bth0 = qp->s_ack_state << 24; break; case OP(RDMA_READ_RESPONSE_LAST): case OP(RDMA_READ_RESPONSE_ONLY): /* * We have to prevent new requests from changing * the r_sge state while a ipath_verbs_send() * is in progress. * Changing r_state allows the receiver * to continue processing new packets. * We do it here now instead of above so * that we are sure the packet was sent before * changing the state. */ qp->r_state = OP(RDMA_READ_RESPONSE_LAST); qp->s_ack_state = OP(ACKNOWLEDGE); return 0; case OP(COMPARE_SWAP): case OP(FETCH_ADD): ss = NULL; len = 0; qp->r_state = OP(SEND_LAST); qp->s_ack_state = OP(ACKNOWLEDGE); bth0 = IB_OPCODE_ATOMIC_ACKNOWLEDGE << 24; ohdr->u.at.aeth = ipath_compute_aeth(qp); ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->s_ack_atomic); hwords += sizeof(ohdr->u.at) / 4; break; default: /* Send a regular ACK. */ ss = NULL; len = 0; qp->s_ack_state = OP(ACKNOWLEDGE); bth0 = qp->s_ack_state << 24; ohdr->u.aeth = ipath_compute_aeth(qp); hwords++; } qp->s_hdrwords = hwords; qp->s_cur_sge = ss; qp->s_cur_size = len; return bth0;}/** * ipath_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC) * @qp: a pointer to the QP * @ohdr: a pointer to the IB header being constructed * @pmtu: the path MTU * @bth0p: pointer to the BTH opcode word * @bth2p: pointer to the BTH PSN word * * Return 1 if constructed; otherwise, return 0. * Note the QP s_lock must be held. */static inline int ipath_make_rc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr, u32 pmtu, u32 *bth0p, u32 *bth2p){ struct ipath_ibdev *dev = to_idev(qp->ibqp.device); struct ipath_sge_state *ss; struct ipath_swqe *wqe; u32 hwords; u32 len; u32 bth0; u32 bth2; char newreq; if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) || qp->s_rnr_timeout) goto done; /* header size in 32-bit words LRH+BTH = (8+12)/4. */ hwords = 5; bth0 = 0; /* Send a request. */ wqe = get_swqe_ptr(qp, qp->s_cur); switch (qp->s_state) { default: /* * Resend an old request or start a new one. * * We keep track of the current SWQE so that * we don't reset the "furthest progress" state * if we need to back up. */ newreq = 0; if (qp->s_cur == qp->s_tail) { /* Check if send work queue is empty. */ if (qp->s_tail == qp->s_head) goto done; qp->s_psn = wqe->psn = qp->s_next_psn; newreq = 1; } /* * Note that we have to be careful not to modify the * original work request since we may need to resend * it. */ qp->s_sge.sge = wqe->sg_list[0]; qp->s_sge.sg_list = wqe->sg_list + 1; qp->s_sge.num_sge = wqe->wr.num_sge; qp->s_len = len = wqe->length; ss = &qp->s_sge; bth2 = 0; switch (wqe->wr.opcode) { case IB_WR_SEND: case IB_WR_SEND_WITH_IMM: /* If no credit, return. */ if (qp->s_lsn != (u32) -1 && ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) goto done; wqe->lpsn = wqe->psn; if (len > pmtu) { wqe->lpsn += (len - 1) / pmtu; qp->s_state = OP(SEND_FIRST); len = pmtu; break; } if (wqe->wr.opcode == IB_WR_SEND) qp->s_state = OP(SEND_ONLY); else { qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE); /* Immediate data comes after the BTH */ ohdr->u.imm_data = wqe->wr.imm_data; hwords += 1; } if (wqe->wr.send_flags & IB_SEND_SOLICITED) bth0 |= 1 << 23; bth2 = 1 << 31; /* Request ACK. */ if (++qp->s_cur == qp->s_size) qp->s_cur = 0; break; case IB_WR_RDMA_WRITE: if (newreq) qp->s_lsn++; /* FALLTHROUGH */ case IB_WR_RDMA_WRITE_WITH_IMM: /* If no credit, return. */ if (qp->s_lsn != (u32) -1 && ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) goto done; ohdr->u.rc.reth.vaddr = cpu_to_be64(wqe->wr.wr.rdma.remote_addr); ohdr->u.rc.reth.rkey = cpu_to_be32(wqe->wr.wr.rdma.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); hwords += sizeof(struct ib_reth) / 4; wqe->lpsn = wqe->psn; if (len > pmtu) { wqe->lpsn += (len - 1) / pmtu; qp->s_state = OP(RDMA_WRITE_FIRST); len = pmtu; break; } if (wqe->wr.opcode == IB_WR_RDMA_WRITE) qp->s_state = OP(RDMA_WRITE_ONLY); else { qp->s_state = OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE); /* Immediate data comes * after RETH */ ohdr->u.rc.imm_data = wqe->wr.imm_data; hwords += 1; if (wqe->wr.send_flags & IB_SEND_SOLICITED) bth0 |= 1 << 23; } bth2 = 1 << 31; /* Request ACK. */ if (++qp->s_cur == qp->s_size) qp->s_cur = 0; break; case IB_WR_RDMA_READ: ohdr->u.rc.reth.vaddr = cpu_to_be64(wqe->wr.wr.rdma.remote_addr); ohdr->u.rc.reth.rkey = cpu_to_be32(wqe->wr.wr.rdma.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); qp->s_state = OP(RDMA_READ_REQUEST); hwords += sizeof(ohdr->u.rc.reth) / 4; if (newreq) { qp->s_lsn++; /* * Adjust s_next_psn to count the * expected number of responses. */ if (len > pmtu) qp->s_next_psn += (len - 1) / pmtu; wqe->lpsn = qp->s_next_psn++; } ss = NULL; len = 0; if (++qp->s_cur == qp->s_size) qp->s_cur = 0; break; case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) qp->s_state = OP(COMPARE_SWAP); else qp->s_state = OP(FETCH_ADD); ohdr->u.atomic_eth.vaddr = cpu_to_be64( wqe->wr.wr.atomic.remote_addr); ohdr->u.atomic_eth.rkey = cpu_to_be32( wqe->wr.wr.atomic.rkey); ohdr->u.atomic_eth.swap_data = cpu_to_be64( wqe->wr.wr.atomic.swap); ohdr->u.atomic_eth.compare_data = cpu_to_be64( wqe->wr.wr.atomic.compare_add); hwords += sizeof(struct ib_atomic_eth) / 4; if (newreq) { qp->s_lsn++; wqe->lpsn = wqe->psn; } if (++qp->s_cur == qp->s_size) qp->s_cur = 0; ss = NULL; len = 0; break; default: goto done; } if (newreq) { qp->s_tail++; if (qp->s_tail >= qp->s_size) qp->s_tail = 0; } bth2 |= qp->s_psn++ & IPS_PSN_MASK; if ((int)(qp->s_psn - qp->s_next_psn) > 0) qp->s_next_psn = qp->s_psn; spin_lock(&dev->pending_lock); if (list_empty(&qp->timerwait)) list_add_tail(&qp->timerwait, &dev->pending[dev->pending_index]); spin_unlock(&dev->pending_lock); break; case OP(RDMA_READ_RESPONSE_FIRST): /* * This case can only happen if a send is restarted. See * ipath_restart_rc(). */ ipath_init_restart(qp, wqe); /* FALLTHROUGH */ case OP(SEND_FIRST): qp->s_state = OP(SEND_MIDDLE); /* FALLTHROUGH */ case OP(SEND_MIDDLE): bth2 = qp->s_psn++ & IPS_PSN_MASK; if ((int)(qp->s_psn - qp->s_next_psn) > 0) qp->s_next_psn = qp->s_psn; ss = &qp->s_sge; len = qp->s_len; if (len > pmtu) { /* * Request an ACK every 1/2 MB to avoid retransmit * timeouts. */ if (((wqe->length - len) % (512 * 1024)) == 0) bth2 |= 1 << 31; len = pmtu; break; } if (wqe->wr.opcode == IB_WR_SEND) qp->s_state = OP(SEND_LAST); else { qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE); /* Immediate data comes after the BTH */ ohdr->u.imm_data = wqe->wr.imm_data; hwords += 1; } if (wqe->wr.send_flags & IB_SEND_SOLICITED) bth0 |= 1 << 23; bth2 |= 1 << 31; /* Request ACK. */ qp->s_cur++; if (qp->s_cur >= qp->s_size) qp->s_cur = 0; break; case OP(RDMA_READ_RESPONSE_LAST): /* * This case can only happen if a RDMA write is restarted. * See ipath_restart_rc(). */ ipath_init_restart(qp, wqe); /* FALLTHROUGH */ case OP(RDMA_WRITE_FIRST): qp->s_state = OP(RDMA_WRITE_MIDDLE); /* FALLTHROUGH */ case OP(RDMA_WRITE_MIDDLE): bth2 = qp->s_psn++ & IPS_PSN_MASK; if ((int)(qp->s_psn - qp->s_next_psn) > 0) qp->s_next_psn = qp->s_psn; ss = &qp->s_sge; len = qp->s_len; if (len > pmtu) { /* * Request an ACK every 1/2 MB to avoid retransmit * timeouts. */ if (((wqe->length - len) % (512 * 1024)) == 0) bth2 |= 1 << 31; len = pmtu; break; } if (wqe->wr.opcode == IB_WR_RDMA_WRITE) qp->s_state = OP(RDMA_WRITE_LAST); else { qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE); /* Immediate data comes after the BTH */ ohdr->u.imm_data = wqe->wr.imm_data; hwords += 1; if (wqe->wr.send_flags & IB_SEND_SOLICITED) bth0 |= 1 << 23; } bth2 |= 1 << 31; /* Request ACK. */ qp->s_cur++; if (qp->s_cur >= qp->s_size) qp->s_cur = 0; break; case OP(RDMA_READ_RESPONSE_MIDDLE): /* * This case can only happen if a RDMA read is restarted. * See ipath_restart_rc(). */ ipath_init_restart(qp, wqe); len = ((qp->s_psn - wqe->psn) & IPS_PSN_MASK) * pmtu; ohdr->u.rc.reth.vaddr = cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len); ohdr->u.rc.reth.rkey = cpu_to_be32(wqe->wr.wr.rdma.rkey); ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len); qp->s_state = OP(RDMA_READ_REQUEST); hwords += sizeof(ohdr->u.rc.reth) / 4; bth2 = qp->s_psn++ & IPS_PSN_MASK; if ((int)(qp->s_psn - qp->s_next_psn) > 0) qp->s_next_psn = qp->s_psn;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?