ipath_rc.c
来自「LINUX 2.6.17.4的源码」· C语言 代码 · 共 1,857 行 · 第 1/4 页
C
1,857 行
qp->s_ack_state = opcode; qp->s_nak_state = 0; qp->s_ack_psn = psn;resched: return 0;done: return 1;}/** * ipath_rc_rcv - process an incoming RC packet * @dev: the device this packet came in on * @hdr: the header of this packet * @has_grh: true if the header has a GRH * @data: the packet data * @tlen: the packet length * @qp: the QP for this packet * * This is called from ipath_qp_rcv() to process an incoming RC packet * for the given QP. * Called at interrupt level. */void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, int has_grh, void *data, u32 tlen, struct ipath_qp *qp){ struct ipath_other_headers *ohdr; u32 opcode; u32 hdrsize; u32 psn; u32 pad; unsigned long flags; struct ib_wc wc; u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); int diff; struct ib_reth *reth; int header_in_data; /* Check for GRH */ if (!has_grh) { ohdr = &hdr->u.oth; hdrsize = 8 + 12; /* LRH + BTH */ psn = be32_to_cpu(ohdr->bth[2]); header_in_data = 0; } else { ohdr = &hdr->u.l.oth; hdrsize = 8 + 40 + 12; /* LRH + GRH + BTH */ /* * The header with GRH is 60 bytes and the core driver sets * the eager header buffer size to 56 bytes so the last 4 * bytes of the BTH header (PSN) is in the data buffer. */ header_in_data = ipath_layer_get_rcvhdrentsize(dev->dd) == 16; if (header_in_data) { psn = be32_to_cpu(((__be32 *) data)[0]); data += sizeof(__be32); } else psn = be32_to_cpu(ohdr->bth[2]); } /* * The opcode is in the low byte when its in network order * (top byte when in host order). */ opcode = be32_to_cpu(ohdr->bth[0]) >> 24; /* * Process responses (ACKs) before anything else. Note that the * packet sequence number will be for something in the send work * queue rather than the expected receive packet sequence number. * In other words, this QP is the requester. */ if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) && opcode <= OP(ATOMIC_ACKNOWLEDGE)) { ipath_rc_rcv_resp(dev, ohdr, data, tlen, qp, opcode, psn, hdrsize, pmtu, header_in_data); goto bail; } spin_lock_irqsave(&qp->r_rq.lock, flags); /* Compute 24 bits worth of difference. */ diff = ipath_cmp24(psn, qp->r_psn); if (unlikely(diff)) { if (ipath_rc_rcv_error(dev, ohdr, data, qp, opcode, psn, diff, header_in_data)) goto done; goto resched; } /* Check for opcode sequence errors. */ switch (qp->r_state) { case OP(SEND_FIRST): case OP(SEND_MIDDLE): if (opcode == OP(SEND_MIDDLE) || opcode == OP(SEND_LAST) || opcode == OP(SEND_LAST_WITH_IMMEDIATE)) break; nack_inv: /* * A NAK will ACK earlier sends and RDMA writes. Don't queue the * NAK if a RDMA read, atomic, or NAK is pending though. */ spin_lock(&qp->s_lock); if (qp->s_ack_state >= OP(RDMA_READ_REQUEST) && qp->s_ack_state != IB_OPCODE_ACKNOWLEDGE) { spin_unlock(&qp->s_lock); goto done; } /* XXX Flush WQEs */ qp->state = IB_QPS_ERR; qp->s_ack_state = OP(SEND_ONLY); qp->s_nak_state = IB_NAK_INVALID_REQUEST; qp->s_ack_psn = qp->r_psn; goto resched; case OP(RDMA_WRITE_FIRST): case OP(RDMA_WRITE_MIDDLE): if (opcode == OP(RDMA_WRITE_MIDDLE) || opcode == OP(RDMA_WRITE_LAST) || opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) break; goto nack_inv; case OP(RDMA_READ_REQUEST): case OP(COMPARE_SWAP): case OP(FETCH_ADD): /* * Drop all new requests until a response has been sent. A * new request then ACKs the RDMA response we sent. Relaxed * ordering would allow new requests to be processed but we * would need to keep a queue of rwqe's for all that are in * progress. Note that we can't RNR NAK this request since * the RDMA READ or atomic response is already queued to be * sent (unless we implement a response send queue). */ goto done; default: if (opcode == OP(SEND_MIDDLE) || opcode == OP(SEND_LAST) || opcode == OP(SEND_LAST_WITH_IMMEDIATE) || opcode == OP(RDMA_WRITE_MIDDLE) || opcode == OP(RDMA_WRITE_LAST) || opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) goto nack_inv; break; } wc.imm_data = 0; wc.wc_flags = 0; /* OK, process the packet. */ switch (opcode) { case OP(SEND_FIRST): if (!ipath_get_rwqe(qp, 0)) { rnr_nak: /* * A RNR NAK will ACK earlier sends and RDMA writes. * Don't queue the NAK if a RDMA read or atomic * is pending though. */ spin_lock(&qp->s_lock); if (qp->s_ack_state >= OP(RDMA_READ_REQUEST) && qp->s_ack_state != IB_OPCODE_ACKNOWLEDGE) { spin_unlock(&qp->s_lock); goto done; } qp->s_ack_state = OP(SEND_ONLY); qp->s_nak_state = IB_RNR_NAK | qp->s_min_rnr_timer; qp->s_ack_psn = qp->r_psn; goto resched; } qp->r_rcv_len = 0; /* FALLTHROUGH */ case OP(SEND_MIDDLE): case OP(RDMA_WRITE_MIDDLE): send_middle: /* Check for invalid length PMTU or posted rwqe len. */ if (unlikely(tlen != (hdrsize + pmtu + 4))) goto nack_inv; qp->r_rcv_len += pmtu; if (unlikely(qp->r_rcv_len > qp->r_len)) goto nack_inv; ipath_copy_sge(&qp->r_sge, data, pmtu); break; case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): /* consume RWQE */ if (!ipath_get_rwqe(qp, 1)) goto rnr_nak; goto send_last_imm; case OP(SEND_ONLY): case OP(SEND_ONLY_WITH_IMMEDIATE): if (!ipath_get_rwqe(qp, 0)) goto rnr_nak; qp->r_rcv_len = 0; if (opcode == OP(SEND_ONLY)) goto send_last; /* FALLTHROUGH */ case OP(SEND_LAST_WITH_IMMEDIATE): send_last_imm: if (header_in_data) { wc.imm_data = *(__be32 *) data; data += sizeof(__be32); } else { /* Immediate data comes after BTH */ wc.imm_data = ohdr->u.imm_data; } hdrsize += 4; wc.wc_flags = IB_WC_WITH_IMM; /* FALLTHROUGH */ case OP(SEND_LAST): case OP(RDMA_WRITE_LAST): send_last: /* Get the number of bytes the message was padded by. */ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; /* Check for invalid length. */ /* XXX LAST len should be >= 1 */ if (unlikely(tlen < (hdrsize + pad + 4))) goto nack_inv; /* Don't count the CRC. */ tlen -= (hdrsize + pad + 4); wc.byte_len = tlen + qp->r_rcv_len; if (unlikely(wc.byte_len > qp->r_len)) goto nack_inv; ipath_copy_sge(&qp->r_sge, data, tlen); atomic_inc(&qp->msn); if (opcode == OP(RDMA_WRITE_LAST) || opcode == OP(RDMA_WRITE_ONLY)) break; wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; wc.opcode = IB_WC_RECV; wc.vendor_err = 0; wc.qp_num = qp->ibqp.qp_num; wc.src_qp = qp->remote_qpn; wc.pkey_index = 0; wc.slid = qp->remote_ah_attr.dlid; wc.sl = qp->remote_ah_attr.sl; wc.dlid_path_bits = 0; wc.port_num = 0; /* Signal completion event if the solicited bit is set. */ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, (ohdr->bth[0] & __constant_cpu_to_be32(1 << 23)) != 0); break; case OP(RDMA_WRITE_FIRST): case OP(RDMA_WRITE_ONLY): case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): /* consume RWQE */ /* RETH comes after BTH */ if (!header_in_data) reth = &ohdr->u.rc.reth; else { reth = (struct ib_reth *)data; data += sizeof(*reth); } hdrsize += sizeof(*reth); qp->r_len = be32_to_cpu(reth->length); qp->r_rcv_len = 0; if (qp->r_len != 0) { u32 rkey = be32_to_cpu(reth->rkey); u64 vaddr = be64_to_cpu(reth->vaddr); int ok; /* Check rkey & NAK */ ok = ipath_rkey_ok(dev, &qp->r_sge, qp->r_len, vaddr, rkey, IB_ACCESS_REMOTE_WRITE); if (unlikely(!ok)) { nack_acc: /* * A NAK will ACK earlier sends and RDMA * writes. Don't queue the NAK if a RDMA * read, atomic, or NAK is pending though. */ spin_lock(&qp->s_lock); if (qp->s_ack_state >= OP(RDMA_READ_REQUEST) && qp->s_ack_state != IB_OPCODE_ACKNOWLEDGE) { spin_unlock(&qp->s_lock); goto done; } /* XXX Flush WQEs */ qp->state = IB_QPS_ERR; qp->s_ack_state = OP(RDMA_WRITE_ONLY); qp->s_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; qp->s_ack_psn = qp->r_psn; goto resched; } } else { qp->r_sge.sg_list = NULL; qp->r_sge.sge.mr = NULL; qp->r_sge.sge.vaddr = NULL; qp->r_sge.sge.length = 0; qp->r_sge.sge.sge_length = 0; } if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) goto nack_acc; if (opcode == OP(RDMA_WRITE_FIRST)) goto send_middle; else if (opcode == OP(RDMA_WRITE_ONLY)) goto send_last; if (!ipath_get_rwqe(qp, 1)) goto rnr_nak; goto send_last_imm; case OP(RDMA_READ_REQUEST): /* RETH comes after BTH */ if (!header_in_data) reth = &ohdr->u.rc.reth; else { reth = (struct ib_reth *)data; data += sizeof(*reth); } spin_lock(&qp->s_lock); if (qp->s_ack_state != OP(ACKNOWLEDGE) && qp->s_ack_state >= IB_OPCODE_RDMA_READ_REQUEST) { spin_unlock(&qp->s_lock); goto done; } qp->s_rdma_len = be32_to_cpu(reth->length); if (qp->s_rdma_len != 0) { u32 rkey = be32_to_cpu(reth->rkey); u64 vaddr = be64_to_cpu(reth->vaddr); int ok; /* Check rkey & NAK */ ok = ipath_rkey_ok(dev, &qp->s_rdma_sge, qp->s_rdma_len, vaddr, rkey, IB_ACCESS_REMOTE_READ); if (unlikely(!ok)) { spin_unlock(&qp->s_lock); goto nack_acc; } /* * Update the next expected PSN. We add 1 later * below, so only add the remainder here. */ if (qp->s_rdma_len > pmtu) qp->r_psn += (qp->s_rdma_len - 1) / pmtu; } else { qp->s_rdma_sge.sg_list = NULL; qp->s_rdma_sge.num_sge = 0; qp->s_rdma_sge.sge.mr = NULL; qp->s_rdma_sge.sge.vaddr = NULL; qp->s_rdma_sge.sge.length = 0; qp->s_rdma_sge.sge.sge_length = 0; } if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) goto nack_acc; /* * We need to increment the MSN here instead of when we * finish sending the result since a duplicate request would * increment it more than once. */ atomic_inc(&qp->msn); qp->s_ack_state = opcode; qp->s_nak_state = 0; qp->s_ack_psn = psn; qp->r_psn++; qp->r_state = opcode; goto rdmadone; case OP(COMPARE_SWAP): case OP(FETCH_ADD): { struct ib_atomic_eth *ateth; u64 vaddr; u64 sdata; u32 rkey; if (!header_in_data) ateth = &ohdr->u.atomic_eth; else { ateth = (struct ib_atomic_eth *)data; data += sizeof(*ateth); } vaddr = be64_to_cpu(ateth->vaddr); if (unlikely(vaddr & (sizeof(u64) - 1))) goto nack_inv; rkey = be32_to_cpu(ateth->rkey); /* Check rkey & NAK */ if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, sizeof(u64), vaddr, rkey, IB_ACCESS_REMOTE_ATOMIC))) goto nack_acc; if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) goto nack_acc; /* Perform atomic OP and save result. */ sdata = be64_to_cpu(ateth->swap_data); spin_lock(&dev->pending_lock); qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr; if (opcode == OP(FETCH_ADD)) *(u64 *) qp->r_sge.sge.vaddr = qp->r_atomic_data + sdata; else if (qp->r_atomic_data == be64_to_cpu(ateth->compare_data)) *(u64 *) qp->r_sge.sge.vaddr = sdata; spin_unlock(&dev->pending_lock); atomic_inc(&qp->msn); qp->r_atomic_psn = psn & IPS_PSN_MASK; psn |= 1 << 31; break; } default: /* Drop packet for unknown opcodes. */ goto done; } qp->r_psn++; qp->r_state = opcode; /* Send an ACK if requested or required. */ if (psn & (1 << 31)) { /* * Coalesce ACKs unless there is a RDMA READ or * ATOMIC pending. */ spin_lock(&qp->s_lock); if (qp->s_ack_state == OP(ACKNOWLEDGE) || qp->s_ack_state < IB_OPCODE_RDMA_READ_REQUEST) { qp->s_ack_state = opcode; qp->s_nak_state = 0; qp->s_ack_psn = psn; qp->s_ack_atomic = qp->r_atomic_data; goto resched; } spin_unlock(&qp->s_lock); }done: spin_unlock_irqrestore(&qp->r_rq.lock, flags); goto bail;resched: /* * Try to send ACK right away but not if ipath_do_rc_send() is * active. */ if (qp->s_hdrwords == 0 && (qp->s_ack_state < IB_OPCODE_RDMA_READ_REQUEST || qp->s_ack_state >= IB_OPCODE_COMPARE_SWAP)) send_rc_ack(qp);rdmadone: spin_unlock(&qp->s_lock); spin_unlock_irqrestore(&qp->r_rq.lock, flags); /* Call ipath_do_rc_send() in another thread. */ tasklet_hi_schedule(&qp->s_task);bail: return;}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?