lsq_unit.hh
来自「M5,一个功能强大的多处理器系统模拟器.很多针对处理器架构,性能的研究都使用它作」· HH 代码 · 共 724 行 · 第 1/2 页
HH
724 行
/// @todo Consider moving to a more advanced model with write vs read ports /** The number of cache ports available each cycle. */ int cachePorts; /** The number of used cache ports in this cycle. */ int usedPorts; /** Is the LSQ switched out. */ bool switchedOut; //list<InstSeqNum> mshrSeqNums; /** Wire to read information from the issue stage time queue. */ typename TimeBuffer<IssueStruct>::wire fromIssue; /** Whether or not the LSQ is stalled. */ bool stalled; /** The store that causes the stall due to partial store to load * forwarding. */ InstSeqNum stallingStoreIsn; /** The index of the above store. */ int stallingLoadIdx; /** The packet that needs to be retried. */ PacketPtr retryPkt; /** Whehter or not a store is blocked due to the memory system. */ bool isStoreBlocked; /** Whether or not a load is blocked due to the memory system. */ bool isLoadBlocked; /** Has the blocked load been handled. */ bool loadBlockedHandled; /** The sequence number of the blocked load. */ InstSeqNum blockedLoadSeqNum; /** The oldest load that caused a memory ordering violation. */ DynInstPtr memDepViolator; // Will also need how many read/write ports the Dcache has. Or keep track // of that in stage that is one level up, and only call executeLoad/Store // the appropriate number of times. /** Total number of loads forwaded from LSQ stores. */ Stats::Scalar<> lsqForwLoads; /** Total number of loads ignored due to invalid addresses. */ Stats::Scalar<> invAddrLoads; /** Total number of squashed loads. */ Stats::Scalar<> lsqSquashedLoads; /** Total number of responses from the memory system that are * ignored due to the instruction already being squashed. */ Stats::Scalar<> lsqIgnoredResponses; /** Tota number of memory ordering violations. */ Stats::Scalar<> lsqMemOrderViolation; /** Total number of squashed stores. */ Stats::Scalar<> lsqSquashedStores; /** Total number of software prefetches ignored due to invalid addresses. */ Stats::Scalar<> invAddrSwpfs; /** Ready loads blocked due to partial store-forwarding. */ Stats::Scalar<> lsqBlockedLoads; /** Number of loads that were rescheduled. */ Stats::Scalar<> lsqRescheduledLoads; /** Number of times the LSQ is blocked due to the cache. */ Stats::Scalar<> lsqCacheBlocked; public: /** Executes the load at the given index. */ template <class T> Fault read(Request *req, T &data, int load_idx); /** Executes the store at the given index. */ template <class T> Fault write(Request *req, T &data, int store_idx); /** Returns the index of the head load instruction. */ int getLoadHead() { return loadHead; } /** Returns the sequence number of the head load instruction. */ InstSeqNum getLoadHeadSeqNum() { if (loadQueue[loadHead]) { return loadQueue[loadHead]->seqNum; } else { return 0; } } /** Returns the index of the head store instruction. */ int getStoreHead() { return storeHead; } /** Returns the sequence number of the head store instruction. */ InstSeqNum getStoreHeadSeqNum() { if (storeQueue[storeHead].inst) { return storeQueue[storeHead].inst->seqNum; } else { return 0; } } /** Returns whether or not the LSQ unit is stalled. */ bool isStalled() { return stalled; }};template <class Impl>template <class T>FaultLSQUnit<Impl>::read(Request *req, T &data, int load_idx){ DynInstPtr load_inst = loadQueue[load_idx]; assert(load_inst); assert(!load_inst->isExecuted()); // Make sure this isn't an uncacheable access // A bit of a hackish way to get uncached accesses to work only if they're // at the head of the LSQ and are ready to commit (at the head of the ROB // too). if (req->isUncacheable() && (load_idx != loadHead || !load_inst->isAtCommit())) { iewStage->rescheduleMemInst(load_inst); ++lsqRescheduledLoads; // Must delete request now that it wasn't handed off to // memory. This is quite ugly. @todo: Figure out the proper // place to really handle request deletes. delete req; return TheISA::genMachineCheckFault(); } // Check the SQ for any previous stores that might lead to forwarding int store_idx = load_inst->sqIdx; int store_size = 0; DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, " "storeHead: %i addr: %#x\n", load_idx, store_idx, storeHead, req->getPaddr()); if (req->isLocked()) { // Disable recording the result temporarily. Writing to misc // regs normally updates the result, but this is not the // desired behavior when handling store conditionals. load_inst->recordResult = false; TheISA::handleLockedRead(load_inst.get(), req); load_inst->recordResult = true; } while (store_idx != -1) { // End once we've reached the top of the LSQ if (store_idx == storeWBIdx) { break; } // Move the index to one younger if (--store_idx < 0) store_idx += SQEntries; assert(storeQueue[store_idx].inst); store_size = storeQueue[store_idx].size; if (store_size == 0) continue; else if (storeQueue[store_idx].inst->uncacheable()) continue; assert(storeQueue[store_idx].inst->effAddrValid); // Check if the store data is within the lower and upper bounds of // addresses that the request needs. bool store_has_lower_limit = req->getVaddr() >= storeQueue[store_idx].inst->effAddr; bool store_has_upper_limit = (req->getVaddr() + req->getSize()) <= (storeQueue[store_idx].inst->effAddr + store_size); bool lower_load_has_store_part = req->getVaddr() < (storeQueue[store_idx].inst->effAddr + store_size); bool upper_load_has_store_part = (req->getVaddr() + req->getSize()) > storeQueue[store_idx].inst->effAddr; // If the store's data has all of the data needed, we can forward. if ((store_has_lower_limit && store_has_upper_limit)) { // Get shift amount for offset into the store's data. int shift_amt = req->getVaddr() & (store_size - 1); memcpy(&data, storeQueue[store_idx].data + shift_amt, sizeof(T)); assert(!load_inst->memData); load_inst->memData = new uint8_t[64]; memcpy(load_inst->memData, storeQueue[store_idx].data + shift_amt, req->getSize()); DPRINTF(LSQUnit, "Forwarding from store idx %i to load to " "addr %#x, data %#x\n", store_idx, req->getVaddr(), data); PacketPtr data_pkt = new Packet(req, MemCmd::ReadReq, Packet::Broadcast); data_pkt->dataStatic(load_inst->memData); WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this); // We'll say this has a 1 cycle load-store forwarding latency // for now. // @todo: Need to make this a parameter. wb->schedule(curTick); ++lsqForwLoads; return NoFault; } else if ((store_has_lower_limit && lower_load_has_store_part) || (store_has_upper_limit && upper_load_has_store_part) || (lower_load_has_store_part && upper_load_has_store_part)) { // This is the partial store-load forwarding case where a store // has only part of the load's data. // If it's already been written back, then don't worry about // stalling on it. if (storeQueue[store_idx].completed) { panic("Should not check one of these"); continue; } // Must stall load and force it to retry, so long as it's the oldest // load that needs to do so. if (!stalled || (stalled && load_inst->seqNum < loadQueue[stallingLoadIdx]->seqNum)) { stalled = true; stallingStoreIsn = storeQueue[store_idx].inst->seqNum; stallingLoadIdx = load_idx; } // Tell IQ/mem dep unit that this instruction will need to be // rescheduled eventually iewStage->rescheduleMemInst(load_inst); iewStage->decrWb(load_inst->seqNum); load_inst->clearIssued(); ++lsqRescheduledLoads; // Do not generate a writeback event as this instruction is not // complete. DPRINTF(LSQUnit, "Load-store forwarding mis-match. " "Store idx %i to load addr %#x\n", store_idx, req->getVaddr()); // Must delete request now that it wasn't handed off to // memory. This is quite ugly. @todo: Figure out the // proper place to really handle request deletes. delete req; return NoFault; } } // If there's no forwarding case, then go access memory DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %#x\n", load_inst->seqNum, load_inst->readPC()); assert(!load_inst->memData); load_inst->memData = new uint8_t[64]; ++usedPorts; // if we the cache is not blocked, do cache access if (!lsq->cacheBlocked()) { PacketPtr data_pkt = new Packet(req, (req->isLocked() ? MemCmd::LoadLockedReq : MemCmd::ReadReq), Packet::Broadcast); data_pkt->dataStatic(load_inst->memData); LSQSenderState *state = new LSQSenderState; state->isLoad = true; state->idx = load_idx; state->inst = load_inst; data_pkt->senderState = state; if (!dcachePort->sendTiming(data_pkt)) { // Delete state and data packet because a load retry // initiates a pipeline restart; it does not retry. delete state; delete data_pkt->req; delete data_pkt; req = NULL; // If the access didn't succeed, tell the LSQ by setting // the retry thread id. lsq->setRetryTid(lsqID); } } // If the cache was blocked, or has become blocked due to the access, // handle it. if (lsq->cacheBlocked()) { if (req) delete req; ++lsqCacheBlocked; iewStage->decrWb(load_inst->seqNum); // There's an older load that's already going to squash. if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum) return NoFault; // Record that the load was blocked due to memory. This // load will squash all instructions after it, be // refetched, and re-executed. isLoadBlocked = true; loadBlockedHandled = false; blockedLoadSeqNum = load_inst->seqNum; // No fault occurred, even though the interface is blocked. return NoFault; } return NoFault;}template <class Impl>template <class T>FaultLSQUnit<Impl>::write(Request *req, T &data, int store_idx){ assert(storeQueue[store_idx].inst); DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x data %#x" " | storeHead:%i [sn:%i]\n", store_idx, req->getPaddr(), data, storeHead, storeQueue[store_idx].inst->seqNum); storeQueue[store_idx].req = req; storeQueue[store_idx].size = sizeof(T); assert(sizeof(T) <= sizeof(storeQueue[store_idx].data)); T gData = htog(data); memcpy(storeQueue[store_idx].data, &gData, sizeof(T)); // This function only writes the data to the store queue, so no fault // can happen here. return NoFault;}#endif // __CPU_O3_LSQ_UNIT_HH__
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?