atomic.cc
来自「M5,一个功能强大的多处理器系统模拟器.很多针对处理器架构,性能的研究都使用它作」· CC 代码 · 共 837 行 · 第 1/2 页
CC
837 行
*/ //Adjust the size to get the remaining bytes. dataSize = vaddr + size - secondAddr; //And access the right address. vaddr = secondAddr; }}#ifndef DOXYGEN_SHOULD_SKIP_THIStemplateFaultAtomicSimpleCPU::read(Addr addr, Twin32_t &data, unsigned flags);templateFaultAtomicSimpleCPU::read(Addr addr, Twin64_t &data, unsigned flags);templateFaultAtomicSimpleCPU::read(Addr addr, uint64_t &data, unsigned flags);templateFaultAtomicSimpleCPU::read(Addr addr, uint32_t &data, unsigned flags);templateFaultAtomicSimpleCPU::read(Addr addr, uint16_t &data, unsigned flags);templateFaultAtomicSimpleCPU::read(Addr addr, uint8_t &data, unsigned flags);#endif //DOXYGEN_SHOULD_SKIP_THIStemplate<>FaultAtomicSimpleCPU::read(Addr addr, double &data, unsigned flags){ return read(addr, *(uint64_t*)&data, flags);}template<>FaultAtomicSimpleCPU::read(Addr addr, float &data, unsigned flags){ return read(addr, *(uint32_t*)&data, flags);}template<>FaultAtomicSimpleCPU::read(Addr addr, int32_t &data, unsigned flags){ return read(addr, (uint32_t&)data, flags);}template <class T>FaultAtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res){ // use the CPU's statically allocated write request and packet objects Request *req = &data_write_req; if (traceData) { traceData->setAddr(addr); } //The block size of our peer. int blockSize = dcachePort.peerBlockSize(); //The size of the data we're trying to read. int dataSize = sizeof(T); uint8_t * dataPtr = (uint8_t *)&data; //The address of the second part of this access if it needs to be split //across a cache line boundary. Addr secondAddr = roundDown(addr + dataSize - 1, blockSize); if(secondAddr > addr) dataSize = secondAddr - addr; dcache_latency = 0; while(1) { req->setVirt(0, addr, dataSize, flags, thread->readPC()); // translate to physical address Fault fault = thread->translateDataWriteReq(req); // Now do the access. if (fault == NoFault) { MemCmd cmd = MemCmd::WriteReq; // default bool do_access = true; // flag to suppress cache access if (req->isLocked()) { cmd = MemCmd::StoreCondReq; do_access = TheISA::handleLockedWrite(thread, req); } else if (req->isSwap()) { cmd = MemCmd::SwapReq; if (req->isCondSwap()) { assert(res); req->setExtraData(*res); } } if (do_access) { Packet pkt = Packet(req, cmd, Packet::Broadcast); pkt.dataStatic(dataPtr); if (req->isMmapedIpr()) { dcache_latency += TheISA::handleIprWrite(thread->getTC(), &pkt); } else { //XXX This needs to be outside of the loop in order to //work properly for cache line boundary crossing //accesses in transendian simulations. data = htog(data); if (hasPhysMemPort && pkt.getAddr() == physMemAddr) dcache_latency += physmemPort.sendAtomic(&pkt); else dcache_latency += dcachePort.sendAtomic(&pkt); } dcache_access = true; assert(!pkt.isError()); if (req->isSwap()) { assert(res); *res = pkt.get<T>(); } } if (res && !req->isSwap()) { *res = req->getExtraData(); } } // This will need a new way to tell if it's hooked up to a cache or not. if (req->isUncacheable()) recordEvent("Uncached Write"); //If there's a fault or we don't need to access a second cache line, //stop now. if (fault != NoFault || secondAddr <= addr) { // If the write needs to have a fault on the access, consider // calling changeStatus() and changing it to "bad addr write" // or something. return fault; } /* * Set up for accessing the second cache line. */ //Move the pointer we're reading into to the correct location. dataPtr += dataSize; //Adjust the size to get the remaining bytes. dataSize = addr + sizeof(T) - secondAddr; //And access the right address. addr = secondAddr; }}FaultAtomicSimpleCPU::translateDataWriteAddr(Addr vaddr, Addr &paddr, int size, unsigned flags){ // use the CPU's statically allocated write request and packet objects Request *req = &data_write_req; if (traceData) { traceData->setAddr(vaddr); } //The block size of our peer. int blockSize = dcachePort.peerBlockSize(); //The address of the second part of this access if it needs to be split //across a cache line boundary. Addr secondAddr = roundDown(vaddr + size - 1, blockSize); //The size of the data we're trying to read. int dataSize = size; bool firstTimeThrough = true; if(secondAddr > vaddr) dataSize = secondAddr - vaddr; dcache_latency = 0; while(1) { req->setVirt(0, vaddr, dataSize, flags, thread->readPC()); // translate to physical address Fault fault = thread->translateDataWriteReq(req); //If there's a fault or we don't need to access a second cache line, //stop now. if (fault != NoFault) return fault; if (firstTimeThrough) { paddr = req->getPaddr(); firstTimeThrough = false; } if (secondAddr <= vaddr) return fault; /* * Set up for accessing the second cache line. */ //Adjust the size to get the remaining bytes. dataSize = vaddr + size - secondAddr; //And access the right address. vaddr = secondAddr; }}#ifndef DOXYGEN_SHOULD_SKIP_THIStemplateFaultAtomicSimpleCPU::write(Twin32_t data, Addr addr, unsigned flags, uint64_t *res);templateFaultAtomicSimpleCPU::write(Twin64_t data, Addr addr, unsigned flags, uint64_t *res);templateFaultAtomicSimpleCPU::write(uint64_t data, Addr addr, unsigned flags, uint64_t *res);templateFaultAtomicSimpleCPU::write(uint32_t data, Addr addr, unsigned flags, uint64_t *res);templateFaultAtomicSimpleCPU::write(uint16_t data, Addr addr, unsigned flags, uint64_t *res);templateFaultAtomicSimpleCPU::write(uint8_t data, Addr addr, unsigned flags, uint64_t *res);#endif //DOXYGEN_SHOULD_SKIP_THIStemplate<>FaultAtomicSimpleCPU::write(double data, Addr addr, unsigned flags, uint64_t *res){ return write(*(uint64_t*)&data, addr, flags, res);}template<>FaultAtomicSimpleCPU::write(float data, Addr addr, unsigned flags, uint64_t *res){ return write(*(uint32_t*)&data, addr, flags, res);}template<>FaultAtomicSimpleCPU::write(int32_t data, Addr addr, unsigned flags, uint64_t *res){ return write((uint32_t)data, addr, flags, res);}voidAtomicSimpleCPU::tick(){ DPRINTF(SimpleCPU, "Tick\n"); Tick latency = ticks(1); // instruction takes one cycle by default for (int i = 0; i < width; ++i) { numCycles++; if (!curStaticInst || !curStaticInst->isDelayedCommit()) checkForInterrupts(); checkPcEventQueue(); Fault fault = setupFetchRequest(&ifetch_req); if (fault == NoFault) { Tick icache_latency = 0; bool icache_access = false; dcache_access = false; // assume no dcache access //Fetch more instruction memory if necessary //if(predecoder.needMoreBytes()) //{ icache_access = true; Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq, Packet::Broadcast); ifetch_pkt.dataStatic(&inst); if (hasPhysMemPort && ifetch_pkt.getAddr() == physMemAddr) icache_latency = physmemPort.sendAtomic(&ifetch_pkt); else icache_latency = icachePort.sendAtomic(&ifetch_pkt); assert(!ifetch_pkt.isError()); // ifetch_req is initialized to read the instruction directly // into the CPU object's inst field. //} preExecute(); if (curStaticInst) { fault = curStaticInst->execute(this, traceData); // keep an instruction count if (fault == NoFault) countInst(); else if (traceData) { // If there was a fault, we should trace this instruction. delete traceData; traceData = NULL; } postExecute(); } // @todo remove me after debugging with legion done if (curStaticInst && (!curStaticInst->isMicroop() || curStaticInst->isFirstMicroop())) instCnt++; if (simulate_stalls) { Tick icache_stall = icache_access ? icache_latency - ticks(1) : 0; Tick dcache_stall = dcache_access ? dcache_latency - ticks(1) : 0; Tick stall_cycles = (icache_stall + dcache_stall) / ticks(1); if (ticks(stall_cycles) < (icache_stall + dcache_stall)) latency += ticks(stall_cycles+1); else latency += ticks(stall_cycles); } } if(fault != NoFault || !stayAtPC) advancePC(fault); } if (_status != Idle) tickEvent.schedule(curTick + latency);}voidAtomicSimpleCPU::printAddr(Addr a){ dcachePort.printAddr(a);}//////////////////////////////////////////////////////////////////////////// AtomicSimpleCPU Simulation Object//AtomicSimpleCPU *AtomicSimpleCPUParams::create(){ AtomicSimpleCPU::Params *params = new AtomicSimpleCPU::Params(); params->name = name; params->numberOfThreads = 1; params->max_insts_any_thread = max_insts_any_thread; params->max_insts_all_threads = max_insts_all_threads; params->max_loads_any_thread = max_loads_any_thread; params->max_loads_all_threads = max_loads_all_threads; params->progress_interval = progress_interval; params->deferRegistration = defer_registration; params->phase = phase; params->clock = clock; params->functionTrace = function_trace; params->functionTraceStart = function_trace_start; params->width = width; params->simulate_stalls = simulate_stalls; params->system = system; params->cpu_id = cpu_id; params->tracer = tracer; params->itb = itb; params->dtb = dtb;#if FULL_SYSTEM params->profile = profile; params->do_quiesce = do_quiesce; params->do_checkpoint_insts = do_checkpoint_insts; params->do_statistics_insts = do_statistics_insts;#else if (workload.size() != 1) panic("only one workload allowed"); params->process = workload[0];#endif AtomicSimpleCPU *cpu = new AtomicSimpleCPU(params); return cpu;}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?