📄 cpu.cc
字号:
static_icount_bias.resize(SMT_MAX_THREADS); static_icount_bias = _icount_bias; first_decode_thread = 0; IQ = new BaseIQ *[numIQueues]; IQNumSlots = 0; for (int i = 0; i < numIQueues; ++i) { IQ[i] = _IQ[i]; IQNumSlots += IQ[i]->size(); IQ[i]->init(this, dispatch_width / numIQueues, issue_width / numIQueues, i); } // // Create whatever information is going to be used by the IQ's // and pass the pointer to the structure around... // clusterSharedInfo = IQ[0]->buildSharedInfo(); for (int q = 1; q < numIQueues; ++q) IQ[q]->setSharedInfo(clusterSharedInfo); hmp_func = HMP_HEAD_SEL; // // Issue prioritization // if (prioritize_issue) { issue_thread_weights.resize(SMT_MAX_THREADS); issue_thread_weights = _thread_weights; hp_thread_change = issue_thread_weights[0]; // schedule first change hp_thread = 0; // first thread is HP } // // Configure the fetch-to-decode queue // // if (decode_dispatch_latency < 2) fatal("decode/dispatch latency must be at least 2 cycles"); decodeQueue = new DecodeDispatchQueue(this, decode_dispatch_latency, dispatch_width, mt_frontend); dispatch_policy = disp_policy; next_fetch_seq = 1; for (int i = 0; i < SMT_MAX_THREADS; ++i) { dispatch_starting_iqueue[i] = i % numIQueues; lastDispatchTime[i] = 0; correctPathSeq[i] = 1; } issue_starting_iqueue = 0; // Function unit pools numFUPools = FUPools.size(); if (numFUPools == 0) fatal("You must specify at least one FU pool"); issue_starting_fu_pool = 0; issue_current_fupool_for_sb = 0; issue_bandwidth.resize(SMT_MAX_THREADS); issue_bandwidth = _issue_bandwidth; // let bpred know which CPU it belongs to if (branch_pred) branch_pred->setCPU(this); rr_commit_last_thread = 0; dispatch_seq = 0; lastChainUsed = 0; int total_int_physical_regs = ROB_size + SMT_MAX_THREADS * TheISA::NumIntRegs; int total_fp_physical_regs = ROB_size + SMT_MAX_THREADS * TheISA::NumFloatRegs; chain_heads_in_rob = 0; free_int_physical_regs = total_int_physical_regs; free_fp_physical_regs = total_fp_physical_regs; LSQ = new load_store_queue(this, name() + ".LSQ", LSQ_size, false); // The ROB is a MachineQueue<ROBStation> ROB.init(this, ROB_size, numIQueues); rob_cap.resize(SMT_MAX_THREADS); rob_cap = _rob_caps; storebuffer = new StoreBuffer(this, name() + ".SB", storebuffer_size); // not sure why, but copied this from fetch.cc if (ifq_size < (number_of_threads * fetch_width)) fatal("ifetch queue size must be > number_of_threads * fetch_width"); // // Initialize fetch list // initialize_fetch_list(true); /* allocate and initialize register file */ for (int i = 0; i < number_of_threads; i++) {#if FULL_SYSTEM assert(i == 0); // can't handle SMT full-system yet thread[i] = new SpecExecContext(this, i, system, itb, dtb, mem); // // stuff below happens in SimpleCPU constructor too... should // put all this in some common place // SpecExecContext *xc = thread[i]; // initialize CPU, including PC TheISA::initCPU(&xc->regs); change_thread_state(i, true, 100);#else if (i < workload.size()) { // we've got a process to initialize this thread from thread[i] = new SpecExecContext(this, i, workload[i], i); change_thread_state(i, true, 100); } else // idle context... can't deal with this yet, but will someday fatal("uninitialized thread context not supported");#endif // FULL_SYSTEM // Add this context to the context list in BaseCPU as well. execContexts.push_back(thread[i]); // clear commitPC in case we look at it before we commit the // first instruction commitPC[i] = 0; } cv_init(); dispatch_init(); fetch_init(); // // The segmented IQ needs to know the latency of a cache hit // for (int i = 0; i < numIQueues; ++i) IQ[i]->set_cache_hit_latency(dcacheInterface->getHitLatency()); issue_init(); // Set up PC sampling event if necessary if (_pc_sample_interval > 0) { pcSampleEvent = new PCSampleEvent(_pc_sample_interval, this); Callback *cb = new MakeCallback<FullCPU, &FullCPU::dumpPCSampleProfile>(this); registerExitCallback(cb); }}//// Destructor//// Not strictly necessary, but useful for determining exactly what _is_ a// memory leak...//FullCPU::~FullCPU(){ // // First, delete the stuff that the constructor explicitly allocated // delete[] IQ; delete LSQ; delete storebuffer; for (int i = 0; i < number_of_threads; ++i) delete thread[i]; // // Second, delete the SimObjects that we received as a pointer //#if 0 delete icache; delete dcache;#endif unsigned num_fupools = FUPools.size(); for (int i = 0; i < num_fupools; ++i) delete FUPools[i]; delete branch_pred; delete ptrace;}voidFullCPU::takeOverFrom(BaseCPU *oldCPU){ BaseCPU::takeOverFrom(oldCPU);#if FULL_SYSTEM /** * @todo this here is a hack to make sure that none of this crap * happens in full_cpu. The quiesce especically screws up full cpu. */ using namespace AlphaPseudo; doStatisticsInsts = false; doCheckpointInsts = false; doQuiesce = false;#endif assert(!tickEvent.scheduled()); // Set all status's to active, schedule the // CPU's tick event. tickEvent.schedule(curTick); for (int i = 0; i < execContexts.size(); ++i) { execContexts[i]->activate(); }}// post-unserialization initialization callbackvoidFullCPU::startup(){ // schedule initial sampling event here since curTick could get // warped by unserialization. event was created (if needed) in // FullCPU contructor. if (pcSampleEvent) { pcSampleEvent->schedule(curTick); }}voidFullCPU::regStats(){ using namespace Stats; BaseCPU::regStats(); commitRegStats(); fetchRegStats(); storebuffer->regStats(); decodeQueue->regStats(); dispatchRegStats(); issueRegStats(); writebackRegStats(); flossRegStats(); reg_int_thrd_occ .init(number_of_threads) .name(name() + ".REG:int:occ") .desc("Cumulative count of INT register usage") .flags(total) ; reg_fp_thrd_occ .init(number_of_threads) .name(name() + ".REG:fp:occ") .desc("Cumulative count of FP register usage") .flags(total) ; ROB_fcount .name(name() + ".ROB:full_count") .desc("number of cycles where ROB was full") ; ROB_count .init(number_of_threads) .name(name() + ".ROB:occupancy") .desc(name() + ".ROB occupancy (cumulative)") .flags(total) ; currentROBCount .name(name() + ".ROB:current_count") .desc("Current ROB occupancy") ; IFQ_count .name(name() + ".IFQ:count") .desc("cumulative IFQ occupancy") ; IFQ_fcount .init(number_of_threads) .name(name() + ".IFQ:full_count") .desc("cumulative IFQ full count") .flags(total) ; ROB_occ_dist .init(number_of_threads,0,ROB_size,2) .name(name() + ".ROB:occ_dist") .desc("ROB Occupancy per cycle") .flags(total | cdf) ; for (int i = 0; i < numIQueues; ++i) IQ[i]->regStats(number_of_threads); LSQ->regStats(number_of_threads);}voidFullCPU::regFormulas(){ using namespace Stats; storebuffer->regFormulas(this); decodeQueue->regFormulas(); dispatchRegFormulas(); fetchRegFormulas(); writebackRegFormulas(); commitRegFormulas(); issueRegFormulas(); ROB_full_rate .name(name() + ".ROB:full_rate") .desc("ROB full per cycle") ; ROB_full_rate = ROB_fcount / numCycles; ROB_occ_rate .name(name() + ".ROB:occ_rate") .desc("ROB occupancy rate") .flags(total) ; ROB_occ_rate = ROB_count / numCycles; IFQ_occupancy .name(name() + ".IFQ:occupancy") .desc("avg IFQ occupancy (inst's)") ; IFQ_occupancy = IFQ_count / numCycles; IFQ_latency .name(name() + ".IFQ:latency") .desc("avg IFQ occupant latency (cycle's)") .flags(total) ; IFQ_latency = IFQ_occupancy / dispatch_rate; IFQ_full_rate .name(name() + ".IFQ:full_rate") .desc("fraction of time (cycle's) IFQ was full") .flags(total); ; IFQ_full_rate = IFQ_fcount * constant(100) / numCycles; for (int i = 0; i < numIQueues; ++i) IQ[i]->regFormulas(number_of_threads); LSQ->regFormulas(number_of_threads);}voidFullCPU::remove_LSQ_element(BaseIQ::iterator i){ LSQ->squash(i);}voidFullCPU::remove_ROB_element(ROBStation *rob_entry){ if (rob_entry->cache_event_ptr) rob_entry->cache_event_ptr->squash(); int thread = rob_entry->thread_number; // These pointers should have been cleared either when // this entry was squashed, or when the event was processed assert(rob_entry->wb_event == 0); assert(rob_entry->delayed_wb_event == 0); assert(rob_entry->cache_event_ptr == 0); assert(rob_entry->recovery_event == 0); if (rob_entry->spec_state.notnull()) { state_list.dump(rob_entry->spec_state); rob_entry->spec_state = 0; } // // Only free physical registers when instruction is removed // from the ROB. // // unsigned num_fp_regs = rob_entry->inst->numFPDestRegs(); unsigned num_int_regs = rob_entry->inst->numIntDestRegs(); free_fp_physical_regs += num_fp_regs; free_int_physical_regs += num_int_regs; used_fp_physical_regs[thread] -= num_fp_regs; used_int_physical_regs[thread] -= num_int_regs; if (rob_entry->head_of_chain) { --chain_heads_in_rob; } ROB.remove(rob_entry); // do this after ROB.remove() since that function needs the thread number delete rob_entry->inst; rob_entry->inst = NULL;}voidFullCPU::activateContext(int thread_num, int delay){ assert(thread[thread_num] != NULL); if (tickEvent.squashed()) tickEvent.reschedule(curTick + delay); else if (!tickEvent.scheduled()) tickEvent.schedule(curTick + delay);}voidFullCPU::tick(){ numCycles++; floss_this_cycle = 0; floss_state.clear(); for (int i = 0; i < number_of_threads; i++) { iq_cap_active[i] = 0; rob_cap_active[i] = 0; } if (ptrace && ptrace->newCycle(curTick)) new SimExitEvent("ptrace caused exit"); // Handle every-cycle stuff for the IQ, LSQ, &storebuffer for (int i = 0; i < numIQueues; ++i) IQ[i]->pre_tick(); LSQ->pre_tick(); storebuffer->pre_tick(); /* commit entries from IQ/LSQ to architected register file */ commit(); /* service function unit release events */ for (int i = 0; i < numFUPools; ++i) FUPools[i]->tick(); /* ==> may have ready queue entries carried over from previous cycles */ /* service result completions, also readies dependent operations */ /* ==> inserts operations into ready queue --> register deps resolved */ writeback(); /* try to locate memory operations that are ready to execute */ /* ==> inserts operations into ready queue --> mem deps resolved */ lsq_refresh(); for (int i = 0; i < numIQueues; ++i) IQ[i]->tick_ready_stats(); LSQ->tick_ready_stats(); storebuffer->tick_ready_stats(); /* issue operations ready to execute from a previous cycle */ /* <== drains ready queue <-- ready operations commence execution */ issue(); // Handle every-cycle stuff for the IQ, LSQ, &storebuffer for (int i = 0; i < numIQueues; ++i) IQ[i]->tick(); LSQ->tick(); storebuffer->tick();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -