📄 iq_segmented.cc
字号:
// disp_rdy_delay_dist .init(num_threads,0,19,1) .name(n+"disp_rdy_delay_dist") .desc("Delay between inst dispatch and ready") .flags(cdf) ; ready_error_dist .init(-40,39,1) .name(n+"ready_error_dist") .desc("Lateness of predicted ready time (cycles)") .flags(cdf) ; delay_at_ops_rdy_dist .init(0,19,1) .name(n+"delay_at_ops_rdy") .desc("value of delay when last op becomes ready") .flags(cdf) ; cum_delay .name(n+"delay_at_ops_rdy_cum") .desc("cumumlative delay values") ; seg0_entry_error_dist .init(-10,9,1) .name(n+"seg0_entry_error") .desc("Seg0_entry_time - max(dispatch+n-1, st_time)") .flags(cdf) ; pred_issue_error_dist .init(-10,99,1) .name("pred_issue_error") .desc("error in predicted issue times") .flags(pdf | cdf) ; // // Predictor stats // if (cpu->use_hm_predictor) hm_predictor->regStats(); if (cpu->use_lr_predictor) lr_predictor->regStats(); // // Register the segment-specific statistics // for (int i = 0; i < num_segments; ++i) queue_segment[i]->regStats(cpu, n);}voidSegmentedIQ::regModelFormulas(unsigned num_threads){ using namespace Stats; // string n = cpu->name() + ".IQ:"; string n = name() + ":"; string c = cpu->name() + "."; // // Stall statistics // rob_chain_frac .name(n+"rob_chain_frac") .desc("Fraction of all chains ONLY in ROB") ; rob_chain_frac = rob_chain_heads / cpu->chain_heads; chains_avg .name(n+"chains_avg") .desc("average number of chains in use") ; chains_avg = chains_cum / cpu->numCycles; deadlock_ratio .name(n+"deadlock_ratio") .desc("Fraction of time IQ was deadlocked") ; deadlock_ratio = deadlock_cycles / cpu->numCycles; deadlock_avg_dur .name(n+"deadlock_avg_dur") .desc("Average duration of deadlock event") ; deadlock_avg_dur = deadlock_cycles / deadlock_events; if (use_bypassing) { bypass_avg .name(n+"bypass_avg") .desc("average number of segments bypassed (when used)") ; bypass_avg = bypassed_segments / bypassed_insts; bypass_frac .name(n+"bypass_frac") .desc("fraction of instructions using bypassing") ; bypass_frac = 100 * bypassed_insts / cpu->dispatch_count_stat; } if (s0_st_limit) { st_limit_rate .name(n+"st_limit_rate") .desc("average S/T limit events per cycle") ; st_limit_rate = st_limit_events / cpu->numCycles; } if (use_pushdown) { pushdown_rate .name(n+"pushdown_rate") .desc("average push-down events per cycle") ; pushdown_rate = total_pd_events / cpu->numCycles; pd_inst_rate .name(n+"pd_inst_rate") .desc("average push-down insts per event") ; pd_inst_rate = total_pd_count / total_pd_events; } // // Ready instruction stats // ready_fraction .name(n+"00:ready_fraction") .desc("Fraction of segment 0 insts ready to issue") ; ready_fraction = 100 * queue_segment[0]->getRQ()->ready_inst / queue_segment[0]->cum_insts; frac_of_all_ready .name(n+"00:frac_of_all_ready") .desc("Fraction of all ready insts that are in seg 0") ; frac_of_all_ready = 100 * queue_segment[0]->getRQ()->ready_inst / total_ready_count; // // Chain prediction stats // correct_pred_rate .name(n+"correct_pred_rate") .desc("pct of 2-not-rdy-op insts correctly pred") ; correct_pred_rate = 100 * correct_last_op / ( cpu->two_op_inst_count - cpu->one_rdy_inst_count ); delay_at_rdy_avg .name(n+"delay_at_rdy_avg") .desc("average delay value when last op becomes ready") ; delay_at_rdy_avg = cum_delay / cpu->exe_inst; // // Predictor stats are shared... only register once // if (thisCluster() == 0) { if (cpu->use_hm_predictor) hm_predictor->regFormulas(); if (cpu->use_lr_predictor) lr_predictor->regFormulas(); } // // Register the segment-specific statistics // for (int i = 0; i < num_segments; ++i) queue_segment[i]->regFormulas(cpu, n);}//// Free the chain associated with this ROB entry//// Returns TRUE if the chain has been de-allocated//boolSegmentedIQ::release_chain(ROBStation *rob){ bool rv = false; unsigned chain_num = rob->head_chain; // // "IF" statement MOVED HERE FROM segment_t::release_chain() // FIXME: this should probably do more than just skip the // per-cluster releases... // // We don't want to free a chain that really belongs to someone else! if (rob->seq == (*chain_info)[chain_num].creator) { for (unsigned i = 0; i < num_segments; ++i) queue_segment[i]->release_chain(rob); // this block duplicated out of segment_t::release_chain() if (deadlock_slot.notnull()) { unsigned n_chains = 0; bool was_chained = false; // check it's ideps to see if we need to unchain them for (int j = 0; j < TheISA::MaxInstSrcRegs; ++j) { if (deadlock_slot->idep_info[j].chained) { // note that this inst was chained when we got here was_chained = true; if (deadlock_slot->idep_info[j].follows_chain == chain_num) { deadlock_slot->idep_info[j].chained = false; } else { // chained to another inst ++n_chains; } } } // // If this inst is no longer on _any_ chain... // if (was_chained && n_chains == 0) { // DO NOT put it on the self-timed list... the // segment add() will do that // We need to set the self-timed flag in any register produced // by this instruction ROBStation *rob = deadlock_slot->rob_entry; for (int j = 0; j < rob->num_outputs; ++j) { RegInfoElement &r = (*reg_info_table)[rob->thread_number][rob->onames[j]]; // // If this ROB entry is responsible for producing this // arch register result... // if (r.producer() && (r.producer()->seq == rob->seq)) { // Clear the chained flag so that future consumers // will know not to wait on the chain head r.unChain(); } } } } } else { // // This instruction was not the creator of the chain... // return rv; } assert(chain_info->chainsInUseThread(rob->thread_number) > 0); // the release() method returns true if this was the last cluster // to release the chain if (chain_info->release(rob->head_chain, rob->thread_number)) { // // Ugly nasty hack to make chain stuff work with EXTERNAL // chainWires stuff // // (We don't want to be releasing the wire until ALL // writebacks are completed) // if (cpu->chainWires != 0) for (int clust = 0; clust < cpu->numIQueues; ++clust) cpu->chainWires->releaseWire(clust, chain_num); rv = true; } // // We only deallocate register entries from the "home" cluster // if (rob->queue_num == thisCluster()) { // // Look through the register-info table, and set any registers // resulting from instructions on this chain to self-time // for (int i = 0; i < TotalNumRegs; ++i) { RegInfoElement &r = (*reg_info_table)[rob->thread_number][i]; if (r.isChained() && r.chainNum() == rob->head_chain) r.unChain(); } } return rv;}//// This (mis-named) function is called as a result of a cache-access// being processed//// If the access is a cache-miss, force the chained instructions to// stop moving through the queue until writeback occurs//voidSegmentedIQ::cachemissevent_handler(Addr pc, int hm_prediction, ROBStation *rob, bool squashed, unsigned ann_value){ unsigned predicted_value, actual_value; if (hm_predictor) { if (hm_prediction == MA_HIT) predicted_value = 1; // Hit else predicted_value = 0; // Miss if (ann_value == MA_HIT) actual_value = 1; else actual_value = 0; // Update the predictor hm_predictor->record(pc >> 2, actual_value, predicted_value); } if (ann_value == MA_CACHE_MISS) { if (!squashed) {#if USE_NEW_SELF_TIME_CODE if (rob->seq == (*chain_info)[rob->head_chain].creator) (*chain_info)[rob->head_chain].self_timed = false;#else for (unsigned i = 0; i < num_segments; ++i) queue_segment[i]->stop_self_time(rob);#endif } }}//===========================================================================SegmentedIQ::iq_iteratorSegmentedIQ::oldest(){ iq_iterator p, q = queue_segment[0]->oldest(); for (unsigned i = 1; i < num_segments; ++i) { p = queue_segment[i]->oldest(); if (p.notnull()) { if (q.notnull()) { if (p->seq < q->seq) { // neither is null, use sequence q = p; } } else { // q is null and p is not q = p; } } } if (deadlock_slot.notnull()) if (q.isnull() || q->seq > deadlock_slot->seq) q = deadlock_slot; return q;}SegmentedIQ::iq_iteratorSegmentedIQ::oldest(unsigned thread) { iq_iterator p, q = queue_segment[0]->oldest(thread); for (unsigned i = 1; i < num_segments; ++i) { p = queue_segment[i]->oldest(thread); if (p.notnull()) { if (q.notnull()) { if (p->seq < q->seq) { // neither is null, use sequence q = p; } } else { // q is null and p is not q = p; } } } if (deadlock_slot.notnull()) { if ((q->seq > deadlock_slot->seq) && (deadlock_slot->thread_number() == thread)) { q = deadlock_slot; } } return q;}//===========================================================================voidSegmentedIQ::dump_internals(){ cout << " Deadlock Mode: " << deadlock_recovery_mode << endl; cout << " segment: " << deadlock_seg_flag << endl; cout << " Total instructions: " << total_insts << endl; for (int t = 0; t < cpu->number_of_threads; ++t) cout << " Thread " << t << ": " << insts[t] << endl; cout << " Chain heads in ROB: " << cpu->chain_heads_in_rob << endl; cout << " Chain heads in IQ: " << iq_heads << endl; for (int t = 0; t < cpu->number_of_threads; ++t) cout << " Active chains (thread " << t << "): " << chain_info->chainsInUseThread(t) << endl;}voidSegmentedIQ::dump_chains(unsigned s){ queue_segment[s]->dump_chains();}voidSegmentedIQ::dump_chains(){ for (int s = 0; s < num_segments; ++s) dump_chains(s);}voidSegmentedIQ::dump(int mode){ if (mode == 0) { short_dump(); return; } cprintf("=========================================================\n" "%s full dump (cycle %d)\n" " Total Instructions: %u\n" " By thread: [", name(), curTick, total_insts); for (unsigned i = 0; i < cpu->number_of_threads; ++i) { cprintf("%u", insts[i]); if (i == cpu->number_of_threads - 1) cout << "]\n"; else cout << ", "; } cout << "---------------------------------------------------------\n" "Chain Status:\n"; chain_info->dump(); cout << "---------------------------------------------------------\n"; for (int i = last_segment; i >= 0; --i) queue_segment[i]->dump(mode);}//// Short dump...//voidSegmentedIQ::short_dump(){ cprintf("=========================================================\n" "%s short dump (cycle %u) (%u instructions)\n" "---------------------------------------------------------\n", name(), curTick, total_insts); for (int i = last_segment; i >= 0; --i) queue_segment[i]->short_dump(); if (deadlock_recovery_mode) cout << "==> Deadlock recovery mode\n";}voidSegmentedIQ::raw_dump(){ cout << "No raw dump\n";}voidSegmentedIQ::rq_dump(){ cprintf("=========================================================\n" "%s RQ dump (cycle %u)\n", name(), curTick); for (int i = 0; i < num_segments; ++i) queue_segment[i]->rq_dump();}voidSegmentedIQ::rq_raw_dump(){ cprintf("=========================================================\n" "%s RQ dump (cycle %d)\n", name(), curTick); queue_segment[0]->rq_raw_dump();}//////////////////////////////////////////////////////////////////////////////// Interface to INI file mechanism//////////////////////////////////////////////////////////////////////////////BEGIN_DECLARE_SIM_OBJECT_PARAMS(SegmentedIQ) Param<unsigned> num_segments; Param<unsigned> max_chain_depth; Param<unsigned> segment_size; Param<unsigned> segment_thresh; Param<bool> en_thread_priority; Param<bool> use_bypassing; Param<bool> use_pushdown; Param<bool> use_pipelined_prom;END_DECLARE_SIM_OBJECT_PARAMS(SegmentedIQ)BEGIN_INIT_SIM_OBJECT_PARAMS(SegmentedIQ) INIT_PARAM(num_segments, "number of IQ segments"), INIT_PARAM(max_chain_depth, "max chain depth"), INIT_PARAM(segment_size, "segment size"), INIT_PARAM(segment_thresh, "segment delta threshold"), INIT_PARAM(en_thread_priority, "enable thread priority"), INIT_PARAM_DFLT(use_bypassing, "enable bypass at dispatch", true), INIT_PARAM_DFLT(use_pushdown, "enable instruction pushdown", true), INIT_PARAM_DFLT(use_pipelined_prom, "enable pipelined chain wires", true)END_INIT_SIM_OBJECT_PARAMS(SegmentedIQ)CREATE_SIM_OBJECT(SegmentedIQ){ return new SegmentedIQ(getInstanceName(), num_segments, max_chain_depth, segment_size, segment_thresh, en_thread_priority, use_bypassing, use_pushdown, use_pipelined_prom);}REGISTER_SIM_OBJECT("SegmentedIQ", SegmentedIQ)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -