📄 iq_segmented.cc
字号:
SegChainInfoEntry &info = (*chain_info)[p->head_chain]; if (info.head_level == src_seg) { info.head_promoted = true; info.head_level = src_seg - 1; }#if DEBUG_PROMOTION cprintf("Promoted seq %d (head of chain %d) " "to seg %d\n", p->seq, p->head_chain, src_seg - 1); } else { cprintf("Promoted seq %d (follows chain %d) " "to seg %d\n", p->seq, p->follows_chain, src_seg - 1);#endif } // we're counting promotions OUT of segments ++queue_segment[src_seg]-> cum_promotions[p->thread_number()]; } } } }}boolSegmentedIQ::release_register(unsigned t, unsigned r, InstSeqNum rob_seq){ // If this instruction is the last writer or this register, // Mark the register so that future consumers don't chain // off of it. if ((*reg_info_table)[t][r].producer() && (*reg_info_table)[t][r].producer()->seq == rob_seq) { (*reg_info_table)[t][r].clear(); return true; } return false;}//// Handle the removal of the specified instruction from both// the instruction queue and the ready queue//SegmentedIQ::rq_iteratorSegmentedIQ::issue_impl(rq_iterator &p){ rq_iterator next = p.next(); ++dedlk_issue_count; if (p.notnull()) { pred_issue_error_dist.sample(curTick-(*p)->pred_issue_cycle); ready_error_dist.sample((*p)->pred_ready_time - (*p)->ready_timestamp); // If we haven't actually counted down to zero yet... if ((*p)->st_zero_time == 0) (*p)->st_zero_time = curTick + (*p)->max_delay() - 1; int error = (*p)->seg0_entry_time - max((*p)->dispatch_timestamp + num_segments - 1, (*p)->st_zero_time); seg0_entry_error_dist.sample(error); // // If head of chain, self-time chain // if ((*p)->head_of_chain) { // chain head has left IQ --iq_heads; // if this is an EA-comp instruction, then we really // don't want to start self-timing yet... wait until // the memory op issues from the LSQ if (!(*p)->ea_comp) {#if USE_NEW_SELF_TIME_CODE ROBStation *rob = (*p)->rob_entry; if (rob->seq == (*chain_info)[rob->head_chain].creator) (*chain_info)[rob->head_chain].self_timed = true;#else for (int seg = 0; seg < num_segments; ++seg) queue_segment[seg]->self_time((*p)->rob_entry);#endif } } // Remove IQ element internal_remove(*p); } return next;}//// This function gets called when an instruction issues form a different IQ// or the LSQ//voidSegmentedIQ::inform_issue(iq_iterator i){ ++dedlk_promotion_count; pred_issue_error_dist.sample(curTick - i->pred_issue_cycle); if (i->head_of_chain) { // head has left the IQ // --iq_heads; bool do_st = false; if (load_chain_st) { // always self-time when the head issues do_st = true; } else if (s0_st_limit) { // start self-timing if there aren't too many not-ready // instructions in segment zero if ((queue_segment[0]->count() - queue_segment[0]->ready_count()) < (segment_size/2)) { do_st = true; } else { ++st_limit_events; } } // else if (i->rob_entry->hm_prediction == MA_CACHE_MISS) { // ??????????? else if (i->rob_entry->hm_prediction == MA_HIT) { do_st = true; } if (do_st) {#if USE_NEW_SELF_TIME_CODE ROBStation *rob = i->rob_entry; if (rob->seq == (*chain_info)[rob->head_chain].creator) (*chain_info)[rob->head_chain].self_timed = true;#else // Start self-timing the chained instructions for (int seg = 0; seg < num_segments; ++seg) queue_segment[seg]->self_time(i->rob_entry);#endif } }}//// This function is called when an instruction from another IQ or from// the LSQ gets squashed//voidSegmentedIQ::inform_squash(ROBStation *rob){ // // If a chain-head that is no longer in the IQ is being // squashed, release the chain now, since there is no IQ // entry to do it via squash(). // // Release any registers that this insturcion produced for (int i = 0; i < rob->num_outputs; ++i) release_register(rob->thread_number, rob->onames[i], rob->seq); if (rob->head_of_chain && rob->iq_entry.isnull()) release_chain(rob);}//// Remove an instruction from the queue//// (frees output registers first)//SegmentedIQ::iq_iteratorSegmentedIQ::squash(iq_iterator &e){ iq_iterator n = e.next(); // Release any registers that this insturcion produced for (int i = 0; i < e->rob_entry->num_outputs; ++i) release_register(e->thread_number(), e->rob_entry->onames[i], e->rob_entry->seq); // We need to free the chain here... // don't bother marking the chained instructions as self-timed // since they will be squashed also. if (e->head_of_chain) { release_chain(e->rob_entry); --iq_heads; } // Remove IQ element internal_remove(e); return n;}//// Walk the output-dependence list for this instruction...//// Only remove those entries which belong to instructions in this// queue.//// We remove those entries from the chain that we handle here so// that the chain can be applied to the LSQ also//// NOTE: EA-Comp operations do not pass through here!//// NOTE: IF YOU ARE CHANGING *THIS* ROUTINE, YOU PROBABLY WANT TO// CHANGE "ls_queue::broadcast_result" ALSO!//unsignedSegmentedIQ::writeback(ROBStation *rob, unsigned queue_num){ DepLink *olink, *olink_next; unsigned consumers = 0; // // If this instruction was the head of a chain: // (1) Mark the chain as self-timed // (2) Free the chain // if (rob->head_of_chain) { unsigned chain = rob->head_chain; // // Collect stats // max_chain_length_dist.sample((*chain_info)[chain].chain_depth); // // Free the chain // // Moves chained instructions from the chain onto the // self_timed_list (causeing stopped insts to restart) // release_chain(rob); } // // Now go through output dependency lists and mark operands ready // for (int i = 0; i < rob->num_outputs; ++i) { // // Try to release the register that this instruction produced // release_register(rob->thread_number, rob->onames[i], rob->seq); // If there are no links... if (rob->odep_list[i][queue_num] == 0) return 0; for (olink = rob->odep_list[i][queue_num]; olink; olink = olink_next) { // grab the next link... we may delete this one olink_next = olink->next(); if (olink->valid()) { res_list<IQStation>::iterator q_entry = olink->consumer(); // This is the IQ... ignore LSQ entries! if (q_entry->in_LSQ) continue; ++consumers; // The consuming operation should still be on a chain... // assert(!q_entry->idep_info[olink->idep_num].chained); if (q_entry->idep_ready[olink->idep_num]) panic("output dependence already satisfied"); // input is now ready q_entry->idep_ready[olink->idep_num] = true; q_entry->idep_ptr[olink->idep_num] = 0; // are all the input operands ready? if (q_entry->ops_ready()) { // We should not become ready before we time-out // assert(q_entry->delay == 0); q_entry->ready_timestamp = curTick; // If we're ready now, and we made a prediction about // which op would be ready last... if (q_entry->pred_last_op_index != -1) { // If we guessed correctly... if (olink->idep_num == q_entry->pred_last_op_index) { ++correct_last_op[q_entry->thread_number()]; } else { wrong_choice_dist[q_entry->thread_number()] .sample(curTick - q_entry->first_op_ready); disp_rdy_delay_dist[q_entry->thread_number()] .sample(curTick - q_entry->dispatch_timestamp); } // if we used the Left/Right predictor AND // we made a prediction for this instruction if (cpu->use_lr_predictor && q_entry->lr_prediction != -1) { lr_predictor->record(q_entry->inst->PC >> 2, olink->idep_num, q_entry->lr_prediction); } } // // We queue this instruction to move on ONLY if // this is segment zero // if (q_entry->segment_number == 0) queue_segment[0]->enqueue(q_entry); // // add the largest delay value to the distribution // unsigned max_delay=0; for (int j = 0; j < TheISA::MaxInstSrcRegs; ++j) if (max_delay < q_entry->idep_info[j].delay) max_delay = q_entry->idep_info[j].delay; delay_at_ops_rdy_dist.sample(max_delay); cum_delay += max_delay; } else { q_entry->first_op_ready = curTick; } } // Free link elements that belong to this queue delete olink; } } return consumers;}boolSegmentedIQ::check_deadlock(){ // // Can't have a deadlock if the IQ is empty... // if (count() == 0) return false; // Queue-specific conditions: // // (1) No promotions or issues in the last cycle if (dedlk_promotion_count || dedlk_issue_count) { dedlk_promotion_count = 0; dedlk_issue_count = 0; return false; } // // The LSQ must have no ready instructions // if (cpu->LSQ->ready_count()) return false; // // The writeback-queue must be empty // if (!cpu->writebackEventQueue.empty()) return false; // // There must be at least one segment between the first and last segments // that is full, and there are no ready instructions // bool found = false; for (unsigned s = 0; s < num_segments - 1; ++s) { if (queue_segment[s]->free_slots() == 0 && queue_segment[s]->ready_count() == 0) { found = true; break; } } // // The DL-1 cache must not have any outstanding misses // if (cpu->dcacheInterface->outstandingMisses()) return false; if (!found) return false; return true;}voidSegmentedIQ::regModelStats(unsigned num_threads){ using namespace Stats; // string n = cpu->name() + ".IQ:"; string n = name() + ":"; string c = cpu->name() + "."; // // Stall statistics // rob_chain_heads .name(n+"rob_chain_heads") .desc("Cum count of chain heads in ROB") ; chains_cum .init(num_threads) .name(n+"chains_cum") .desc("chains in use") .flags(total) ; chains_peak .init(num_threads) .name(n+"chains_peak") .desc("maximum number of chains in use") .flags(total) ; deadlock_events .name(n+"deadlock_events") .desc("Number of IQ deadlock events") ; deadlock_cycles .name(n+"deadlock_cycles") .desc("Number of cycles the IQ was deadlocked") ; bypassed_insts.init(num_threads); bypassed_segments.init(num_threads); if (use_bypassing) { bypassed_insts .name(n+"bypassed_insts") .desc("number of insts that used bypassing") .flags(total) ; bypassed_segments .name(n+"bypassed_segs") .desc("number of segments that were bypassed") .flags(total) ; } if (s0_st_limit) { st_limit_events .name(n+"st_limit_events") .desc("number of S/T limit events") ; } if (use_pushdown) { total_pd_count .name(n+"pushdown_count") .desc("number of instructions pushed into this segment") ; total_pd_events .name(n+"pushdown_events") .desc("number of pushed-down events") ; } // // Ready instruction stats // total_ready_count .name(n+"ops_rdy_insts") .desc("Cumulative count of ops-ready insts") ; // This should really always be zero! seg0_prom_early_count .name(n+"00:promoted_early") .desc("instructions promoted to Seg-0 early") ; // // Chain prediction stats // correct_last_op .init(num_threads) .name(n+"correct_op_pred") .desc("number of 2-op insts w/ correct pred") .flags(total) ; wrong_choice_dist .init(num_threads,0,19,1) .name(n+"wrong_choice_dist") .desc("how many cycles we were off when we choose wrong") .flags(cdf) ; // // Chain-length stats... // max_chain_length_dist .init(0,max_chain_depth,1) .name(n+"max_chain_length_dist") .desc("Maximum chain lengths") .flags(cdf) ; inst_depth_dist .init(0,max_chain_depth,1) .name(n+"inst_depth_dist") .desc("Instruction depth (levels)") .flags(cdf) ; inst_depth_lat_dist .init(0,total_thresh,1) .name(n+"inst_depth_lat_dist") .desc("Instruction depth (cycles)") .flags(cdf) ; // // These stats tell us how effective this queue is // ready_error_dist = the time between ops actually ready & // when we *think* they should be ready // [rdy_time - pred_rdy_time]
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -