📄 iq_segmented.cc
字号:
else cout << "--"; if (chained) cout << " follows C#" << follows_chain << " w/ latency " << max_delay + op_lat; else cout << " self-timed"; cout << endl;#endif // // Add the instruciton to the queue segment AFTER all of it's fields // have been filled in. // // The following IQStation entries are initialized during // the call to segment_t::add() : // (1) segment_number // (2) chain_entry // (3) head_entry // (4) rq_entry // assert( queue_segment[destination]->add(p).notnull() ); --free_slot_info[destination]; // // Update the register-info structure so that subsequent insts // get the right chain info for regs produced by this inst. // for (unsigned i = 0; i < inst->numDestRegs(); ++i) { // The ready time is based on a timestamp... ri[i].setPredReady(pred_wb_time); if (p->head_of_chain) { ri[i].setChain(p->head_chain, max_depth + 1); ri[i].setLatency(0 + op_lat); } else if (chained) { // this instruction is following one (or more) chains ri[i].setChain(follows_chain, max_depth + 1); ri[i].setLatency(max_delay + op_lat); } else { // this instruction is self-timed... NO CHAIN ri[i].setLatency(max_delay + op_lat); } } return p;}//// This function gets called when an instruction is dispatched to a// different cluster//voidSegmentedIQ::inform_dispatch(iq_iterator i) {}voidSegmentedIQ::registerLSQ(iq_iterator &p, BaseIQ::iterator &lsq){ p->lsq_entry = lsq; lsq->hm_prediction = p->hm_prediction; lsq->pred_issue_cycle = p->pred_issue_cycle; // // We need this to clean up some squashed instructions, and // for stores... // if (p->head_of_chain) { lsq->head_of_chain = true; lsq->head_chain = p->head_chain; }}unsigned SegmentedIQ::choose_dest_segment(iq_iterator &p){ unsigned destination; unsigned desired_pos = queue_segment[0]->proper_segment(p); p->dest_seg = desired_pos; // // This is easy if we're not doing bypassing // if (!use_bypassing) return num_segments - 1; // // Search for the lowest segment... // int d; for (d = num_segments - 1; d >= 0; --d) { // // We break out early if we are looking at the "correct" segment // for this instruction // if (!use_mod_bypassing && d == desired_pos) { unsigned slots; // Dispatch stage is close enough to get the real number // for the top segment if (d == last_segment) slots = queue_segment[d]->free_slots(); else slots = free_slot_info[d]; // We can do this w/out checking which segment we're looking at // because we're guaranteed that the TOP segment has a free // slot in it... if (slots == 0) ++d; break; } // // keep going down until we find a non-empty segment // if (!queue_segment[d]->empty()) { unsigned slots; // Dispatch stage is close enough to get the real number // for the top segment if (d == last_segment) slots = queue_segment[d]->free_slots(); else slots = free_slot_info[d]; // // If we're checking for a minimun number of free slots... // OR // there are no open slots... // if (bypass_slot_checking && slots < cpu->issue_width || slots == 0) { // We need to try to go back one segment... if (d < num_segments - 1) ++d; } break; } } // Just in case... if (d < 0) d = 0; if (d > last_segment) d = last_segment; destination = d; // // Statistics... // if (destination != num_segments - 1) { ++bypassed_insts[p->thread_number()]; bypassed_segments[p->thread_number()] += num_segments - destination - 1; } return destination;}//// remove an instruction from the queue...// Cleans up everything except for the output registers//SegmentedIQ::iq_iteratorSegmentedIQ::internal_remove(SegmentedIQ::iq_iterator &e){ iq_iterator n = e.next(); if (e.notnull()) { unsigned seg = e->segment_number; --total_insts; --insts[e->thread_number()]; if (e->segment_number != deadlock_seg_flag) queue_segment[seg]->remove(e); else deadlock_slot = 0; // Make sure that the tag is invalidated! e->tag++; for (int i = 0; i < TheISA::MaxInstSrcRegs; ++i) { if (e->idep_ptr[i]) { delete e->idep_ptr[i]; // delete the DepLink e->idep_ptr[i] = 0; } } // Release the actual IQStation active_instructions->remove(e); } return n;}//// Every cycle:// (1) Promote instuctions that need it// (2) Check to see which instructions will follow those just promoted//voidSegmentedIQ::tick_model_stats(){ // // Statistics // for (int i = 0; i < cpu->number_of_threads; ++i) { unsigned c = chain_info->chainsInUseThread(i); chains_cum[i] += c; if (c > chains_peak[i].value()) chains_peak[i] = c; } for (int i = 0; i < num_segments; ++i) { // do segments stats queue_segment[i]->tick_stats(); } // The number of chain heads in the ROB ONLY is the difference // between number of chain heads & the number of chain heads still // in the IQ/LSQ rob_chain_heads += (cpu->chain_heads_in_rob - iq_heads);}voidSegmentedIQ::tick_ready_stats(){ for (int i = 0; i < num_segments; ++i) { // do segments stats queue_segment[i]->tick_ready_stats(); total_ready_count += queue_segment[i]->ops_ready_count(); }}unsignedSegmentedIQ::sanity_check(){ unsigned rv = 0; if (!chain_info->sanityCheckOK()) rv |= 0x01; for (iterator i = active_instructions->head(); i != 0; i = i.next()) { unsigned seq = i->seq; for (int j = 0; j < TheISA::MaxInstSrcRegs; ++j) { if (i->idep_info[j].chained) { unsigned c = i->idep_info[j].follows_chain; if ((*chain_info)[c].free) { rv |= 0x02; return rv; rv = seq; seq = c; } } } } return rv;}//============================================================================//////voidSegmentedIQ::tick(){#if SANITY_CHECKING assert(!sanity_check());#endif //---------------------------------------------------------- // // Ready Queue re-sorting, and debugging stuff // for (int i = 0; i < num_segments; ++i) queue_segment[i]->tick(); //---------------------------------------------------------- // // Deadlock Recovery... // if (deadlock_slot.notnull()) { // Put the instruction from the deadlock_slot into // the last segment (if there's room) if (!queue_segment[last_segment]->full()) { assert(queue_segment[last_segment]->add(deadlock_slot).notnull()); deadlock_slot = 0; } } //---------------------------------------------------------- for (int i = 0; i < num_segments; ++i) free_slot_info[i] = queue_segment[i]->free_slots(); // // Promote those instructions marked ready-to-promote during // the previous cycle. // // The "head_promoted" signal is asserted as appropriate // for (unsigned i = 0; i < num_segments; ++i) { // Promote instructions from this segment if (i != 0) promote_insts(i); } // // Shift the bits for pipelined promotion // chain_info->tick(); // // Let each segment decide which instructions should be promoted // for (unsigned i = 0; i < num_segments; ++i) queue_segment[i]->check_promotable(); // // Register timers may decremnt also // for (unsigned t = 0; t < cpu->number_of_threads; ++t) { for (unsigned r = 0; r < TotalNumRegs; ++r) { RegInfoElement &ri = (*reg_info_table)[t][r]; if (ri.isChained() == false || (*chain_info)[ri.chainNum()].self_timed) ri.tickLatency(); } } //---------------------------------------------------------- // ONLY CALL THIS ONCE!!!! (It resets counters) deadlock_recovery_mode = check_deadlock(); if (deadlock_recovery_mode) { ++deadlock_cycles; // Count a new event only if last cycle wasn't a deadlock cycle if (last_deadlock != curTick - 1) ++deadlock_events; last_deadlock = curTick; // Pull the youngest instruction from Segment 0 if the // deadlock_slot is open if (deadlock_slot.isnull()) { deadlock_slot = queue_segment[0]->youngest(); if (deadlock_slot.notnull()) { queue_segment[0]->remove(deadlock_slot); deadlock_slot->segment_number = deadlock_seg_flag; } } // // Force the oldest instruction in the segment onto the // ready list // for (int seg = 1; seg < num_segments; ++seg) { iq_iterator i = queue_segment[seg]->oldest(); if (i.notnull() && !i->queued) queue_segment[seg]->enqueue(i); } }}//// Prompote instructions from the specified queue segment to the next//voidSegmentedIQ::promote_insts(unsigned src_seg){ unsigned num_to_promote; if (src_seg == 0) panic("IQ-Segmented: Don't promote from seg 0"); // Figure out how many to promote num_to_promote = queue_segment[src_seg]->ready_count(); if (num_to_promote > free_slot_info[src_seg - 1]) num_to_promote = free_slot_info[src_seg - 1]; if (num_to_promote > cpu->issue_width) num_to_promote = cpu->issue_width; segment_t::rq_iterator p = queue_segment[src_seg]->issuable_list(); for (int i = 0; i < num_to_promote; ++i) { segment_t::rq_iterator n = p.next(); ++dedlk_promotion_count; // We must remove the entry FIRST since add() overwrites some // of the tracking information queue_segment[src_seg]->remove(*p); assert(queue_segment[src_seg - 1]->add(*p).notnull()); --free_slot_info[src_seg - 1]; // If we are promoting into Segment 0 if (src_seg == 1) { unsigned max_delay = 0; (*p)->seg0_entry_time = curTick; for (int i = 0; i < TheISA::MaxInstSrcRegs; ++i) if (!(*p)->idep_ready[i] && (*p)->idep_info[i].chained) if (max_delay < (*p)->idep_info[i].delay) max_delay = (*p)->idep_info[i].delay; if (max_delay >= segment_thresh) seg0_prom_early_count++; } // // If we are promoting the head of a chain, notify the chain // if ((*p)->head_of_chain) { // // Actually, if this instruction is going through the queue again // as a result of the deadlock-recovery mechanism, we don't want // to signal the promotion. // SegChainInfoEntry &info = (*chain_info)[(*p)->head_chain]; if (info.head_level == src_seg) { info.head_promoted = true; info.head_level = src_seg - 1; }#if DEBUG_PROMOTION cprintf("Promoted seq %d (head of chain %d) to seg %d\n", (*p)->seq, (*p)->head_chain, src_seg - 1);#endif }#if DEBUG_PROMOTION else { cprintf("Promoted seq %d (follows chain %d) to seg %d\n", (*p)->seq, (*p)->follows_chain, src_seg - 1); }#endif // we're counting promotions OUT of segments ++queue_segment[src_seg]->cum_promotions[(*p)->thread_number()]; // Point to next p = n; } if (use_pushdown) { // // Push-Down to Next Segment // // not if no b/w or this is segment 1 if (num_to_promote == cpu->issue_width || src_seg < 2) return; // // Pushdown criteria: // - This segment has less than a full width of free slots // (make enough room for a full width) // - The next segment must have a half-width of free slots // after we finish pushing down // unsigned f_slots = free_slot_info[src_seg]; if (f_slots < cpu->issue_width) { unsigned f_slots_1 = free_slot_info[src_seg - 1]; if (f_slots_1 > cpu->issue_width / 2) { // Calculate our paramaters unsigned want_to_push = cpu->issue_width - f_slots; unsigned room_avail = f_slots_1 - cpu->issue_width / 2; unsigned bw_avail = cpu->issue_width - num_to_promote; // Don't over-fill the next segment if (room_avail < want_to_push) want_to_push = room_avail; // We have a fixed amount of bandwidth if (bw_avail < want_to_push) want_to_push = bw_avail; ++pushdown_events[src_seg - 1]; ++total_pd_events; pushdown_count[src_seg - 1] += want_to_push; total_pd_count += want_to_push + 1; for (int i = 0; i < want_to_push; ++i) { iq_iterator p = queue_segment[src_seg]->oldest(); if (use_mod_pushdown) { iq_iterator q = queue_segment[src_seg]->lowest_dest(); if (q->dest_seg < p->dest_seg) p = q; } ++dedlk_promotion_count; queue_segment[src_seg]->remove(p); // zero return value indicates add error assert(queue_segment[src_seg - 1]->add(p).notnull()); --free_slot_info[src_seg - 1]; // // If we are promoting the head of a chain, // notify the chain // if (p->head_of_chain) { // // Actually, if this instruction is going // through the queue again as a result of the // deadlock-recovery mechanism, we don't want // to signal the promotion. //
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -