📄 fetch.cc
字号:
for (int i = 0; i < number_of_threads; i++) { int thread_number = fetch_list[i].thread_number; if (fetch_list[i].blocked) floss_state.fetch_end_cause[thread_number] = FLOSS_FETCH_POLICY; else if (!thread_info[thread_number].blocked) all_threads_blocked = false; } /* if all threads are in icache misses, we're done */ if (all_threads_blocked) { flossRecord(&floss_state, thread_fetched); fetch_idle_cycles++; // check_counters(); return; } /* * If the icache is totally blocked, we won't be able to fetch for * any thread, even if some are ready. This should be very * unlikely now that we squash outstanding I-cache requests on a * misspeculation, as long as there are as many MSHRs as threads * (and we're not doing i-cache prefetching). * * If this does occur, we'd like to spread the blame among all * threads that currently have outstanding icache misses. Howver, * this doesn't fit into our floss_reasons interface, so for now * we'll just consider it an idel fetch cycle. */ if (icacheInterface->isBlocked()) { flossRecord(&floss_state, thread_fetched); fetch_idle_cycles++; fetch_idle_icache_blocked_cycles++; // check_counters(); return; } /*====================================================================*/ /* Fetch from _some_ thread as long as: * (1) We have fetch bandwidth left * (2) We haven't used all the icache ports * (3) We haven't tried all the threads */ for (int list_index = 0; list_index < number_of_threads; ++list_index) { if (fetched_this_cycle >= fetch_width || ports_used >= num_icache_ports || icacheInterface->isBlocked()) { // We didn't look at all the threads... for (int j = 0; j < number_of_threads; ++j) { if (floss_state.fetch_end_cause[j] == FLOSS_FETCH_NONE) { // Call the cause BANDWIDTH // (actual cause is either _real_ BW or cache ports) floss_state.fetch_end_cause[j] = FLOSS_FETCH_BANDWIDTH; } } break; } int thread_number = fetch_list[list_index].thread_number; if (thread_info[thread_number].blocked || fetch_list[list_index].blocked) { continue; } fetch_decisions++; fetch_chances[thread_number]++; ports_used++; /* Indicate that we've tried to fetch from a thread */ fetch_choice_dist[list_index]++; fetched_this_thread = fetchOneThread(thread_number, fetch_width - fetched_this_cycle); if (fetched_this_thread > 0) ++icache_ports_used_last_fetch; fetched_this_cycle += fetched_this_thread; // thread_fetched[thread_number] = fetched_this_thread; thread_fetched[0] += fetched_this_thread; } /* fetch for 'n' threads... */ fetch_nisn_dist.sample(fetched_this_cycle); flossRecord(&floss_state, thread_fetched); /* RR Policy: * cycle around the list by the number of "extra" threads * checked this cycle (we cycle for one thread automagically) */ if (fetch_policy == RR) for (int i = 1; i < ports_used; i++) choose_next_thread(fetch_list);}/*=====================================================================*/voidFullCPU::choose_next_thread(ThreadListElement *thread_list){ switch (fetch_policy) { case RR: round_robin_policy(thread_list); break; case IC: icount_policy(thread_list); break; default: panic("Illegal fetch policy requested"); break; }}/*=====================================================================*/voidFullCPU::round_robin_policy(ThreadListElement *thread_list){ int first, last; ThreadListElement temp; /* * Check for valid thread_list[] * (can become invalid when a thread finishes before the end of run) */ for (int i = 0; i < number_of_threads; i++) { /* * look for inactive threads at the top of the list */ if (!thread_info[thread_list[i].thread_number].active) { /* * We have to re-build the list */ initialize_fetch_list(false); break; } /* Make sure that we don't block any threads in here... */ /* if we need to initialize the fetch list, this will be */ /* done there... */ thread_list[i].blocked = false; } /* At this point, the threads in the list are grouped by priority */ /* from highest to lowest. Now we rotate within each priority group */ first = 0; last = 0; while (first < number_of_threads - 1) { /* look for the last member of this priority group */ while ((thread_list[first].priority == thread_list[last].priority) && (last < number_of_threads)) { last++; } last--; if (first != last) { /* rotate this group */ temp = thread_list[first]; for (int i = first; i < last; i++) thread_list[i] = thread_list[i + 1]; thread_list[last] = temp; } /* point to the next group */ first = last + 1; last = first; }}voidFullCPU::update_icounts(){ /* initialize instruction counters */ if (mt_frontend) for (int i = 0; i < number_of_threads; i++) thread_info[i].current_icount = ifq[i].num_valid + decodeQueue->count(i) + IQNumInstructions(i); else for (int i = 0; i < number_of_threads; i++) thread_info[i].current_icount = ifq[0].num_valid_thread[i] + decodeQueue->count(i) + IQNumInstructions(i); /* update cumulative instruction counters */ for (int i = 0; i < number_of_threads; i++) thread_info[i].cum_icount += thread_info[i].current_icount;}voidFullCPU::fetch_dump(){ for (int i = 0; i < number_of_threads; i++) icache_output_buffer[i]->dump(); cout << "=======================================================\n"; cout << "Fetch Stage State:\n"; for (int i = 0; i < number_of_threads; i++) { cprintf(" (Thread %d) PC: %#08x\n", i, thread[i]->regs.pc); if (thread[i]->spec_mode) cout << " {Mis-speculating}\n"; } if (mt_frontend) { for (int i = 0; i < number_of_threads; i++) { stringstream stream; ccprintf(stream, " (thread %d)", i); ifq[i].dump(stream.str()); } } else ifq[0].dump("");}voidFullCPU::icount_policy(ThreadListElement *thread_list){ /* put instruction counts into thread list */ for (int i = 0; i < number_of_threads; i++) { thread_list[i].thread_number = i; thread_list[i].blocked = false; thread_list[i].priority = thread_info[i].priority; thread_list[i].sort_key = thread_info[i].current_icount + 10000 * !thread_info[i].active + 10000 * thread_info[i].blocked; }#ifdef DEBUG_ICOUNT cerr << "-----------------------------------------------\n"; for (int i = 0; i < number_of_threads; i++) ccprintf(cerr, "%d ", thread_list[i].sort_key); cerr << "\n";#endif /* sort the list of threads in descending order... */ /* only the first 'n' threads are actually sorted */ qsort(thread_list, number_of_threads, sizeof(ThreadListElement), icount_compare);#ifdef DEBUG_ICOUNT for (int i = 0; i < number_of_threads; i++) ccprintf(cerr, "%d ", thread_list[i].sort_key); cerr << "\n";#endif}/*======================================================================*//* sort by descending priority field */static intrr_compare(const void *first, const void *second){ /* FIRST > SECOND --> (-1) */ if (((ThreadListElement *) first)->priority > ((ThreadListElement *) second)->priority) { return (-1); } /* FIRST < SECOND --> (1) */ if (((ThreadListElement *) first)->priority < ((ThreadListElement *) second)->priority) { return (1); } /* FIRST == SECOND --> (0) */ return (0);}/* sort based on (in order): * 1) decreasing priority * 2) increasing sort_key * 3) last fetch time */static inticount_compare(const void *first, const void *second){ /* Priority: FIRST > SECOND --> (-1) */ if (((ThreadListElement *) first)->priority > ((ThreadListElement *) second)->priority) { return (-1); } /* Priority: FIRST < SECOND --> (1) */ if (((ThreadListElement *) first)->priority < ((ThreadListElement *) second)->priority) { return (1); } /* otherwise, must be equal priority */ /* FIRST > SECOND --> (1) */ if (((ThreadListElement *) first)->sort_key > ((ThreadListElement *) second)->sort_key) { return (1); } /* FIRST < SECOND --> (-1) */ if (((ThreadListElement *) first)->sort_key < ((ThreadListElement *) second)->sort_key) { return (-1); } /* Same priority, Same sort-key... Use last fetch time */ /* Earlier last-fetch times go first */ /* FIRST > SECOND --> (1) */ if (((ThreadListElement *) first)->last_fetch > ((ThreadListElement *) second)->last_fetch) { return (1); } /* FIRST < SECOND --> (-1) */ if (((ThreadListElement *) first)->last_fetch < ((ThreadListElement *) second)->last_fetch) { return (-1); } /* FIRST == SECOND --> (0) */ return (0);}voidFullCPU::fetchRegStats(){ using namespace Stats; fetch_decisions .name(name() + ".FETCH:decisions") .desc("number of times the fetch stage chose between threads") ; fetch_idle_cycles .name(name() + ".FETCH:idle_cycles") .desc("number of cycles where fetch stage was idle") ; fetch_idle_icache_blocked_cycles .name(name() + ".FETCH:idle_icache_blocked_cycles") .desc("number of cycles where fetch was idle due to icache blocked") ; qfull_iq_occupancy .init(number_of_threads) .name(name() + ".IFQ:qfull_iq_occ") .desc("Number of insts in IQ when fetch-queue full") .flags(total) ; qfull_iq_occ_dist_ .init(/* size */ number_of_threads, /* base value */ 0, /* last value */(int) (IQSize()), /* bucket size */ 10) .name(name() + ".IFQ:qfull_iq_occ_dist") .desc("Number of insts in IQ when fetch-queue full") .flags(pdf) ; qfull_rob_occupancy .init(number_of_threads) .name(name() + ".IFQ:qfull_rob_occ") .desc("Number of insts in ROB when fetch-queue full") .flags(total) ; qfull_rob_occ_dist_ .init(/* size */ number_of_threads, /* base value */ 0, /* last value */ (int)(ROB_size), /* bucket size */ 10) .name(name() + ".IFQ:qfull_rob_occ_dist") .desc("Number of insts in ROB when fetch-queue full") .flags(pdf) ; priority_changes .init(number_of_threads) .name(name() + ".FETCH:prio_changes") .desc("Number of times priorities were changed") .flags(total) ; fetch_chances .init(number_of_threads) .name(name() + ".FETCH:chances") .desc("Number of fetch opportunities") .flags(total) ; fetched_inst .init(number_of_threads) .name(name() + ".FETCH:count") .desc("Number of instructions fetched") .flags(total) ; fetched_branch .init(number_of_threads) .name(name() + ".FETCH:branch_count") .desc("Number of branches fetched") .flags(total) ; fetch_choice_dist .init(number_of_threads) .name(name() + ".FETCH:choice") .desc("Number of times we fetched from our first choice") .flags(total | pdf | dist) ; fetch_nisn_dist .init(/* base value */ 0, /* last value */ fetch_width, /* bucket size */ 1) .name(name() + ".FETCH:rate_dist") .desc("Number of instructions fetched each cycle (Total)") .flags(pdf) ; fetch_nisn_dist_ = new Distribution<>[number_of_threads]; for (int i = 0; i < number_of_threads; i++) { stringstream lblStream; lblStream << name() << ".FETCH:rate_dist_" << i; stringstream descStream; descStream << "Number of instructions fetched each cycle (Thread " << i << ")"; fetch_nisn_dist_[i] .init(/* base value */ 0, /* last value */fetch_width, /* bucket size */ 1) .name(lblStream.str()) .desc(descStream.str()) .flags(pdf) ; }}voidFullCPU::fetchRegFormulas(){ using namespace Stats; idle_rate .name(name() + ".FETCH:idle_rate") .desc("percent of cycles fetch stage was idle") .precision(2) ; idle_rate = fetch_idle_cycles * 100 / numCycles; branch_rate .name(name() + ".FETCH:branch_rate") .desc("Number of branch fetches per cycle") .flags(total) ; branch_rate = fetched_branch / numCycles; fetch_rate .name(name() + ".FETCH:rate") .desc("Number of inst fetches per cycle") .flags(total) ; fetch_rate = fetched_inst / numCycles; fetch_chance_pct .name(name() + ".FETCH:chance_pct") .desc("Percentage of all fetch chances") ; fetch_chance_pct = fetch_chances / sum(fetch_chances);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -