📄 l1i_cache.c
字号:
*/ pmshr = 0; if (captr->mshr_count > 0) { unsigned blocknum = ADDR2BNUM1I(req->paddr); for (i = 0; i < captr->max_mshrs; i++) { if ((captr->mshrs[i].valid == 1) && (blocknum == captr->mshrs[i].blocknum)) { pmshr = &(captr->mshrs[i]); break; } } } /* * Determine if the requested line is available in the cache. */ misscache = Cache_search(captr, req->vaddr, req->paddr, &cline); req->d.proc_instruction.instructions = captr->data + ((cline->index * captr->linesz + (req->paddr & captr->block_mask)) / SIZE_OF_SPARC_INSTRUCTION) * SIZEOF_INSTR; /* * If it's an L1 prefetch, we will drop it no matter what as long as it * matches an MSHR or a cache line. */ if ((req->prefetch == 4) && (pmshr || misscache == 0)) return MSHR_USELESS_FETCH; if (pmshr == 0) { /* the line is not present in any MSHR */ if (misscache == 0) { if (Cache_hit_update(captr, cline, req)) { /* update LRU ages */ if (cparam.L1I_prefetch & 2) L1ICache_start_prefetch(captr, req); } req->hit_type = L1IHIT; return NOMSHR; } /* The line is either not in l1 cache, or in l1 cache but the cache line is in share state. */ if (req->prefetch == 2) { /* * An L2 prefetch goes to the next level of cache without * taking an MSHR here. */ return NOMSHR_FWD; } /**** Otherwise, we need to allocate an MSHR for this request ****/ if (captr->mshr_count == captr->max_mshrs) { /* * None are available, the value "NOMSHR_STALL" is returned. */ captr->pstats->l1istall.full++; return NOMSHR_STALL; } /* * Find the first free MSHR. */ for (i = 0; i < captr->max_mshrs; i++) { if (captr->mshrs[i].valid != 1) { pmshr = &(captr->mshrs[i]); break; } } req->l1mshr = pmshr; pmshr->valid = 1; pmshr->mainreq = req; pmshr->setnum = cline->index >> captr->set_shift; pmshr->blocknum = req->paddr >> captr->block_shift; pmshr->counter = 0; pmshr->pending_cohe = 0; pmshr->stall_WAR = 0; pmshr->demand = -1.0; pmshr->only_prefs = (req->prefetch) ? 1 : 0; pmshr->has_writes = 0; pmshr->cline = cline; pmshr->misscache = misscache; pmshr->pend_opers = 0; captr->mshr_count++; captr->reqmshr_count++; captr->reqs_at_mshr_count++; return MSHR_NEW; } /* Matches an MSHR. The REQUEST must be merged, dropped, forwarded or stalled. */ /* * If there are pending flush/purge operations, stall. */ if (pmshr->pend_opers) { captr->pstats->l1istall.flush++; return MSHR_STALL_FLUSH; } /* * Now, how does the MSHR handle prefetch matches? At the first level * cache, prefetches should either be dropped, forwarded around cache, or * stalled. There is never any need to coalesce at L1, since one fetch is * sufficient. At the L2, though, prefetches cannot be dropped, as they * might already have many requests coalesced into them and waiting for * them at the L1 cache. */ if (req->prefetch) { /* * If a read prefetch wants to coalesce at L1 cache, drop it. */ return MSHR_USELESS_FETCH; } /* * Too many requests coalesced with MSHR */ if (pmshr->counter == MAX_COALS-1) { captr->pstats->l1istall.coal++; return MSHR_STALL_COAL; } /* * No problems with coalescing the request, so coalesce it. */ pmshr->coal_req[pmshr->counter++] = req; if (pmshr->only_prefs && !req->prefetch) { /* * Demand access coalesces with late prefetch accesses. */ if (pmshr->mainreq->prefetch == 1) { captr->pstats->sl1.late++; captr->pstats->sl1.useful++; } else if (pmshr->mainreq->prefetch == 4) { captr->pstats->l1ip.late++; captr->pstats->l1ip.useful++; } pmshr->only_prefs = 0; pmshr->demand = YS__Simtime; } return MSHR_COAL;}/*============================================================================= * L1ProcessTagReply: simulates access replies to the L1 cache. * This functions always return 1 because it won't fail for a write-through * cache -- no write-back to lower level, no invalidation to higher level. */static int L1IProcessTagReply(CACHE * captr, REQ * req){ int misscache; HIT_TYPE hit_type; MISS_TYPE ccdres; cline_t *cline; MSHR *pmshr; if (cparam.L1I_perfect) YS__errmsg(captr->nodeid, "Perfect L1 I-cache shouldn't receive REPLY"); if (req->progress == 0) { /* no progress made so far */ pmshr = req->l1mshr; if (pmshr == 0) YS__errmsg(captr->nodeid, "L1I Cache %i received a reply for a nonexistent MSHR\n", captr->procid); /* * Collect statistics if the mainreq is a late prefetch. */ if (pmshr->demand > 0) { if (pmshr->mainreq->prefetch == 1) captr->pstats->sl1.lateness += YS__Simtime - pmshr->demand; else if (pmshr->mainreq->prefetch == 4) captr->pstats->l1ip.lateness += YS__Simtime - pmshr->demand; pmshr->demand = -1; } cline = pmshr->cline; misscache = pmshr->misscache; req->wrb_req = 0; if (misscache == 0) /* line present in cache -- upgrade reply */ cline->state = PR_DY; else { /* line not present in cache */ if (misscache == 1) { /* "present" miss -- COHE miss */ Cache_pmiss_update(captr, req, cline->index, pmshr->only_prefs); CCD_InsertNewLine(captr->ccd, ADDR2BNUM1I(req->paddr)); req->miss_type1 = CACHE_MISS_COHE; } else { /* total miss: find a victim and updates ages */ if (!Cache_miss_update(captr, req, &cline, pmshr->only_prefs)) YS__errmsg(captr->nodeid,"L1WT should never see a NO_REPLACE"); /* We should determine COLD/CAP/CONF */ ccdres = CCD_InsertNewLine(captr->ccd, ADDR2BNUM1I(req->paddr)); req->miss_type1 = req->line_cold ? CACHE_MISS_COLD : ccdres; pmshr->cline = cline; } if (cline->state == SH_CL) captr->pstats->shcl_victims1i++; else if (cline->state == PR_CL) captr->pstats->prcl_victims1i++; else if (cline->state == PR_DY) { YS__warnmsg(captr->nodeid, "Writeback from I-Cache !!!"); captr->pstats->prdy_victims1i++; } /* * Now fill in the information for the new line */ cline->tag = req->paddr >> captr->tag_shift; cline->vaddr = req->vaddr & block_mask1i; if (pmshr->has_writes) cline->state = PR_DY; else cline->state = (req->req_type == REPLY_SH) ? SH_CL : PR_CL; PredecodeBlock(req->paddr & block_mask1i, req->node, captr->data + cline->index * captr->linesz / SIZE_OF_SPARC_INSTRUCTION * SIZEOF_INSTR, captr->linesz / SIZE_OF_SPARC_INSTRUCTION); req->d.proc_instruction.instructions = captr->data + ((cline->index * captr->linesz + (req->paddr & captr->block_mask)) / SIZE_OF_SPARC_INSTRUCTION) * SIZEOF_INSTR; } req->progress = 1; } if (req->progress == 1) req->progress = 2; /* * Now, resolve all requests coalesced into the MSHR in question. Send * them back to the processor for possible further handling. And if any * flush/purge transaction is pending, activate it. */ pmshr = req->l1mshr; L1ICache_uncoalesce_mshr(captr, pmshr); if (pmshr->pend_opers && pmshr->cline->state != INVALID) { if (pmshr->cline->pref == 1) captr->pstats->sl1.useless++; else if (pmshr->cline->pref == 4) captr->pstats->l1ip.useless++; pmshr->cline->state = INVALID; pmshr->cline->tag = -1; } /* * free up the MSHR, or release all coalesced requests. */ Cache_free_mshr(captr, pmshr); return 1;}/*============================================================================= * Uncoalesce each request coalesced in the specified MSHR. */int L1ICache_uncoalesce_mshr(CACHE *captr, MSHR *pmshr){ HIT_TYPE hit_type = pmshr->mainreq->hit_type; int latepf = (pmshr->demand > 0.0); int i; cline_t *cline; /* * If this MSHR had a late prefetch, set prefetched_late field to allow * system to count every access coalesced into the MSHR as part of "late PF" * time in statistics. Is this reasonable? */ pmshr->mainreq->prefetched_late = latepf; cline = pmshr->cline; if (pmshr->mainreq->prefetch == 4) { if (!pmshr->mainreq->cohe_count) YS__PoolReturnObj(&YS__ReqPool, pmshr->mainreq); } else Cache_global_perform(captr, pmshr->mainreq, 1); for (i = 0; i < pmshr->counter; i++) { REQ *req = pmshr->coal_req[i]; /* * In addition to latepf, also set hit_type of each coalesced REQUEST * to be L1HIT. This allows all of these times to be counted as the * appropriate component of execution time. */ req->hit_type = L1IHIT; req->prefetched_late = latepf; if (req->prefetch == 4) YS__errmsg(captr->nodeid, "L1ICache_uncoalesce_msrh: coalesced L1 prefetch"); req->d.proc_instruction.instructions = captr->data + ((cline->index * captr->linesz + (req->paddr & captr->block_mask)) / SIZE_OF_SPARC_INSTRUCTION) * SIZEOF_INSTR; Cache_global_perform(captr, req, 1); } return 0;}/*============================================================================= * L1ProcessTagCohe: simulates coherence requests or invalidation * request to the L1 cache. An invalidation request is sent by the L2 * when a miss victim is casted out of L2 cache in order to ensure the * "inclusion property". */static int L1IProcessTagCohe(CACHE * captr, REQ * req){ cline_t *cline; int i, misscache; unsigned paddr = req->paddr & block_mask2; unsigned vaddr = req->vaddr & block_mask2; unsigned eaddr = paddr + ARCH_linesz2; if (cparam.L1I_perfect) req->progress = 1; if (req->progress == 0) { paddr = req->paddr & block_mask2; vaddr = req->vaddr & block_mask2; eaddr = paddr + ARCH_linesz2; for (; paddr < eaddr; paddr += ARCH_linesz1i, vaddr += ARCH_linesz1i) { misscache = Cache_search(captr, vaddr, paddr, &cline); if (misscache == 0) { /* line in cache */ if (req->req_type == INVALIDATE) { if (captr->gid == req->src_proc) { /* * It's actually an invalidation request. Reset the tag * so it won't be taken as a victim of coherence check. */ cline->tag = -1; } cline->state = INVALID; if (cline->pref == 1) captr->pstats->sl1.useless++; else if (cline->pref == 4) captr->pstats->l1ip.useless++; } else cline->state = SH_CL; } } req->progress = 1; } if (!req->cohe_count) YS__PoolReturnObj(&YS__ReqPool, req); return 1;}/*============================================================================= * Start an L1-initiated prefetch. If L1Q is full, drop the prefetch. * Note that prefetch can not cross page boudary and no prefetch for * non-cacheable address. */static void L1ICache_start_prefetch(CACHE *captr, REQ *req){ REQ *newreq; unsigned newpaddr = req->paddr + captr->linesz; if (!SAMEPAGE(req->paddr, newpaddr) || tlb_uncached(req->memattributes)) return; captr->pstats->l1ip.total++; if (L1IQ_FULL[captr->gid]) { captr->pstats->l1ip.dropped++; return; } /* * General information */ newreq = (REQ *) YS__PoolGetObj(&YS__ReqPool); newreq->type = REQUEST; newreq->req_type = READ; newreq->prefetch = 4; newreq->node = captr->nodeid; newreq->src_proc = captr->procid; newreq->dest_proc = req->dest_proc; newreq->prcr_req_type = req->prcr_req_type; newreq->paddr = newpaddr; newreq->vaddr = req->vaddr + captr->linesz; newreq->ifetch = 1; newreq->l1mshr = 0; newreq->l2mshr = 0; newreq->memattributes = req->memattributes; newreq->line_cold = 0; /* * for statistics */ newreq->issue_time = YS__Simtime; newreq->hit_type = UNKHIT; newreq->line_cold = 0; /* * Add it to REQUEST input queue of L1 cache, which must then know that * there is something on its input queue so as to be activated by * cycle-by-cycle simulator. If this access fills up the port, the * L1Q_FULL variable is set so that the processor does not issue any * more memory references until the port clears up. */ lqueue_add(&(captr->request_queue), newreq, captr->nodeid); captr->inq_empty = 0; L1IQ_FULL[captr->gid] = lqueue_full(&(captr->request_queue));}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -