📄 l1i_cache.c

📁 ml-rsim 多处理器模拟器支持类bsd操作系统
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
   */  pmshr = 0;   if (captr->mshr_count > 0)    {      unsigned blocknum = ADDR2BNUM1I(req->paddr);      for (i = 0; i < captr->max_mshrs; i++)	{	  if ((captr->mshrs[i].valid == 1) &&	      (blocknum == captr->mshrs[i].blocknum))	    {	      pmshr = &(captr->mshrs[i]);	      break;	    }	}    }  /*    * Determine if the requested line is available in the cache.    */  misscache = Cache_search(captr, req->vaddr, req->paddr, &cline);  req->d.proc_instruction.instructions = captr->data +    ((cline->index * captr->linesz + (req->paddr & captr->block_mask)) /     SIZE_OF_SPARC_INSTRUCTION) * SIZEOF_INSTR;  /*   * If it's an L1 prefetch, we will drop it no matter what as long as it   * matches an MSHR or a cache line.   */  if ((req->prefetch == 4) && (pmshr || misscache == 0))    return MSHR_USELESS_FETCH;  if (pmshr == 0)    {  /* the line is not present in any MSHR */      if (misscache == 0)	{	  if (Cache_hit_update(captr, cline, req))	    { /* update LRU ages */	      if (cparam.L1I_prefetch & 2)		L1ICache_start_prefetch(captr, req);	    }	  req->hit_type = L1IHIT;	  return NOMSHR;	}      /* The line is either not in l1 cache, or in l1 cache but the cache         line is in share state. */      if (req->prefetch == 2)	{	  /* 	   * An L2 prefetch goes to the next level of cache without	   * taking an MSHR here.	   */	  return NOMSHR_FWD;	}            /**** Otherwise, we need to allocate an MSHR for this request ****/      if (captr->mshr_count == captr->max_mshrs)	{  	  /* 	   * None are available, the value "NOMSHR_STALL" is returned. 	   */	  captr->pstats->l1istall.full++;	  return NOMSHR_STALL;	}            /*        * Find the first free MSHR.       */      for (i = 0; i < captr->max_mshrs; i++)	{	  if (captr->mshrs[i].valid != 1)	    {	      pmshr = &(captr->mshrs[i]);	      break;	    }	}      req->l1mshr         = pmshr;      pmshr->valid        = 1;      pmshr->mainreq      = req;      pmshr->setnum       = cline->index >> captr->set_shift;      pmshr->blocknum     = req->paddr >> captr->block_shift;      pmshr->counter      = 0;      pmshr->pending_cohe = 0;      pmshr->stall_WAR    = 0;      pmshr->demand       = -1.0;       pmshr->only_prefs   = (req->prefetch) ? 1 : 0;      pmshr->has_writes   = 0;      pmshr->cline        = cline;      pmshr->misscache    = misscache;      pmshr->pend_opers   = 0;      captr->mshr_count++;      captr->reqmshr_count++;      captr->reqs_at_mshr_count++;      return MSHR_NEW;    }    /* Matches an MSHR. The REQUEST must be merged, dropped, forwarded     or stalled. */  /*   * If there are pending flush/purge operations, stall.   */  if (pmshr->pend_opers)    {      captr->pstats->l1istall.flush++;      return MSHR_STALL_FLUSH;    }    /*    * Now, how does the MSHR handle prefetch matches? At the first level   * cache, prefetches should either be dropped, forwarded around cache, or    * stalled. There is never any need to coalesce at L1, since one fetch is    * sufficient. At the L2, though, prefetches cannot be dropped, as they   * might already have many requests coalesced into them and waiting for   * them at the L1 cache.    */  if (req->prefetch)    {      /*        * If a read prefetch wants to coalesce at L1 cache, drop it.        */      return MSHR_USELESS_FETCH;    }    /*   * Too many requests coalesced with MSHR    */  if (pmshr->counter == MAX_COALS-1)    {        captr->pstats->l1istall.coal++;      return MSHR_STALL_COAL;    }    /*    * No problems with coalescing the request, so coalesce it.   */  pmshr->coal_req[pmshr->counter++] = req;  if (pmshr->only_prefs && !req->prefetch)    {      /*        * Demand access coalesces with late prefetch accesses.       */      if (pmshr->mainreq->prefetch == 1)	{	  captr->pstats->sl1.late++;	  captr->pstats->sl1.useful++;	}      else if (pmshr->mainreq->prefetch == 4)	{	  captr->pstats->l1ip.late++;	  captr->pstats->l1ip.useful++;	}      pmshr->only_prefs = 0;      pmshr->demand = YS__Simtime;    }  return MSHR_COAL;}/*============================================================================= * L1ProcessTagReply: simulates access replies to the L1 cache. * This functions always return 1 because it won't fail for a write-through * cache -- no write-back to lower level, no invalidation to higher level. */static int L1IProcessTagReply(CACHE * captr, REQ * req){  int        misscache;  HIT_TYPE   hit_type;  MISS_TYPE  ccdres;  cline_t   *cline;  MSHR      *pmshr;  if (cparam.L1I_perfect)     YS__errmsg(captr->nodeid, "Perfect L1 I-cache shouldn't receive REPLY");  if (req->progress == 0)    {   /* no progress made so far */      pmshr = req->l1mshr;        if (pmshr == 0)	 YS__errmsg(captr->nodeid,		    "L1I Cache %i received a reply for a nonexistent MSHR\n",		    captr->procid);      /*       * Collect statistics if the mainreq is a late prefetch.       */      if (pmshr->demand > 0)	{	  if (pmshr->mainreq->prefetch == 1)            captr->pstats->sl1.lateness += YS__Simtime - pmshr->demand;	  else if (pmshr->mainreq->prefetch == 4)            captr->pstats->l1ip.lateness += YS__Simtime - pmshr->demand;	  pmshr->demand = -1;	}      cline     = pmshr->cline;      misscache = pmshr->misscache;      req->wrb_req = 0;      if (misscache == 0)     /* line present in cache -- upgrade reply */	 cline->state = PR_DY;      else	{ /* line not present in cache */	  if (misscache == 1)	    { 	      /* "present" miss -- COHE miss */	      Cache_pmiss_update(captr, req, cline->index, pmshr->only_prefs);	      CCD_InsertNewLine(captr->ccd, ADDR2BNUM1I(req->paddr));         	      req->miss_type1 = CACHE_MISS_COHE;	    }	  else	    {	      /* total miss: find a victim and updates ages */	      if (!Cache_miss_update(captr, req, &cline, pmshr->only_prefs))		YS__errmsg(captr->nodeid,"L1WT should never see a NO_REPLACE");	      /* We should determine COLD/CAP/CONF */	      ccdres = CCD_InsertNewLine(captr->ccd, ADDR2BNUM1I(req->paddr));	      req->miss_type1 = req->line_cold ? CACHE_MISS_COLD : ccdres;	      pmshr->cline = cline;	    }	  if (cline->state == SH_CL)	    captr->pstats->shcl_victims1i++;	  else if (cline->state == PR_CL)	    captr->pstats->prcl_victims1i++;	  else if (cline->state == PR_DY)	    {	      YS__warnmsg(captr->nodeid, "Writeback from I-Cache !!!");	      captr->pstats->prdy_victims1i++;	    }	  /* 	   * Now fill in the information for the new line 	   */	  cline->tag   = req->paddr >> captr->tag_shift;	  cline->vaddr = req->vaddr & block_mask1i;	  if (pmshr->has_writes)	    cline->state = PR_DY;	  else	    cline->state = (req->req_type == REPLY_SH) ? SH_CL : PR_CL;	  PredecodeBlock(req->paddr & block_mask1i, req->node,			 captr->data + cline->index *			 captr->linesz / SIZE_OF_SPARC_INSTRUCTION *			 SIZEOF_INSTR,			 captr->linesz / SIZE_OF_SPARC_INSTRUCTION); 	  req->d.proc_instruction.instructions = captr->data +	    ((cline->index * captr->linesz + (req->paddr & captr->block_mask)) /	     SIZE_OF_SPARC_INSTRUCTION) * SIZEOF_INSTR;	}      req->progress = 1;    }  if (req->progress == 1)    req->progress = 2;    /*    * Now, resolve all requests coalesced into the MSHR in question. Send    * them back to the processor for possible further handling. And if any   * flush/purge transaction is pending, activate it.   */  pmshr = req->l1mshr;  L1ICache_uncoalesce_mshr(captr, pmshr);  if (pmshr->pend_opers && pmshr->cline->state != INVALID)    {      if (pmshr->cline->pref == 1)	captr->pstats->sl1.useless++;      else if (pmshr->cline->pref == 4)	captr->pstats->l1ip.useless++;      pmshr->cline->state = INVALID;      pmshr->cline->tag = -1;    }  /*     * free up the MSHR, or release all coalesced requests.    */  Cache_free_mshr(captr, pmshr);    return 1;}/*============================================================================= * Uncoalesce each request coalesced in the specified MSHR. */int L1ICache_uncoalesce_mshr(CACHE *captr, MSHR *pmshr){  HIT_TYPE  hit_type = pmshr->mainreq->hit_type;  int       latepf   = (pmshr->demand > 0.0);  int       i;  cline_t  *cline;  /*   * If this MSHR had a late prefetch, set prefetched_late field to allow   * system to count every access coalesced into the MSHR as part of "late PF"   * time in statistics. Is this reasonable?   */  pmshr->mainreq->prefetched_late = latepf;  cline = pmshr->cline;  if (pmshr->mainreq->prefetch == 4)    {      if (!pmshr->mainreq->cohe_count)       YS__PoolReturnObj(&YS__ReqPool, pmshr->mainreq);    }  else    Cache_global_perform(captr, pmshr->mainreq, 1);  for (i = 0; i < pmshr->counter; i++)    {      REQ *req = pmshr->coal_req[i];      /*       * In addition to latepf, also set hit_type of each coalesced REQUEST       * to be L1HIT. This allows all of these times to be counted as the       * appropriate component of execution time.       */      req->hit_type = L1IHIT;      req->prefetched_late = latepf;      if (req->prefetch == 4)	YS__errmsg(captr->nodeid,		   "L1ICache_uncoalesce_msrh: coalesced L1 prefetch");      req->d.proc_instruction.instructions = captr->data +	((cline->index * captr->linesz + (req->paddr & captr->block_mask)) /	 SIZE_OF_SPARC_INSTRUCTION) * SIZEOF_INSTR;        Cache_global_perform(captr, req, 1);    }  return 0;}/*============================================================================= * L1ProcessTagCohe: simulates coherence requests or invalidation * request to the L1 cache. An invalidation request is sent by the L2 * when a miss victim is casted out of L2 cache in order to ensure the  * "inclusion property". */static int L1IProcessTagCohe(CACHE * captr, REQ * req){  cline_t *cline;  int      i, misscache;  unsigned paddr = req->paddr & block_mask2;  unsigned vaddr = req->vaddr & block_mask2;  unsigned eaddr = paddr + ARCH_linesz2;  if (cparam.L1I_perfect)    req->progress = 1;  if (req->progress == 0)    {      paddr = req->paddr & block_mask2;      vaddr = req->vaddr & block_mask2;      eaddr = paddr + ARCH_linesz2;      for (; paddr < eaddr; paddr += ARCH_linesz1i, vaddr += ARCH_linesz1i)	{	  misscache = Cache_search(captr, vaddr, paddr, &cline);	  if (misscache == 0)	    {  /* line in cache */              if (req->req_type == INVALIDATE)		{		  if (captr->gid == req->src_proc)		    {		      /* 		       * It's actually an invalidation request. Reset the tag		       * so it won't be taken as a victim of coherence check.		       */		      cline->tag = -1;		    }		  cline->state = INVALID;		  if (cline->pref == 1)		    captr->pstats->sl1.useless++;		  else if (cline->pref == 4)		    captr->pstats->l1ip.useless++;		}	      else		cline->state = SH_CL;	    }	}      req->progress = 1;    }  if (!req->cohe_count)    YS__PoolReturnObj(&YS__ReqPool, req);  return 1;}/*============================================================================= * Start an L1-initiated prefetch. If L1Q is full, drop the prefetch. * Note that prefetch can not cross page boudary and no prefetch for * non-cacheable address. */static void L1ICache_start_prefetch(CACHE *captr, REQ *req){  REQ *newreq;  unsigned newpaddr = req->paddr + captr->linesz;  if (!SAMEPAGE(req->paddr, newpaddr) ||      tlb_uncached(req->memattributes))    return;  captr->pstats->l1ip.total++;  if (L1IQ_FULL[captr->gid])    {      captr->pstats->l1ip.dropped++;      return;    }  /*   * General information   */  newreq = (REQ *) YS__PoolGetObj(&YS__ReqPool);    newreq->type          = REQUEST;  newreq->req_type      = READ;  newreq->prefetch      = 4;  newreq->node          = captr->nodeid;  newreq->src_proc      = captr->procid;  newreq->dest_proc     = req->dest_proc;  newreq->prcr_req_type = req->prcr_req_type;  newreq->paddr         = newpaddr;  newreq->vaddr         = req->vaddr + captr->linesz;  newreq->ifetch        = 1;  newreq->l1mshr        = 0;  newreq->l2mshr        = 0;  newreq->memattributes = req->memattributes;  newreq->line_cold     = 0;  /*   * for statistics   */  newreq->issue_time    = YS__Simtime;  newreq->hit_type      = UNKHIT;  newreq->line_cold     = 0;    /*   * Add it to REQUEST input queue of L1 cache, which must then know that    * there is something on its input queue so as to be activated by    * cycle-by-cycle simulator. If this access fills up the port, the    * L1Q_FULL variable is set so that the processor does not issue any    * more memory references until the port clears up.    */  lqueue_add(&(captr->request_queue), newreq, captr->nodeid);  captr->inq_empty  = 0;  L1IQ_FULL[captr->gid] = lqueue_full(&(captr->request_queue));}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -