⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ref.c

📁 这是个trace drive的Cache模拟器
💻 C
📖 第 1 页 / 共 2 页
字号:
}#endif	/* !D4CUSTOM || D4_OPT (walloc_never) */#if !D4CUSTOM || D4_OPT (walloc_nofetch)/* * Write allocate if no fetch is required * (write exactly fills an integral number of subblocks) */D4_INLINEintd4walloc_nofetch (d4cache *c, d4memref m){	return m.size == D4REFNSB(c,m) << D4VAL (c, lg2subblocksize);}#endif	/* !D4CUSTOM || D4_OPT (walloc_nofetch) */#if !D4CUSTOM || D4_OPT (wback_always)/* * Always write back */D4_INLINEintd4wback_always (d4cache *c, d4memref m, int setnumber, d4stacknode *ptr, int walloc){	return 1;}#endif	/* !D4CUSTOM || D4_OPT (wback_always) */#if !D4CUSTOM || D4_OPT (wback_never)/* * Never write back (i.e., always write through) */D4_INLINEintd4wback_never (d4cache *c, d4memref m, int setnumber, d4stacknode *ptr, int walloc){	return 0;}#endif	/* !D4CUSTOM || D4_OPT (wback_never) */#if !D4CUSTOM || D4_OPT (wback_nofetch)/* * Write back if no fetch is required * The actual test is for every affected subblock to be valid or * for the write to completely cover all affected subblocks. */D4_INLINEintd4wback_nofetch (d4cache *c, d4memref m, int setnumber, d4stacknode *ptr, int walloc){	return (D4ADDR2SBMASK(c,m) & ~ptr->valid) == 0 ||	       m.size == (D4REFNSB(c,m) << D4VAL (c, lg2subblocksize));}#endif	/* !D4CUSTOM || D4_OPT (wback_nofetch) */#if !D4CUSTOM || D4_OPT (ccc)/* * This function implements an infinite-sized cache, used * when classifying cache misses into compulsory, capacity, * and conflict misses. * * Return value: *	-1 if at least 1 affected subblock (but not the whole block) *		misses in the infinite cache *	0 if all affected subblocks hit in the infinite cache *	1 if the whole block misses in the infinite cache * Note we require that the number of subblocks per block be a *	divisor of D4_BITMAP_RSIZE, so blocks are not split across bitmaps */static intd4infcache (d4cache *c, d4memref m){	const unsigned int sbsize = 1 << D4VAL (c, lg2subblocksize);	const d4addr sbaddr = D4ADDR2SUBBLOCK (c, m.address);	const int nsb = D4REFNSB (c, m);	unsigned int bitoff; /* offset of bit in bitmap */	int hi, lo, i, b;	static int totranges = 0, totbitmaps = 0;	bitoff = (sbaddr & (D4_BITMAP_RSIZE-1)) / sbsize;	/* binary search for range containing our address */	hi = c->nranges-1;	lo = 0;	while (lo <= hi) {		i = lo + (hi-lo)/2;		if (c->ranges[i].addr + D4_BITMAP_RSIZE <= sbaddr)			lo = i + 1;		/* need to look higher */		else if (c->ranges[i].addr > sbaddr)			hi = i - 1;		/* need to look lower */		else { /* found the right range */			const int sbpb = 1 << (D4VAL (c, lg2blocksize) - D4VAL (c, lg2subblocksize));			int nb;			/* count affected bits we've seen */			for (nb = 0, b = 0;  b < nsb;  b++)				nb += ((c->ranges[i].bitmap[(bitoff+b)/CHAR_BIT] &					(1<<((bitoff+b)%CHAR_BIT))) != 0);			if (nb == nsb)				return 0;	/* we've seen it all before */			/* consider the whole block */			if (sbpb != 1 && nsb != sbpb) {				unsigned int bbitoff = (D4ADDR2BLOCK (c, m.address) &							(D4_BITMAP_RSIZE-1)) / sbsize;				for (nb = 0, b = 0;  b < sbpb;  b++)					nb += ((c->ranges[i].bitmap[(bbitoff+b)/CHAR_BIT] &						(1<<((bbitoff+b)%CHAR_BIT))) != 0);			}			/* set the bits */			for (b = 0;  b < nsb;  b++)				c->ranges[i].bitmap[(bitoff+b)/CHAR_BIT] |=					(1<<((bitoff+b)%CHAR_BIT));			return nb==0 ? 1 : -1;		}	}	/* lo > hi: range not found; find position and insert new range */	if (c->nranges >= c->maxranges-1) {		/* ran out of range pointers; allocate some more */		int oldmaxranges = c->maxranges;		c->maxranges = (c->maxranges + 10) * 2;		if (c->ranges == NULL) /* don't trust realloc(NULL,...) */			c->ranges = malloc (c->maxranges * sizeof(*c->ranges));		else			c->ranges = realloc (c->ranges, c->maxranges * sizeof(*c->ranges));		if (c->ranges == NULL) {			fprintf (stderr, "DineroIV: can't allocate more "				 "bitmap pointers for cache %s (%d so far, total %d)\n",				 c->name, oldmaxranges, totranges);			exit(1);		}		totranges++;	}	for (i = c->nranges++ - 1;  i >= 0;  i--) {		if (c->ranges[i].addr < sbaddr)			break;		c->ranges[i+1] = c->ranges[i];	}	c->ranges[i+1].addr = sbaddr & ~(D4_BITMAP_RSIZE-1);	c->ranges[i+1].bitmap = calloc ((((D4_BITMAP_RSIZE + sbsize - 1)					/ sbsize) + CHAR_BIT - 1) / CHAR_BIT, 1);	if (c->ranges[i+1].bitmap == NULL) {		fprintf (stderr, "DineroIV: can't allocate another bitmap "			 "(currently %d, total %d, each mapping 0x%x bytes)\n",			 c->nranges-1, totbitmaps, D4_BITMAP_RSIZE);		exit(1);	}	totbitmaps++;	for (b = 0;  b < nsb;  b++, bitoff++)		c->ranges[i+1].bitmap[bitoff/CHAR_BIT] |= (1<<(bitoff%CHAR_BIT));	return 1; /* we've not seen it before */}#endif /* !D4CUSTOM || D4_OPT (ccc) *//* * Split a memory reference if it crosses a block boundary. * The remainder, if any, is queued for processing later. */D4_INLINEd4memrefd4_splitm (d4cache *c, d4memref mr, d4addr ba){	const int bsize = 1 << D4VAL (c, lg2blocksize);	const int bmask = bsize - 1;	int newsize;	d4pendstack *pf;	if (ba == D4ADDR2BLOCK (c, mr.address + mr.size - 1))		return mr;	pf = d4get_mref();        pf->m.address = ba + bsize;        pf->m.accesstype = mr.accesstype | D4_MULTIBLOCK;	newsize = bsize - (mr.address&bmask);        pf->m.size = mr.size - newsize;	pf->next = c->pending;	c->pending = pf;	c->multiblock++;	mr.size = newsize;	return mr;}/* * Handle a memory reference for the given cache. * The user calls this function for the cache closest to * the processor; other caches are handled automatically. */voidd4ref (d4cache *c, d4memref mr){    /* special cases first */    if ((D4VAL (c, flags) & D4F_MEM) != 0) /* Special case for simulated memory */	c->fetch[(int)mr.accesstype]++;    else if (mr.accesstype == D4XCOPYB || mr.accesstype == D4XINVAL) {	d4memref m = mr;	/* dumb compilers might de-optimize if we take addr of mr */	if (m.accesstype == D4XCOPYB)		d4copyback (c, &m, 1);	else		d4invalidate (c, &m, 1);    }    else {				 /* Everything else */	const d4addr blockaddr = D4ADDR2BLOCK (c, mr.address);	const d4memref m = d4_splitm (c, mr, blockaddr);	const int atype = D4BASIC_ATYPE (m.accesstype);	const int setnumber = D4ADDR2SET (c, m.address);	const int ronly = D4CUSTOM && (D4VAL (c, flags) & D4F_RO) != 0; /* conservative */	const int walloc = !ronly && atype == D4XWRITE && D4VAL (c, wallocf) (c, m);	const int sbbits = D4ADDR2SBMASK (c, m);	int miss, blockmiss, wback;	d4stacknode *ptr;	if ((D4VAL (c, flags) & D4F_RO) != 0 && atype == D4XWRITE) {		fprintf (stderr, "Dinero IV: write to read-only cache %d (%s)\n",			 c->cacheid, c->name);		exit (9);	}	/*	 * Find address in the cache.	 * Quickly check for top of stack.	 */	ptr = c->stack[setnumber].top;	if (ptr->blockaddr == blockaddr && ptr->valid != 0)		; /* found it */	else if (!D4CUSTOM || D4VAL (c, assoc) > 1)		ptr = d4_find (c, setnumber, blockaddr);	else		ptr = NULL;	blockmiss = (ptr == NULL);	miss = blockmiss || (sbbits & ptr->valid) != sbbits;	/*	 * Prefetch on reads and instruction fetches, but not on	 * writes, misc, and prefetch references.	 * Optionally, some percentage may be thrown away.	 */	if ((!D4CUSTOM || !D4_OPT (prefetch_none)) &&	    (m.accesstype == D4XREAD || m.accesstype == D4XINSTRN)) {		d4pendstack *pf = D4VAL (c, prefetchf) (c, m, miss, ptr);		if (pf != NULL) {			/* Note: 0 <= random() <= 2^31-1 and 0 <= random()/(INT_MAX/100) < 100. */			if (D4VAL (c, prefetch_abortpercent) > 0 &&			    random()/(INT_MAX/100) < D4VAL (c, prefetch_abortpercent))				d4put_mref (pf);	/* throw it away */			else {				pf->next = c->pending;	/* add to pending list */				c->pending = pf;			}		}	}	/*	 * Update the cache	 * Don't do it for non-write-allocate misses	 */	wback = 0;	if (ronly || atype != D4XWRITE || !blockmiss || walloc) {		/*		 * Adjust priority stack as necessary		 */		ptr = D4VAL (c, replacementf) (c, setnumber, m, ptr);		/*		 * Update state bits		 */		if (blockmiss) {			assert (ptr->valid == 0);			ptr->referenced = 0;			ptr->dirty = 0;	 	}		ptr->valid |= sbbits;		if ((m.accesstype & D4PREFETCH) == 0)			ptr->referenced |= sbbits;		/*		 * For writes, decide if write-back or write-through.		 * Set the dirty bits if write-back is going to be used.		 */		wback = !ronly &&			(atype == D4XWRITE) &&			D4VAL (c, wbackf) (c, m, setnumber, ptr, walloc);		if (wback)			ptr->dirty |= sbbits;		/*		 * Take care of replaced block		 * including write-back if necessary		 */		if (blockmiss) {			d4stacknode *rptr = c->stack[setnumber].top->up;			if (rptr->valid != 0) {				if (!ronly && (rptr->valid & rptr->dirty) != 0)					d4_wbblock (c, rptr, D4VAL (c, lg2subblocksize));				if (c->stack[setnumber].n > D4HASH_THRESH)					d4_unhash (c, setnumber, rptr);				rptr->valid = 0;			}		}	}	/*	 * Prepare reference for downstream cache.	 * We do this for write-throughs, read-type misses,	 * and fetches for incompletely written subblocks	 * when a write misses and write-allocate is being used.	 * In some cases, a write can generate two downstream references:	 * a fetch to load the complete subblock and a write-through store.	 */	if (!ronly && atype == D4XWRITE && !wback) {		d4pendstack *newm = d4get_mref();		newm->m = m; 		newm->next = c->pending;		c->pending = newm;	}	if (miss && (ronly || atype != D4XWRITE ||		     (walloc && m.size != D4REFNSB (c, m) << D4VAL (c, lg2subblocksize)))) {		d4pendstack *newm = d4get_mref();		/* note, we drop prefetch attribute */		newm->m.accesstype = (atype == D4XWRITE) ? D4XREAD : atype;		newm->m.address = D4ADDR2SUBBLOCK (c, m.address);		newm->m.size = D4REFNSB (c, m) << D4VAL (c, lg2subblocksize);		newm->next = c->pending;		c->pending = newm;	}	/*	 * Do fully associative and infinite sized caches too.	 * This allows classifying misses into {compulsory,capacity,conflict}.	 * An extra "set" is provided (==c->numsets) for the fully associative	 * simulation.	 */	if ((D4CUSTOM && D4_OPT (ccc)) ||	    (!D4CUSTOM && (c->flags & D4F_CCC) != 0)) {					/* set to use for fully assoc cache */		const int fullset = D4VAL(c,numsets);					/* number of blocks in fully assoc cache */		int fullmiss, fullblockmiss;	/* like miss and blockmiss, but for fully assoc cache */		ptr = c->stack[fullset].top;		if (ptr->blockaddr != blockaddr)			ptr = d4_find (c, fullset, blockaddr);		else if (ptr->valid == 0)			ptr = NULL;		fullblockmiss = (ptr == NULL);		fullmiss = fullblockmiss || (sbbits & ptr->valid) != sbbits;		/* take care of stack update */		if (ronly || atype != D4XWRITE || !fullblockmiss || walloc) {			ptr = D4VAL (c, replacementf) (c, fullset, m, ptr);			assert (!fullblockmiss || ptr->valid == 0);			ptr->valid |= sbbits;		}		/* classify misses */		if (miss) {			int infmiss = 0; /* assume hit in infinite cache */			if (!fullmiss) /* hit in fully assoc: conflict miss */				c->conf_miss[(int)m.accesstype]++;			else {				infmiss = d4infcache (c, m);				if (infmiss != 0) /* first miss: compulsory */					c->comp_miss[(int)m.accesstype]++;				else	/* hit in infinite cache: capacity miss */					c->cap_miss[(int)m.accesstype]++;			}			if (blockmiss) {				if (!fullblockmiss) /* block hit in full assoc */					c->conf_blockmiss[(int)m.accesstype]++;				else if (infmiss == 1) /* block miss in full and inf */					c->comp_blockmiss[(int)m.accesstype]++;				else /* part of block hit in infinite cache */					c->cap_blockmiss[(int)m.accesstype]++;			}		}		/* take care of replaced block */		if (fullblockmiss) {			d4stacknode *rptr = c->stack[fullset].top->up;			if (rptr->valid != 0) {				if (c->stack[fullset].n > D4HASH_THRESH)					d4_unhash (c, fullset, rptr);				rptr->valid = 0;			}		}	}	/*	 * Update non-ccc metrics. 	 */	c->fetch[(int)m.accesstype]++;	if (miss) {		c->miss[(int)m.accesstype]++;		if (blockmiss)			c->blockmiss[(int)m.accesstype]++;	}	/*	 * Now make recursive calls for pending references	 */	if (c->pending)		d4_dopending (c, c->pending);    }}#endif /* !D4CUSTOM || D4_REF_ONCE>1 */#undef D4_REF_ONCE#define D4_REF_ONCE 2	/* from now on, skip the first stuff and do the rest */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -