📄 pcache.c
字号:
/* * Copyright (C) 1996-1998 by the Board of Trustees * of Leland Stanford Junior University. * * This file is part of the SimOS distribution. * See LICENSE file for terms of the license. * *//***************************************************************** * pcache.c * * Primary cache implemented as up to 4-way associative * caches, compile time choice of associativity, size, & line size. * * $Author: bosch $ * $Date: 1998/02/10 00:27:59 $ *******************************************************************/#include <stdio.h>#include <time.h>#include <assert.h>#include <malloc.h>#include <string.h>#include "syslimits.h"#include "sim_error.h"#include "../memref.h"#include "pcache.h"#include "scache.h"#include "simrecord.h"#include "memsys.h"#include "simutil.h"#include "false_sharing.h"#include "hw_events.h"#include "registry.h"#include "tcl_init.h"#include "limits.h"#include "arch_specifics.h"#include "cpu_stats.h"#if (defined(SIM_MIPS64) || defined(SIM_MIPS32))#include "trace.h"#else#define TraceDataRef(a,b,c)#endif#ifdef MIPSY_MXS# include "ms.h"#endif/* We haven't been using these, so I'll make them compile time parameters for now. It will also speed up the caches. */#define DCACHE_HIT_ALWAYS 0#define DCACHE_MISS_ALWAYS 0#define ICACHE_HIT_ALWAYS 0#define ICACHE_MISS_ALWAYS 0#define CURRENT_PC(_cpu) (CPUVec.CurrentPC(_cpu))/* * Possible return status from the miss handling table. * MHTSUCCESS - Entry allocated everything looks good. * MHTMERGE - This request merged with another. Returned other. * MHTFULL - Entry not allocated because table was full. * MHTCONFLICT - Entry not allocated because it conflicted with one already * allocated. */typedef enum { MHTSUCCESS = 0, MHTFULL, MHTMERGE, MHTCONFLICT} MHTStatus;Cache *CACHE;bool skipCaches;/* Variables needed for the lookup macros */static int iTagShift;static int iIndexMask;static int dTagShift;static int dIndexMask;#ifndef SIM_ALPHAstatic int iAddrDivisor;static int dAddrDivisor;#else int iAddrDivisor; int dAddrDivisor;#endifstatic bool useWriteBuffer;static bool noUpgrades = FALSE; /* Send no upgrades to mem system *//* Local Cache Functions */static void InitPCaches(void);static void ICachePUT(int, MHT *, int );static void DCacheFlush(int, int writeback, int retain, PA, int);static void DCachePUT(int, MHT *, int);static Result ICacheMiss(int cpuNum, VA, PA, int *);static Result DCacheReadMiss(int cpuNum, VA, PA, void *data, RefSize size, RefFlavor flavor, int *indPtr);static Result DCacheWriteMiss(int cpuNum, VA, PA, RefSize size, RefFlavor flavor, struct DCacheSet* set, int way, bool upgrade, int* mhtind);static void FreeMHT(int cpuNum, int entryNum);static MHTStatus AllocMHT(int cpuNum, SCacheCmd cmd, VA vAddr, PA pAddr, int size, int lru, int *mhtind);static void RetryNAKedMHTEntry(int cpuNum,EventCallbackHdr *event, void *arg);static bool AddToWriteBuffer(int cpuNum,PA pAddr, uint64 data, RefSize size, int mhtind);static bool AddrIsInWriteBuffer(int cpuNum, PA pAddr, RefSize size);static void RetireWriteBuffer(int cpuNum, MHT *mht, byte *data);/**************************************************************** * * Cope with different associativities. * * The following code handles up to 4-way for now, but in the future * the interface is good enough to handle higher associativities. * Just extend the if tree and pass a pointer to the first word * of the set */#define UPDATE_BIG_LRU(lruword,set) \{ \ unsigned char* _lru = (unsigned char*) &(lruword); \ unsigned char _t1, _t2; \ \ if (_lru[0] != (set)) { \ _t1 = _lru[1]; \ _lru[1] = _lru[0]; \ _lru[0] = (set); \ if (_t1 != (set)) { \ _t2 = _lru[2]; \ _lru[2] = _t1; \ if (_t2 != (set)) _lru[3] = _t2; \ } \ } \}#define INIT_BIG_LRU(lruword) \{ \ char* _lru = (char*) &(lruword); \ _lru[0] = 0; _lru[1] = 1; _lru[2] = 2; _lru[3] = 3; \}#define GET_BIG_LRU(lruword, assoc) (((char*) &(lruword))[(assoc)-1])#define SET_BIG_LRU4(lruword, set) \{ \ unsigned char* _lru = (unsigned char*) &(lruword); \ unsigned char _t1,_t2; \ \ if (_lru[3] != (set)) { \ _t1 = _lru[2]; \ _lru[2] = _lru[3]; \ _lru[3] = (set); \ if (_t1 != (set)) { \ _t2 = _lru[1]; \ _lru[1] = _t1; \ if (_t2 != (set)) _lru[0] = _t2; \ } \ } \}#if ICACHE_ASSOC == 1#define ICACHE_TOUCH(lruword, set) /* nop */#define ICACHE_INIT_LRU(lruword) lruword = 0;#define ICACHE_LRU(lruword) 0#define ICACHE_MAKE_LRU(lruword, set) /* nop */#endif#if ICACHE_ASSOC == 2#define ICACHE_TOUCH(lruword, set) lruword = set;#define ICACHE_INIT_LRU(lruword) lruword = 0;#define ICACHE_LRU(lruword) (!lruword)#define ICACHE_MAKE_LRU(lruword, set) lruword = !(set);#endif#if ICACHE_ASSOC == 4#define ICACHE_TOUCH(lruword, set) UPDATE_BIG_LRU(lruword, set)#define ICACHE_INIT_LRU(lruword) INIT_BIG_LRU(lruword)#define ICACHE_LRU(lruword) GET_BIG_LRU(lruword, ICACHE_ASSOC)#define ICACHE_MAKE_LRU(lruword,set) SET_BIG_LRU4(lruword, set)#endif#if ICACHE_ASSOC == 8#define ICACHE_TOUCH(lruword, set) UPDATE_BIG_LRU(lruword, set)#define ICACHE_INIT_LRU(lruword) INIT_BIG_LRU(lruword)#define ICACHE_LRU(lruword) GET_BIG_LRU(lruword, ICACHE_ASSOC)#define ICACHE_MAKE_LRU(lruword,set) SET_BIG_LRU8(lruword, set)#endif#if DCACHE_ASSOC == 1#define DCACHE_TOUCH(lruword, set) /* nop */#define DCACHE_INIT_LRU(lruword) lruword = 0;#define DCACHE_LRU(lruword) 0#define DCACHE_MAKE_LRU(lruword, set) /* nop */#endif#if DCACHE_ASSOC == 2#define DCACHE_TOUCH(lruword, set) lruword = set;#define DCACHE_INIT_LRU(lruword) lruword = 0;#define DCACHE_LRU(lruword) !lruword#define DCACHE_MAKE_LRU(lruword, set) lruword = !(set);#endif#if DCACHE_ASSOC == 4#define DCACHE_TOUCH(lruword, set) UPDATE_BIG_LRU(lruword, set)#define DCACHE_INIT_LRU(lruword) INIT_BIG_LRU(lruword)#define DCACHE_LRU(lruword) GET_BIG_LRU(lruword, DCACHE_ASSOC)#define DCACHE_MAKE_LRU(lruword, set) SET_BIG_LRU4(lruword, set)#endif#if DCACHE_ASSOC == 8#define DCACHE_TOUCH(lruword, set) UPDATE_BIG_LRU(lruword, set)#define DCACHE_INIT_LRU(lruword) INIT_BIG_LRU(lruword)#define DCACHE_LRU(lruword) GET_BIG_LRU(lruword, DCACHE_ASSOC)#define DCACHE_MAKE_LRU(lruword, set) SET_BIG_LRU8(lruword, set)#endif/**** just for laughs, here's another UPDATE_BIG_LRU routine, with a few * more instructions but no branches or if tests after the initial * check. Probably runs faster on a dynamically-scheduled processor like T5. * Keep x axis of array as set number instead of lru position. * * { * unsigned char* lru = (unsigned char*) lruword; * register unsigned int l = lru[h]; * if (l != 0) { * lru[0] += (lru[0] < l); * lru[1] += (lru[1] < l); * lru[2] += (lru[2] < l); * lru[3] += (lru[3] < l); * lru[h] = 0; * } * } *//***************************************************************** * EVENT SUPPORT *****************************************************************/#define DATA_READ_EVENT() \ TraceDataRef(&PE[cpuNum], vAddr, pAddr); \ STATS_INC(cpuNum, dReads, 1); \ if (++(dcache->memSample) >= MS_SAMPLE_MEMOP_INTERVAL) { \ dcache->memSample = 0; \ MEM_SAMPLE_EVENT(CPUVec.CycleCount(cpuNum), cpuNum, CURRENT_PC(cpuNum), \ vAddr, pAddr, 1); \ }#define DATA_WRITE_EVENT() \ TraceDataRef(&PE[cpuNum], vAddr, pAddr); \ STATS_INC(cpuNum, dWrites, 1); \ if (++CACHE[cacheNum].DCache.memSample >= MS_SAMPLE_MEMOP_INTERVAL) { \ CACHE[cacheNum].DCache.memSample = 0; \ MEM_SAMPLE_EVENT(CPUVec.CycleCount(cpuNum), cpuNum, CURRENT_PC(cpuNum), \ vAddr, pAddr, 0); \ }/**************************************************************** * MemRefInit * *****************************************************************/void MemRefInit(void){ if (!strcmp(CACHE_MODEL, "None")) { CACHE = (Cache *)calloc(TOTAL_CPUS, sizeof(Cache)); skipCaches = TRUE; } else { ASSERT(!strcmp(CACHE_MODEL, "2Level")); InitPCaches(); InitSCaches(); skipCaches = FALSE; } MemsysInit();}/***************************************************************** * MemRefResetStats * *****************************************************************/voidMemRefResetStats(int cpuNum){ int cacheNum = GET_CACHE_NUM(cpuNum); int scacheNum = GET_SCACHE_NUM(cpuNum); bzero((char*)&(CACHE[cacheNum].stats), sizeof(CACHE[cacheNum].stats)); bzero((char*)&(SCACHE[scacheNum].stats), sizeof(SCACHE[scacheNum].stats));}/***************************************************************** * MemRefPrintPeriodicStats * * This is called every STAT_INTERVAL. *****************************************************************/static struct CacheStats { unsigned int ICacheCnt, ICacheMisses; unsigned int DCacheCnt, DCacheMisses; unsigned int SCacheCnt, SCacheMisses;} oldstats[SIM_MAXCPUS];static int printcnt;voidMemRefPeriodicStats(int cpuNum){ int cacheNum = GET_CACHE_NUM(cpuNum); int scacheNum = GET_SCACHE_NUM(cpuNum); uint newICacheCnt, newICacheMisses; uint newDCacheCnt, newDCacheMisses; uint newSCacheCnt, newSCacheMisses; uint totalICacheCnt, totalICacheMisses; uint totalDCacheCnt, totalDCacheMisses; uint totalSCacheCnt, totalSCacheMisses; struct CacheStats *s; Cache *C = &CACHE[cacheNum]; SCache *SC = &SCACHE[scacheNum]; totalICacheCnt = totalICacheMisses = 0; totalDCacheCnt = totalDCacheMisses = 0; totalSCacheCnt = totalSCacheMisses = 0; s = &(oldstats[cacheNum]); newICacheCnt = STATS_VALUE(cpuNum, iReads); totalICacheCnt += newICacheCnt; totalICacheMisses += newICacheMisses = (uint)C->stats.ICache.ReadMisses; totalDCacheCnt += newDCacheCnt = (uint) (STATS_VALUE(cpuNum, dReads) + STATS_VALUE(cpuNum, dWrites)); totalDCacheMisses += newDCacheMisses = (uint) (C->stats.DCache.ReadMisses + C->stats.DCache.WriteMisses + C->stats.DCache.UpgradeMisses); totalSCacheCnt += newSCacheCnt = (uint) (SC->stats.Igets + SC->stats.Dgets + SC->stats.DgetXs + SC->stats.Dupgrades); totalSCacheMisses += newSCacheMisses = (uint) (SC->stats.IgetMisses + SC->stats.DgetMisses + SC->stats.DgetXMisses + SC->stats.DupgradeMisses); #ifdef DELETE CPUPrint("C%d I %5.4f%% D %5.4f%% S %3.2f%% %3.2f%%\n", cacheNum, ((newICacheMisses - s->ICacheMisses == 0) ? 0 : ((100.0 * (newICacheMisses - (uint)s->ICacheMisses)) / (newICacheCnt - (uint)s->ICacheCnt))), ((newDCacheMisses - (uint)s->DCacheMisses == 0) ? 0 : ((100.0 * (newDCacheMisses - (uint)s->DCacheMisses)) / (newDCacheCnt - (uint)s->DCacheCnt))), ((newSCacheMisses - (uint)s->SCacheMisses == 0) ? 0 : ((100.0 * (newSCacheMisses - (uint)s->SCacheMisses)) / (newSCacheCnt - (uint)s->SCacheCnt))));#endif s->ICacheMisses = newICacheMisses; s->DCacheCnt = newDCacheCnt; s->DCacheMisses = newDCacheMisses; s->SCacheMisses = newSCacheMisses; s->SCacheCnt = newSCacheCnt; printcnt++; CPUPrint("C%d Total I Misses: %lld Total I Refs: %lld Rate: %5.3f%%\n", cacheNum, (uint64)totalICacheMisses, (uint64)totalICacheCnt, totalICacheCnt ? (100.0 * totalICacheMisses)/totalICacheCnt : 0.0); CPUPrint("C%d Total D Misses: %lld Total D Refs: %lld Rate: %5.3f%%\n", cacheNum, (uint64)totalDCacheMisses, (uint64)totalDCacheCnt, totalDCacheCnt ? (100.0 * totalDCacheMisses)/totalDCacheCnt : 0.0); CPUPrint("C%d Total S Misses: %lld Total S Refs: %lld Rate: %5.3f%%\n", cacheNum, (uint64)totalSCacheMisses, (uint64)totalSCacheCnt, totalSCacheCnt ? (100.0 * totalSCacheMisses)/totalSCacheCnt : 0.0); #if (MHT_SIZE >= 4) && (SMHT_SIZE >= 4) { /* * These stats are currently MHT occupancy histograms over all * cpu's. They can be changed to per cpu if desireable. */ int i, c; SimCounter occ[MHT_SIZE+1], socc[SMHT_SIZE+1]; for (i = 0; i < MHT_SIZE+1; i++) { occ[i] = 0; for (c = 0; c < TOTAL_CPUS; c++) { occ[i] += CACHE[c].stats.mhtOccupancyHist[i]; } } for (i = 0; i < SMHT_SIZE+1; i++) { socc[i] = 0; for (c = 0; c < TOTAL_CPUS; c++) { socc[i] += SCACHE[c].stats.smhtOccupancyHist[i]; } } CPUPrint("C%d MHT Occupancy: %lld %lld %lld %lld %lld\n", cacheNum, (uint64) occ[0], (uint64) occ[1], (uint64) occ[2], (uint64) occ[3], (uint64) occ[4]); CPUPrint("C%d SMHT Occupancy: %lld %lld %lld %lld %lld\n", cacheNum,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -