📄 memstat.c
字号:
/* * Copyright (C) 1996-1998 by the Board of Trustees * of Leland Stanford Junior University. * * This file is part of the SimOS distribution. * See LICENSE file for terms of the license. * *//**************************************************************** * memstat.c * * Keep track of memory cache lines and the transitions that they go * through. * * Last Modified by: $Author: bosch $ * $Date: 1998/02/10 00:29:00 $ ****************************************************************/ /* #define DEBUGGING */ #include <stdio.h>#include <stdlib.h>#include <sys/types.h>#include <sys/mman.h>#include <fcntl.h>#include <unistd.h>#include <string.h>#include <errno.h>#include <stddef.h>#include <stdarg.h>#include "sim_error.h"#include "simutil.h" #include "simrecord.h" #include "cpu_interface.h"#include "memstat.h"#include "annotations.h"#include "machine_params.h"#include "statrecord.h"#include "false_sharing.h"#include "hw_events.h"#include "tcl_init.h"/* Get rid of this */#include "../../memsystems/memsys.h"/******************************************************************* * The #define NO_RACE_CONDITION option can be used to debug the * caches by verifying that only certain state transitions actually * take place. NO_RACE_CONDITION can only be used with UMA,NUMA and * PerfectMem, not with Flash-lite *******************************************************************/#define NO_RACE_CONDITION ( memsysVec.type != FLASHLITE )/******************************************************************** * MemStat state machine. * This simple FSM consists of the states CACHE_COLD ... CACHE_WARMED * and is managed for every physical line and processor of the machine * The state is encoded in 16 bits, the top 12 being reserved to store * the PID of the process that evicted the line. ********************************************************************/#define CACHE_COLD 0 #define CACHE_REPLACED_K 1#define CACHE_REPLACED_U 2#define CACHE_INVALIDATED 3#define CACHE_REPL_INV 4#define CACHE_INCACHE 5#define CACHE_WARMED 6char *categName[] = {"ice","cold", "inv","capinv", "cap","Kcap","SelfCap","OtherCap"};#define MS_MAXPID 4095 /* 2**12 -1 */#define PIDOFFSET 4#define MEM_LINE(_pA) ((_pA)>>log2SCACHE_LINE_SIZE)#define MEM_L1D_LINE(_pA) ((_pA)>>log2DCACHE_LINE_SIZE)#define MEM_L1I_LINE(_pA) ((_pA)>>log2ICACHE_LINE_SIZE)#define MS_pid(_cpu) 0#define MemStatNone 0#define MemStatGlobal 1#define MemStatGlobalPID 2int memstatOption = MemStatGlobal;static struct MemStatState { /* State machine turned on or not! */ int init; int trackPID; int trackL1; uint cacheMask; int bitsPerState; /* L2 State machines */ uint lastAddr[MAX_MACHINES]; uint trNumCPUs[MAX_MACHINES]; uint trWarnings[MAX_MACHINES]; uint *trPtr[MAX_MACHINES]; /* L1 State machines (cap/compulsory) */ uint *l1DtrPtr[MAX_MACHINES]; uint *l1ItrPtr[MAX_MACHINES]; /* * PID mappings. * Memstat pid table. Hive uses pids sparsly. We therefore * use an indirection table to index the pids. This saves us * a couple of bits (exactly 4) under the safe assumption that * no more than 4K processes will be spawned during the run. */ int pidCount[MAX_MACHINES]; int pidTable[MAX_MACHINES][32*1024];} memStat;struct MemStatBucket { /* Instruction & Data */ StatRecordFieldDesc dStall; StatRecordFieldDesc dL2Stall; StatRecordFieldDesc dUpgradeStall; StatRecordFieldDesc dL2DirtyStall; StatRecordFieldDesc dL1Miss; StatRecordFieldDesc dL2Miss; StatRecordFieldDesc dL2DirtyMiss; StatRecordFieldDesc dL1CapMiss; StatRecordFieldDesc dUpgrade; StatRecordFieldDesc dUpgradeNoInval; StatRecordFieldDesc dUpgradeSC; StatRecordFieldDesc dUpgradeRemote; StatRecordFieldDesc read[NUM_CATEGS]; StatRecordFieldDesc write[NUM_CATEGS]; /* Instruction only */ StatRecordFieldDesc instr[NUM_CATEGS]; StatRecordFieldDesc iL1Miss; StatRecordFieldDesc iL2Miss; StatRecordFieldDesc iStall; StatRecordFieldDesc iL2Stall; StatRecordFieldDesc iL2DirtyMiss; StatRecordFieldDesc iL1CapMiss; StatRecordFieldDesc dL2RemoteMiss; StatRecordFieldDesc dL2RemoteStall; StatRecordFieldDesc iL2RemoteMiss; StatRecordFieldDesc iL2RemoteStall;} memStatBucket;/*********************************************************************** * Statics and #defines **********************************************************************/#define STAT_RECORD(_f,_inc) \ StatRecordEntry(cpuNum,pc,vAddr,memStatBucket._f,_inc);#define STAT_RECORD_I(_f,_inc) \ StatRecordEntry(cpuNum,pc,0,memStatBucket._f,_inc);#define STAT_RECORD_D(_f,_inc) \ StatRecordEntry(cpuNum,0,vAddr,memStatBucket._f,_inc);static int CacheTrMiss(int cpu, PA pAddr);int interest(PA addr);#define ITRACE(s) CPUWarning("%10lld (%s,%x,%d)\n", \ (uint64)CPUVec.CycleCount(cpuNum),s,pAddr & (SCACHE_LINE_SIZE-1), cpuNum);/***************************************************************** * MemStatInitCategoryFields * * Go through the cache miss categories and define fields for them. * This can be used by anyone needing the cache fields. *****************************************************************/void MemStatInitCategoryFields(StatRecordFieldDesc *desc, char *tag, int type){ int i; char str[128]; ASSERT (memStat.init); for (i=0; i<NUM_CATEGS; i++) { int f = 1; switch (i) { case CATEG_cap: if( memStat.trackPID ) f = 0; break; case CATEG_Kcap: case CATEG_OtherCap: case CATEG_SelfCap: if( !memStat.trackPID ) f=0; break; } if (f) { sprintf(str,"%s.%s", tag, categName[i]); desc[i] = StatRecordDefineField(str, type); } }}/**************************************************************************** * MemStatInit ****************************************************************************/#define DEF_FIELD(_field,_fl) (memStatBucket._field = StatRecordDefineField(# _field,_fl))extern int isSolo;void MemStatEarlyInit(void){ int machine, i; for(machine=0; machine<NUM_MACHINES; machine++) { for(i=0;i<32*1024;i++) { memStat.pidTable[machine][i] = -1; } } /* Data-cache related counters */ DEF_FIELD(dL1Miss, STATRECORD_INSTRUCTION|STATRECORD_DATA); DEF_FIELD(dL2Miss, STATRECORD_INSTRUCTION|STATRECORD_DATA); DEF_FIELD(dL2DirtyMiss, STATRECORD_INSTRUCTION|STATRECORD_DATA); DEF_FIELD(dUpgrade, STATRECORD_INSTRUCTION|STATRECORD_DATA); DEF_FIELD(dUpgradeNoInval, STATRECORD_INSTRUCTION|STATRECORD_DATA); DEF_FIELD(dUpgradeSC, STATRECORD_INSTRUCTION|STATRECORD_DATA); /* data stall time */ DEF_FIELD(dStall, STATRECORD_INSTRUCTION|STATRECORD_DATA); DEF_FIELD(dL2Stall, STATRECORD_INSTRUCTION|STATRECORD_DATA); DEF_FIELD(dUpgradeStall, STATRECORD_INSTRUCTION|STATRECORD_DATA); DEF_FIELD(dL2DirtyStall, STATRECORD_INSTRUCTION|STATRECORD_DATA); /* Instruction-cache related counters */ DEF_FIELD(iL1Miss, STATRECORD_INSTRUCTION); DEF_FIELD(iL2Miss, STATRECORD_INSTRUCTION ); DEF_FIELD(iStall, STATRECORD_INSTRUCTION ); DEF_FIELD(iL2Stall, STATRECORD_INSTRUCTION ); DEF_FIELD(iL2DirtyMiss, STATRECORD_INSTRUCTION); /* Local/Remote memory home related counters */ DEF_FIELD(iL2RemoteMiss, STATRECORD_INSTRUCTION); DEF_FIELD(iL2RemoteStall, STATRECORD_INSTRUCTION); DEF_FIELD(dL2RemoteMiss, STATRECORD_INSTRUCTION|STATRECORD_DATA); DEF_FIELD(dL2RemoteStall, STATRECORD_INSTRUCTION|STATRECORD_DATA); switch (memstatOption) { case MemStatNone: memStat.init = 0; return; case MemStatGlobal: memStat.init = 1; /* memStat.trackL1 = 1; */ memStat.trackPID = 0; memStat.bitsPerState = 4; memStat.cacheMask = 0xf; break; case MemStatGlobalPID: memStat.init = 1; /* memStat.trackL1 = 1; */ memStat.trackPID = 1; memStat.bitsPerState = 4+12; memStat.cacheMask = 0xffff; break; default: ASSERT(0); } memStat.init = 1; /* * Additional counters, enabled only when the state machine is running */ if (memStat.trackL1) { DEF_FIELD(dL1CapMiss, STATRECORD_INSTRUCTION|STATRECORD_DATA); } MemStatInitCategoryFields(memStatBucket.read, "read", STATRECORD_INSTRUCTION|STATRECORD_DATA); MemStatInitCategoryFields(memStatBucket.write, "write", STATRECORD_INSTRUCTION|STATRECORD_DATA); if (memStat.trackL1) { DEF_FIELD(iL1CapMiss, STATRECORD_INSTRUCTION ); } MemStatInitCategoryFields(memStatBucket.instr, "instr", STATRECORD_INSTRUCTION); }void MemStatLateInit(void){ int machine; if( !memStat.init) return; for (machine = 0; machine < NUM_MACHINES; machine++) { for (memStat.trNumCPUs[machine]=1; memStat.trNumCPUs[machine] < NUM_CPUS(machine); memStat.trNumCPUs[machine] *=2) { continue; } memStat.lastAddr[machine] = memStat.bitsPerState * memStat.trNumCPUs[machine] * (MEM_LINE(MEM_SIZE(machine)) / 8); ASSERT (memStat.lastAddr[machine]); CPUPrint("MemStat: init cache state transitions for mach %d, %i proc,len=0x%08x\n", machine, memStat.trNumCPUs[machine], memStat.lastAddr[machine]); memStat.trPtr[machine] = (unsigned int *) ZALLOC_PERM(memStat.lastAddr[machine],"MemStat"); if (memStat.trackL1) { ASSERT (log2DCACHE_LINE_SIZE); ASSERT (log2ICACHE_LINE_SIZE); ASSERT (log2ICACHE_SIZE); ASSERT (log2DCACHE_SIZE); ASSERT (NUM_CPUS(machine)); memStat.l1DtrPtr[machine] = (unsigned int *) ZALLOC_PERM(NUM_CPUS(machine) * MEM_L1D_LINE(MEM_SIZE(machine)) / 8, "MemStat::L1D"); memStat.l1ItrPtr[machine] = (unsigned int *) ZALLOC_PERM(NUM_CPUS(machine) * MEM_L1I_LINE(MEM_SIZE(machine)) / 8, "MemStat::L1I"); } }}voidMemStatImiss(MemStatTime cycle, int cpuNum, VA pc, PA pAddr, MemStatTime stallTime, int missType) { int categ = -1; int machine; int mcpu; if (!memStat.init) return; machine = M_FROM_CPU(cpuNum); mcpu = MCPU_FROM_CPU(cpuNum); ASSERT(!(missType & E_D));#ifdef DEBUGGING if( interest(pAddr) ) { CPUWarning("%10lld IMISS cpu=%i pAddr=%08x type=%i\n", CPUVec.CycleCount(cpuNum), cpuNum, pAddr,missType); }#endif ASSERT( (missType & E_L1) || (missType & E_L2)); if (memStat.init) { if (!(missType & E_L1)) { categ = CacheTrMiss(cpuNum,pAddr); ASSERT(categ >= 0); } } else { categ = -1; } /* * L1 cache miss */ if (missType & E_L1) { int l1Line = mcpu * MEM_L1I_LINE(MEM_SIZE(machine)) + MEM_L1I_LINE(pAddr); int l1Index = l1Line /32; int l1Pos = l1Line %32; STAT_RECORD_I(iL1Miss,1); STAT_RECORD_I(iStall,stallTime); /* L1 capacity */ if (memStat.trackL1) { if (memStat.l1ItrPtr[machine][l1Index] & (1<<l1Pos)) { STAT_RECORD_I(iL1CapMiss,1); } memStat.l1ItrPtr[machine][l1Index] |= (1<<l1Pos); } return; } /* * L2 cache miss */ if (missType & E_L2) { int i; STAT_RECORD_I(iL2Miss,1); STAT_RECORD_I(iL2Stall, stallTime); if (missType & E_FOUND_IN_CACHE) { STAT_RECORD_I(iL2DirtyMiss,1); } if (missType & E_REMOTE) { STAT_RECORD_I(iL2RemoteMiss,1); STAT_RECORD_I(iL2RemoteStall,stallTime); } if (memStat.trackL1) { for(i=0; i < SCACHE_LINE_SIZE; i += ICACHE_LINE_SIZE) { /* clear the capacity bits in the L1 cache */ uint L1pA = (pAddr & ~(SCACHE_LINE_SIZE-1)) + i; int l1Line = mcpu * MEM_L1I_LINE(MEM_SIZE(machine)) + MEM_L1I_LINE(L1pA); int l1Index = l1Line /32; int l1Pos = l1Line %32; memStat.l1ItrPtr[machine][l1Index] &= ~(1<<l1Pos); } } if (memStat.init) { ASSERT(categ >= 0 && categ < NUM_CATEGS); StatRecordEntry(cpuNum,pc,0,memStatBucket.instr[categ],1); } }}intMemStatDmiss(MemStatTime cycle, int cpuNum, VA pc, VA vAddr, PA pAddr, MemStatTime stallTime, int missType, int way){ int categ=-1,i; int machine; int mcpu; if (!memStat.init) return 0; machine = M_FROM_CPU(cpuNum); mcpu = MCPU_FROM_CPU(cpuNum);#ifdef DEBUGGING if (interest(pAddr)) { CPUWarning("%10lld DMISS cpu=%i pAddr=%08x type=%#x state=%#x upgrade=%i\n", CPUVec.CycleCount(cpuNum), cpuNum, pAddr, missType, MSCacheState(cpuNum, pAddr), ((missType&E_UPGRADE)!=0)); }#endif if ((missType & E_L1) || ((missType & E_L2) && (missType & E_UPGRADE))) { /* no cache transition */ } else { /* transition */ ASSERT(missType & E_L2); if (memStat.init) { categ = CacheTrMiss(cpuNum,pAddr); if (missType & E_D) { FalseSharingDefineOffset(cpuNum,pAddr,way,categ); } } else { categ = -1; } } ASSERT((missType & (E_L2 | E_L1))); if (missType & E_L1) { int l1Line = mcpu * MEM_L1D_LINE(MEM_SIZE(machine)) + MEM_L1D_LINE(pAddr); int l1Index = l1Line /32; int l1Pos = l1Line %32; STAT_RECORD(dL1Miss,1); STAT_RECORD(dStall,stallTime); if (memStat.trackL1) { /* L1 capacity */ if (memStat.l1DtrPtr[machine][l1Index] & (1<<l1Pos)) { STAT_RECORD(dL1CapMiss,1); } memStat.l1DtrPtr[machine][l1Index] |= (1<<l1Pos);#ifdef DEBUGGING
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -