📄 busuma.c
字号:
/* * Copyright (C) 1996-1998 by the Board of Trustees * of Leland Stanford Junior University. * * This file is part of the SimOS distribution. * See LICENSE file for terms of the license. * *//***************************************************************** * File: BusUma.c * * UMA model with bus contention, snoopy caches, writeback buffers, * out-of-order split transaction bus. * * Author: $Author: bosch $ * Date: $Date: 1998/02/10 00:37:16 $ *****************************************************************/#include <stdio.h>#include <stdlib.h>#include <unistd.h>#include <string.h>#include "syslimits.h"#include "scache.h"#include "memsys.h"#include "sim_error.h"#include "simutil.h"#include "eventcallback.h"#include "list.h"#include "cpu_interface.h"#include "registry.h"#include "hw_events.h"#ifdef SOLO#include "solo_page.h"#define DATA_ADDR(_m, _pa) SoloGetMemoryAddr(SoloDecompressAddr(0,_pa))#else#define DATA_ADDR(_m, _pa) PHYS_TO_MEMADDR(_m, _pa)#endifstatic int debugMem = 0;/*#define DEBUG_MEM*//* * Memory layout * * we require that the memory per mem node and the number of nodes be * power-of-2 size */#ifdef __alpha#define LLD_SPACE "%ld "#else#define LLD_SPACE "%lld "#endif#define WB_BUFFER_SIZE 4#define REQUEST_TABLE_SIZE 8#define BUSUMA_MAX_MEMORIES 64static int arbitrationCycles, transferCycles, memoryCycles;static int dirtyPenalty, readOverhead, upgradeOverhead;static int maxOverflow;#define MEMORY_LATENCY 80 /* 80 ns *//* * Interleave on a cache line basis. */#ifdef SOLO#define BusumaAddrToMemnum(_m,_addr) SOLO_PA_NODE(SoloDecompressAddr(0,_addr))#define BusumaAddrToMemline(_m,_addr) (SOLO_PA_OFFSET(SoloDecompressAddr(0,_addr))/SCACHE_LINE_SIZE)#else#define BusumaAddrToMemnum(_m,_addr) (FIRST_MEMORY(_m)+(((_addr) >>log2SCACHE_LINE_SIZE) % NUM_MEMORIES(_m)))#define BusumaAddrToMemline(_m,_addr) ((_addr) / (SCACHE_LINE_SIZE*NUM_MEMORIES(_m)))#endif#define BusumaIsLocal(_cpu,_mem) (1)enum MemReqState { REQ_FREE, WB_OVERFLOW, RT_OVERFLOW, CPU_TO_BUS_OVERHEAD, BUS_ARB_WAIT, BUS_TRANSFER_WAIT, MEM_CONFLICT_WAIT, MEM_MERGE_WAIT, CACHE_TRANSFER_WAIT, MEM_BUSY_WAIT, MEM_ACTIVE_WAIT, REPLY_BUS_ARB_WAIT, REPLY_BUS_TRANSFER_WAIT};#define MAX_MREQ (SIM_MAXCPUS*MEMSYS_MAX_OUTSTANDING)typedef struct MemRequest { EventCallbackHdr hdr; /* Must be first!! */ List_Links link; /* Must be second (after EventCallbackhdr */#define MREQ_TO_LIST(_m) (&((_m)->link))#define LIST_TO_MREQ(_l) ((MemRequest *) (((char *)(_l)) - sizeof(EventCallbackHdr))) int cmd; int transId; PA reqAddr; PA addr; /* Address scache aligned */ unsigned int mode; int status; int result; enum MemReqState state; int rtIndex; int memnum; int machnum; int cpunum; uint starttime; uint memQEnter; byte* data; /* data handling - dma, writeback buffer */ int len;} MemRequest;static MemRequest MemRequestStorage[MAX_MREQ];/* List of available mreq structures */static List_Links freeMemReqList;typedef struct _RequestTable { int used; PA addr; MemRequest *initiator; List_Links merge; List_Links delay;} RequestTable;static RequestTable requestTable[MAX_MACHINES][REQUEST_TABLE_SIZE];static List_Links requestTableFullQ[MAX_MACHINES];typedef struct _WB_Buffer { List_Links free; int numUsed; MemRequest entries[WB_BUFFER_SIZE]; int numOverflowUsed; List_Links overflow;} WB_Buffer;static WB_Buffer wbBuffers[SIM_MAXCPUS];static int busBusy[MAX_MACHINES];static int busIdleStart[MAX_MACHINES];static EventCallbackHdr arbitrate[MAX_MACHINES];static List_Links lowPri[MAX_MACHINES], highPri[MAX_MACHINES];#define WB_QUEUE(_m) (&(lowPri[_m]))static Result BusUmaCmd(int cpuNum, int cmd, PA paddr, int transId, PA replacedPaddr, int writeback, byte *data);static void BusUmaDumpStats (void);static void BusUmaDone (void);static void BusUmaStatus (void);static void MemoryDone(int cpuNum, EventCallbackHdr *hdr, void *arg);static void MemoryArrive(int memnum);static void CPUArrive(MemRequest *mreq);static MemRequest * IsInWBBuffer(int cpuNum, PA addr);static void CacheSnoop(MemRequest *mreq);static void BusFree(int cpuNum, EventCallbackHdr *hdr, void *arg);static void ArbitrateCallback(int cpuNum, EventCallbackHdr *hdr, void *arg);static void Arbitrate(int machine);static void BusUmaUncachedRead(int cpuNum, PA addr, byte *data, int len);static void BusUmaUncachedWrite(int cpuNum, PA addr, byte *data, int len);static void BusUmaDrain(void);#ifndef SOLOstatic PA BusUmaGetNodeAddress(int cpunum);static void BusUmaSetRemap(int cpunum, PA mask);static void BusUmaControlRemap(int cpunum, int isEnabled);#endif/* * Stats collected per memory system. */#define STAT_HIST_BUCKETS 100typedef struct StatsHist { unsigned int scaledivisor; SimCounter sum; SimCounter count; SimCounter counts[STAT_HIST_BUCKETS];} StatsHist;enum BusUmaCounterTypes { /* number of requests that are satisfied by a merge */ BUC_MERGES, /* number of requests delayed due to conflicts */ BUC_CONFLICTS, /* number of writebacks that didn't use the bus */ BUC_KILLEDWBS, /* number of GETs satisfied by writeback buffer */ BUC_GET_WBBUF, /* number of upgrades satisfied by writeback buffer */ BUC_UPG_WBBUF, /* number of cycles bus is busy doing a get request*/ BUC_BUS_GETRQST, /* number of cycles bus is busy doing a get reply*/ BUC_BUS_GETREPLY, /* number of cycles bus is busy doing an upgrade*/ BUC_BUS_UPG, /* number of cycles bus is busy doing a wb*/ BUC_BUS_WB, /* number of cycles bus is busy doing an get which is nak'd b/c no space in request table*/ BUC_BUS_NAK, /* number of cycles bus is busy */ BUC_BUS_BUSY, /* number of cycles bus is idle */ BUC_BUS_IDLE, /* time in memory queue */ BUC_MEMQUEUE, BUC_TOTAL};typedef struct BusUmaMemStats { SimCounter counts[COUNT_TOTAL]; SimCounter buCounts[BUC_TOTAL]; StatsHist reqtime; /* latency */} BusUmaStats;/* * State associated with a memory in the system. */typedef struct MemState { List_Links memoryQueue; /* Queue to memory */ BusUmaStats stats; int busy; MemRequest wbHdr; /* hack needed because we free writebackbuffer at memoryarrive, and we need something to use for eventcallback */} MemState;static MemState *memState[BUSUMA_MAX_MEMORIES];#ifndef SOLO/***************************************************************** * remap region support *****************************************************************/static PA backmapMask[MAX_MACHINES];static PA nodeaddrMask[MAX_MACHINES];static voidBusUmaUpdateBackmapMask(void){ /* the backmapMask is an optimization that summarizes all the enabled * remap masks; it has ones in any bit position which, if set, means * this physical address could not be the target of a remap on any * CPU. * * nodeaddrMask masks out the node id bits. */ int i, m; for (m=0; m < NUM_MACHINES; m++) { backmapMask[m] = nodeaddrMask[m]; for (i=FIRST_CPU(m); i<=LAST_CPU(m); i++) { if (remapVec->RemapEnable[i]) { backmapMask[m] &= remapVec->RemapMask[i]; } } }}static voidBusUmaInitRemap(void){ int i, machine; /* We initialize backmapMask to all ones except for the * bits that might be set in the node id field */ for (machine=0; machine<NUM_MACHINES; machine++) { nodeaddrMask[machine] = ~0; for (i=0; i<NUM_CPUS(machine); i++) { if (!remapVec->NodeAddrInitialized) {#ifdef TORNADO int m = machine; remapVec->NodeAddr[FIRST_CPU(machine)+i] = (i*NUM_MEMORIES(m)/NUM_CPUS(m)) * (MEM_SIZE(m) / NUM_CPUS(m)); CPUWarning("busuma: nodeaddr for %d/%d is %lx\n", i, FIRST_CPU(machine)+i, (unsigned long)(remapVec->NodeAddr[FIRST_CPU(machine)+i]));#elif defined(SIM_ORIGIN) remapVec->NodeAddr[FIRST_CPU(machine)+i] = MEMADDR_TO_PHYS(machine, SIM_MEM_ADDR(machine) + (i/2) * 2 * (MEM_SIZE(machine) / NUM_CPUS(machine)));#else remapVec->NodeAddr[FIRST_CPU(machine)+i] = MEMADDR_TO_PHYS(machine, SIM_MEM_ADDR(machine) + i * (MEM_SIZE(machine) / NUM_CPUS(machine)));#endif } nodeaddrMask[machine] &= ~remapVec->NodeAddr[FIRST_CPU(machine)+i]; } } remapVec->NodeAddrInitialized = 1; /* now zero out the low bits of the backmapmask */ BusUmaUpdateBackmapMask();}static voidBusUmaSetRemap(int cpunum, PA mask){ remapVec->RemapMask[cpunum] = mask; BusUmaUpdateBackmapMask();}static voidBusUmaControlRemap(int cpunum, int isEnabled){ remapVec->RemapEnable[cpunum] = isEnabled; BusUmaUpdateBackmapMask();}static PABusUmaGetNodeAddress(int cpunum){ return remapVec->NodeAddr[cpunum];}static PAReverseRemap(PA paddr, int cpunum){ if ((paddr & remapVec->RemapMask[cpunum]) == remapVec->NodeAddr[cpunum]) { return paddr - remapVec->NodeAddr[cpunum]; } else if ((paddr & remapVec->RemapMask[cpunum]) == 0) { return paddr + remapVec->NodeAddr[cpunum]; } else { return paddr; }}#define BACKMAP_PADDR(paddr,cpunum) \ (((paddr & backmapMask[M_FROM_CPU(cpunum)]) \ || !remapVec->RemapEnable[cpunum]) \ ? paddr : ReverseRemap(paddr, cpunum))#endif #ifdef SOLO#define BACKMAP_PADDR(paddr,cpunum) (paddr)#endif/***************************************************************** * BusUmaInit *****************************************************************/void BusUmaInit(void){ int i, j, m; int halfTransXTime; memsysVec.type = BUSUMA; memsysVec.NoMemoryDelay = 0; memsysVec.MemsysCmd = BusUmaCmd; memsysVec.MemsysDumpStats = BusUmaDumpStats; memsysVec.MemsysDone = BusUmaDone; memsysVec.MemsysStatus = BusUmaStatus; memsysVec.MemsysDrain = BusUmaDrain;#ifndef SOLO memsysVec.MemsysSetRemap = BusUmaSetRemap; memsysVec.MemsysControlRemap = BusUmaControlRemap; memsysVec.MemsysGetNodeAddress = BusUmaGetNodeAddress;#endif halfTransXTime = NanoSecsToCycles(SCACHE_LINE_SIZE*1000/BUS_BW/2); arbitrationCycles = 2*halfTransXTime/5; transferCycles = halfTransXTime - arbitrationCycles; ASSERT(transferCycles >= 0); memoryCycles = NanoSecsToCycles(MEMORY_LATENCY); dirtyPenalty = DIRTY_PENALTY; readOverhead = MEM_CYCLE_TIME - memoryCycles - 2*halfTransXTime; ASSERT(readOverhead >= 0); upgradeOverhead = UPGRADE_TIME - halfTransXTime; ASSERT(upgradeOverhead >= 0); /* maxOverflow = 1; this doesn't work for mxs or with prefetching */ maxOverflow = 5; CPUPrint("MEMSYS: BusUma - arbitrate: %d, transfer: %d, memory: %d\n\n", arbitrationCycles, transferCycles, memoryCycles); CPUPrint("MEMSYS: BusUma - readOverhead: %d, upgradeOverhead: %d, dirtyPenalty: %d\n", readOverhead, upgradeOverhead, dirtyPenalty); for (m = 0; m < NUM_MACHINES; m++) { if( !NUM_MEMORIES(m) ) CPUError("BusUma: NumMemories must be >0 (machine %d)\n", m); if( MEM_SIZE(m) % NUM_MEMORIES(m) != 0 ) CPUError("BusUma: the memory size must be a multiple of the number of memories (machine %d)\n",m); CPUPrint("BusUma with %d memories; cache-line interleaving (machine %d)\n", NUM_MEMORIES(m), m); busBusy[m] = 0; busIdleStart[m] = 1; bzero((char *)(requestTable[m]), sizeof(RequestTable)*REQUEST_TABLE_SIZE); for (i = 0; i < REQUEST_TABLE_SIZE; i++) { List_Init(&requestTable[m][i].merge); List_Init(&requestTable[m][i].delay); } List_Init(&requestTableFullQ[m]); } CPUPrint("MEMFields IGETS LLGETS DMAGETS DGETS DMAGETXS GETXS SCUPGRADES UPGRADES WRITEBACKS REPLHINTS UNCACHEDWRITES UNCACHEDACCWRITES UNCACHEDREADS INVALORDNGRADESENT NAKS REMOTEDIRTY EXCLUSIVEONSHARED MEMORYACCESS\n"); CPUPrint("MEMBUSUMAFields MERGES CONFLICTS KILLEDWBS GET_WBBUF UPG_WBBUF BUS_GETRQST BUS_GETREPLY BUS_UPG BUS_WB BUS_NAK BUS_BUSY BUS_IDLE MEMQUEUE\n"); /* initialize writeback buffer for each proc */ for (i = 0; i < SIM_MAXCPUS; i++) { List_Init(&wbBuffers[i].free); wbBuffers[i].numUsed = 0; for (j = 0; j < WB_BUFFER_SIZE; j++) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -