📄 dcache.cc
字号:
#include "koala.hh"// WARNING: currently, the memory access latencies are not simulated.// WARNING: currently, the effect of write buffers isn't simulated.// State information for the data cache.const UInt32 W = 1; // write-back bitconst UInt32 CS_First = 1, CS_Last = 2; // cache state field// Cache states -- unused on R4600/R4700, but needed for control_dcache().const UInt32 CS_Invalid = 0 << CS_First;const UInt32 CS_Shared = 1 << CS_Last;const UInt32 CS_CleanExclusive = 2 << CS_Last;const UInt32 CS_DirtyExclusive = 3 << CS_Last;// Reset the L1 data cache.voidKoala::reset_dcache(){ for (int i = 0; i < dcache.sets; ++i) { for (int j = 0; j < dcache.assoc; ++j) { dcache.set[i].line[j].tag = bad_tag; dcache.set[i].line[j].state = CS_Invalid; dcache.set[i].lru_init(); } }}// Perform a cache operation (for use by decode_cache()).voidKoala::control_dcache(VA va, PA pa, int op, int type){ if (type) return; // secondary cache not presents switch (op) { case 0: { // Index Writeback Invalidate. ICache::Set* set = &dcache.set[dcache.index(va)]; ICache::Line* line = &set->line[dcache.block(va)]; if (line->state & W) { PA pa = (dcache.index(va) << dcache.index_first) | (line->tag << (dcache.index_last + 1)); ClockValue latency = bus->write(pa, line->data, dcache.line_size); if (is_bus_error(latency)) process_bus_error(data_store); else { line->tag = bad_tag; line->state = CS_Invalid; } } break; } case 1: { // Index Load Tag. This cannot be implemented properly as I don't // store the tag for invalidated entries. assert(TODO); break; } case 2: { // Index Store Tag. The comment from (1) applies. As ``Index Load // Tag'' is not implemented, this may be left as a noop. break; } case 3: { // Create Dirty Exclusive. UInt32 tag = dcache.tag(pa); ICache::Set* set = &dcache.set[dcache.index(va)]; ICache::Line* line = &set->line[dcache.block(va)]; if (line->tag != tag && (line->state & W)) { // Write back current data. PA pa = (dcache.index(va) << dcache.index_first) | (line->tag << (dcache.index_last + 1)); ClockValue latency = bus->write(pa, line->data, dcache.line_size); if (is_bus_error(latency)) process_bus_error(data_store); } line->tag = tag; line->state = CS_DirtyExclusive | (line->state & W); break; } case 4: { // Hit Invalidate. UInt32 tag = dcache.tag(pa); ICache::Set* set = &dcache.set[dcache.index(va)]; ICache::Line* line = &set->line[dcache.block(va)]; if (line->tag != tag && (line->state & W)) { // Hit: invalidate the cache. line->tag = bad_tag; line->state = CS_Invalid; cp0[SR] = set_bit(cp0[SR], SR_CH); } else { // Miss: clear the CH bit in status register. cp0[SR] = clear_bit(cp0[SR], SR_CH); } break; } case 5: { // Hit Writeback Invalidate. UInt32 tag = dcache.tag(pa); ICache::Set* set = &dcache.set[dcache.index(va)]; ICache::Line* line = &set->line[dcache.block(va)]; if (line->tag == tag && (line->state & W)) { // Write back current data. PA pa = (dcache.index(va) << dcache.index_first) | (line->tag << (dcache.index_last + 1)); ClockValue latency = bus->write(pa, line->data, dcache.line_size); if (is_bus_error(latency)) process_bus_error(data_store); } line->tag = bad_tag; line->state = CS_Invalid; break; } case 6: { // Hit Writeback. UInt32 tag = dcache.tag(pa); ICache::Set* set = &dcache.set[dcache.index(va)]; ICache::Line* line = &set->line[dcache.block(va)]; if (line->tag == tag && (line->state & W)) { // Write back current data. PA pa = (dcache.index(va) << dcache.index_first) | (line->tag << (dcache.index_last + 1)); ClockValue latency = bus->write(pa, line->data, dcache.line_size); if (is_bus_error(latency)) process_bus_error(data_store); } line->tag = bad_tag; break; } case 7: // Hit Set Virtual. break; // secondary cache not present }}// Perform a load from the virtual address (va). The address translation has// already been performed and the physical address is (pa). The coherency// algorithm to use is encoded in high-order bits of (pa) using the same// encoding as that of the xkphys address space region. It is a template// explicitely specialized for doubleword, word, halfword and byte, to improve// performance of the endianess correction algorithm, as well as minor// improvements steming from the fact that bit field extraction on variable// boundaries is slow.template <int syscmd> UInt64Koala::load(VA va, PA pa){ const int size = syscmd + 1; typedef typename FixedWidth<size * 8>::Unsigned T; ClockValue latency; int i; int ca = coherency_algorithm(pa); // dcache currently disabled as it is broken - matthewc if (1 || ca == uncached) { // A direct memory access. UInt64 x; latency = bus->read(swizzle<syscmd>(pa), &x, size); if (is_bus_error(latency)) process_bus_error(data_load); return reverse_endian() ? byte_swap(T(x)) : x; } // A cached memory access. UInt32 index = dcache.index(va); UInt32 tag = dcache.tag(pa); ICache::Set* set = &(dcache.set[index]); ICache::Line* line = &(set->line[0]); const VA line_mask = dcache.line_size - 1; // Find the correct entry in the set (if any). for (int i = 0; i < dcache.assoc; ++i, ++line) { if (line->tag == tag) { set->lru_touch(i); goto cache_hit; } } // Otherwise, we've got a cache miss. i = dcache.set[index].lru_replace(); line = &(dcache.set[index].line[i]); // Write back any dirty data. if (line->state & W) { PA pa = (index << dcache.index_first) | (line->tag << (dcache.index_last + 1)); ClockValue latency = bus->write(pa, line->data, dcache.line_size); if (is_bus_error(latency)) process_bus_error(data_store); } // Fill the cache line from the main memory. latency = bus->read(pa & ~line_mask, line->data, dcache.line_size); if (is_bus_error(latency)) { line->state = CS_Invalid; process_bus_error(data_load); } line->tag = tag; line->state = CS_DirtyExclusive; set->lru_touch(i); cache_hit: // Finally, fetch the data from the cache. return swizzle<syscmd>(line->data[(pa & line_mask) / 8], va);}template UInt64 Koala::load<0>(UInt64 va, UInt64 pa);template UInt64 Koala::load<1>(UInt64 va, UInt64 pa);template UInt64 Koala::load<3>(UInt64 va, UInt64 pa);template UInt64 Koala::load<7>(UInt64 va, UInt64 pa);// Store data to the virtual address (va). The address translation has already// been performed and the physical address is (pa). The coherency algorithm to// use is encoded in high-order bits of (pa) using the same encoding as that// of the xkphys address space region. It is a template explicitely// specialized for doubleword, word, halfword and byte, to improve performance// of the endianess correction algorithm, as well as minor improvements// steming from the fact that bit field extraction on variable boundaries is// slow.template <int syscmd> voidKoala::store(UInt64 data, VA va, PA pa){ const int size = syscmd + 1; typedef typename FixedWidth<size * 8>::Unsigned T; T x = reverse_endian() ? byte_swap(T(data)) : data; int ca = coherency_algorithm(pa); if (0 && ca != uncached) { ClockValue latency; int i; // A cached memory access. UInt32 index = dcache.index(va); UInt32 tag = dcache.tag(pa); ICache::Set* set = &(dcache.set[index]); ICache::Line* line = &(set->line[0]); const VA line_mask = dcache.line_size - 1; // Find the correct entry in the set (if any). for (i = 0; i < dcache.assoc; ++i, ++line) { if (line->tag == tag) { set->lru_touch(i); goto cache_hit; } } // Otherwise, we've got a cache miss. For write-through // no-write-allocate requests, this is identical to an uncached write. if (ca != noncoherent_write_through) { i = dcache.set[index].lru_replace(); line = &(dcache.set[index].line[i]); // Write back any dirty data. if (line->state & W) { PA pa = (index << dcache.index_first) | (line->tag << (dcache.index_last + 1)); latency = bus->write(pa, line->data, dcache.line_size); if (is_bus_error(latency)) process_bus_error(data_store); } // Fill the cache line from the main memory. latency = bus->read(pa & ~line_mask, line->data, dcache.line_size); if (is_bus_error(latency)) { line->state = CS_Invalid; process_bus_error(data_load); } line->tag = tag; set->lru_touch(i); cache_hit: // Update the cache line. if (syscmd == doubleword) line->data[(pa & line_mask) / 8] = x; else { int shift = big_endian_cpu() ? (64 - size * 8) - (pa & 7) * 8 : (pa & 7) * 8; UInt64 mask = bitmask(size * 8 - 1, 0) << shift; UInt64 data = UInt64(x) << shift; line->data[(pa & line_mask) / 8] = (line->data[(pa & line_mask) / 8] & ~mask) | data; } // For write-back requests, this is all we had to do. Otherwise, // we have a write-through request and must also update main RAM. if (ca == noncoherent_write_back) return; } } // A direct memory access. data = x; ClockValue latency = bus->write(swizzle<syscmd>(pa), &data, size); if (is_bus_error(latency)) process_bus_error(data_store);}template void Koala::store<Koala::byte>(UInt64 x, UInt64 va, UInt64 pa);template void Koala::store<Koala::halfword>(UInt64 x, UInt64 va, UInt64 pa);template void Koala::store<Koala::word>(UInt64 x, UInt64 va, UInt64 pa);template void Koala::store<Koala::doubleword>(UInt64 x, UInt64 va, UInt64 pa);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -