📄 bcm1480_draminit.c
字号:
uint32_t flags; /* 32: various flags */ uint32_t inuse; /* 36: indicates MC is in use */ uint16_t cfg_chanintlv_type; /* 40: Try to interleave channels */ uint64_t ttlbytes; /* 48: total bytes */ mcdata_t mc[MC_MAX_CHANNELS];/* 56: per-channel data (4 * 208) */} initdata_t; /* total size: 888 bytes */#define M_MCINIT_TRYPINTLV 1 /* Try to do port interleaving */#define M_MCINIT_PINTLV 2 /* Actually do port interleaving *//* Work area: initdata structure plus enough working stack to run the DRAM init routine. We round the initdata structure up to a 1K boundary, and throw in an extra 1K for stack space. This **MUST** evaluate to a compile-time constant. */#define WORK_AREA_SIZE (((sizeof(initdata_t) + 1023) / 1024) + 1) * 1024/* ********************************************************************* * Configuration data structure ********************************************************************* */#include "bcm1480_draminit.h"#include "jedec.h"/* ********************************************************************* * Initialized data * * WARNING WARNING WARNING! * * This module is *very magical*! We are using the cache as * SRAM, and we're running as relocatable code *before* the code * is relocated and *before* the GP register is set up. * * Therefore, there should be NO data declared in the data * segment - all data must be allocated in the .text segment * and references to this data must be calculated by an inline * assembly stub. * * If you grep the disassembly of this file, you should not see * ANY references to the GP register. ********************************************************************* */#ifdef _MCSTANDALONE_NOISY_static char *bcm1480_rectypes[] = {"MCR_GLOBALS","MCR_CHCFG","MCR_TIMING", "MCR_DLLCFG","MCR_GEOM","MCR_SPD","MCR_MANTIMING"};#endif/* ********************************************************************* * Module Description * * This module attempts to initialize the DRAM controller on the * BCM1480. There are four channels (0-3), each providing a 32-bit * data path. Pairs of channels can be ganged together to form * up to two 64-bit channels (0-1). When ganged together, channels * 0 & 2 become channel 0 and channels 1 & 3 become channel 1. * * Each 32-bit channel can support four chip selects, or two double- * sided DDR SDRAM DIMMs. * * Each 64-bit channel can support eight chip selects, or four * double-sided DDR SDRAM DIMMs. * * The controller can support up eight DIMMs. * * The steps to initialize the DRAM controller are: * * * Read the SPD, verify DDR SDRAMs or FCRAMs * * Obtain #rows, #cols, #banks, and module size * * Calculate row, column, and bank masks * * Calculate chip selects * * Calculate timing register. Note that we assume that * all banks will use the same timing. * * Repeat for each DRAM. * * DRAM Controller registers are programmed in the following order: * * MC_TEST_DATA, MC_TEST_ECC * * MC_CSXX_BA, MC_CSXX_COL, MC_CSXX_ROW * (repeated for each chip select) * * MC_CS_START, MC_CS_END * * MC_CONFIG (for CS interleaving) * MC_GLB_INTLV (for channel interleaving) * * MC_CLOCK_CFG * MC_TIMING * (delay) * MC_DRAMMODE * (delay after each mode setting ??) * * Once the registers are initialized, the DRAM is activated by * sending it the following sequence of commands: * * PRE (precharge) * EMRS (extended mode register set) * MRS (mode register set) * PRE (precharge) * AR (auto-refresh) * AR (auto-refresh again) * MRS (mode register set) * * then wait 200 memory clock cycles without accessing DRAM. * * Following initialization, the ECC bits must be cleared. This * can be accomplished by disabling ECC checking on all memory * controllers, and then zeroing all memory via the mapping * in xkseg. ********************************************************************* *//* ********************************************************************* * * Address Bit Assignment Algorithm: * * Good performance can be achieved by taking the following steps * when assigning address bits to the row, column, and interleave * masks. You will need to know the following: * * - The number of rows, columns, and banks on the memory devices * - The block size (larger tends to be better for sequential * access) * - Whether you will interleave chip-selects * - Whether you will be using both memory controllers and want * to interleave between them * * By choosing the masks carefully you can maximize the number of * open SDRAM banks and reduce access times for nearby and sequential * accesses. * * The diagram below depicts a physical address and the order * that the bits should be placed into the masks. Start with the * least significant bit and assign bits to the row, column, bank, * and interleave registers in the following order: * * <------------Physical Address---------------> * Bits: RRRRRRR..R CCCCCCCC..C SS BB PP CCc00 * Step: 6 5 4 3 2 1 * * Where: * R = Row Address Bit (MC_CSXX_ROW register) * C = Column Address Bit (MC_CSXX_COL register) * S = Chip Select (MC_CONFIG register) * (when interleaving via chip selects) * B = Bank Bit (MC_CSXX_BA register) * P = Channel Select Bit (MC_GLB_INTLV register) * (when interleaving memory channels) * c = Column Address Bit (MC_CSXX_COL register for 32-bit channels) * (0 for 64-bit channels) * 0 = must be zero * * When an address bit is "assigned" it is set in one of the masks * in the MC_CSXX_ROW, MC_CSXX_COL, MC_CSXX_BA, MC_CONFIG, or MC_GLB_INTLV * registers. * * * 1. The least significant 5 bits specify the byte within a cache line, * and always zero in physical addresses presented to the memory * controller. In 32-bit channels, the controller uses the top 3 bits * as column bits to sequence the 8 chunks of the cache line, and * the bottom 2 bits are ignored. For ganged 64-bit channels the top * 2 bits are used to sequence the four chunks, and the bottom 3 bits * are ignored. The appropriate number of 2 or 3 column bits must be * subtracted from the total number in the device to give the number * of column bits. * * 2. These one or two bits are used for interleaving the channels and * are specified in the MC_GLB_INTLV register. * * 3. These bits select the internal bank within a memory device. Most * devices have four banks, so 2 bits set in the MC_CSXX_BA register. * * 4. These bits select the interleaving among physical devices on a * channel via chip selects in the MC_CONFIG register. If chip select * interleaving is not used on the channel, no bits are specified here. * * 5. The remaining column bits not used in field 1 are set in the * MC_CSXX_COL registers. * * 6. The row bits of the device are specified in the MC_CSXX_ROW * registers. ********************************************************************* *//* ********************************************************************* * bcm1480_find_timingcs(mc) * * For a given memory controller, choose the chip select whose * timing values will be used to base the TIMING and MCLOCK_CFG * registers on. * * Input parameters: * mc - memory controller * * Return value: * chip select index, or -1 if no active chip selects. ********************************************************************* */static int bcm1480_find_timingcs(mcdata_t *mc){ int idx; /* for now, the first one with data is the one we pick */ for (idx = 0; idx < MC_MAX_CHIPSELS; idx++) { if (mc->csdata[idx].flags & CS_PRESENT) return idx; } return -1;}/* ********************************************************************* * bcm1480_auto_timing(mcidx,tdata) * * Program the memory controller's timing registers based on the * timing information stored with the chip select data. For DIMMs * this information comes from the SPDs, otherwise it was entered * from the datasheets into the tables in the init modules. * * Input parameters: * mcidx - memory controller index (0 or 1) * tdata - a chip select data (csdata_t) * * Return value: * nothing ********************************************************************* */static void bcm1480_auto_timing(int mcidx,mcdata_t *mc,csdata_t *tdata){ unsigned int res; unsigned int plldiv; unsigned int clk_ratio; unsigned int refrate; unsigned int ref_freq; unsigned int caslatency; unsigned int spd_tCK_25; unsigned int spd_tCK_20; unsigned int spd_tCK_10; unsigned int tCpuClk; unsigned int tMemClk; unsigned int fMemClk; unsigned int w2rIdle,r2wIdle,r2rIdle; unsigned int tCL,tCrDh,tFIFO; unsigned int tCwD; unsigned int tRAS; unsigned int tWR,tWTR; unsigned int tRP,tRRD,tRCD,tRCw,tRCr,tRFC; uint64_t timing1, mclkcfg; sbport_t base; /* Timing window variables */ int n01_open,n02_open,n12_open; int n01_close,n02_close,n12_close; int dqsArrival; int addrAdjust,dqiAdjust,dqoAdjust; int minDqsMargin; int dllScaleNum,dllScaleDenom,dllOffset; /* * We need our cpu clock for all sorts of things. */#if defined(_FUNCSIM_) plldiv = 16; /* 800MHz CPU for functional simulation */#else plldiv = G_BCM1480_SYS_PLL_DIV(READCSR(PHYS_TO_K1(A_SCD_SYSTEM_CFG)));#endif if (plldiv == 0) { /* XXX: should be common macro, also defaulted by boards' *_devs.c. */ plldiv = 6; } /* * Compute tCpuClk, in picoseconds to avoid rounding errors. * * Calculation: * tCpuClk = 1/fCpuClk * = 1/(100MHz * plldiv/2) * = 2/(100MHz*plldiv) * = 2/(100*plldiv) us * = 20/plldiv ns * = 2000000/plldiv 10ths of ns * * If BCM1480_REFCLK is in MHz, then: * 2/(BCM1480_REFCLK*plldiv) us * = 2000/(BCM1480_REFCLK*plldiv) ns * = 2000000/(BCM1480_REFCLK*plldiv) ps * * However, we want to round the result to the nearest integer, * so we double the numerator (to 4000000) to get one more bit * of precision in the quotient, then add one and scale it back down */ /* tCpuClk is in picoseconds */ tCpuClk = ((4000000/(BCM1480_REFCLK*plldiv))+1)/2; spd_tCK_25 = DECTO10THS(tdata->spd_tCK_25); spd_tCK_20 = DECTO10THS(tdata->spd_tCK_20); spd_tCK_10 = DECTO10THS(tdata->spd_tCK_10); /* * Compute the target tMemClk, in units of tenths of nanoseconds * to be similar to the JEDEC SPD values. This will be * * MAX(MIN_tMEMCLK,spd_tCK_25) */ tMemClk = spd_tCK_25; if (mc->mintmemclk > tMemClk) tMemClk = mc->mintmemclk; /* * Now compute our clock ratio (the amount we'll divide tCpuClk by * to get as close as possible to tMemClk without exceeding it. * * It's (tMemClk*100) here because tCpuClk is in picoseconds * * The ratio needs to be relative to the ZBBus, which runs at * 1/2 the core clock. * * The clock ratios are expressed in quarters (denominator=4) * so multiply the numerator by 4 before doing the divide. * Therefore, the low 2 bits fo the clk_ratio result will be * the fractional part. */ clk_ratio = (4*((tMemClk*100) + tCpuClk - 1)) / (2*tCpuClk);#ifdef _MCSTANDALONE_NOISY_ printf("DRAM: Would like to use clk_ratio %d\n",clk_ratio); printf("DRAM: memory's tMemClk is %d\n",tMemClk);#endif if (clk_ratio < 4) clk_ratio = 4; if (clk_ratio > 24) clk_ratio = 24;#if _BCM1480_S0_WORKAROUNDS_ /* * XXX 1480 preproduction samples problem: Keep clk_ratio * XXX an integer multiple. */ clk_ratio = ((clk_ratio+3) & ~3); /* make integer multiple */ if (clk_ratio > 8) clk_ratio = 8; /* but don't exceed 2 */#endif /* * Now, recompute tMemClk using the new clk_ratio. This gives us * the actual tMemClk that the memory controller will generate * * Calculation: * fMemClk = BCM1480_REFCLK * plldiv / clk_ratio Mhz * * tMemClk = 1/fMemClk us * = clk_ratio / (BCM1480_REFCLK * plldiv) us * = 10000 * clk_ratio / (BCM1480_REFCLK * plldiv) 0.1ns * * The resulting tMemClk is in tenths of nanoseconds so we * can compare it with the SPD values. The x10000 converts * us to 0.1ns */new_ratio: tMemClk = (10000 * clk_ratio)/(BCM1480_REFCLK * plldiv); /* Calculate the refresh rate */ switch (tdata->spd_rfsh & JEDEC_RFSH_MASK) { case JEDEC_RFSH_64khz: ref_freq = 64; break; case JEDEC_RFSH_256khz: ref_freq = 256; break; case JEDEC_RFSH_128khz: ref_freq = 128; break; case JEDEC_RFSH_32khz: ref_freq = 32; break; case JEDEC_RFSH_8khz: ref_freq = 8; break; default: ref_freq = 8; break; } /* * Compute the target refresh value, in Khz/32. We know * the rate that the DIMMs expect (in Khz, above). So we need * to calculate what the MemClk is divided by to get that value. * There is an internal divide-by-32 in the 1400 in the refresh * generation. * * fMemClk (in KHz) calculated as follows: * * core_clock = plldiv * reference * 1000 / 2; * zbbus_clock = core_clock / 2; * mem_clock = (zbbus_clock * 4) / clk_ratio * * The refresh counter ticks once every fMemClk cycles, and * we want 'ref_freq' number of cycles to happen per second. * The refresh counter ticks once every 32 MemClks. * * Therefore, we can issue refresh pulses at a rate of * fMemClk/32 if we wanted to, but to get the correct * counter value all we need to do is: * * refrate = (fMemClk/32) / ref_freq (in Khz) */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -