⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cache_example.asm

📁 Blakcfin processor Cache code example
💻 ASM
字号:
/************************************************

(C) Copyright 2001 - Analog Devices, Inc.  All rights reserved.

File Name:		cache_example.asm

Date Modified:	12/20/01		CL		Rev 0.1

This example shows how to configure the ADSP-BF535 to run with L1 memory
configured as cache.

Since the cache-line valid bits come up in a random state at reset,
the first thing we need to do is clear each valid bit in both the data 
and instruction L1 caches.

The next thing we need to do is to set up the CPLBs for data and instruction 
memory.  The CPLB setup provided here is very basic.

Once CPLB's are defined, cache can be enabled.  There is a small blink program 
which is located in L2 memory.  This code is actually cached in this example.

****************************************************/
#include <DefBF535.h>
.section l2_sram;
.align 2;
.global _main

start:
_main:

//Setting the icache to cache for preloading
	P0.L = (IMEM_CONTROL & 0xFFFF);
	P0.H = (IMEM_CONTROL >> 16);
    R0.L= 0X5; //Enable instruction memory as cache.
	R0.H= 0X0;
	[P0]=R0;
	P1.L = (DMEM_CONTROL & 0xFFFF);
	P1.H = (DMEM_CONTROL >> 16);
    R0.L= 0XD;  //Enable superbank A and B as caches.
	R0.H= 0X0;
	[P1]=R0;
    csync;
//******************Preloading the Instruction Cache**********
//Registers used:
// r0 - contains the command value. written to ITEST COMMAND
// r1 - value written to ITEST DATA0 and ITEST DATA1
//    * DATA write
//    - contains the instruction to be written
// r2 - contains intermediate values during computation
// r3 - contains mask for the double word index (bit[3:4]=00)
// r4 - contains mask for the set index (bit[5:9]=00000)
// r5 - contains mask for the ways (bit[26:27]=00)
// r6 - value written to ITEST DATA0
//    * TAG write
//    - contains the original address
//    - used to determine sub- banks
// i0 - pointer to ITEST_COMMAND
// i1 - pointer to ITEST_DATA0
// i2 - pointer to ITEST_DATA1
// i3 - pointer to relocated (instruction) data
// p4 - loop counter to perform the writes to DATA and TAGs
// m0 - save command for sub- bank 0
// m1 - save command for sub- bank 1
// m2 - save command for sub- bank 2
// m3 - save command for sub- bank 3
//*************************************************************

    R7.L=0;
    R7.H=0;
	L0 = R7;
	L1 = R7;
	L2 = R7;
	L3 = R7;
	M0 = R7;
	M1 = R7;
	M2 = R7;
	M3 = R7;

    I0.L = (ITEST_COMMAND & 0xFFFF);
	I0.H = (ITEST_COMMAND >> 16);
    I1.L = (ITEST_DATA0 & 0xFFFF);
	I1.H = (ITEST_DATA0 >> 16);

//Writing zero to the ITEST_DATA0
//Whenever we chose to write to Cache TAG/ARRAY, the value
//in the ITEST_DATA0 gets written to the caches.
//The ITEST_DATA0/1 should be written to before the write
//to ITEST_COMMAND register (page 6-28 in HRM)

R0.l=0;
R0.h=0;

[I1]=R0;
// The cache lines come out of reset in a random state
// The valid bits of each line must be set to zero at initialization
// Explanation: The invalidation routine is as follows:
//  Cache setup:
//  I-CACHE:
//    4 sub- banks, 
//    each sub- bank has 4 ways, 
//    each way has 32 lines,
//    each line (or set) has 4 double words.
//  Routine:
//  * Take way 0, and way1
//  * Inner loop:
//         Invalidate all sets (cache lines) in sub- bank for way 0 and way1
//         Instruction Cache has 32 sets (cache lines), hence loop count is 32
//  * Outer loop:
//         Increment sub- banks
//         repeat inner loop
//         do it 4 times because of 4 sub- banks.
//  * Repeat again for way 2, way 3

R2=32;                //Need to increment the set index every loop (Should be 64 for D-CACHE) 
R3.l=0;               //sub- bank increment, at the end of inner loop
R3.h=1;
P4=4;                 //Number of sub- bank - also outer loop counter (Should be 2 for D-cache)
P3=R2;	              //Inner loop counter (Number of set index)

R4=2;                 //Initial value for ITEST_COMMAND for way 0 - should be WRITE to TAG
R5.h=0x400;           //Initial value for ITEST_COMMAND for way 1 - should be WRITE to TAG
R5.l=2;

//Way 0,1 invalidation
lsetup(LBL0A,LBL3A) lc1=p4;
LBL0A: r0=r4;
       r1=r5;
       lsetup(LBL1A,LBL2A) lc0=p3;
       LBL1A:   r0 = r0+|+r2 || [i0]=r0;
       LBL2A:   r1 = r1+|+r2 || [i0]=r1;
       r4=r4+r3;
LBL3A: r5=r5+r3;

R4.h=0x800;           //Initial value for ITEST_COMMAND for way 2 - should be WRITE to TAG
R4.l=2;                
R5.h=0xc00;           //Initial value for ITEST_COMMAND for way 3 - should be WRITE to TAG
R5.l=2;

//Way 2,3 invalidation
lsetup(LBL0B,LBL3B) lc1=p4;
LBL0B: r0=r4;
       r1=r5;
       lsetup(LBL1B,LBL2B) lc0=p3;
       LBL1B:   r0 = r0+|+r2 || [i0]=r0;
       LBL2B:   r1 = r1+|+r2 || [i0]=r1;
       r4=r4+r3;
LBL3B: r5=r5+r3;


//***********************************
//** DCACHE Invalidation
//***********************************

	P0.L = (DMEM_CONTROL & 0xFFFF);
	P0.H = (DMEM_CONTROL >> 16);
	R0 = (ACACHE_BCACHE | ENDM);
	ssync;
	[P0] = R0;
	ssync;


    I0.L = (DTEST_COMMAND & 0xFFFF);
	I0.H = (DTEST_COMMAND >> 16);
    I1.L = (DTEST_DATA0 & 0xFFFF);
	I1.H = (DTEST_DATA0 >> 16);

R0.l=0;
R0.h=0;

[I1]=R0;

// Repeat for data cache
//Explanation: The invalidation routine is as follows:
//  Cache construction:
//  D-CACHE:
//    2 superbanks,
//    each superbank has 4 sub- banks, 
//    each sub- bank has 2 ways, 
//    each way has 64 lines,
//    each line (or set) has 4 double words.
//  Routine:
//  * Take way 0, and way1
//  * Inner loop:
//         Invalidate all sets (cache lines) in sub- bank for way 0 and way1
//         Instruction Cache has 64 sets (cache lines), hence loop count is 64
//  * Outer loop:
//         Increment sub- banks
//         repeat inner loop
//         do it 4 times because of 4 sub- banks.
//  * Repeat again for superbank B

R2=64;            //Need to increment the set index every loop (Should be 64 for D-CACHE) 
R3.l=0;           //sub- bank increment, at the end of inner loop
R3.h=1;
P4=4;             //Number of sub- bank - also outer loop counter (Should be 2 for D-cache)
P3=R2;	      //Inner loop counter (Number of set index)
R2=32;		//line index increment

//******************************
//** Invalidation of Superbank A
//******************************
R4.h=0x00;             //Initial value for ITEST_COMMAND for way 0 - should be WRITE to TAG
R4.l=2;               
R5.h=0x400;           //Initial value for ITEST_COMMAND for way 1 - should be WRITE to TAG
R5.l=2;

//Way 0,1 invalidation
lsetup(LBL0C,LBL3C) lc1=p4;
LBL0C: r0=r4;
       r1=r5;
       lsetup(LBL1C,LBL2C) lc0=p3;
       LBL1C:   r0 = r0+|+r2 || [i0]=r0;
       LBL2C:   r1 = r1+|+r2 || [i0]=r1;
       r4=r4+r3;
LBL3C: r5=r5+r3;

//******************************
//** Invalidation of Superbank B
//******************************
R4.h=0x080;           //Initial value for ITEST_COMMAND for way 0 - should be WRITE to TAG
R4.l=2;                
R5.h=0x480;           //Initial value for ITEST_COMMAND for way 1 - should be WRITE to TAG
R5.l=2;

//Way 0,1 invalidation
lsetup(LBL0D,LBL3D) lc1=p4;
LBL0D: r0=r4;
       r1=r5;
       lsetup(LBL1D,LBL2D) lc0=p3;
       LBL1D:   r0 = r0 +|+ r2 || [i0]=r0;
       LBL2D:   r1 = r1 +|+ r2 || [i0]=r1;
       r4=r4+r3;
LBL3D: r5=r5+r3;

//*******************************
// Routine to read cache arrays
// This is provided as an example on how to read cache arays
// It is not required to make the cache work
//*******************************

//Address used =0xf0000000
//Read the tags
    I0.L = (ITEST_COMMAND & 0xFFFF);
    I0.H = (ITEST_COMMAND >> 16);
    I1.L = (ITEST_DATA0 & 0xFFFF);
    I1.H = (ITEST_DATA0 >> 16);

    R0.l=0;
    R0.h=0xf000;
    p5=32;   //read an entire way and sub- bank, loop counter;

lsetup (l_start, l_end) lc0=p5;
     l_start:
         r2=[I0];   //read the ITEST_COMMAND MMR

         r1.h=0xf3fc;
         r1.l=0xfc01;
         r2=r2&r1;  //Mask out the way/sub- bank/set/doubleword index
                              //r2[27:26]=0         r2[9:1]=0
                              //r2[17:16]=0

         r1=0x3f8;  //get the set and double word index information
         r1=r1&r0;

         r2=r2|r1;  //r2 now has the correct set and double word index
                            //r2[27:26, 17:16]=0

         r1=0x3000; //getting the sub- bank information
         r1=r1&r0;
         r1<<=4;

         r2=r1|r2;  //r2 now has sub- bank information
                            //r2[27:26]=0

         r1=0x0c00; //getting the way information
         r1=r1&r0;
         r1<<=16;

         r2=r1|r2; //r2 now includes the way information

         bitclr(r2,1); //making it perform read operations
         bitclr(r2,2); //making it access tag array

         [i0]=r2;
         r3=[i1];   //r3 should now have the tag information

    l_end: r0+=32;  //incrementing address to read next line (set)




// Memory map configuration.
// We assume the presence of the following areas, with the
// following cache settings:
// Core and System MMRs, 4MB, not cached
// L1 SRAM Scratch pad, 1MB, not cached
// L1 SRAM code area, 1MB, instruction cache of L2
// L1 SRAM data area A, 1MB, data cache of L2
// L1 SRAM data area B, 1MB, data cache of L2
// L2 SRAM 1MB, cached
// SDRAM 4MB, cached
// SDRAM 4MB, cached
// (all sizes give the regions of memory allowed, which may be
// larger than the physical memory available)
//
// Other areas of memory are considered to be not available, and
// accesses to them will generate exceptions.

#define NUM_CPLBS 8

	// Create DCPLB_ADDR and ICPLB_ADDR entries
	

	//DATA CPLBs
	I0.L = (DCPLB_ADDR0 & 0xFFFF);
	I0.H = (DCPLB_ADDR0 >> 16);

	I1.L = (DCPLB_DATA0 & 0xFFFF);
	I1.H = (DCPLB_DATA0 >> 16);

	I2.L = cplb_table;
	I2.H = cplb_table;
	
	P3=NUM_CPLBS;
	LSETUP(s_cplb_first, e_cplb_first) LC0=P3;
s_cplb_first:	R0 = [I2++];
				[I0++] = R0;
				R0 = [I2++];
e_cplb_first:	[I1++] = R0;

	
	//INSTRUCTION CPLBs
	I0.L = (ICPLB_ADDR0 & 0xFFFF);
	I0.H = (ICPLB_ADDR0 >> 16);
	
	I1.L = (ICPLB_DATA0 & 0xFFFF);
	I1.H = (ICPLB_DATA0 >> 16);

	I2.L = cplb_table;
	I2.H = cplb_table;

	LSETUP(s_cplb_2, e_cplb_2) LC0 = P3;
s_cplb_2:	R0 = [I2++];
			[I0++] = R0;
			R0 = [I2++];
e_cplb_2:	[I1++] = R0;

	// Now that the CPLBs are set up, set the control words to
	// reference them.

	// Configure L1 SRAM data banks as cache
	// - Default to DCBS==0, so LOWBIT (bit14) selects bank A or B
	//   (because that splits L2 across A and B)
	// - set DMC==11, so both A and B are cache.
	// - Set ENDCPLB==1, so DCPLBs are referenced.
	// - set ENDM==1, so L1 Data Memory is enabled.
	P0.L = (DMEM_CONTROL & 0xFFFF);
	P0.H = (DMEM_CONTROL >> 16);
	R0 = [P0];
	R1 = (ACACHE_BCACHE | ENDCPLB |ENDM);
	R0 = R0 | R1;			// set these bits
	[P0] = R0;

	// Configure L1 SRAM code bank as cache
	// - Default to ILOC==0000
	// - Set IMC==1, so cache.
	// - Set ENICPLB==1, so ICPLBs are referenced.
	// - Set ENIM==1, so Code memory is enabled.
	P0.L = (IMEM_CONTROL & 0xFFFF);
	P0.H = (IMEM_CONTROL >> 16);
	R0 = [P0];
	R1 = (IMC | ENICPLB | ENIM);
	R0 = R0 | R1;
	[P0] = R0;
/* L1CACHE */

jump start_here;



// Data for use when initialising the CPLBs when setting up
// the cache. 
// If additional CPLBs are needed, 

	// Addresses of the memory areas we configure.
    // Note: the CPLB's are configured using this technique for
	// clarity.  In a real application, MMR space and L1 data memory
	// only need to be defined with a DCPLB
	// L1 instruction memory only needs to be defined with a ICPLB entry
.align 4;

cplb_table:
	#define L2_FLAGS CPLB_WT|CPLB_L1_CHBL|CPLB_L1SRAM|CPLB_SUPV_WR|CPLB_USER_WR|CPLB_USER_RD|CPLB_VALID
	#define SDRAM_FLAGS CPLB_WT|CPLB_L1_CHBL|CPLB_L1SRAM|CPLB_SUPV_WR|CPLB_USER_WR|CPLB_USER_RD|CPLB_VALID
	.byte4 =
		0xFFC00000, (PAGE_SIZE_4MB|CPLB_DIRTY|CPLB_SUPV_WR|CPLB_VALID),// MMRs
		0xFFB00000,	(PAGE_SIZE_1MB|CPLB_DIRTY|CPLB_SUPV_WR|CPLB_USER_WR|CPLB_USER_RD|CPLB_VALID),// Scratchpad
		0xFFA00000,	(PAGE_SIZE_1MB|CPLB_WT|CPLB_VALID),// Instruction memory
		0xFF900000,	(PAGE_SIZE_1MB|CPLB_WT|CPLB_VALID),// Data B
		0xFF800000,	(PAGE_SIZE_1MB|CPLB_WT|CPLB_VALID),// Data 
		0xF0000000,	(PAGE_SIZE_1MB|L2_FLAGS),// L2 SRAM - 256KB
        0x00000000, (PAGE_SIZE_4MB|SDRAM_FLAGS),// SDRAM
		0x00000400, (PAGE_SIZE_4MB|SDRAM_FLAGS);// SDRAM 


/* L1CACHE */

.align 32;  // align on a 32-byte cache-line
start_here:

/*****************************************************************************************************/
//  Everything from here on is cached
//
//						
    P0.L = FIO_DIR & 0xFFFF;
	P0.H = FIO_DIR >> 16;

	R0 = W[P0];  		/*Read value of flag direction register*/

	R1.L = 0x000f;
	R0 = R0 | R1;
	W[P0] = R0;			/*Set PF 0 - PF 3 as outputs*/
    ssync;
	P1.L = FIO_FLAG_S & 0xFFFF;
	P1.H = FIO_FLAG_S >> 16;
	P2.L = FIO_FLAG_C & 0xFFFF;
	P2.H = FIO_FLAG_C >> 16;
	
	P5.H=0x0553;		/*Delay loop count*/
	P5.L=0x4600;		 
	
	R1.L = 0x0005;		/*Affect PF0 and PF2*/
	R2.L = 0x000A;		/*Affect PF1 and PF3*/

	LIGHT:
	W[P1] = R1;			/*Set PF0 and PF2*/
	SSYNC;
	W[P2] = R2;			/*Clear PF1 and PF3*/
	SSYNC;

	CALL DELAY_LOOP;
 
	W[P1] = R2;			/*Set PF1 and PF3*/
	SSYNC;
	W[P2] = R1;			/*Clear PF0 and PF2*/
	SSYNC;

	CALL DELAY_LOOP;

	JUMP LIGHT;

	DELAY_LOOP:        
	LSETUP(L_BEGIN, L_END) LC0 = P5;
	L_BEGIN:
	L_END: NOP;    
	RTS;              
  
_main.END:

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -