⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 vecdotprod.asm

📁 向量点乘源代码
💻 ASM
字号:
//*************************************************************************
// Vector Dot Product
//*************************************************************************
// File Name:	Vector Dot Product.asm
//		
// Date Modified:	11/10/00	RFG
//
// Purpose:	Subroutine that implements a Vector Dot Product given two vectors.
//
// Equation:	SUM += A(i) * B(i)
//
//
//
//*************************************************************************
// Include Files
//*************************************************************************
#include "macros.h"

//*************************************************************************
// Macro Definitions
//*************************************************************************
#define stack_length 	256
#define CCEN 			0x00000002		// Enable cycle counter (=1)
#define EVT0			0xFFE02000		// Event 0
#define	DMEM_CONTROL	0xFFE00004		// Data memory control
#define ENDM			0x00000001		// Enable Data Memory L1
#define EVSW			0x00000008		// Exception interrupt
#define ASRAM_BSRAM		0x00000000		// Bank A set as SRAM, Bank B set as SRAM

//*************************************************************************
// Program Code
//*************************************************************************
.section program;

supervisor_mode_start:				// Code running in supervisor mode starts here
	
	sp.l = stack_start + stack_length*4;		// set up supervisor/user stack
	sp.h = stack_start + stack_length*4;		// set up supervisor/user stack
	usp = sp;									// User mode and supervisor mode share same stack
	
	r0=0;							// Clear the contents of the cycle count registers
	cycles = r0;
	cycles2 = r0;

	bitset_SR(SYSCFG, CCEN);		// Enable the cycle counter using macro in macros.h
									// Access to SYSCFG can only be done in supervisor mode
	
	/* MADE SOME MODIFICATIONS HERE (CHANGED EVT TO EVT0)*/
	p0.l = EVT0 & 0xffff;			// Event Vector Table base address (low 16-bits)	

	p0.h = EVT0 >> 16;				// Event Vector Table base address (high 16-bits)
///    p0.l = EVT;
		
	p1.l = _evsw_handler;			// setup software exception handler
	p1.h = _evsw_handler;
	[p0 + (4 * EVSW)] = p1;			// address of _evsw_handler written to the EVSW location in the EVT

	bitset_MMR(DMEM_CONTROL, ENDM | ASRAM_BSRAM);	// Enable the L1 Bank A and Bank B as SRAM using macro in macros.h
	
	r0.l = user_mode_start;			// set low 16-bits of address of user mode code
    r0.h = user_mode_start;			// set high 16-bits of address of user mode code
	reti = r0;						// set reti to address of user mode code
	rti;							// RTI clears interrupt, jumps to address in reti
									// and enters user mode (since interrupt is cleared)

_evsw_handler:
exception:	idle;		  			// If any exception occurs, exit.
			ssync;					// Wait for idle to occur
			jump exception;

user_mode_start:					// Code running in user mode starts here.
	
	save_user_regs;					// seave dregs, pregs, dags, and loop regs
									// using macro from macros.h
	
	l0 = 0;							// At power up, all DAG circular functionality
	l1 = 0;							// should be disabled
	l2 = 0;
	l3 = 0;

	i0.l = VecA;					// set low 16-bits of i0 to low part of the address of VecA
	i0.h = VecA;					// set high 16-bits of i0 to high part of the address of VecA

	i1.l = VecB;					// set low 16-bits of i1 to low part of the address of VecB
	i1.h = VecB;					// set high 16-bits of i1 to high part of the address of VecB

	p0.l = Result;					// set low 16-bits of p0 to low part of the address of Result
	p0.h = Result;					// set high 16-bits of p0 to high part of the address of Result
	
vec_dot_prod:
	a0 = 0;							// clear accumulators
	a1 = 0;

	p1 = LENGTH(VecA)(z);			// set p1 for length of vectors	

	r0=[i0++] || r1=[i1++];			// fetch A[1] into r0.h and A[0] into r0.l
									// fetch B[1] into r1.h and B[0] into r1.l

 
	lsetup (myloop,myloop) lc0=p1 >> 1;								// loop start/end on myloop, loop counter = p1/2
	myloop:a1+=r0.h*r1.h, a0+=r0.l*r1.l || r0=[i0++] || r1=[i1++];	// a1 += A[1] * B[1], a0 += A[0] * B[0]
																	// fetch A[3] into r0.h and A[2] into r0.l
																	// fetch B[3] into r1.h and B[2] into r1.l

	a0 += a1;						// combine the partial sums for the dot product final result in a0
	r0 = a0;
	W[p0] = r0.h;					// store result to Result using a 16-bit store (W)
	
	restore_user_regs;				// restore dregs, pregs, dags, and loop regs
									// using macro from macros.h

done:	jump done;	

//*************************************************************************
// User Stack
//*************************************************************************
.section stack;
.align 4;
.var stack_start[stack_length];		// Arbitrary stack length

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -