📄 vecdotprod.asm
字号:
//*************************************************************************
// Vector Dot Product
//*************************************************************************
// File Name: Vector Dot Product.asm
//
// Date Modified: 11/10/00 RFG
//
// Purpose: Subroutine that implements a Vector Dot Product given two vectors.
//
// Equation: SUM += A(i) * B(i)
//
//
//
//*************************************************************************
// Include Files
//*************************************************************************
#include "macros.h"
//*************************************************************************
// Macro Definitions
//*************************************************************************
#define stack_length 256
#define CCEN 0x00000002 // Enable cycle counter (=1)
#define EVT0 0xFFE02000 // Event 0
#define DMEM_CONTROL 0xFFE00004 // Data memory control
#define ENDM 0x00000001 // Enable Data Memory L1
#define EVSW 0x00000008 // Exception interrupt
#define ASRAM_BSRAM 0x00000000 // Bank A set as SRAM, Bank B set as SRAM
//*************************************************************************
// Program Code
//*************************************************************************
.section program;
supervisor_mode_start: // Code running in supervisor mode starts here
sp.l = stack_start + stack_length*4; // set up supervisor/user stack
sp.h = stack_start + stack_length*4; // set up supervisor/user stack
usp = sp; // User mode and supervisor mode share same stack
r0=0; // Clear the contents of the cycle count registers
cycles = r0;
cycles2 = r0;
bitset_SR(SYSCFG, CCEN); // Enable the cycle counter using macro in macros.h
// Access to SYSCFG can only be done in supervisor mode
/* MADE SOME MODIFICATIONS HERE (CHANGED EVT TO EVT0)*/
p0.l = EVT0 & 0xffff; // Event Vector Table base address (low 16-bits)
p0.h = EVT0 >> 16; // Event Vector Table base address (high 16-bits)
/// p0.l = EVT;
p1.l = _evsw_handler; // setup software exception handler
p1.h = _evsw_handler;
[p0 + (4 * EVSW)] = p1; // address of _evsw_handler written to the EVSW location in the EVT
bitset_MMR(DMEM_CONTROL, ENDM | ASRAM_BSRAM); // Enable the L1 Bank A and Bank B as SRAM using macro in macros.h
r0.l = user_mode_start; // set low 16-bits of address of user mode code
r0.h = user_mode_start; // set high 16-bits of address of user mode code
reti = r0; // set reti to address of user mode code
rti; // RTI clears interrupt, jumps to address in reti
// and enters user mode (since interrupt is cleared)
_evsw_handler:
exception: idle; // If any exception occurs, exit.
ssync; // Wait for idle to occur
jump exception;
user_mode_start: // Code running in user mode starts here.
save_user_regs; // seave dregs, pregs, dags, and loop regs
// using macro from macros.h
l0 = 0; // At power up, all DAG circular functionality
l1 = 0; // should be disabled
l2 = 0;
l3 = 0;
i0.l = VecA; // set low 16-bits of i0 to low part of the address of VecA
i0.h = VecA; // set high 16-bits of i0 to high part of the address of VecA
i1.l = VecB; // set low 16-bits of i1 to low part of the address of VecB
i1.h = VecB; // set high 16-bits of i1 to high part of the address of VecB
p0.l = Result; // set low 16-bits of p0 to low part of the address of Result
p0.h = Result; // set high 16-bits of p0 to high part of the address of Result
vec_dot_prod:
a0 = 0; // clear accumulators
a1 = 0;
p1 = LENGTH(VecA)(z); // set p1 for length of vectors
r0=[i0++] || r1=[i1++]; // fetch A[1] into r0.h and A[0] into r0.l
// fetch B[1] into r1.h and B[0] into r1.l
lsetup (myloop,myloop) lc0=p1 >> 1; // loop start/end on myloop, loop counter = p1/2
myloop:a1+=r0.h*r1.h, a0+=r0.l*r1.l || r0=[i0++] || r1=[i1++]; // a1 += A[1] * B[1], a0 += A[0] * B[0]
// fetch A[3] into r0.h and A[2] into r0.l
// fetch B[3] into r1.h and B[2] into r1.l
a0 += a1; // combine the partial sums for the dot product final result in a0
r0 = a0;
W[p0] = r0.h; // store result to Result using a 16-bit store (W)
restore_user_regs; // restore dregs, pregs, dags, and loop regs
// using macro from macros.h
done: jump done;
//*************************************************************************
// User Stack
//*************************************************************************
.section stack;
.align 4;
.var stack_start[stack_length]; // Arbitrary stack length
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -