📄 hpel.asm
字号:
/*******************************************************************************
Copyright(c) 2000 - 2002 Analog Devices. All Rights Reserved.
Developed by Joint Development Software Application Team, IPDC, Bangalore, India
for Blackfin DSPs ( Micro Signal Architecture 1.0 specification).
By using this module you agree to the terms of the Analog Devices License
Agreement for DSP Software.
********************************************************************************
Module Name : hpel.asm
Label Name : __hpel
Version : 1.0
Change History :
Version Date Author Comments
1.0 04/12/2001 Vijay Original
Description : This routine does the half pixel computation for the motion
estimation.
Assumption : The routine assumes that WINWIDTH is a multiple of 4.
Prototype : int _hpel(unsigned char *best_match, int min_SAD,
unsigned char *target, int WINWIDTH);
best_match -> Address of the best matching reference block
min_SAD -> The minimum SAD corresponding to the best
matching block.
target -> Address of the target macro block (16x16)
WINWIDTH -> Width of the reference window (WINWIDTH)
The output is the half pel positions which are returned to
the calling routine
Registers used : A0, A1, R0-R7, I0-I3, M0, M1, M3, L0-L3, P0, P2-P5, LC0.
Performance:
Code size : 768 bytes
Cycle count for half pixelation : 1695 cycles
*******************************************************************************/
// Half pixel computation
.section L1_code;
.align 8;
.global __hpel;
__hpel:
[--SP] = (R7:4, P5:3);
[--SP] = RETS;
L0 = 0;
L1 = 0;
L2 = 0;
L3 = 0;
P5 = R0; // Address of the best matching block
R5 = R1; // SAD corresponding to the best match
P4 = R2; // Address of the target block
P3 = [SP + 44]; // Width of the reference window
P0 = 344;
SP -= P0;
I3 = SP; // Temporary buffer in stack
R4 = 0;
/******************** INTERPOLATE DIAGONAL BLOCKS ********************/
M0 = 7;
M3 = -3 (X);
R0 = P5; // Address of the best match
R0 += -1;
I0 = R0;
I1 = R0;
R2 = P3; // WINWIDTH
R1 = R0 - R2 (S);
I2 = R1; // Address of best match - (WINWIDTH+1)
R2 += -17;
M1 = R2;
P2 = 17;
LSETUP(AVG4_ST, AVG4_END) LC0 = P2;
DISALGNEXCPT || R0 = [I1++] || R2 = [I2++];
DISALGNEXCPT || R1 = [I1++M3] || R3 = [I2++M3];
R7 = BYTEOP2P(R1:0,R3:2) (RNDL) || R0 = [I1++M0];
AVG4_ST:
DISALGNEXCPT || I0 += M3 || R2 = [I2++M0];
R6 = BYTEOP2P(R1:0,R3:2) (RNDH) || R0 = [I1++M3] || R2 = [I2++M3];
R7 = R6 + R7 (NS) || I0 -= M3;
R7 = BYTEOP2P(R1:0,R3:2) (RNDL, R) || [I3++] = R7 || R1 = [I1++M0];
DISALGNEXCPT || I0 += M3 || R3 = [I2++M0];
R6 = BYTEOP2P(R1:0,R3:2) (RNDH, R) || R1 = [I1++M3] || R3 = [I2++M3];
R7 = R6 + R7 (NS) || I0 -= M3;
R7 = BYTEOP2P(R1:0,R3:2) (RNDL) || [I3++] = R7 || R0 = [I1++M0];
DISALGNEXCPT || I0 += M3 || R2 = [I2++M0];
R6 = BYTEOP2P(R1:0,R3:2) (RNDH) || R0 = [I1++M3] || R2 = [I2++M3];
R7 = R6 + R7 (NS) || I0 -= M3;
R7 = BYTEOP2P(R1:0,R3:2) (RNDL, R) || [I3++] = R7 || R1 = [I1++M0];
DISALGNEXCPT || I0 += M3 || R3 = [I2++M0];
R6 = BYTEOP2P(R1:0,R3:2) (RNDH, R) || R1 = [I1++M3] || R3 = [I2++M3];
R7 = R6 + R7 (NS) || I0 -= M3;
R7 = BYTEOP2P(R1:0,R3:2) (RNDL) || [I3++] = R7 || R0 = [I1++M1];
DISALGNEXCPT || I0 += M3 || R2 = [I2++M1];
R6 = BYTEOP2P(R1:0,R3:2) (RNDH) || R0 = [I1++] || R2 = [I2++];
R7 = R6 + R7 (NS) || I0 -= M3;
DISALGNEXCPT || R1 = [I1++M3] || R3 = [I2++M3];
AVG4_END:
R7 = BYTEOP2P(R1:0,R3:2) (RNDL) || [I3++] = R7 || R0 = [I1++M0];
/**************** CALCULATE SAD FOR DIAGONAL BLOCKS *******************/
R7 = -1; // (V,H) -> R7.H = -1, R7.L = -1
R0 = P4;
R1 = SP;
CALL _compute_sad;
CC = R0 < R5;
IF CC R5 = R0;
IF CC R4 = R7;
R7.L = 1; // (V,H) -> R7.H = -1, R7.L = 1
R0 = P4;
R1 = SP;
R1 += 1;
CALL _compute_sad;
CC = R0 < R5;
IF CC R5 = R0;
IF CC R4 = R7;
R7.H = 1; // (V,H) -> R7.H = 1, R7.L = 1
R0 = P4;
R1 = SP;
R1 += 21;
CALL _compute_sad;
CC = R0 < R5;
IF CC R5 = R0;
IF CC R4 = R7;
R7.L = -1; // (V,H) -> R7.H = 1, R7.L = -1
R0 = P4;
R1 = SP;
R1 += 20;
CALL _compute_sad;
CC = R0 < R5;
IF CC R5 = R0;
IF CC R4 = R7;
/******************** INTERPOLATE LEFT/RIGHT BLOCKS ********************/
R0 = P5; // Address of the best match
I0 = R0; // Address of best match
R0 += -1;
I1 = R0; // Address of best match - 1
I3 = SP; // Output buffer
P2 = 16 (Z);
R1 = P3; // WINWIDTH
R1 += -20;
M1 = R1;
MNOP;
LSETUP(AVG2_LR_ST, AVG2_LR_END) LC0 = P2;
DISALGNEXCPT || R0 = [I0++] || R2 = [I1++];
AVG2_LR_ST:
DISALGNEXCPT || R1 = [I0++] || R3 =[I1++];
R6 = BYTEOP1P(R1:0,R3:2) || R0 = [I0++] || R2 =[I1++];
R6 = BYTEOP1P(R1:0,R3:2)(R) || [I3++] = R6 || R1 = [I0++];
DISALGNEXCPT || [I3++] = R6 || R3 =[I1++];
R6 = BYTEOP1P(R1:0,R3:2) || R0 = [I0++] || R2 =[I1++];
R6 = BYTEOP1P(R1:0,R3:2)(R) || [I3++] = R6 || R1 = [I0++M1];
DISALGNEXCPT || [I3++] = R6 || R3 =[I1++M1];
R6 = BYTEOP1P(R1:0,R3:2) || R0 = [I0++];
AVG2_LR_END:
DISALGNEXCPT || [I3++] = R6 || R2 =[I1++];
/**************** CALCULATE SAD FOR LEFT/RIGHT BLOCKS *******************/
R7 = 1; // (V,H) -> R7.H = 0, R7.L = 1
R0 = P4;
R1 = SP;
R1 += 1;
CALL _compute_sad;
CC = R0 < R5;
IF CC R5 = R0;
IF CC R4 = R7;
R7.L = -1; // (V,H) -> R7.H = 0, R7.L = -1
R0 = P4;
R1 = SP;
CALL _compute_sad;
CC = R0 < R5;
IF CC R5 = R0;
IF CC R4 = R7;
/******************** INTERPOLATE TOP/BOTTOM BLOCKS ********************/
R1 = P3; // WINWIDTH
R0 = P5; // Address of the best match
I0 = R0; // Address of best match
R0 = R0 - R1(S) || NOP;
I1 = R0; // Address of best match - WINWIDTH
I3 = SP; // Output buffer
P2 = 17;
R1 += -16;
M1 = R1;
LSETUP(AVG2_TB_ST, AVG2_TB_END) LC0 = P2;
DISALGNEXCPT || R0 = [I0++] || R2 = [I1++];
AVG2_TB_ST:
DISALGNEXCPT || R1 = [I0++] || R3 =[I1++];
R6 = BYTEOP1P(R1:0,R3:2) || R0 = [I0++] || R2 =[I1++];
R6 = BYTEOP1P(R1:0,R3:2)(R) || [I3++] = R6 || R1 = [I0++];
DISALGNEXCPT || [I3++] = R6 || R3 =[I1++];
R6 = BYTEOP1P(R1:0,R3:2) || R0 = [I0++M1] || R2 =[I1++M1];
R6 = BYTEOP1P(R1:0,R3:2)(R) || [I3++] = R6 || R0 = [I0++];
AVG2_TB_END:
DISALGNEXCPT || [I3++] = R6 || R2 =[I1++];
/**************** CALCULATE SAD FOR TOP/BOTTOM BLOCKS *******************/
R7.H = -1; // (V,H) -> R7.H = -1, R7.L = 0
R7.L = 0;
R0 = P4;
R1 = SP;
CALL _compute_sad_aligned;
CC = R0 < R5;
IF CC R5 = R0;
IF CC R4 = R7;
R7.H = 1; // (V,H) -> R7.H = 1, R7.L = 0
R0 = P4;
R1 = SP;
R1 += 16;
CALL _compute_sad_aligned;
CC = R0 < R5;
IF CC R5 = R0;
IF CC R4 = R7;
/********************************************************************/
R0 = R4; // Return horizontal and vertical half pel
P0 = 344;
SP = SP + P0;
RETS = [SP++];
(R7:4, P5:3) = [SP++];
RTS;
.align 8;
_compute_sad:
I0 = R0; // Address of the target
I1 = R1; // Address of the interpolated block
P0 = 16 (Z);
A1 = A0 = 0;
LSETUP (MAD_START1, MAD_END1) LC0=P0;
DISALGNEXCPT || R0 = [I0++] || R2 = [I1++];
MAD_START1:
DISALGNEXCPT || R3 = [I1++];
SAA (R1:0,R3:2) || R1 = [I0++] || R2 = [I1++];
// Compute absolute difference and acc
SAA (R1:0,R3:2) (R) || R0 = [I0++] || R3 = [I1++];
SAA (R1:0,R3:2) || R1 = [I0++] || R2 = [I1++];
MAD_END1:
SAA (R1:0,R3:2) (R) || R0 = [I0++] || R2 = [I1++];
R3=A1.L+A1.H,R2=A0.L+A0.H;
R0 = R2 + R3 (S); // Add the accumulated values in both MACs
RTS;
.align 8;
_compute_sad_aligned:
I0 = R0; // Address of the target
I1 = R1; // Address of the interpolated block
P0 = 16;
LSETUP (MAD_START, MAD_END) LC0=P0;
A1=A0=0 || R0 = [I0++] || R2 = [I1++];
// Initialize accumulators
MAD_START:
SAA (R1:0,R3:2) || R1 = [I0++] || R3 = [I1++];
// Compute absolute difference and acc.
SAA (R1:0,R3:2) (R) || R0 = [I0++] || R2 = [I1++];
SAA (R1:0,R3:2) || R1 = [I0++] || R3 = [I1++];
MAD_END:SAA (R1:0,R3:2) (R) || R0 = [I0++] || R2 = [I1++];
R3=A1.L+A1.H,R2=A0.L+A0.H;
R0 = R2 + R3 (S); // Add the accumulated values in both MACs
RTS;
__hpel.end:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -