📄 idct_mips.c
字号:
/*****************************************************************************
*
* This code has been developed by Project Mayo. This software is an
* implementation of a part of one or more MPEG-4 Video tools as
* specified in ISO/IEC 14496-2 standard. Those intending to use this
* software module in hardware or software products are advised that its
* use may infringe existing patents or copyrights, and any such use
* would be at such party's own risk. The original developer of this
* software module and his/her company, and subsequent editors and their
* companies (including Project Mayo), will have no liability for use of
* this software or modifications or derivatives thereof.
*
*****************************************************************************
* *
* This program is free software ; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* $Id: idct_mips.c 5 2004-07-15 13:20:46Z picard $
*
*****************************************************************************
*
* Authors:
*
* Pedro Mateu (Pm)
*
****************************************************************************/
#include "../stdafx.h"
#include "softidct.h"
#if defined(MIPS64) || defined(MIPS32)
#if defined(MIPSVR41XX)
#define MACC(out,ina,inb) \
"macc $0," #ina "," #inb ";" \
"mflo " #out ";"
// "macc " #out "," #ina "," #inb ";"
#else
#define MACC(out,ina,inb) \
".set noat;" \
"mflo $1;" \
"mult " #ina "," #inb ";" \
"mflo " #out ";" \
"addu " #out "," #out ",$1;"
#endif
void IDCT_Row8(idct_block_t *Blk, uint8_t *Dest, const uint8_t *Src)
{
__asm("move $24,$5;"
"move $25,$6;"
"lh $5,0($4);" //x0
"lh $9,2($4);" //x4
"lh $8,4($4);" //x3
"lh $12,6($4);" //x7
"lh $6,8($4);" //x1
"lh $11,10($4);" //x6
"lh $7,12($4);" //x2
"lh $10,14($4);" //x5
"sll $6,$6,8;" // x1<<
"or $2,$7,$6;"
"or $2,$2,$8;"
"or $2,$2,$9;"
"or $2,$2,$10;"
"or $2,$2,$11;"
"or $2,$2,$12;" //shortcut
"bne $2,$0,idct_row_a;" //if some values aren磘 0, we can磘 take the shortcut
"addi $5,$5,32;"
"sra $11,$5,6;"
"bne $25,$0,add_eq;"
"srl $15,$11,16;" //if less than 0, then 0
"srlv $11,$11,$15;"
"sll $15,$11,23;" //if bigger than 255:255
"sra $15,$15,31;"
"or $11,$11,$15;"
"sll $14,$11,8;"
"or $11,$11,$14;"
"sll $14,$11,16;"
"or $11,$11,$14;"
"sw $11,0($24);" //guardamos x0
"sw $11,4($24);"
#ifdef MIPSVR41XX
"andi $7,$24,0x8;"
"beq $7,$0, fin;"
".set noreorder;"
"cache 25,-8($24);" // hit writeback
".set reorder;"
#endif
"jr $31;");
__asm( "add_eq:"
"lbu $15,0($25);" //we load older values
"lbu $6,1($25);"
"lbu $7,2($25);"
"lbu $9,3($25);"
"lbu $13,4($25);"
"lbu $5,5($25);"
"lbu $8,6($25);"
"lbu $12,7($25);"
"add $15,$11,$15;"
"add $6,$11,$6;"
"add $7,$11,$7;"
"add $9,$11,$9;"
"add $13,$11,$13;"
"add $5,$11,$5;"
"add $8,$11,$8;"
"add $12,$11,$12;"
// Check if there is an overflow and store
"b check_overflow;");
__asm ("idct_row_a:add $13,$9,$10;" //x8=x4+x5
"li $14,565;" //W7
"mult $14,$13;" //W7*x8
"li $15,2276;" //W1_minus_W7
MACC($9,$15,$9) //x4=x(W1-W7)*x4+W7*8
"sll $5,$5,8;" // x0<<8)
"mult $14,$13;" //W7*x8
"li $15,-3406;" //-W1_plus_W7
MACC($10,$15,$10) //-((W1+W7)*x5)+W7*x8
"li $14,2408;" //W3
"add $13,$11,$12;" // x6+x7
"mult $14,$13;" //W3*(x6+x7)
"li $15,-799;" //-W3_minus_W5
MACC($11,$15,$11) //-((W3-W5)*x6)+W3*(x6+x7)
"add $5,$5,8192;" //x0+=128
"mult $14,$13;"
"li $14,-4017;"
MACC($12,$12,$14));
__asm ( "add $13,$5,$6;"
"sub $5,$5,$6;" //x0 -=x1
"add $6,$7,$8;" //x1=(x2+x3)
"li $15,1108;" //W6
"mult $15,$6;" //W6*(x3+x2)
"li $14,-3784;" //-W2_plus_W6
MACC($7,$14,$7) //x1+(-W2_plus_W6*x2)
"mult $15,$6;" //W6*(x3+x2)
"li $14,1568;" //W2_minus_W6
MACC($8,$14,$8) //W2_minus_W6*x3
"addi $9,$9,4;"
"addi $11,$11,4;"
"addi $10,$10,4;"
"addi $12,$12,4;"
"sra $9,$9,3;"
"sra $10,$10,3;"
"sra $11,$11,3;"
"sra $12,$12,3;"
"add $6,$9,$11;" //x1=x4+x6
"sub $9,$9,$11;" //x4=x4-x6
"add $11,$10,$12;" //x6=x5+x7
"sub $10,$10,$12;" //x5=x5-x7
);
__asm ( "addi $7,$7,4;"
"addi $8,$8,4;"
"sra $7,$7,3;"
"sra $8,$8,3;"
"add $12,$13,$8;" //x7=x8+x3
"sub $13,$13,$8;" //x8-=x3
"add $8,$5,$7;" //x3=x0+x2
"sub $5,$5,$7;" //x0-=x2
"li $14,181;"
"add $7,$9,$10;" //x4+x5
"mult $14,$7;" //
"mflo $7;"
"addi $7,$7,128;"
"sra $7,$7,8;"
"sub $9,$9,$10;"
"mult $14,$9;"
"mflo $9;"
"addi $9,$9,128;"
"sra $9,$9,8;");
//Fourth Stage
__asm ( //x7+x1
"add $15,$12,$6;"
"sra $15,$15,14;"
//x7-x1
"sub $12,$12,$6;"
"sra $12,$12,14;"
//x3+x2
"add $6,$8,$7;"
"sra $6,$6,14;"
//x3-x2
"sub $8,$8,$7;"
"sra $8,$8,14;"
//x0+x4
"add $7,$5,$9;"
"sra $7,$7,14;"
//x0-x4
"sub $5,$5,$9;"
"sra $5,$5,14;"
//x8+x6
"add $9,$13,$11;"
"sra $9,$9,14;"
//x8-x6
"sub $13,$13,$11;"
"sra $13,$13,14;"
// If we don磘 have to add older values, check if there is an overflow and store
"beq $25,$0,check_overflow;");
__asm ( "lbu $2,0($25);"
"lbu $3,1($25);"
"add $15,$15,$2;"
"add $6,$6,$3;"
"lbu $2,2($25);"
"lbu $3,3($25);"
"add $7,$7,$2;"
"add $9,$9,$3;"
"lbu $2,4($25);"
"lbu $3,5($25);"
"add $13,$13,$2;"
"add $5,$5,$3;"
"lbu $2,6($25);"
"lbu $3,7($25);"
"add $8,$8,$2;"
"add $12,$12,$3;");
__asm( // Check for overflow
"check_overflow:"
"or $14,$15,$12;"
"or $14,$14,$6;"
"or $14,$14,$8;"
"or $14,$14,$7;"
"or $14,$14,$5;"
"or $14,$14,$9;"
"or $14,$14,$13;"
"srl $14,$14,8;"
// If there is an overflow, we must saturate all the values
"beq $14,$0,no_saturar;"
);
__asm ( "srl $14,$15,16;" // if less than 0 -> 0
"srlv $15,$15,$14;" //
"sll $14,$15,23;" // if bigger than 255 -> 255
"sra $14,$14,31;" //
"or $15,$15,$14;" //
"srl $14,$12,16;" // if less than 0 -> 0
"srlv $12,$12,$14;" //
"sll $14,$12,23;" // if bigger than 255 -> 255
"sra $14,$14,31;" //
"or $12,$12,$14;" //
"srl $14,$6,16;" // if less than 0 -> 0
"srlv $6,$6,$14;" //
"sll $14,$6,23;" // if bigger than 255 -> 255
"sra $14,$14,31;" //
"or $6,$6,$14;" //
"srl $14,$8,16;" // if less than 0 -> 0
"srlv $8,$8,$14;" //
"sll $14,$8,23;" // if bigger than 255 -> 255
"sra $14,$14,31;" //
"or $8,$8,$14;" //
"srl $14,$7,16;" // if less than 0 -> 0
"srlv $7,$7,$14;" //
"sll $14,$7,23;" // if bigger than 255 -> 255
"sra $14,$14,31;" //
"or $7,$7,$14;" //
"srl $14,$5,16;" // if less than 0 -> 0
"srlv $5,$5,$14;" //
"sll $14,$5,23;" // if bigger than 255 -> 255
"sra $14,$14,31;" //
"or $5,$5,$14;" //
"srl $14,$9,16;" // if less than 0 -> 0
"srlv $9,$9,$14;" //
"sll $14,$9,23;" // if bigger than 255 -> 255
"sra $14,$14,31;" //
"or $9,$9,$14;" //
"srl $14,$13,16;" // if less than 0 -> 0
"srlv $13,$13,$14;" //
"sll $14,$13,23;" // if bigger than 255 -> 255
"sra $14,$14,31;" //
"or $13,$13,$14;" //
"no_saturar:"
"sb $15,0($24);"
"sb $6,1($24);"
"sb $7,2($24);"
"sb $9,3($24);"
"sb $13,4($24);"
"sb $5,5($24);"
"sb $8,6($24);"
"sb $12,7($24);"
#ifdef MIPSVR41XX
"andi $7,$24,0x8;"
"beq $7,$0, fin;"
".set noreorder;"
"cache 25,-8($24);" // hit writeback
".set reorder;"
"fin:"
#endif
);
}
void IDCT_Col8(idct_block_t *Blk)
{
__asm ( "lh $5,0($4);"//x0
"lh $9,16($4);"//x4
"lh $8,32($4);"//x3
"lh $12,48($4);"//x7
"lh $6,64($4);"//x1
"lh $11,80($4);"//x6
"lh $7,96($4);"//x2
"lh $10,112($4);"//x5
"or $2,$7,$6;"
"or $2,$2,$8;"
"or $2,$2,$9;"
"or $2,$2,$10;"
"or $2,$2,$11;"
"or $2,$2,$12;"//shortcut
"sll $6,$6,11;"// x1<<11
"bne $2,$0,idct_estandar;"//si no es 0 no podemos atajar
"sll $5,$5,3;"//x0<<3
"beq $5,$0,final;"
"sh $5,0($4);"//guardamos x0
"sh $5,16($4);"
"sh $5,32($4);"
"sh $5,48($4);"
"sh $5,64($4);"
"sh $5,80($4);"
"sh $5,96($4);"
"sh $5,112($4);"
"final:jr $31;");
__asm ("idct_estandar:add $13,$9,$10;"//x8=x4+x5
"li $14,565;"//W7
"mult $14,$13;"//W7*x8
"li $15,2276;"//W1_minus_W7
MACC($9,$15,$9) //x4=x(W1-W7)*x4+W7*8
"sll $5,$5,11;"// x0<<11
"mult $14,$13;"//W7*x8
"li $15,-3406;" //-W1_plus_W7
MACC($10,$15,$10) //-((W1+W7)*x5)+W7*x8
"li $14,2408;"//W3
"add $13,$11,$12;"// x6+x7
"mult $14,$13;"//W3*(x6+x7)
"li $15,-799;"//-W3_minus_W5
MACC($11,$15,$11) //-((W3-W5)*x6)+W3*(x6+x7)
"add $5,$5,128;"//x0+=128
"mult $14,$13;"
"li $14,-4017;"
MACC($12,$12,$14)
);
__asm ( "add $13,$5,$6;"
"sub $5,$5,$6;"//x0 -=x1
"add $6,$7,$8;"//x1=(x2+x3)
"li $15,1108;"//W6
"mult $15,$6;"//W6*(x3+x2)
"li $14,-3784;" //-W2_plus_W6
MACC($7,$14,$7) //x1+(-W2_plus_W6*x2)
"mult $15,$6;"//W6*(x3+x2)
"li $14,1568;"//W2_minus_W6
MACC($8,$14,$8) //W2_minus_W6*x3
"add $6,$9,$11;"//x1=x4+x6
"sub $9,$9,$11;"//x4=x4-x6
"add $11,$10,$12;"//x6=x5+x7
"sub $10,$10,$12;"//x5=x5-x7
);
__asm ( "add $12,$13,$8;"//x7=x8+x3
"sub $13,$13,$8;"//x8-=x3
"add $8,$5,$7;"//x3=x0+x2
"sub $5,$5,$7;"//x0-=x2
"li $14,181;"
"add $7,$9,$10;"//x4+x5
"mult $14,$7;"//
"mflo $7;"
"addi $7,$7,128;"
"sra $7,$7,8;"
"sub $9,$9,$10;"
"mult $14,$9;"
"mflo $9;"
"addi $9,$9,128;"
"sra $9,$9,8;");
//Fourth Stage
__asm ( "add $24,$12,$6;"//x7+x1
"sra $24,$24,8;"
"sh $24,0($4);"
"add $24,$7,$8;"//x3+x2
"sra $24,$24,8;"
"sh $24,16($4);"
"add $24,$5,$9;"//x0+x4
"sra $24,$24,8;"
"sh $24,32($4);"
"add $24,$13,$11;"//x8+x6
"sra $24,$24,8;"
"sh $24,48($4);"
"sub $24,$13,$11;"
"sra $24,$24,8;"
"sh $24,64($4);"
"sub $24,$5,$9;"
"sra $24,$24,8;"
"sh $24,80($4);"
"sub $24,$8,$7;"
"sra $24,$24,8;"
"sh $24,96($4);"
"sub $24,$12,$6;"
"sra $24,$24,8;"
"sh $24,112($4);");
}
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -