📄 mp4_idct.c
字号:
/********************************************************************************
* *
* This code has been developed by Project Mayo. This software is an *
* implementation of a part of one or more MPEG-4 Video tools as *
* specified in ISO/IEC 14496-2 standard. Those intending to use this *
* software module in hardware or software products are advised that its *
* use may infringe existing patents or copyrights, and any such use *
* would be at such party's own risk. The original developer of this *
* software module and his/her company, and subsequent editors and their *
* companies (including Project Mayo), will have no liability for use of *
* this software or modifications or derivatives thereof. *
* *
********************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* The GPL can be found at: http://www.gnu.org/copyleft/gpl.html *
* *
* Authors: *
* *
* Andrea Graziani (Ag): *
* - Original source code (Open Divx Decoder 0.4a). *
* *
* Pedro Mateu (Pm): *
* - Modified and optimized code + MIPS ASM. *
* *
********************************************************************************/
#include "global.h"
#if !defined(MIPS_ASM_MACC) || (_WIN32_WCE >= 300)
#include "mp4_idct.h"
// 2D Inverse Discrete Cosine Transform (iDCT)
void IDCT_S (idct_block_t *DCT_Block, uint8_t *DestU8, int Stride, unsigned char Mode)
{
IDCT_Col(DCT_Block);
IDCT_Col(DCT_Block+1);
IDCT_Col(DCT_Block+2);
IDCT_Col(DCT_Block+3);
IDCT_Col(DCT_Block+4);
IDCT_Col(DCT_Block+5);
IDCT_Col(DCT_Block+6);
IDCT_Col(DCT_Block+7);
IDCT_Row_S(DCT_Block,DestU8,Mode);
DestU8+=Stride;
IDCT_Row_S(DCT_Block+8,DestU8,Mode);
DestU8+=Stride;
IDCT_Row_S(DCT_Block+16,DestU8,Mode);
DestU8+=Stride;
IDCT_Row_S(DCT_Block+24,DestU8,Mode);
DestU8+=Stride;
IDCT_Row_S(DCT_Block+32,DestU8,Mode);
DestU8+=Stride;
IDCT_Row_S(DCT_Block+40,DestU8,Mode);
DestU8+=Stride;
IDCT_Row_S(DCT_Block+48,DestU8,Mode);
DestU8+=Stride;
IDCT_Row_S(DCT_Block+56,DestU8,Mode);
}
#ifndef MIPS_ASM_MACC
#define _SAT(a) a>255?255:a<0?0:a
static void IDCT_Col(idct_block_t *Blk)
{
int x0, x1, x2, x3, x4, x5, x6, x7, x8;
unsigned short temp1,temp2;
x0 = Blk[0];
x4 = Blk[8];
x3 = Blk[16];
x7 = Blk[24];
x1 = Blk[32] << 11;
x6 = Blk[40];
x2 = Blk[48];
x5 = Blk[56];
if (!((x1) | (temp2=(x2) | (x3)) | (temp1=(x4) | (x5) | (x6) | (x7))))
{
if (!x0) return;
else {
Blk[0] = Blk[8] = Blk[16] = Blk[24] = Blk[32] = Blk[40] = Blk[48] = Blk[56] = Blk[0] << 3;
return;
}
}
else if (!(temp2|temp1)){
x0 = (x0 << 11) + 128;
x8 = (x0 + x1)>>8;
x0 = (x0 - x1)>>8;
Blk[0] = x8;
Blk[8] = x0;
Blk[16] = x0;
Blk[24] = x8;
Blk[32] = x8;
Blk[40] = x0;
Blk[48] = x0;
Blk[56] = x8;
return;
}
else if (!temp1) {
x0 = (x0 << 11) + 128;
x8 = x0 + x1;
x0 -= x1;
x1 = W6 * (x3 + x2);
x2 = x1 - (W2_plus_W6) * x2;
x3 = x1 + (W2_minus_W6) * x3;
x7 = x8 + x3;
x8 -= x3;
x3 = x0 + x2;
x0 -= x2;
Blk[0] = x7 >> 8;
Blk[8] = x3 >> 8;
Blk[16] = x0 >> 8;
Blk[24] = x8 >> 8;
Blk[32] = x8 >> 8;
Blk[40] = x0 >> 8;
Blk[48] = x3 >> 8;
Blk[56] = x7 >> 8;
return;
}
else {
x0 = (Blk[0] << 11) + 128;
x8 = W7 * (x4 + x5);
x4 = x8 + (W1_minus_W7) * x4;
x5 = x8 - (W1_plus_W7) * x5;
x8 = W3 * (x6 + x7);
x6 = x8 - (W3_minus_W5) * x6;
x7 = x8 - (W3_plus_W5) * x7;
x8 = x0 + x1;
x0 -= x1;
x1 = W6 * (x3 + x2);
x2 = x1 - (W2_plus_W6) * x2;
x3 = x1 + (W2_minus_W6) * x3;
x1 = x4 + x6;
x4 -= x6;
x6 = x5 + x7;
x5 -= x7;
x7 = x8 + x3;
x8 -= x3;
x3 = x0 + x2;
x0 -= x2;
x2 = (181 * (x4 + x5) + 128) >> 8;
x4 = (181 * (x4 - x5) + 128) >> 8;
Blk[0] = (x7 + x1) >> 8;
Blk[8] = (x3 + x2) >> 8;
Blk[16] = (x0 + x4) >> 8;
Blk[24] = (x8 + x6) >> 8;
Blk[32] = (x8 - x6) >> 8;
Blk[40] = (x0 - x4) >> 8;
Blk[48] = (x3 - x2) >> 8;
Blk[56] = (x7 - x1) >> 8;
}
}
static void IDCT_Row_S (idct_block_t *Blk, uint8_t *DestU8, unsigned char Mode)
{
int x0, x1, x2, x3, x4, x5, x6, x7, x8;
x4 = Blk[1];
x3 = Blk[2];
x7 = Blk[3];
x1 = Blk[4] << 8;
x6 = Blk[5];
x2 = Blk[6];
x5 = Blk[7];
if (!((x1) | (x2) | (x3) | (x4) | (x5) | (x6) | (x7)))
{
x0 = (Blk[0] + 32) >> 6;
if (Mode) {
x1 = x0 + DestU8[0];
x2 = x0 + DestU8[1];
x3 = x0 + DestU8[2];
x4 = x0 + DestU8[3];
x5 = x0 + DestU8[4];
x6 = x0 + DestU8[5];
x7 = x0 + DestU8[6];
x8 = x0 + DestU8[7];
x0 = (x1|x2|x3|x4|x5|x6|x7|x8)>>8;
if (!x0){
DestU8[0] = x1;
DestU8[1] = x2;
DestU8[2] = x3;
DestU8[3] = x4;
DestU8[4] = x5;
DestU8[5] = x6;
DestU8[6] = x7;
DestU8[7] = x8;
}
else {
DestU8[0] = _SAT(x1);
DestU8[1] = _SAT(x2);
DestU8[2] = _SAT(x3);
DestU8[3] = _SAT(x4);
DestU8[4] = _SAT(x5);
DestU8[5] = _SAT(x6);
DestU8[6] = _SAT(x7);
DestU8[7] = _SAT(x8);
}
}
else {
DestU8[0]=DestU8[1]=DestU8[2]=DestU8[3]=DestU8[4]=DestU8[5]=DestU8[6]=DestU8[7]=_SAT(x0);
}
return;
}
x0 = (Blk[0] << 8) + 8192;
x8 = W7 * (x4 + x5) + 4;
x4 = (x8 + (W1_minus_W7) * x4) >> 3;
x5 = (x8 - (W1_plus_W7) * x5) >> 3;
x8 = W3 * (x6 + x7) + 4;
x6 = (x8 - (W3_minus_W5) * x6) >> 3;
x7 = (x8 - (W3_plus_W5) * x7) >> 3;
x8 = x0 + x1;
x0 -= x1;
x1 = W6 * (x3 + x2) + 4;
x2 = (x1 - (W2_plus_W6) * x2) >> 3;
x3 = (x1 + (W2_minus_W6) * x3) >> 3;
x1 = x4 + x6;
x4 -= x6;
x6 = x5 + x7;
x5 -= x7;
x7 = x8 + x3;
x8 -= x3;
x3 = x0 + x2;
x0 -= x2;
x2 = (181 * (x4 + x5) + 128) >> 8;
x4 = (181 * (x4 - x5) + 128) >> 8;
if (Mode){
x5 = ((x7 + x1) >> 14) + DestU8[0];
x1 = ((x7 - x1) >> 14) + DestU8[7];
x7 = ((x3 + x2) >> 14) + DestU8[1];
x2 = ((x3 - x2) >> 14) + DestU8[6];
x3 = ((x0 + x4) >> 14) + DestU8[2];
x4 = ((x0 - x4) >> 14) + DestU8[5];
x0 = ((x8 + x6) >> 14) + DestU8[3];
x6 = ((x8 - x6) >> 14) + DestU8[4];
x8 = (x5|x1|x7|x2|x3|x4|x0|x6)>>8;
}
else{
x5 = (x7 + x1) >> 14;
x1 = (x7 - x1) >> 14;
x7 = (x3 + x2) >> 14;
x2 = (x3 - x2) >> 14;
x3 = (x0 + x4) >> 14;
x4 = (x0 - x4) >> 14;
x0 = (x8 + x6) >> 14;
x6 = (x8 - x6) >> 14;
x8 = (x5|x1|x7|x2|x3|x4|x0|x6)>>8;
}
if (!x8){
DestU8[0] = x5;
DestU8[1] = x7;
DestU8[2] = x3;
DestU8[3] = x0;
DestU8[4] = x6;
DestU8[5] = x4;
DestU8[6] = x2;
DestU8[7] = x1;
}
else {
DestU8[0] = _SAT(x5);
DestU8[1] = _SAT(x7);
DestU8[2] = _SAT(x3);
DestU8[3] = _SAT(x0);
DestU8[4] = _SAT(x6);
DestU8[5] = _SAT(x4);
DestU8[6] = _SAT(x2);
DestU8[7] = _SAT(x1);
}
}
#else
// MIPS ASM - MACC VERSION
// ----------------------------------
//
// iDCT row and col for multiply and add capable MIPS devices
static void IDCT_Row_S (idct_block_t *Blk, uint8_t *DestU8, unsigned char Mode)
{
__asm("add $24,$5,$0;"
"add $25,$6,$0;"
"lh $5,0($4);" //x0
"lh $9,2($4);" //x4
"lh $8,4($4);" //x3
"lh $12,6($4);" //x7
"lh $6,8($4);" //x1
"lh $11,10($4);" //x6
"lh $7,12($4);" //x2
"lh $10,14($4);" //x5
"sll $6,$6,8;" // x1<<
"or $2,$7,$6;"
"or $2,$2,$8;"
"or $2,$2,$9;"
"or $2,$2,$10;"
"or $2,$2,$11;"
"or $2,$2,$12;" //shortcut
"bne $2,$0,idct_row_a;" //if some values aren磘 0, we can磘 take the shortcut
"addi $5,$5,32;"
"sra $11,$5,6;"
"bne $25,$0,add_eq;"
"srl $15,$11,16;" //if less than 0, then 0
"srlv $11,$11,$15;"
"sll $15,$11,23;" //if bigger than 255:255
"sra $15,$15,31;"
"or $11,$11,$15;"
"sll $14,$11,8;"
"or $11,$11,$14;"
"sll $14,$11,16;"
"or $11,$11,$14;"
"sw $11,0($24);" //guardamos x0
"sw $11,4($24);"
"jr $31;");
__asm( "add_eq:"
"lbu $15,0($24);" //we load older values
"lbu $6,1($24);"
"lbu $7,2($24);"
"lbu $9,3($24);"
"lbu $13,4($24);"
"lbu $5,5($24);"
"lbu $8,6($24);"
"lbu $12,7($24);"
"add $15,$11,$15;"
"add $6,$11,$6;"
"add $7,$11,$7;"
"add $9,$11,$9;"
"add $13,$11,$13;"
"add $5,$11,$5;"
"add $8,$11,$8;"
"add $12,$11,$12;"
// Check if there is an overflow and store
"b check_overflow;");
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -