📄 mp4_idct.c
字号:
__asm ("idct_row_a:add $13,$9,$10;" //x8=x4+x5
"li $14,565;" //W7
"mult $14,$13;" //W7*x8
"li $15,2276;" //W1_minus_W7
"macc $0,$15,$9;" //x4=x(W1-W7)*x4+W7*8
"mflo $9;"
"sll $5,$5,8;" // x0<<8)
"mult $14,$13;" //W7*x8
"li $15,-3406;" //-W1_plus_W7
"macc $0,$15,$10;" //-((W1+W7)*x5)+W7*x8
"mflo $10;"
"li $14,2408;" //W3
"add $13,$11,$12;" // x6+x7
"mult $14,$13;" //W3*(x6+x7)
"li $15,-799;" //-W3_minus_W5
"macc $0,$15,$11;" //-((W3-W5)*x6)+W3*(x6+x7)
"mflo $11;"
"add $5,$5,8192;" //x0+=128
"mult $14,$13;"
"li $14,-4017;"
"macc $0,$12,$14;"
"mflo $12;");
__asm ( "add $13,$5,$6;"
"sub $5,$5,$6;" //x0 -=x1
"add $6,$7,$8;" //x1=(x2+x3)
"li $15,1108;" //W6
"mult $15,$6;" //W6*(x3+x2)
"li $14,-3784;" //-W2_plus_W6
"macc $0,$14,$7;" //x1+(-W2_plus_W6*x2)
"mflo $7;"
"mult $15,$6;" //W6*(x3+x2)
"li $14,1568;" //W2_minus_W6
"macc $0,$14,$8;" //W2_minus_W6*x3
"mflo $8;"
"addi $9,$9,4;"
"addi $11,$11,4;"
"addi $10,$10,4;"
"addi $12,$12,4;"
"sra $9,$9,3;"
"sra $10,$10,3;"
"sra $11,$11,3;"
"sra $12,$12,3;"
"add $6,$9,$11;" //x1=x4+x6
"sub $9,$9,$11;" //x4=x4-x6
"add $11,$10,$12;" //x6=x5+x7
"sub $10,$10,$12;" //x5=x5-x7
);
__asm ( "addi $7,$7,4;"
"addi $8,$8,4;"
"sra $7,$7,3;"
"sra $8,$8,3;"
"add $12,$13,$8;" //x7=x8+x3
"sub $13,$13,$8;" //x8-=x3
"add $8,$5,$7;" //x3=x0+x2
"sub $5,$5,$7;" //x0-=x2
"li $14,181;"
"add $7,$9,$10;" //x4+x5
"mult $14,$7;" //
"mflo $7;"
"addi $7,$7,128;"
"sra $7,$7,8;"
"sub $9,$9,$10;"
"mult $14,$9;"
"mflo $9;"
"addi $9,$9,128;"
"sra $9,$9,8;");
//Fourth Stage
__asm ( //x7+x1
"add $15,$12,$6;"
"sra $15,$15,14;"
//x7-x1
"sub $12,$12,$6;"
"sra $12,$12,14;"
//x3+x2
"add $6,$8,$7;"
"sra $6,$6,14;"
//x3-x2
"sub $8,$8,$7;"
"sra $8,$8,14;"
//x0+x4
"add $7,$5,$9;"
"sra $7,$7,14;"
//x0-x4
"sub $5,$5,$9;"
"sra $5,$5,14;"
//x8+x6
"add $9,$13,$11;"
"sra $9,$9,14;"
//x8-x6
"sub $13,$13,$11;"
"sra $13,$13,14;"
// If we don磘 have to add older values, check if there is an overflow and store
"beq $25,$0,check_overflow;");
__asm ( "lbu $2,0($24);"
"lbu $3,1($24);"
"add $15,$15,$2;"
"add $6,$6,$3;"
"lbu $2,2($24);"
"lbu $3,3($24);"
"add $7,$7,$2;"
"add $9,$9,$3;"
"lbu $2,4($24);"
"lbu $3,5($24);"
"add $13,$13,$2;"
"add $5,$5,$3;"
"lbu $2,6($24);"
"lbu $3,7($24);"
"add $8,$8,$2;"
"add $12,$12,$3;");
__asm( // Check for overflow
"check_overflow:"
"or $14,$15,$12;"
"or $14,$14,$6;"
"or $14,$14,$8;"
"or $14,$14,$7;"
"or $14,$14,$5;"
"or $14,$14,$9;"
"or $14,$14,$13;"
"srl $14,$14,8;"
// If there is an overflow, we must saturate all the values
"bne $14,$0,saturar;"
);
__asm ( "no_saturar:"
"sb $15,0($24);"
"sb $6,1($24);"
"sb $7,2($24);"
"sb $9,3($24);"
"sb $13,4($24);"
"sb $5,5($24);"
"sb $8,6($24);"
"sb $12,7($24);"
"jr $31;");
__asm ( "saturar:"
"srl $14,$15,16;" // if less than 0 -> 0
"srlv $15,$15,$14;" //
"sll $14,$15,23;" // if bigger than 255 -> 255
"sra $14,$14,31;" //
"or $15,$15,$14;" //
"srl $14,$12,16;" // if less than 0 -> 0
"srlv $12,$12,$14;" //
"sll $14,$12,23;" // if bigger than 255 -> 255
"sra $14,$14,31;" //
"or $12,$12,$14;" //
"srl $14,$6,16;" // if less than 0 -> 0
"srlv $6,$6,$14;" //
"sll $14,$6,23;" // if bigger than 255 -> 255
"sra $14,$14,31;" //
"or $6,$6,$14;" //
"srl $14,$8,16;" // if less than 0 -> 0
"srlv $8,$8,$14;" //
"sll $14,$8,23;" // if bigger than 255 -> 255
"sra $14,$14,31;" //
"or $8,$8,$14;" //
"srl $14,$7,16;" // if less than 0 -> 0
"srlv $7,$7,$14;" //
"sll $14,$7,23;" // if bigger than 255 -> 255
"sra $14,$14,31;" //
"or $7,$7,$14;" //
"srl $14,$5,16;" // if less than 0 -> 0
"srlv $5,$5,$14;" //
"sll $14,$5,23;" // if bigger than 255 -> 255
"sra $14,$14,31;" //
"or $5,$5,$14;" //
"srl $14,$9,16;" // if less than 0 -> 0
"srlv $9,$9,$14;" //
"sll $14,$9,23;" // if bigger than 255 -> 255
"sra $14,$14,31;" //
"or $9,$9,$14;" //
"srl $14,$13,16;" // if less than 0 -> 0
"srlv $13,$13,$14;" //
"sll $14,$13,23;" // if bigger than 255 -> 255
"sra $14,$14,31;" //
"or $13,$13,$14;" //
"sb $15,0($24);"
"sb $6,1($24);"
"sb $7,2($24);"
"sb $9,3($24);"
"sb $13,4($24);"
"sb $5,5($24);"
"sb $8,6($24);"
"sb $12,7($24);");
}
static void IDCT_Col(idct_block_t *Blk)
{
__asm ( "lh $5,0($4);"//x0
"lh $9,16($4);"//x4
"lh $8,32($4);"//x3
"lh $12,48($4);"//x7
"lh $6,64($4);"//x1
"lh $11,80($4);"//x6
"lh $7,96($4);"//x2
"lh $10,112($4);"//x5
"or $2,$7,$6;"
"or $2,$2,$8;"
"or $2,$2,$9;"
"or $2,$2,$10;"
"or $2,$2,$11;"
"or $2,$2,$12;"//shortcut
"sll $6,$6,11;"// x1<<11
"bne $2,$0,idct_estandar;"//si no es 0 no podemos atajar
"sll $5,$5,3;"//x0<<3
"beq $5,$0,final;"
"sh $5,0($4);"//guardamos x0
"sh $5,16($4);"
"sh $5,32($4);"
"sh $5,48($4);"
"sh $5,64($4);"
"sh $5,80($4);"
"sh $5,96($4);"
"sh $5,112($4);"
"final:jr $31;");
__asm ("idct_estandar:add $13,$9,$10;"//x8=x4+x5
"li $14,565;"//W7
"mult $14,$13;"//W7*x8
"li $15,2276;"//W1_minus_W7
"macc $0,$15,$9;"//x4=x(W1-W7)*x4+W7*8
"mflo $9;"
"sll $5,$5,11;"// x0<<11
"mult $14,$13;"//W7*x8
"li $15,-3406;" //-W1_plus_W7
"macc $0,$15,$10;"//-((W1+W7)*x5)+W7*x8
"mflo $10;"
"li $14,2408;"//W3
"add $13,$11,$12;"// x6+x7
"mult $14,$13;"//W3*(x6+x7)
"li $15,-799;"//-W3_minus_W5
"macc $0,$15,$11;"//-((W3-W5)*x6)+W3*(x6+x7)
"mflo $11;"
"add $5,$5,128;"//x0+=128
"mult $14,$13;"
"li $14,-4017;"
"macc $0,$12,$14;"
"mflo $12;"
);
__asm ( "add $13,$5,$6;"
"sub $5,$5,$6;"//x0 -=x1
"add $6,$7,$8;"//x1=(x2+x3)
"li $15,1108;"//W6
"mult $15,$6;"//W6*(x3+x2)
"li $14,-3784;" //-W2_plus_W6
"macc $0,$14,$7;" //x1+(-W2_plus_W6*x2)
"mflo $7;"
"mult $15,$6;"//W6*(x3+x2)
"li $14,1568;"//W2_minus_W6
"macc $0,$14,$8;"//W2_minus_W6*x3
"mflo $8;"
"add $6,$9,$11;"//x1=x4+x6
"sub $9,$9,$11;"//x4=x4-x6
"add $11,$10,$12;"//x6=x5+x7
"sub $10,$10,$12;"//x5=x5-x7
);
__asm ( "add $12,$13,$8;"//x7=x8+x3
"sub $13,$13,$8;"//x8-=x3
"add $8,$5,$7;"//x3=x0+x2
"sub $5,$5,$7;"//x0-=x2
"li $14,181;"
"add $7,$9,$10;"//x4+x5
"mult $14,$7;"//
"mflo $7;"
"addi $7,$7,128;"
"sra $7,$7,8;"
"sub $9,$9,$10;"
"mult $14,$9;"
"mflo $9;"
"addi $9,$9,128;"
"sra $9,$9,8;");
//Fourth Stage
__asm ( "add $24,$12,$6;"//x7+x1
"sra $24,$24,8;"
"sh $24,0($4);"
"add $24,$7,$8;"//x3+x2
"sra $24,$24,8;"
"sh $24,16($4);"
"add $24,$5,$9;"//x0+x4
"sra $24,$24,8;"
"sh $24,32($4);"
"add $24,$13,$11;"//x8+x6
"sra $24,$24,8;"
"sh $24,48($4);"
"sub $24,$13,$11;"
"sra $24,$24,8;"
"sh $24,64($4);"
"sub $24,$5,$9;"
"sra $24,$24,8;"
"sh $24,80($4);"
"sub $24,$8,$7;"
"sra $24,$24,8;"
"sh $24,96($4);"
"sub $24,$12,$6;"
"sra $24,$24,8;"
"sh $24,112($4);");
}
#endif
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -