📄 yuv2rgb.c
字号:
"add $9,$10,$12;"
"sra $8,$9,31;"
"srlv $9,$9,$8;");
__asm( "andi $9,$9,0x00F8;"
"lbu $10,1($4);"//cargamos aqui para no tener interbloqueo
"sll $9,$9,8;"
"or $24,$24,$9;"
//primer punto ,ahora procesamos segundo.
//r2
"add $9,$10,$11;"
"sra $8,$9,31;"
"srlv $9,$9,$8;"
"andi $9,$9,0x00F8;"
"srl $19,$9,3;"
//g2
"addi $9,$10,-16;"
"sra $8,$9,31;"
"srlv $9,$9,$8;"
"andi $9,$9,0x00FC;"
"sll $9,$9,3;"
"or $19,$19,$9;"
//b2
"add $9,$10,$12;"
"lbu $10,0($15);"//cargamos aqui para no tener interbloqueo
"sra $8,$9,31;"
"srlv $9,$9,$8;"
"andi $9,$9,0x00F8;"
"sll $9,$9,8;"
"or $19,$19,$9;"
//presentamos el punto
);
__asm(//ahora procesamos la segunda l韓ea
//f2r1
"add $9,$10,$11;"
"sra $8,$9,31;"
"srlv $9,$9,$8;"
"andi $9,$9,0x00F8;"
"sll $9,$9,13;"
"or $24,$24,$9;"
//f2g1
"addi $9,$10,-16;"
"sra $8,$9,31;"
"srlv $9,$9,$8;"
"andi $9,$9,0x00FC;"
"sll $9,$9,19;"
"or $24,$24,$9;"
//f2b1
"add $9,$10,$12;"
"sra $8,$9,31;"
"srlv $9,$9,$8;"
"andi $9,$9,0x00F8;"
"lbu $10,1($15);"//cargamos aqui para no tener interbloqueo
"sll $9,$9,24;"
"or $24,$24,$9;"
);
//primer punto ,ahora procesamos segundo.
//f2r2
__asm( "add $9,$10,$11;"
"lbu $11,1($6);"
"sra $8,$9,31;"
"srlv $9,$9,$8;"
"andi $9,$9,0x00F8;"
"sll $9,$9,13;"
"or $19,$19,$9;"
//f2g2
"addi $9,$10,-16;"
"sra $8,$9,31;"
"srlv $9,$9,$8;"
"andi $9,$9,0x00FC;"
"sll $9,$9,19;"
"or $19,$19,$9;"
//f2b2
"add $9,$10,$12;"
"lbu $12,1($7);"
"sra $8,$9,31;"
"srlv $9,$9,$8;"
"andi $9,$9,0x00F8;"
"sll $9,$9,24;"
"lbu $10,2($4);"
"or $19,$19,$9;"
//presentamos el punto
"sw $24,0($25);"
"sub $8,$25,$17;"
"sw $19,0($8);"
);
//incrementamos todos los punteros y volvemos al bucle
__asm(
"addi $4,$4,+2;"
"addi $15,$15,+2;"
"addi $6,$6,+1;"
"addi $7,$7,+1;"
"addi $3,$3,-1;"
"sub $25,$25,$17;"
"sub $25,$25,$17;"
"bgtz $3,bucle_xR;"
"add $4,$15,$16;"
"addu $6,$6,$14;"
"addu $7,$7,$14;"
//cargamos aqui para no tener interbloqueo
"lbu $11,0($6);"
"lbu $10,0($4);"
"lbu $12,0($7);"
"mul $8,$13,$17;"
"addu $25,$25,$8;" //puntero_video+desplazamiento
"addiu $25,$25,+4;"
"addi $2,$2,-1;"
"bgtz $2,bucle_yR;"
//restauramos registros, la pila y finalizamos
"lw $16,12(sp);"
"lw $17,16(sp);"
"lw $18,20(sp);"
"lw $19,24(sp);"
"addiu sp,sp,+36;"
);}
void yuv2rgb_565Z(uint8_t *puc_y, int stride_y,
uint8_t *puc_u, uint8_t *puc_v, int stride_uv,
uint8_t *puc_out, int width_y, int height_y, int stride_dest, int Dither)
{
// yuv2rgb 4xzoom.32bits
// v0.2->some optimizations+horizontal crop
// v0.3->precise bilinear+some optimizations
// v0.4a->removed precise filtering+save word opt.
// v0.5a->fixed big bug. Precise bilinear only on VR4122.
__asm ( "addiu sp,sp,-8;"
"sw $16,0(sp);"
"sw $17,4(sp);"
//cargamos todas las variables
"lw $9,40(sp);"
"lw $2,36(sp);"
"lw $3,32(sp);"
"lw $25,28(sp);"
//recortamos video x_crop
"addi $14,$3,-120;"
"blez $14,no_recorte_x;"
"li $3,120;"
"srl $14,$14,1;"
"add $4,$4,$14;"
"srl $14,$14,1;"
"add $6,$6,$14;"
"add $7,$7,$14;"
"no_recorte_x:"
//recortamos video y_crop
"addi $14,$2,-160;"
"blez $14,no_recorte_y;"
"li $2,160;"
"srl $14,$14,1;"
"mul $14,$14,$5;"
"add $4,$4,$14;"
"srl $14,$14,2;"
"add $6,$6,$14;"
"add $7,$7,$14;"
//diferencias de strides:
"no_recorte_y:srl $8,$5,1;"
"sub $5,$5,$3;"
//
"sll $9,$9,1;"
//preparando segunda linea
"sll $14,$3,1;"
"add $17,$25,$9;"
"add $17,$17,$14;"
//
"bucle_y1:lbu $12,0($7);"
"lbu $11,0($6);"
"lbu $10,0($4);"
"add $16,$3,$0;"
"addi $11,$11,-128;"
"addi $12,$12,-128;"
"addi $10,$10,-16;"
//r1
"add $14,$10,$11;"
"sra $15,$14,31;"
"srlv $15,$14,$15;"
"srl $13,$15,3;"
//g1
"sra $15,$14,31;"
"srlv $15,$14,$15;"
"andi $14,$15,0x00FC;"
"sll $14,$14,3;"
"or $13,$13,$14;"
//b1
"add $14,$10,$12;"
"sra $15,$14,31;"
"srlv $15,$14,$15;"
"andi $14,$15,0x00F8;"
"sll $14,$14,8;"
"or $13,$13,$14;"
"lbu $10,1($4);"
"addi $4,$4,+1;"
// "addi $16,$16,-1;"
"addi $10,$10,-16;"
);
__asm( "bucle_x1:add $14,$10,$11;"
"sra $15,$14,31;"
"srlv $24,$14,$15;"
"srl $24,$24,3;"
//g3
"sra $15,$10,31;"
"srlv $15,$10,$15;"
"andi $15,$15,0x00FC;"
"sll $14,$15,3;"
"or $24,$24,$14;"
//b3
"add $14,$10,$12;"
"sra $15,$14,31;"
"srlv $15,$14,$15;"
"andi $15,$15,0x00F8;"
"sll $14,$15,8;"
"or $24,$24,$14;"
#if (_WIN32_WCE < 300)
//calculamos el punto interpolado
"srl $14,$13,1;" //div por 2 p1
"srl $15,$24,1;" //y p2
"andi $14,$14,0x7BEF;" //los enmascaramos
"andi $15,$15,0x7BEF;"
"addu $13,$14,$15;" //y los sumamos
#else
//calculamos el punto interpolado
"andi $10,$13,0x0821;"//
"and $10,$10,$24;"//calcula un valor medio perfecto
"srl $14,$13,1;"//div por 2 p1
"srl $15,$24,1;"//y p2
"andi $14,$14,0x7BEF;"//los enmascaramos
"andi $15,$15,0x7BEF;"
"addu $13,$14,$15;"//y los sumamos
"addu $13,$13,$10;"
#endif
//guardamos puntos
"sll $15,$24,16;"
"or $13,$13,$15;"
"sw $13,0($25);"
"sw $13,0($17);"
//cambiamos de lugar ultimo punto
"add $13,$24,$0;"
//incrementamos punteros
"lbu $10,0($4);" //cargamos nuevo puc_y
"addi $4,$4,+1;"
"addi $16,$16,-1;"
"sll $14,$16,31;"
"addi $25,$25,+4;"
"addi $17,$17,+4;"
"addi $10,$10,-16;" //*
"beq $14,$0,par_x;"
"addi $6,$6,+1;"
"addi $7,$7,+1;"
"lbu $12,0($7);"
"lbu $11,0($6);"
"addi $12,$12,-128;"
"addi $11,$11,-128;"
"par_x:bgtz $16,bucle_x1;");
__asm( //aumentamos strides:
"sll $14,$2,31;"
"add $4,$4,$5;"
"addi $4,$4,-1;"
"srl $15,$3,1;"
"sub $6,$6,$15;"
"sub $7,$7,$15;"
"beq $14,$0,par_y;"
"add $6,$6,$8;"
"add $7,$7,$8;"
//aumentamos stride_dest
"par_y:addi $2,$2,-1;"
"sll $14,$3,1;"
"sub $17,$17,$14;"
"add $25,$17,$9;"
"add $17,$25,$9;"
"add $17,$17,$14;"
"bgtz $2,bucle_y1;"
"lw $16,0(sp);"
"lw $17,4(sp);"
"addiu sp,sp,+8;" );
}
void yuv2rgb_565ZPP(uint8_t *puc_y, int stride_y,
uint8_t *puc_u, uint8_t *puc_v, int stride_uv,
uint8_t *puc_out, int width_y, int height_y, int stride_dest, int Dither)
{
//yuv2rgb 4xzoom BILINEAR FILTERING. 32bit access
// v0.1=>base code of 0.2 linear zoom asm +bilinear filtering
// v0.2=>finally 32bits code
__asm ( "addiu sp,sp,-36;"
"sw $16,0(sp);"
"sw $17,4(sp);"
"sw $18,8(sp);"
"sw $19,12(sp);"
"sw $20,16(sp);"
"sw $21,20(sp);"
//cargamos todas las variables
"lw $9,68(sp);"
"lw $2,64(sp);"
"lw $3,60(sp);"
"lw $25,56(sp);"
//recortamos video x_crop
"addi $14,$3,-120;"
"blez $14,no_recorte_xzpp;"
"li $3,120;"
"srl $14,$14,1;"
"add $4,$4,$14;"
"srl $14,$14,1;"
"add $6,$6,$14;"
"add $7,$7,$14;"
"no_recorte_xzpp:"
//recortamos video y_crop
"addi $14,$2,-160;"
"blez $14,no_recorte_yzpp;"
"li $2,160;"
"srl $14,$14,1;"
"mul $14,$14,$5;"
"add $4,$4,$14;"
"srl $14,$14,2;"
"add $6,$6,$14;"
"add $7,$7,$14;"
//diferencias de strides:
"no_recorte_yzpp:srl $8,$5,1;"
"add $17,$5,$4;"//y tomamos el primer puntero
"sub $5,$5,$3;"
//
"sll $9,$9,1;"
//preparando segunda linea
"sll $14,$3,1;"
"add $21,$25,$9;"
"add $21,$21,$14;"
//
"bucle_y2:lbu $12,0($7);"
"lbu $11,0($6);"
"lbu $10,0($4);"
"lbu $18,0($17);"//tomamos puc_y de la siguiente linea
"add $16,$3,$0;"
"addi $11,$11,-144;"
"addi $12,$12,-144;");
//r1
__asm( "add $14,$10,$11;"
"sra $15,$14,31;"
"srlv $15,$14,$15;"
"srl $13,$15,3;"
//g1
"addi $14,$10,-16;"
"sra $15,$14,31;"
"srlv $15,$14,$15;"
"andi $14,$15,0x00FC;"
"sll $14,$14,3;"
"or $13,$13,$14;"
//b1
"add $14,$10,$12;"
"sra $15,$14,31;"
"srlv $15,$14,$15;"
"andi $14,$15,0x00F8;"
"sll $14,$14,8;"
"or $13,$13,$14;"
//r2
"srl $10,$10,1;"
"srl $18,$18,1;"
"add $18,$18,$10;"
"add $14,$18,$11;"
"sra $15,$14,31;"
"srlv $15,$14,$15;"
"srl $19,$15,3;"
//g2
"addi $14,$18,-16;"
"sra $15,$14,31;"
"srlv $15,$14,$15;"
"andi $14,$15,0x00FC;"
"sll $14,$14,3;"
"or $19,$19,$14;"
//b2
"add $14,$18,$12;"
"sra $15,$14,31;"
"srlv $15,$14,$15;"
"andi $14,$15,0x00F8;"
"sll $14,$14,8;"
"or $19,$19,$14;"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -