📄 mem_transfer.c
字号:
else if(temp==2)
{
for(j =0; j < 8; j++)
{
/*get three word*/
src0 = *(ptr_ref++);
src1 = *(ptr_ref++);
src2 = *(ptr_ref--);
/*make up two word*/
ref0 = FUNSHIFT2(src1,src0);
ref1 = FUNSHIFT2(src2,src1);
ref16_0 = MERGELSB(0,ref0);
ref16_1 = MERGEMSB(0,ref0);
ref16_2 = MERGELSB(0,ref1);
ref16_3 = MERGEMSB(0,ref1);
cur0 = *(ptr_cur++);
cur1 = *(ptr_cur--);
cur16_0 = MERGELSB(0,cur0);
cur16_1 = MERGEMSB(0,cur0);
cur16_2 = MERGELSB(0,cur1);
cur16_3 = MERGEMSB(0,cur1);
*(ptr_cur++ ) = ref0;
*(ptr_cur)= ref1;
*(ptr_dct++) = DSPIDUALSUB(cur16_0,ref16_0);
*(ptr_dct++) = DSPIDUALSUB(cur16_1,ref16_1);
*(ptr_dct++) = DSPIDUALSUB(cur16_2,ref16_2);
*(ptr_dct++) = DSPIDUALSUB(cur16_3,ref16_3);
ptr_ref += cur_ref_stride;
ptr_cur += cur_ref_stride;
/*ptr_dct += 4;*/
}
}
else if(temp==3)
{
for(j =0; j < 8; j++)
{
/*get three word*/
src0 = *(ptr_ref++);
src1 = *(ptr_ref++);
src2 = *(ptr_ref--);
/*make up two word*/
ref0 = FUNSHIFT1(src1,src0);
ref1 = FUNSHIFT1(src2,src1);
ref16_0 = MERGELSB(0,ref0);
ref16_1 = MERGEMSB(0,ref0);
ref16_2 = MERGELSB(0,ref1);
ref16_3 = MERGEMSB(0,ref1);
cur0 = *(ptr_cur++);
cur1 = *(ptr_cur--);
cur16_0 = MERGELSB(0,cur0);
cur16_1 = MERGEMSB(0,cur0);
cur16_2 = MERGELSB(0,cur1);
cur16_3 = MERGEMSB(0,cur1);
*(ptr_cur++) = ref0;
*(ptr_cur ) = ref1;
*(ptr_dct++) = DSPIDUALSUB(cur16_0,ref16_0);
*(ptr_dct++) = DSPIDUALSUB(cur16_1,ref16_1);
*(ptr_dct++) = DSPIDUALSUB(cur16_2,ref16_2);
*(ptr_dct++) = DSPIDUALSUB(cur16_3,ref16_3);
ptr_ref += cur_ref_stride;
ptr_cur += cur_ref_stride;
/*ptr_dct += 4;*/
}
}
}
#endif
#ifdef optimize_8to16sub_1
void
transfer_8to16sub_c(int16_t * const dct, /*<--> the dct coefficient buffer*/
uint8_t * const cur, /*<-->in:the current buffer/out:save the ref*/
const uint8_t * ref, /*<-- the reference buffer*/
const uint32_t stride /*<-- the dct coefficient buffer*/
)
{
uint32_t i, j;
uint8_t c0,c1,c2,c3,c4,c5,c6,c7;
uint8_t r0,r1,r2,r3,r4,r5,r6,r7;
uint32_t cur_ref_stride,dct_stride;
cur_ref_stride = 0;
dct_stride = 0;
for (j = 0; j < 8; j++) {
/*for (i = 0; i < 8; i++) {*/
c0 = cur[cur_ref_stride + 0];
c1 = cur[cur_ref_stride + 1];
c2 = cur[cur_ref_stride + 2];
c3 = cur[cur_ref_stride + 3];
c4 = cur[cur_ref_stride + 4];
c5 = cur[cur_ref_stride + 5];
c6 = cur[cur_ref_stride + 6];
c7 = cur[cur_ref_stride + 7];
r0 = ref[cur_ref_stride + 0];
r1 = ref[cur_ref_stride + 1];
r2 = ref[cur_ref_stride + 2];
r3 = ref[cur_ref_stride + 3];
r4 = ref[cur_ref_stride + 4];
r5 = ref[cur_ref_stride + 5];
r6 = ref[cur_ref_stride + 6];
r7 = ref[cur_ref_stride + 7];
cur[cur_ref_stride + 0] = r0;
cur[cur_ref_stride + 1] = r1;
cur[cur_ref_stride + 2] = r2;
cur[cur_ref_stride + 3] = r3;
cur[cur_ref_stride + 4] = r4;
cur[cur_ref_stride + 5] = r5;
cur[cur_ref_stride + 6] = r6;
cur[cur_ref_stride + 7] = r7;
dct[dct_stride+ 0] = (int16_t) c0 - (int16_t) r0;
dct[dct_stride+ 1] = (int16_t) c1 - (int16_t) r1;
dct[dct_stride+ 2] = (int16_t) c2 - (int16_t) r2;
dct[dct_stride+ 3] = (int16_t) c3 - (int16_t) r3;
dct[dct_stride+ 4] = (int16_t) c4 - (int16_t) r4;
dct[dct_stride+ 5] = (int16_t) c5 - (int16_t) r5;
dct[dct_stride+ 6] = (int16_t) c6 - (int16_t) r6;
dct[dct_stride+ 7] = (int16_t) c7 - (int16_t) r7;
cur_ref_stride += stride;
dct_stride += 8;
/*}*/
}
}
#endif
/*!
************************************************************************
* \brief
* the function does the 16->8 bit transfer and this serie of operations
* SRC (16bit) = SRC
* DST (8bit) = max(min(DST+SRC, 255), 0)
************************************************************************
*/
#ifdef _TRIMEDIA
#define optimize_16to8add_2
#endif
#ifndef _TRIMEDIA
void
transfer_16to8add_c(uint8_t * const dst, /*<--> the destination buffer*/
const int16_t * const src, /*<-- the source buffer*/
uint32_t stride /*<-- stride*/
)
{
uint32_t i, j;
int16_t pixel;
for (j = 0; j < 8; j++) {
for (i = 0; i < 8; i++) {
pixel = (int16_t) dst[j * stride + i] + src[j * 8 + i];
if (pixel < 0) {
pixel = 0;
} else if (pixel > 255) {
pixel = 255;
}
dst[j * stride + i] = (uint8_t) pixel;
}
}
}
#endif
#ifdef optimize_16to8add_2
void
transfer_16to8add_c(uint8_t * const dst, /*<--> the destination buffer*/
const int16_t * const src, /*<-- the source buffer*/
uint32_t stride /*<-- stride*/
)
{
uint32_t i, j;
/*
uint32_t * restrict ptr_dst = (uint32_t *)dst;
const uint32_t * restrict ptr_src = (uint32_t *)src;
*/
uint32_t * ptr_dst = (uint32_t *)dst;
const uint32_t * ptr_src = (uint32_t *)src;
uint32_t src0,src1,src2,src3;
uint32_t dst0,dst1;
uint32_t dst16_0,dst16_1,dst16_2,dst16_3;
int32_t pixel0,pixel1,pixel2,pixel3;
uint32_t u_pixel0,u_pixel1,u_pixel2,u_pixel3;
uint32_t dst32_0,dst32_1,dst32_2,dst32_3;
uint32_t temp0,temp1;
uint32_t stride1;
int16_t pixel;
stride1 = (stride>>2)-1;
for (j = 0; j < 8; j++) {
dst0 = *(ptr_dst++);
dst1 = *(ptr_dst--);
dst16_0 = MERGELSB(0,dst0);
dst16_1 = MERGEMSB(0,dst0);
dst16_2 = MERGELSB(0,dst1);
dst16_3 = MERGEMSB(0,dst1);
src0 = *(ptr_src++);
src1 = *(ptr_src++);
src2 = *(ptr_src++);
src3 = *(ptr_src++);
pixel0 = DSPIDUALADD(dst16_0,src0);
pixel1 = DSPIDUALADD(dst16_1,src1);
pixel2 = DSPIDUALADD(dst16_2,src2);
pixel3 = DSPIDUALADD(dst16_3,src3);
u_pixel0 = DUALUCLIPI(pixel0,255);
u_pixel1 = DUALUCLIPI(pixel1,255);
u_pixel2 = DUALUCLIPI(pixel2,255);
u_pixel3 = DUALUCLIPI(pixel3,255);
dst32_0 = PACK16LSB(u_pixel1,u_pixel0);
dst32_1 = PACK16MSB(u_pixel1,u_pixel0);
temp0 = FUNSHIFT1 (dst32_1,0);
*(ptr_dst++) = dst32_0 + temp0;
dst32_2 = PACK16LSB(u_pixel3,u_pixel2);
dst32_3 = PACK16MSB(u_pixel3,u_pixel2);
temp1 = FUNSHIFT1 (dst32_3,0);
*(ptr_dst) = dst32_2 + temp1;
ptr_dst += stride1;
}
}
#endif
/*unroll the inner loop */
#ifdef optimize_16to8add_1
void
transfer_16to8add_c(uint8_t * const dst, /*<--> the destination buffer*/
const int16_t * const src, /*<-- the source buffer*/
uint32_t stride /*<-- stride*/
)
{
uint32_t i, j;
int16_t pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7;
uint16_t upixel0,upixel1,upixel2,upixel3,upixel4,upixel5,upixel6,upixel7;
uint32_t dst_stride,src_stride;
dst_stride = 0;
src_stride =0;
for (j = 0; j < 8; j++) {
/*for (i = 0; i < 8; i++) {*/
pixel0 = (int16_t) dst[dst_stride + 0] + src[src_stride + 0];
pixel1 = (int16_t) dst[dst_stride + 1] + src[src_stride + 1];
pixel2 = (int16_t) dst[dst_stride + 2] + src[src_stride + 2];
pixel3 = (int16_t) dst[dst_stride + 3] + src[src_stride + 3];
pixel4 = (int16_t) dst[dst_stride + 4] + src[src_stride + 4];
pixel5 = (int16_t) dst[dst_stride + 5] + src[src_stride + 5];
pixel6 = (int16_t) dst[dst_stride + 6] + src[src_stride + 6];
pixel7 = (int16_t) dst[dst_stride + 7] + src[src_stride + 7];
upixel0 = UCLIPI(pixel0,255);
upixel1 = UCLIPI(pixel1,255);
upixel2 = UCLIPI(pixel2,255);
upixel3 = UCLIPI(pixel3,255);
upixel4 = UCLIPI(pixel4,255);
upixel5 = UCLIPI(pixel5,255);
upixel6 = UCLIPI(pixel6,255);
upixel7 = UCLIPI(pixel7,255);
dst[dst_stride + 0]= (uint8_t)upixel0;
dst[dst_stride + 1]= (uint8_t)upixel1;
dst[dst_stride + 2]= (uint8_t)upixel2;
dst[dst_stride + 3]= (uint8_t)upixel3;
dst[dst_stride + 4]= (uint8_t)upixel4;
dst[dst_stride + 5]= (uint8_t)upixel5;
dst[dst_stride + 6]= (uint8_t)upixel6;
dst[dst_stride + 7]= (uint8_t)upixel7;
dst_stride += stride;
src_stride += 8;
/* if (pixel < 0) {
pixel = 0;
} else if (pixel > 255) {
pixel = 255;
}
dst[j * stride + i] = (uint8_t) pixel;
}*/
}
}
#endif
/*!
************************************************************************
* \brief
* Then the function does the 8->8 bit transfer and this serie of operations
* SRC (8bit) = SRC
* DST (8bit) = SRC
************************************************************************
*/
/*#ifdef _TRIMEDIA*/
void
transfer8x8_copy_c(uint8_t * const dst, /*<--> the destination buffer*/
const uint8_t * const src, /*<-- the source buffer*/
const uint32_t stride /*<-- stride*/
)
{
uint32_t i, j;
for (j = 0; j < 8; j++) {
for (i = 0; i < 8; i++) {
dst[j * stride + i] = src[j * stride + i];
}
}
}
/*#endif*/
/*#define optimize_8x8_copy_1*/
#ifdef optimize_8x8_copy_1
void
transfer8x8_copy_c(uint8_t * const dst, /*<--> the destination buffer*/
const uint8_t * const src, /*<-- the source buffer*/
const uint32_t stride /*<-- stride*/
)
{
uint32_t i, j;
uint8_t const *ptr_src = src; /*src value don't change */
uint8_t *ptr_dst = dst; /*dst value change */
uint32_t stride1 = stride - 8;
#pragma TCS_unroll=2
for (j = 0; j < 8; j++) {
*ptr_dst++ = *ptr_src++;
*ptr_dst++ = *ptr_src++;
*ptr_dst++ = *ptr_src++;
*ptr_dst++ = *ptr_src++;
*ptr_dst++ = *ptr_src++;
*ptr_dst++ = *ptr_src++;
*ptr_dst++ = *ptr_src++;
*ptr_dst++ = *ptr_src++;
ptr_dst += stride1;
ptr_src += stride1;
}
}
#endif
#ifdef optimize_8x8_copy_2
void
transfer8x8_copy_c(uint8_t * const dst, /*<--> the destination buffer*/
const uint8_t * const src, /*<-- the source buffer*/
const uint32_t stride /*<-- stride*/
)
{
uint32_t i, j;
uint8_t const *ptr_src = src; /*src value don't change */
uint8_t *ptr_dst = dst; /*dst value change */
#pragma TCS_unroll=2
for (j = 0; j < 8; j++) {
*(ptr_dst+0) = *(ptr_src+0);
*(ptr_dst+1) = *(ptr_src+1);
*(ptr_dst+2) = *(ptr_src+2);
*(ptr_dst+3) = *(ptr_src+3);
*(ptr_dst+4) = *(ptr_src+4);
*(ptr_dst+5) = *(ptr_src+5);
*(ptr_dst+6) = *(ptr_src+6);
*(ptr_dst+7) = *(ptr_src+7);
ptr_dst += stride;
ptr_src += stride;
}
}
#endif
/*method 3,to make up word ,later to do */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -