📄 mem_transfer.c

📁 用MPEG-4对YUV视频文件编码压缩成divx视频文件
💻 C
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
	else if(temp==2)
	{
		for(j =0; j < 8; j++)
		{
		
			/*get three word*/
			src0 = *(ptr_ref++);
			src1 = *(ptr_ref++);
			src2 = *(ptr_ref--);	
			/*make up two word*/
			ref0 = FUNSHIFT2(src1,src0);
			ref1 = FUNSHIFT2(src2,src1);
			ref16_0 = MERGELSB(0,ref0);
			ref16_1 = MERGEMSB(0,ref0);
			ref16_2 = MERGELSB(0,ref1);
			ref16_3 = MERGEMSB(0,ref1);
			cur0 = *(ptr_cur++);
			cur1 = *(ptr_cur--);
			cur16_0 = MERGELSB(0,cur0);
			cur16_1 = MERGEMSB(0,cur0);
			cur16_2 = MERGELSB(0,cur1);
			cur16_3 = MERGEMSB(0,cur1);
			*(ptr_cur++ ) = ref0;
			*(ptr_cur)= ref1;
			*(ptr_dct++)  = DSPIDUALSUB(cur16_0,ref16_0);
			*(ptr_dct++)  = DSPIDUALSUB(cur16_1,ref16_1);
			*(ptr_dct++)  = DSPIDUALSUB(cur16_2,ref16_2);
			*(ptr_dct++)  = DSPIDUALSUB(cur16_3,ref16_3);
			ptr_ref += cur_ref_stride;
			ptr_cur += cur_ref_stride;
			/*ptr_dct += 4;*/
			
			

		}
	}
	else if(temp==3)
	{
		for(j =0; j < 8; j++)
		{
		
			/*get three word*/
			src0 = *(ptr_ref++);
			src1 = *(ptr_ref++);
			src2 = *(ptr_ref--);	
			/*make up two word*/
			ref0 = FUNSHIFT1(src1,src0);
			ref1 = FUNSHIFT1(src2,src1);
			ref16_0 = MERGELSB(0,ref0);
			ref16_1 = MERGEMSB(0,ref0);
			ref16_2 = MERGELSB(0,ref1);
			ref16_3 = MERGEMSB(0,ref1);
			cur0 = *(ptr_cur++);
			cur1 = *(ptr_cur--);
			cur16_0 = MERGELSB(0,cur0);
			cur16_1 = MERGEMSB(0,cur0);
			cur16_2 = MERGELSB(0,cur1);
			cur16_3 = MERGEMSB(0,cur1);
			*(ptr_cur++) = ref0;
			*(ptr_cur  ) = ref1;
			*(ptr_dct++)  = DSPIDUALSUB(cur16_0,ref16_0);
			*(ptr_dct++)  = DSPIDUALSUB(cur16_1,ref16_1);
			*(ptr_dct++)  = DSPIDUALSUB(cur16_2,ref16_2);
			*(ptr_dct++)  = DSPIDUALSUB(cur16_3,ref16_3);
			ptr_ref += cur_ref_stride;
			ptr_cur += cur_ref_stride;
			/*ptr_dct += 4;*/
			
			

		}
	}

}
#endif 
#ifdef  optimize_8to16sub_1
void
transfer_8to16sub_c(int16_t * const dct,		/*<--> the dct coefficient buffer*/
					uint8_t * const cur,		/*<-->in:the current buffer/out:save the ref*/	
					const uint8_t * ref,		/*<-- the reference buffer*/
					const uint32_t stride		/*<-- the dct coefficient buffer*/
					)
{
	uint32_t i, j;
	uint8_t c0,c1,c2,c3,c4,c5,c6,c7;
	uint8_t r0,r1,r2,r3,r4,r5,r6,r7;
	
	uint32_t cur_ref_stride,dct_stride;
	cur_ref_stride = 0;
	dct_stride = 0;
	

	for (j = 0; j < 8; j++) {
		/*for (i = 0; i < 8; i++) {*/
			c0 = cur[cur_ref_stride + 0];
			c1 = cur[cur_ref_stride + 1];
			c2 = cur[cur_ref_stride + 2];
			c3 = cur[cur_ref_stride + 3];
			c4 = cur[cur_ref_stride + 4];
			c5 = cur[cur_ref_stride + 5];
			c6 = cur[cur_ref_stride + 6];
			c7 = cur[cur_ref_stride + 7];
			r0 = ref[cur_ref_stride + 0];
			r1 = ref[cur_ref_stride + 1];
			r2 = ref[cur_ref_stride + 2];
			r3 = ref[cur_ref_stride + 3];
			r4 = ref[cur_ref_stride + 4];
			r5 = ref[cur_ref_stride + 5];
			r6 = ref[cur_ref_stride + 6];
			r7 = ref[cur_ref_stride + 7];
			cur[cur_ref_stride + 0] = r0;
			cur[cur_ref_stride + 1] = r1;
			cur[cur_ref_stride + 2] = r2;
			cur[cur_ref_stride + 3] = r3;
			cur[cur_ref_stride + 4] = r4;
			cur[cur_ref_stride + 5] = r5;
			cur[cur_ref_stride + 6] = r6;
			cur[cur_ref_stride + 7] = r7;
			dct[dct_stride+ 0] = (int16_t) c0 - (int16_t) r0;
			dct[dct_stride+ 1] = (int16_t) c1 - (int16_t) r1;
			dct[dct_stride+ 2] = (int16_t) c2 - (int16_t) r2;
			dct[dct_stride+ 3] = (int16_t) c3 - (int16_t) r3;
			dct[dct_stride+ 4] = (int16_t) c4 - (int16_t) r4;
			dct[dct_stride+ 5] = (int16_t) c5 - (int16_t) r5;
			dct[dct_stride+ 6] = (int16_t) c6 - (int16_t) r6;
			dct[dct_stride+ 7] = (int16_t) c7 - (int16_t) r7;
			cur_ref_stride += stride;
			dct_stride += 8; 
			
		/*}*/
	}
}
#endif 


/*!
 ************************************************************************   
 * \brief
 *    the function does the 16->8 bit transfer and this serie of operations 
 *    SRC (16bit) = SRC
 *    DST (8bit)  = max(min(DST+SRC, 255), 0)
 ************************************************************************
 */
#ifdef _TRIMEDIA
#define  optimize_16to8add_2 
#endif
         
#ifndef _TRIMEDIA
void
transfer_16to8add_c(uint8_t * const dst,		/*<--> the destination buffer*/
					const int16_t * const src,	/*<--  the source buffer*/
					uint32_t stride				/*<--  stride*/
					)
{
	uint32_t i, j;
	int16_t pixel;

	for (j = 0; j < 8; j++) {
		for (i = 0; i < 8; i++) {
			pixel = (int16_t) dst[j * stride + i] + src[j * 8 + i];
			if (pixel < 0) {
				pixel = 0;
			} else if (pixel > 255) {
				pixel = 255;
			}
			dst[j * stride + i] = (uint8_t) pixel;
		}
	}
}
#endif

#ifdef  optimize_16to8add_2
void
transfer_16to8add_c(uint8_t * const dst,		/*<--> the destination buffer*/
					const int16_t * const src,	/*<--  the source buffer*/
					uint32_t stride				/*<--  stride*/
					)
{
	
	uint32_t i, j;
/*	
	uint32_t  * restrict ptr_dst = (uint32_t  *)dst;
	const uint32_t * restrict ptr_src = (uint32_t *)src;
*/
	uint32_t  * ptr_dst = (uint32_t  *)dst;
	const uint32_t * ptr_src = (uint32_t *)src;

	uint32_t src0,src1,src2,src3;
	uint32_t dst0,dst1;
	uint32_t dst16_0,dst16_1,dst16_2,dst16_3;
	int32_t pixel0,pixel1,pixel2,pixel3;
	uint32_t u_pixel0,u_pixel1,u_pixel2,u_pixel3;
	uint32_t dst32_0,dst32_1,dst32_2,dst32_3;
	uint32_t temp0,temp1;
	uint32_t stride1;
	int16_t pixel;
	stride1 = (stride>>2)-1;

	for (j = 0; j < 8; j++) {
			dst0 = *(ptr_dst++);
			dst1 = *(ptr_dst--);
			dst16_0 = MERGELSB(0,dst0);
			dst16_1 = MERGEMSB(0,dst0);
			dst16_2 = MERGELSB(0,dst1);
			dst16_3 = MERGEMSB(0,dst1);
			src0 = *(ptr_src++);
			src1 = *(ptr_src++);
			src2 = *(ptr_src++);
			src3 = *(ptr_src++);
			pixel0 = DSPIDUALADD(dst16_0,src0);
			pixel1 = DSPIDUALADD(dst16_1,src1);
			pixel2 = DSPIDUALADD(dst16_2,src2);
			pixel3 = DSPIDUALADD(dst16_3,src3);
			u_pixel0 = DUALUCLIPI(pixel0,255);
			u_pixel1 = DUALUCLIPI(pixel1,255); 
			u_pixel2 = DUALUCLIPI(pixel2,255); 
			u_pixel3 = DUALUCLIPI(pixel3,255); 
			dst32_0 = PACK16LSB(u_pixel1,u_pixel0);
			dst32_1 = PACK16MSB(u_pixel1,u_pixel0);
			temp0   = FUNSHIFT1 (dst32_1,0);
			*(ptr_dst++) = dst32_0 + temp0;
			dst32_2 = PACK16LSB(u_pixel3,u_pixel2);
			dst32_3 = PACK16MSB(u_pixel3,u_pixel2);
			temp1   = FUNSHIFT1 (dst32_3,0);
			*(ptr_dst)   = dst32_2 + temp1;	
			ptr_dst += stride1;		
			
	}
}
#endif
/*unroll the inner loop */

#ifdef  optimize_16to8add_1
void
transfer_16to8add_c(uint8_t * const dst,		/*<--> the destination buffer*/
					const int16_t * const src,	/*<--  the source buffer*/
					uint32_t stride				/*<--  stride*/
					)
{
	uint32_t i, j;
	int16_t pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7;
	uint16_t upixel0,upixel1,upixel2,upixel3,upixel4,upixel5,upixel6,upixel7;

	uint32_t dst_stride,src_stride;
	dst_stride = 0;
	src_stride =0;

	for (j = 0; j < 8; j++) {
		
		
		
		
		/*for (i = 0; i < 8; i++) {*/
			pixel0 = (int16_t) dst[dst_stride + 0] + src[src_stride + 0];
			pixel1 = (int16_t) dst[dst_stride + 1] + src[src_stride + 1];
			pixel2 = (int16_t) dst[dst_stride + 2] + src[src_stride + 2];
			pixel3 = (int16_t) dst[dst_stride + 3] + src[src_stride + 3];
			pixel4 = (int16_t) dst[dst_stride + 4] + src[src_stride + 4];
			pixel5 = (int16_t) dst[dst_stride + 5] + src[src_stride + 5];
			pixel6 = (int16_t) dst[dst_stride + 6] + src[src_stride + 6];
			pixel7 = (int16_t) dst[dst_stride + 7] + src[src_stride + 7];
			upixel0 = UCLIPI(pixel0,255);
			upixel1 = UCLIPI(pixel1,255);
			upixel2 = UCLIPI(pixel2,255);
			upixel3 = UCLIPI(pixel3,255);
			upixel4 = UCLIPI(pixel4,255);
			upixel5 = UCLIPI(pixel5,255);
			upixel6 = UCLIPI(pixel6,255);
			upixel7 = UCLIPI(pixel7,255);
			dst[dst_stride + 0]= (uint8_t)upixel0;
			dst[dst_stride + 1]= (uint8_t)upixel1;
			dst[dst_stride + 2]= (uint8_t)upixel2;
			dst[dst_stride + 3]= (uint8_t)upixel3;
			dst[dst_stride + 4]= (uint8_t)upixel4;
			dst[dst_stride + 5]= (uint8_t)upixel5;
			dst[dst_stride + 6]= (uint8_t)upixel6;
			dst[dst_stride + 7]= (uint8_t)upixel7;
			dst_stride += stride;
			src_stride += 8;
			
			

		/*	if (pixel < 0) {
				pixel = 0;
			} else if (pixel > 255) {
				pixel = 255;
			}
			dst[j * stride + i] = (uint8_t) pixel;
		}*/
	}
}
#endif
/*!
 ************************************************************************   
 * \brief
 *    Then the function does the 8->8 bit transfer and this serie of operations 
 *    SRC (8bit) = SRC
 *    DST (8bit) = SRC
 ************************************************************************
 */

/*#ifdef _TRIMEDIA*/
void
transfer8x8_copy_c(uint8_t * const dst,			/*<--> the destination buffer*/
				   const uint8_t * const src,	/*<--  the source buffer*/
				   const uint32_t stride		/*<--  stride*/
				   )
{
	uint32_t i, j;
	

	for (j = 0; j < 8; j++) {
		for (i = 0; i < 8; i++) {
		dst[j * stride + i] = src[j * stride + i];
		}                                            
	}
}
/*#endif*/
/*#define optimize_8x8_copy_1*/
#ifdef  optimize_8x8_copy_1

void
transfer8x8_copy_c(uint8_t * const dst,			/*<--> the destination buffer*/
				   const uint8_t * const src,	/*<--  the source buffer*/
				   const uint32_t stride		/*<--  stride*/
				   )
{
	uint32_t i, j;
	uint8_t const *ptr_src = src;  /*src value don't change */
	uint8_t       *ptr_dst = dst;      /*dst value change */
	uint32_t  stride1 = stride - 8;
	
	#pragma TCS_unroll=2
	for (j = 0; j < 8; j++) {
		
		*ptr_dst++ = *ptr_src++;
		*ptr_dst++ = *ptr_src++;
		*ptr_dst++ = *ptr_src++;
		*ptr_dst++ = *ptr_src++;
		*ptr_dst++ = *ptr_src++;
		*ptr_dst++ = *ptr_src++;
		*ptr_dst++ = *ptr_src++;
		*ptr_dst++ = *ptr_src++;	
		ptr_dst += stride1;
		ptr_src += stride1;
		                               
	}
}
#endif
#ifdef  optimize_8x8_copy_2

void
transfer8x8_copy_c(uint8_t * const dst,			/*<--> the destination buffer*/
				   const uint8_t * const src,	/*<--  the source buffer*/
				   const uint32_t stride		/*<--  stride*/
				   )
{
	uint32_t i, j;
	uint8_t const *ptr_src = src;  /*src value don't change */
	uint8_t       *ptr_dst = dst;      /*dst value change */
	
	
	#pragma TCS_unroll=2
	for (j = 0; j < 8; j++) {
		
		*(ptr_dst+0) = *(ptr_src+0);
		*(ptr_dst+1) = *(ptr_src+1);
		*(ptr_dst+2) = *(ptr_src+2);
		*(ptr_dst+3) = *(ptr_src+3);
		*(ptr_dst+4) = *(ptr_src+4);
		*(ptr_dst+5) = *(ptr_src+5);
		*(ptr_dst+6) = *(ptr_src+6);
		*(ptr_dst+7) = *(ptr_src+7);	
		ptr_dst += stride;
		ptr_src += stride;
		                               
	}
}
#endif
/*method 3,to make up word ,later to do */
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -