⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 encoder.c

📁 基于Linux的ffmepg decoder
💻 C
📖 第 1 页 / 共 5 页
字号:
	  //pCodec->DMA_COMMAND_local[34] = (uint32_t) 0;
	  pCodec->DMA_COMMAND_local[35] = (uint32_t) 0x4B01010; // make it group ID 1, disable this command
	  
      //pCodec->DMA_COMMAND_local[38+DMA_COMMAND_QUEUE_STRIDE] = (uint32_t) (8+1-4) << 24 | 4 << 20;
	  pCodec->DMA_COMMAND_local[39+DMA_COMMAND_QUEUE_STRIDE] = (uint32_t) 0x840010;
	  //pCodec->DMA_COMMAND_local[26+DMA_COMMAND_QUEUE_STRIDE] = (uint32_t) 0;
	  pCodec->DMA_COMMAND_local[27+DMA_COMMAND_QUEUE_STRIDE] = (uint32_t) 0x4B01040; // make it group ID 1, disable this command
	  //pCodec->DMA_COMMAND_local[30+DMA_COMMAND_QUEUE_STRIDE] = (uint32_t) 0;
	  pCodec->DMA_COMMAND_local[31+DMA_COMMAND_QUEUE_STRIDE] = (uint32_t) 0x4B01010; // make it group ID 1, disable this command
	  //pCodec->DMA_COMMAND_local[34+DMA_COMMAND_QUEUE_STRIDE] = (uint32_t) 0;
	  pCodec->DMA_COMMAND_local[35+DMA_COMMAND_QUEUE_STRIDE] = (uint32_t) 0x4B01010; // make it group ID 1, disable this command

	  // set the related DMA commands to move the reference block to local memory
	  // since we use DMA double buffer, but for the very first time, we will move the reference image to 
	  // local memory twice by using the upper part of DMA double buffer
	  // so, let's increment it for just 256 bytes
	  pCodec->DMA_COMMAND_local[0] = ((uint32_t) pEnc->reference->reconstruct.y - (256*XDIM/16) | 0x06);
	  // since we use DMA double buffer, so the increment for Y block of reference image is 512 bytes instead of 256 bytes
	  pCodec->DMA_COMMAND_local[0+DMA_COMMAND_QUEUE_STRIDE] = ((uint32_t) pEnc->reference->reconstruct.y - (256*XDIM/16) + 512 | 0x07);

      pCodec->DMA_COMMAND_local[1] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(REF_Y);
      pCodec->DMA_COMMAND_local[1+DMA_COMMAND_QUEUE_STRIDE] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS((uint32_t)REF_Y+32);      
      // local memory frame width is 16 words, and block width is 4 words.
      // system memory frame width is (XDIM*4) words and block width is 32 words(128 bytes).
	  pCodec->DMA_COMMAND_local[2] = ((16+1-4)<<24) | 4 << 20 | ((XDIM*4)-63)<<6 | 32;
	  pCodec->DMA_COMMAND_local[2+DMA_COMMAND_QUEUE_STRIDE] = ((16+1-4)<<24) | 4 << 20 | ((XDIM*4)-63)<<6 | 32;
	  pCodec->DMA_COMMAND_local[3] = 0x4A50000 | 192; // enable Transfer Done flag mask
                                              // Enable DMA start transferring
                                              // Enable chain transfer
                                              // From 2D System memory to 2D Local memory
                                              // transfer 192 words (768 bytes = 3 Y blocks)
      pCodec->DMA_COMMAND_local[3+DMA_COMMAND_QUEUE_STRIDE] = 0x4A50000 | 192; // enable Transfer Done flag mask
                                              // Enable DMA start transferring
                                              // Enable chain transfer
                                              // From 2D System memory to 2D Local memory
                                              // transfer 192 words (768 bytes = 3 Y blocks)


      // since we use DMA double buffer, but for the very first time, we will move the reference image to 
	  // local memory twice by using the upper part of DMA double buffer
	  // so, let's increment it for just 64 bytes
      pCodec->DMA_COMMAND_local[4] = ((uint32_t) pEnc->reference->reconstruct.u - (64*XDIM/16) | 0x04);
      // since we use DMA double buffer, so the increment for U block of reference image is 128 bytes instead of 64 bytes
      pCodec->DMA_COMMAND_local[4+DMA_COMMAND_QUEUE_STRIDE] = ((uint32_t) pEnc->reference->reconstruct.u - (64*XDIM/16) + 128 | 0x05);		//	4 - 7 for Ref U Load Next MB
	  pCodec->DMA_COMMAND_local[5] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(REF_U);
	  pCodec->DMA_COMMAND_local[5+DMA_COMMAND_QUEUE_STRIDE] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS((uint32_t)REF_U+16);
	  pCodec->DMA_COMMAND_local[6] = (uint32_t) ((8+1-2)<<24) | 2 << 20 | ((XDIM)-15)<<6 | 8;
	  pCodec->DMA_COMMAND_local[6+DMA_COMMAND_QUEUE_STRIDE] = (uint32_t) ((8+1-2)<<24) | 2 << 20 | ((XDIM)-15)<<6 | 8;	  
	  pCodec->DMA_COMMAND_local[7] = 0x4A50000 | 48;										//  chain enable, 
	  pCodec->DMA_COMMAND_local[7+DMA_COMMAND_QUEUE_STRIDE] = 0x4A50000 | 48;				//  chain enable, 
	
      // since we use DMA double buffer, but for the very first time, we will move the reference image to 
	  // local memory twice by using the upper part of DMA double buffer
	  // so, let's increment it for just 64 bytes
	  pCodec->DMA_COMMAND_local[8] = ((uint32_t) pEnc->reference->reconstruct.v - (64*XDIM/16) | 0x04);		//	16 - 19 for Ref V Load Next MB
	  // since we use DMA double buffer, so the increment for V block of reference image is 128 bytes instead of 64 bytes
	  pCodec->DMA_COMMAND_local[8+DMA_COMMAND_QUEUE_STRIDE] = ((uint32_t) pEnc->reference->reconstruct.v - (64*XDIM/16) +128 | 0x05);		//	16 - 19 for Ref V Load Next MB
	  

	  pCodec->DMA_COMMAND_local[9] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(REF_V);
	  pCodec->DMA_COMMAND_local[9+DMA_COMMAND_QUEUE_STRIDE] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS((uint32_t)REF_V+16);
      pCodec->DMA_COMMAND_local[10] = (uint32_t) ((8+1-2)<<24) | 2 << 20 | ((XDIM)-15)<<6 | 8;
      pCodec->DMA_COMMAND_local[10+DMA_COMMAND_QUEUE_STRIDE] = (uint32_t) ((8+1-2)<<24) | 2 << 20 | ((XDIM)-15)<<6 | 8;
      pCodec->DMA_COMMAND_local[11] = 0x4A50000 | 48;					//  chain disable
      pCodec->DMA_COMMAND_local[11+DMA_COMMAND_QUEUE_STRIDE] = 0x4A50000 | 48;					//  chain disable

	  // set the related DMA commands to move the current blocks to local memory
	  // since we use DMA double buffer, so the increment for Y block of current image is 512 bytes instead of 256 bytes
      pCodec->DMA_COMMAND_local[12] = ((uint32_t) pEnc->current1->image.y | 0x07);
      pCodec->DMA_COMMAND_local[12+DMA_COMMAND_QUEUE_STRIDE] = ((uint32_t) pEnc->current1->image.y + 256 | 0x07);
	  pCodec->DMA_COMMAND_local[13] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(CUR_Y0);
	  pCodec->DMA_COMMAND_local[13+DMA_COMMAND_QUEUE_STRIDE] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(CUR_Y1);
	  pCodec->DMA_COMMAND_local[15] = (uint32_t) 0x4A00040; // enable Transfer Done flag mask
	                                                // Enable DMA start transferring
	                                                // Enable chain transfer
	                                                // From sequential System memory to sequential Local memory
	                                                // transfer 0x40 words (256 bytes)
	  pCodec->DMA_COMMAND_local[15+DMA_COMMAND_QUEUE_STRIDE] = (uint32_t) 0x4A00040;
	  
      // since we use DMA double buffer, so the increment for U block of current image is 128 bytes instead of 64 bytes
      pCodec->DMA_COMMAND_local[16] = ((uint32_t) pEnc->current1->image.u | 0x05);
      pCodec->DMA_COMMAND_local[16+DMA_COMMAND_QUEUE_STRIDE] = ((uint32_t) pEnc->current1->image.u + 64 | 0x05);
      pCodec->DMA_COMMAND_local[17] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(CUR_U0);
      pCodec->DMA_COMMAND_local[17+DMA_COMMAND_QUEUE_STRIDE] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(CUR_U1);
	  pCodec->DMA_COMMAND_local[19] = (uint32_t) 0x4A00010; // enable Transfer Done flag mask
	                                                // Enable DMA start transferring
	                                                // Enable chain transfer
	                                                // From sequential System memory to sequential Local memory
	                                                // transfer 0x10 words (64 bytes)
	  pCodec->DMA_COMMAND_local[19+DMA_COMMAND_QUEUE_STRIDE] = (uint32_t) 0x4A00010;
	  
	  // since we use DMA double buffer, so the increment for V block of current image is 128 bytes instead of 64 bytes
	  pCodec->DMA_COMMAND_local[20] = ((uint32_t) pEnc->current1->image.v | 0x05);
	  pCodec->DMA_COMMAND_local[20+DMA_COMMAND_QUEUE_STRIDE] = ((uint32_t) pEnc->current1->image.v + 64  | 0x05);
	  pCodec->DMA_COMMAND_local[21] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(CUR_V0);
	  pCodec->DMA_COMMAND_local[21+DMA_COMMAND_QUEUE_STRIDE] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(CUR_V1);
      pCodec->DMA_COMMAND_local[23] = (uint32_t) 0x4A00010; // enable Transfer Done flag mask
	                                                // Enable DMA start transferring
	                                                // Enable chain transfer
	                                                // From sequential System memory to sequential Local memory
	                                                // transfer 0x10 words (64 bytes)
	  pCodec->DMA_COMMAND_local[23+DMA_COMMAND_QUEUE_STRIDE] = (uint32_t) 0x4A00010;

      // prepare the DMA commands for loading predictor
	  pCodec->DMA_COMMAND_local[36] = (uint32_t) pCodec->pred_value_phy;  //	predictor
	  pCodec->DMA_COMMAND_local[36+	DMA_COMMAND_QUEUE_STRIDE] = (uint32_t) (pCodec->pred_value_phy);				//	predictor
	  pCodec->DMA_COMMAND_local[37] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(LOCAL_PREDICTOR0);
	  pCodec->DMA_COMMAND_local[37+DMA_COMMAND_QUEUE_STRIDE] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(LOCAL_PREDICTOR0);
	  pCodec->DMA_COMMAND_local[39] = (uint32_t) 0x840010;  // disable Transfer Done flag mask
	                                                // Enable DMA start transferring
	                                                // Disable chain transfer
	                                                // From sequqntial System memory to 2D Local memory
	                                                // transfer 0x10 words (64 bytes)
	  pCodec->DMA_COMMAND_local[39+DMA_COMMAND_QUEUE_STRIDE] = (uint32_t) 0x840010;
	  
	  pCodec->DMA_COMMAND_local[24] = ((uint32_t) pEnc->current1->reconstruct.y + 256) | 0x07;
      pCodec->DMA_COMMAND_local[24+DMA_COMMAND_QUEUE_STRIDE] = ((uint32_t) pEnc->current1->reconstruct.y) | 0x07;
      pCodec->DMA_COMMAND_local[28] = ((uint32_t) pEnc->current1->reconstruct.u + 64) | 0x05;
	  pCodec->DMA_COMMAND_local[28+DMA_COMMAND_QUEUE_STRIDE] = ((uint32_t) pEnc->current1->reconstruct.u) | 0x05;
	  pCodec->DMA_COMMAND_local[32] = ((uint32_t) pEnc->current1->reconstruct.v + 64) | 0x05;
	  pCodec->DMA_COMMAND_local[32+DMA_COMMAND_QUEUE_STRIDE] = ((uint32_t) pEnc->current1->reconstruct.v) | 0x05;
	  
      // begin to move the local DMA commands (from 0th to 0+40(0x1C)-1=39th)
      // to the system memory and start DMA procedure..
	  DMA_MOVE(0, 0x4B00028)    // enable Transfer Done flag mask
                                // Enable DMA start transferring
                                // Enable chain transfer
                                // From sequential Local memory to sequential System memory
                                // transfer 0x28(40) words

	RTL_DEBUG_OUT(0x95000000 | (uint32_t) pCodec->pred_value_phy)

	pEnc->current1->fcode = pEnc->mbParam.m_fcode;

	READ_ASADR(*pBits)						//	64 byte
	READ_BALR(x)							//	compressed data in local memory (words)
	READ_VOADR(y)							//	compressed data in local memory (bits)
	x = (x & 0x3c);
	y = y & 0xff;
	bit_header = y + x*8;							//	total bits in local memory

	pEnc->current1->coding_type = P_VOP;
	if (pEnc->mbParam.h263) {
		pEnc->mbParam.m_rounding_type = 0;
		pEnc->current1->rounding_type = pEnc->mbParam.m_rounding_type;
		BitstreamWriteShortHeader(&pEnc->mbParam, pEnc->current1, 1,pEnc->pCodec);
	} else {
		pEnc->mbParam.m_rounding_type = 1 - pEnc->mbParam.m_rounding_type;
		pEnc->current1->rounding_type = pEnc->mbParam.m_rounding_type;
		if (vol_header)
			BitstreamWriteVolHeader(&pEnc->mbParam, pEnc->current1,pEnc->pCodec);
		BitstreamWriteVopHeader(&pEnc->mbParam, pEnc->current1, 1,pEnc->pCodec);
	}
	
	
    // check DMA is done
	POLL_MARKER_S
	while((pmdma->Status & 0x1) == 0);
	POLL_MARKER_E
	
	// we just want to move the the reference image again, so we disable the chain
	pCodec->DMA_COMMAND_local[1] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS((uint32_t)REF_Y + 16);
	pCodec->DMA_COMMAND_local[5] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS((uint32_t)REF_U + 8);
	pCodec->DMA_COMMAND_local[9] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS((uint32_t)REF_V + 8);
    pCodec->DMA_COMMAND_local[11] = 0x850000 | 48;

	// begin to move the reference image to local memory again (from 0th to 0+12(0xC)-1=11th)
    // by using the upper part of DMA command buffer
	DMA_MOVE(0, 0x4B0000C)  // enable Transfer Done flag mask
                            // Enable DMA start transferring
                            // Enable chain transfer
                            // From sequential Local memory to sequential System memory
                            // transfer 0xC(12) words 


    // the reason why we set the MCCADDR register just here for once is because during
    // P-frame encoding , unlike I-frame encoding , while ME engine is activated, the
    // ME engine will copy the current blocks to the address that was set by MCCADDR
    // register by the way.
	SET_MCCADDR(CUR_Y2)			
	SET_QAR(DZQAR)	    // quantization table

	RTL_DEBUG_OUT(0x91000000)

	pCodec->even_odd_1 = 0;
	pCodec->even_odd_I = 0;
	pCodec->acdc_status = 7;

    pCodec->triple_buffer_selector = 0;

	
	pMB = pEnc->current1->mbs;
	pMB->quant = pEnc->current1->quant;
	// initialize Raddr and Raddr23 in word address
	pCodec->Raddr = (((uint32_t) REF_Y + 64*16) >> 2) & 0xfff;
	pCodec->Raddr23 = (((uint32_t) REF_Y + 64*(16+8)) >> 2) & 0xfff; //for block 2,3
	
	#ifdef DUMP_PMV_RESULT                
    fprintf(pmv_result_file,"Macroblock %d (x=%d,y=%d)\n",0,0,0);
    #endif
    

    bIntra = MotionEstimation_block0(pMB,pEnc);

	
	#ifdef DUMP_ME_RESULT
      fprintf(me_result_file,"Macroblock %d (x=%d,y=%d) :",0,0,0);      

	  if(pEnc->mbParam.enable_4mv)
        fprintf(me_result_file,"0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x\n",(pMB->mv16x_0&0x07f), (pMB->mv16y_0&0x07f), (pMB->mv16x_1&0x07f), (pMB->mv16y_1&0x07f), (pMB->mv16x_2&0x07f), (pMB->mv16y_2&0x07f), (pMB->mv16x_3&0x07f), (pMB->mv16y_3&0x07f));
      else
        // to be compatible with C Model encoder's output format
        fprintf(me_result_file,"0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x\n",(pMB->mv16x_3&0x07f), (pMB->mv16y_3&0x07f), (pMB->mv16x_3&0x07f), (pMB->mv16y_3&0x07f), (pMB->mv16x_3&0x07f), (pMB->mv16y_3&0x07f), (pMB->mv16x_3&0x07f), (pMB->mv16y_3&0x07f));
    #endif

	RTL_DEBUG_OUT(0x92000000)
	
	x = 1;
	data_64b = 2;
  	for (y = 0; y < pEnc->mbParam.mb_height; y++) {
		for ( ; x < pEnc->mbParam.mb_width; x++) {
			pMB->quant = pEnc->current1->quant;
            if (pEnc->mbParam.resyn==1)
			{
			  // for resync marker, since we should set the bound for PMV (performed
			  // by hardware) once the resync marker is enabled, so we set the bit 3 of MECTL 
			  // register to notify the hardware
			  if(x==0) pCodec->ME_COMMAND|=8;
  		        if (!pEnc->mbParam.h263)
				{
					if ((y!=0) & (x==1))
					{
					    // In core_version_1 ,the hardware register (CPSTS) did not provide the
				        // bit to check whether the VLC engine is done or not. Without the bit
				        // potentially, the follwing codes in core_version_1 will cause bitstream
				        // buffer contention if we want to insert the resync marker to bitstream
				        // buffer while VLC is not done and still accessing the same bitstream 
				        // buffer at the same time. Therefore, core_vesion_2 hardware has 
				        // provided another bit on bit15 of CPSTS register to let software check 
				        // the VLC done status in order to avoid bitstream buffer contention.
				        
					    // check whether the VLC is done or not
                        // CPSTS register is at address 0x10028
                        int32_t cpsts;
 	                    do {
	                      READ_CPSTS(cpsts)
	                    } while (!(cpsts&0x08000));
	                    
						BitstreamPadAlways(pCodec);
						BitstreamPutBits(VIDO_RESYN_MARKER, 17,pCodec);
						BitstreamPutBits((x-1) + y*pEnc->mbParam.mb_width, log2bin(pEnc->mbParam.mb_width *  pEnc->mbParam.mb_height - 1),pCodec);
						BitstreamPutBits(pMB->quant, 5,pCodec);
						BitstreamPutBit(0,pCodec);
					}
				}
				else
				{
					if ((y!=0) & (x==1))
					{
					    // In core_version_1 ,the hardware register (CPSTS) did not provide the
				        // bit to check whether the VLC engine is done or not. Without the bit
				        // potentially, the follwing codes in core_version_1 will cause bitstream
				        // buffer contention if we want to insert the resync marker to bitstream
				        // buffer while VLC is not done and still accessing the same bitstream 
				        // buffer at the same time. Therefore, core_vesion_2 hardware has 
				        // provided another bit on bit15 of CPSTS register to let software check 
				        // the VLC done status in order to avoid bitstream buffer contention.
                        
					    // check whether the VLC is done or not
                        // CPSTS register is at address 0x10028
                        int32_t cpsts;
 	                    do {
	                      READ_CPSTS(cpsts)
	                    } while (!(cpsts&0x08000));
                          
						BitstreamPutBits(VIDO_RESYN_MARKER, 17,pCodec);
						BitstreamPutBits(y, 5,pCodec);
						BitstreamPutBits(0, 2,pCodec);		// ID
						BitstreamPutBits(pMB->quant, 5,pCodec);
					}
				}
			}
			pMB_mc = pMB;
			pMB++;
			pCodec->even_odd_1 ^= 1;
			pCodec->triple_buffer_selector = (++pCodec->triple_buffer_selector) % 3;
            
			RTL_DEBUG_OUT(0x91000000 | y << 12 | x)
			
			#ifdef DUMP_PMV_RESULT                
            fprintf(pmv_result_file,"Macroblock %d (x=%d,y=%d)\n",x+y*pEnc->mbParam.mb_width,x,y);
            #endif
            bIntra = MotionEstimation(pMB, pMB_mc, x, y, &pEnc->mbParam, pEnc, data_64b);
			#ifdef DUMP_ME_RESU

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -