📄 encode_frame_sc.sc

📁 motion Jpeg 在SPI DSP平台优化好的代码
💻 SC
📖 第 1 页 / 共 2 页
字号:
上一页 12
        spi_printf ("Error: Malloc of prev_blk_bits failed. \n");        SPI_ASSERT(1);    }    if ((kernel_output_bitcount = (unsigned int *) spi_malloc (iterations_per_frame * SPI_LANES * sizeof (unsigned int))) == NULL)    {        spi_printf ("Error: Malloc of kernel_output_bitcount failed. \n");        SPI_ASSERT(1);    }    for (i = 0; i < SPI_LANES; i++)    {        bitstream_offset[i]	= i * p_comp->width_in_blocks * BLOCK_BIT_BUFFER_SIZE;    }    init[0] = init[1] = init[2] = 0;    ////////////////////////////////////////////////////////////////////////////////////////////////////    //      Indice Generation    ////////////////////////////////////////////////////////////////////////////////////////////////////    //    // Data in the form of 8x8 blocks is to be loaded into 'block_strm' from the input image / input bitstream.  The     // input image is visualized as rows of 8x8 blocks and each lane independently processes one such row of 8x8 blocks.    // Indices are calculated such that every row in the input image would have an index and STRIP_SIZE * BLOCK_WIDTH    // number of pixels are loaded from each index.   The indices are ordered such that an entire STRIP_SIZE row of     // 8x8 blocks get loaded into each lane.   Number of blocks processed in one kernel call = STRIP_SIZE * SPI_LANES,    // where each lane processes one row of 8x8 blocks in the image.    // Hence (BLOCK_HEIGHT * SPI_LANES) indices are calculated for one kernel call.   Since the location of the indices    // is constant w.r.t. location of the base pointer, only the offset inside the image is changed for each kernel call.    // Since the height of the image may not always be a multiple of SPI_LANES # 8x8 blocks, another set of indices is     // needed for the last iteration, when there are no more rows_of_8x8_blocks remaining in the input image that can be     // loaded into different lanes.   In this case the indices into the unused lanes are set to 0.    // When the input image height is not a multiple of BLOCK_HEIGHT, to avoid re-allocation & padding of the input image,     // previous row indice is reused, which serves the purpose of padding in the last iteration indices.    count = 0;    for (i = 0; i < BLOCK_HEIGHT; i++)    {        index = i * p_comp->scaled_width;        for (k =0; k < SPI_LANES; k++)        {            p_input_index[count] = index + k * BLOCK_HEIGHT * p_comp->scaled_width;            if (p_input_index[count] >= p_comp->scaled_width * p_comp->scaled_height)		// In case the image is small            {                p_input_index[count] = 0;            }            cur_height_loc = (((iterations_per_frame - 1) * SPI_LANES) * BLOCK_HEIGHT);            cur_height_loc += ((k * BLOCK_HEIGHT) + i);             // Calculates the last iteration location in terms of rows in input image.             if (cur_height_loc >= p_comp->actual_height)	        // Set index offsets for the last iteration            {                if (cur_height_loc < p_comp->scaled_height)         // Special condition when input image height is not a multiple of 8,                {                                                   // reuse previous row index (padding).                    p_input_index[count + INDICES_PER_KERNEL] = p_input_index[count + INDICES_PER_KERNEL - SPI_LANES];                }                else                                                // Location is outside image boundary, set to zero.                {                    p_input_index[count + INDICES_PER_KERNEL] = 0;                }            }            else                                                    // Location is within image boundary, reuse calculated index            {                p_input_index[count + INDICES_PER_KERNEL] = p_input_index[count];            }            count++;        }    }    ////////////////////////////////////////////////////////////////////////////////////////////////////    //      AC & DC Huffman Table Generation    ////////////////////////////////////////////////////////////////////////////////////////////////////    //     // DC huffman table (12 words) and AC huffman table (256 words) are organized such that the higher    // 16 bits have the code length and the lower 16 bits have the corresponding code word.    for (i = 0; i < DERIVED_DC_TABLE_LENGTH; i++)    {        dc_huffman_table_k[i] =             ((unsigned int) p_comp->d_dc_huff_tbl.code_length[i] << 16) | ((unsigned int)p_comp->d_dc_huff_tbl.code_word[i]);    }    for (i = 0; i < DERIVED_AC_TABLE_LENGTH; i++)    {        ac_huffman_table_k[i] =             ((unsigned int) p_comp->d_ac_huff_tbl.code_length[i] << 16) | ((unsigned int)p_comp->d_ac_huff_tbl.code_word[i]);    }    spi_flush_entire_data_cache();    ////////////////////////////////////////////////////////////////////////////////////////////////////    //      LOAD : Data Load    ////////////////////////////////////////////////////////////////////////////////////////////////////        // Data loads that need to take place, only once per input frame    spi_load (dc_huff_table_strm, &dc_huffman_table_k[0], 0, DERIVED_DC_TABLE_LENGTH, 1, 1, 0);  // Load DC huffman table into each lane (transpose=0)    spi_load (ac_huff_table_strm, &ac_huffman_table_k[0], 0, DERIVED_AC_TABLE_LENGTH, 1, 1, 0);  // Load AC huffman table into each lane (transpose=0)    spi_load (divisor_strm, p_quant_divisor, 0, 32, 1, 1, 0);                                    // Load Quantization Divisors into each lane (transpose=0)    spi_load (bitstream_offset_strm, &bitstream_offset, 0, SPI_LANES, 1, 1, 1);                  // Initialse the pointers to bitstream.     spi_load (index_strm, p_input_index, 0, INDICES_PER_KERNEL, 1, 1, 1);                        // Load the first set of indices generated.    p_src           = p_input;    cur_strip_size  = (p_comp->width_in_blocks > STRIP_SIZE) ? STRIP_SIZE :  p_comp->width_in_blocks;    last_strip_size = p_comp->width_in_blocks -  ((iterations_per_row - 1) * STRIP_SIZE);   // Remaining valid number of 8x8 blocks in the current row of 8x8 blocks    for (i = 0; i < iterations_per_frame; i++)    {        p_bitstream      = (unsigned int *) p_comp->p_mem_buffer + i * p_comp->width_in_blocks * SPI_LANES * BLOCK_BIT_BUFFER_SIZE_W;        strip_size       = cur_strip_size;        last_iter_in_row = 0;        // Data loads & stores that need to take place prior to kernel launch.        spi_load (prev_block_data, &init, 0, 3, 1, 1, 0);  // Initialise all the three substreams to 0        if (i == (iterations_per_frame - 1))               // Check if current iteration is the on the last few rows of 8x8 blocks of the input image        {            spi_load (index_strm, p_input_index, INDICES_PER_KERNEL, INDICES_PER_KERNEL, 1, 1, 1);  // If so, then load the second set of indices generated.        }        if (i != 0)        {            // Store the number of bits created by each lane            // This is used to convert the output bitstream from the kernel into a bit-buffer structure            spi_store (                next_bitstream_offset_strm,				// output stream                  kernel_output_bitcount,					// output buffer                ((i-1) * SPI_LANES),					// offset                SPI_LANES,								// count                1,										// group                1,										// stride                1										// transpose                );	        }        spi_load (next_bitstream_offset_strm, &bitstream_offset, 0, SPI_LANES, 1, 1, 1);  // Reset the values in the bitstream         for (j = 0; j < iterations_per_row; j++)        {            if (j == (iterations_per_row - 1))  // Check if the current iteration is last iteration on the row of 8x8 blocks.            {                last_iter_in_row = 0xFFFFFFF;   // 0xFFFFFFF is easier for DPU to understand than "0x1"                 strip_size = last_strip_size;   // Update strip_size to the vaild number of 8x8 blocks only            }            // Update 'p_src_img_offset' to point the location in the input image where the next valid data needs to be picked.            // 'p_src_img_offset' is made to point to the start of the window from which the next valid STRIP_SIZE number of            // 8x8 blocks are to fetched from.            p_src_img_offset = p_src + (((i * SPI_LANES * BLOCK_HEIGHT * p_comp->scaled_width) + (j * STRIP_SIZE *BLOCK_WIDTH)));            // Input image needs to be loaded into the each lanes
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -