📄 jpege_vlc_kc.sc

📁 motion Jpeg 在SPI DSP平台优化好的代码
💻 SC
📖 第 1 页 / 共 3 页
字号:
    abs_coef		= spi_vabd32i (diff_dc_coef, 0);    code_mag		= spi_vselect32(spi_vle32i((vec int32x1)0, diff_dc_coef), diff_dc_coef, (diff_dc_coef - 1));	// code_mag = (diff_dc_coef >= 0) ? diff_dc_coef : (diff_dc_coef - 1);    // Find number of bits needed for magnitude of the coefficient.    num_bits		= 32 - spi_vffone32 (abs_coef);    // Write huffman coded categorie    spi_array_read(dc_huffman_table,huffman_entry,num_bits);	// huffman_entry = dc_huffman_table[num_bits];    code_length		= spi_vshuffledi_hi (half_word_hi_lo, huffman_entry, tmp);     // top 16 bits    code_word		= spi_vshuffledi_lo (half_word_hi_lo, huffman_entry, tmp);     // bottom 16 bits    //cur_word_position = start_index  | (no_prev_blk_bits << 16)  ;  // This line implemented as shuffle for optimization    cur_word_position	= spi_vshuffleu(0x05040100, start_index, no_prev_blk_bits);         cur_word			= prev_blk_bits ;    // Merge code_word of length code_length & code_mag of length num_bits into one word to be written to the bitstream    code_word			= (code_word << num_bits) | (code_mag & ((vec uint32x1)0xFFFFFFFF >> ((vec uint32x1)32 - num_bits)));    code_length			= code_length + num_bits;    write_bits (bitstream, cur_word_position, cur_word_position, cur_word, cur_word, code_word, code_length, false);}inline void kernel huffman_encode ( vec		int32x1		zz_0(in), vec		int32x1		zz_63(in), stream		int32x1		run_level(array_io), vec		uint32x1	run_level_size(in), stream		uint32x1	ac_huffman_table(array_in), stream		uint32x1	bitstream(array_io), stream		int32x1		prev_block_data_strm(array_io),	 vec		uint32x1	escape_code_length(in), vec		uint32x1	escape_code_word(in), vec		uint32x1	eob_code_length(in), vec		uint32x1	eob_code_word(in), vec		uint32x1	cur_word_in(in), vec		uint32x1	cur_word_position_in(in), vec		uint32x1	last_word(out), vec		uint32x1	last_word_pos(out) ) // Description:  //     Each lane operates on one 8x8 block. //     There are no more than 64 run-level pairs per block. //     All the run -level pairs are Huffman coded & then written to the bitstream. //     Each word to be written to the bistream is first chacked for "FF" sequence & //     stuffed with "00" as an emulation prevention measure      // ////////////////////////////////////////////////////////////////{    vec uint32x1	code_mag;               // code of the magnitute    vec uint32x1	num_bits;    vec uint32x1	huffman_entry;    vec uint32x1	code_length;    vec uint32x1	code_word;    vec uint32x1	run_level_i;    vec int32x1		run, level, updated_run;    vec uint32x1	abs_level;    vec uint32x1	table_offset;    vec uint32x1	cur_word_position;    vec uint32x1	cur_word;    vec uint32x1	utmpv, utmpv1;    vec int32x1		tmp;    vec int32x1		i;    vec uint32x1	half_word_hi_lo;    half_word_hi_lo		= 0xb9b93120;    cur_word			= cur_word_in;    cur_word_position	= cur_word_position_in;    // ---- Handle ac_coefficients -----    // Loop over run_level until coding for all the lanes is done    i	= 0;    tmp = 0;    utmpv1 = 0;    spi_array_read (run_level, run_level_i, i);					//run_level_i = run_level[0];    utmpv = spi_vselect32(spi_vle32u(run_level_size, i), 0, 1); //utmpv = (run_level_size == 0) ? 0 : 1;    while ( spi_vrorl(utmpv) != 0 )    {#if defined (SWP)#pragma pipeline #endif        // Lanes for which run-level encoding has finished, initialse the run & level to 0        run_level_i	= spi_vselect32(spi_vle32u(run_level_size, i), 0, run_level_i);        run			= spi_vshuffledi_hi (half_word_hi_lo, run_level_i, tmp);     // top 16 bits        level		= spi_vshuffledi_lo (half_word_hi_lo, run_level_i, tmp);     // bottom 16 bits        // If run > 16 in previous iteration, update the run with recalculated one        run			=  spi_vselect32(utmpv1, updated_run, run);        // check if run > 15        utmpv1		= spi_vlt32u(15, run);        // store the modified run to use it in next iteration        updated_run	= run - 16;        // if run > 15, we'll be inserting an escape code, hence change the run to 0 so that it points to a valid location in huffman table        run			=  spi_vselect32(utmpv1, 0, run);        utmpv		= spi_veq32(level, 0);	//level == 0        abs_level	= spi_vabd32i (level, 0);        code_mag	= spi_vselect32(spi_vle32i(0, level), level, level - 1);	// code_mag = (level >= 0) ? level : level - 1;        num_bits	= spi_vselect32(utmpv, 0, (32 - spi_vffone32 (abs_level)));	// num_bits = utmpv ? 0 : (32 - spi_vffone32 (abs_level));        table_offset = run << 4;        table_offset = table_offset + num_bits;        spi_array_read(ac_huffman_table, huffman_entry, table_offset);        code_length = spi_vselect32(utmpv, 0, spi_vshuffledi_hi (half_word_hi_lo, huffman_entry, tmp));	//code_length = (level == 0) ? 0 : huffman_entry >> 16;        code_word	= spi_vshuffledi_lo (half_word_hi_lo, huffman_entry, tmp);     // bottom 16 bits	//code_word   = huffman_entry & MASK_16BIT        // Merge code_word of length code_length & code_mag of length num_bits into one word to be written to the bitstream        code_word	= (code_word << num_bits) | (code_mag & ((vec uint32x1)0xFFFFFFFF >> ((vec uint32x1)32 - num_bits)));        code_length = code_length + num_bits;        // If run > 15 we beed to write the escape code word        code_length = spi_vselect32(utmpv1, escape_code_length, code_length);        code_word	= spi_vselect32(utmpv1, escape_code_word, code_word);        write_bits (bitstream, cur_word_position, cur_word_position, cur_word, cur_word, code_word, code_length, false);        // Don't increment the pointer if run > 15          i			=  spi_vselect32(utmpv1, i, i + 1);        spi_array_read (run_level, run_level_i, i);        utmpv		= spi_vselect32(spi_vle32u(run_level_size, i), 0, 1);	//utmpv = (run_level_size == i) ? 0 : 1;    }    // If the last coef is zero, emit an end-of-block code     utmpv		= zz_63;    code_length = spi_vselect32(spi_veq32(utmpv, 0), eob_code_length, 0);    write_bits (bitstream, cur_word_position, cur_word_position, cur_word, cur_word, eob_code_word, code_length, false);    // Save the dc_coef of the prev block to be used as the init_dc_coef for thr next block    //cur_dc_coef = zz_0;	    spi_array_write (prev_block_data_strm, zz_0, 0);    // Store the incomplete word & no of bits in that to be patched up with the next block    spi_array_write (prev_block_data_strm, cur_word, 1);    spi_array_write (prev_block_data_strm, (cur_word_position >> 16), 2);    last_word		= cur_word;    last_word_pos	= cur_word_position;   	}inline void kernel calc_run_level( stream		uint32x1	run_level(array_io), vec		int32x1		level(in), vec		int32x1		run_in(in), vec		int32x1		run_out(out), vec		uint32x1	num_non_zero_in(in), vec		uint32x1	num_non_zero_out(out), vec		uint32x1	pack(in) ) // Description:  //     If level = 0, run is inceremented. //     If level != 0, run-level pair is written to the stream "run_level" //     & the index into the stream num_non_zero_in is incremented. // ////////////////////////////////////////////////////////////////{    vec uint32x1 tmp0;    // Pack run and level into one 32 bit word where run is top 16 bit, level to bottom 16bits    tmp0                = spi_vshufflei (pack, run_in, level);	    spi_array_write (run_level, tmp0, num_non_zero_in);    tmp0                = spi_veq32(level, (vec uint32x1)0);    run_out             = spi_vselect32(tmp0, (run_in + (vec int32x1)1), (vec uint32x1)0);    num_non_zero_out    = spi_vselect32(tmp0, num_non_zero_in, (num_non_zero_in + (vec uint32x1)1));}kernel void jpege_vlc_kc ( stream int16x2 coefs (seq_in),							// Transformed & quantized co-efficients as input stream uint32x1 dc_huffman_table(array_in),			// Each huffman table entry is a 32 bit word.                                                        // The high 16 bit is the code length, the low 16 bit is the code word                                                        // dc_huffman_table has a total of 12 entries.  stream uint32x1 ac_huffman_table(array_in),			// ac_huffman table has a total of 160 entries	 stream uint32x1 bitstream (array_io),					// Ouput bitstream stream int32x1 prev_block_data_strm(array_io),			// This stream consists of prev dc coeff, prev incomplete word,                                                         // & no. of bits in prev incomplete word stream uint32x1 prev_bitstream_offset_strm(array_io),	// This bitstream points to the last word position of the previous block stream uint32x1 next_bitstream_offset_strm(array_io),	// This bitstream is updated to pint to the last word position of current block stream uint32x1 run_level(array_io),					// Used as temporary storage for runs & levels of each block                                                        // The high 16 bit is the run, the low 16 bit is the level uint32x1 last_iter_in_row(in)					        // This flag is true if we are encoding the last strip is a row ) // Description: Each lane operates on STRIP_SIZE number of 8x8 blocks. // //   1. Transformed & quantized input coefficients are first stored in zig-zag scan order //	  2. runs & levels are calculated //   3. This is followed by huffamn encoding of differential DC coeffient & all the runs & levels in the block //	  4. Each word to be written to the bitstream is searched for "FF" bit pattern & stuffed with "00" : Emulation prevention procedure // ////////////////////////////////////////////////////////////////{    vec int16x2 c0c8,  c16c24, c32c40, c48c56;    vec int16x2 c1c9,  c17c25, c33c41, c49c57;    vec int16x2 c2c10, c18c26, c34c42, c50c58;    vec int16x2 c3c11, c19c27, c35c43, c51c59;    vec int16x2 c4c12, c20c28, c36c44, c52c60;    vec int16x2 c5c13, c21c29, c37c45, c53c61;    vec int16x2 c6c14, c22c30, c38c46, c54c62;    vec int16x2 c7c15, c23c31, c39c47, c55c63;    vec int32x1	zz_0,	zz_1,	zz_2,	zz_3,	zz_4,	zz_5,	zz_6,	zz_7,	zz_8,	zz_9;    vec int32x1	zz_10,	zz_11,	zz_12,	zz_13,	zz_14,	zz_15,	zz_16,	zz_17,	zz_18,	zz_19;    vec int32x1	zz_20,	zz_21,	zz_22,	zz_23,	zz_24,	zz_25,	zz_26,	zz_27,	zz_28,	zz_29;    vec int32x1	zz_30,	zz_31,	zz_32,	zz_33,	zz_34,	zz_35,	zz_36,	zz_37,	zz_38,	zz_39;    vec int32x1	zz_40,	zz_41,	zz_42,	zz_43,	zz_44,	zz_45,	zz_46,	zz_47,	zz_48,	zz_49;    vec int32x1	zz_50,	zz_51,	zz_52,	zz_53,	zz_54,	zz_55,	zz_56,	zz_57,	zz_58,	zz_59;    vec int32x1	zz_60,	zz_61,	zz_62,	zz_63;    vec uint32x1 hi_lo_bytes;    vec uint32x1 half_word_hi_lo;    vec int32x1 tmp0;    vec uint32x1 utmpv;    vec uint32x1 num_non_zero;    vec int32x1 run;    vec uint32x1 pack;    vec uint32x1 write_index, store_index;    vec uint32x1 last_word, last_word_pos, last_word_bits;    vec uint32x1 cur_word, cur_word_position;    vec uint32x1 data, num_bits;    vec uint32x1 reset_marker;    vec uint32x1 escape_code_word;    vec uint32x1 escape_code_length;    vec uint32x1 eob_code_word;    vec uint32x1 eob_code_length;    hi_lo_bytes =0x9B9B1302;    half_word_hi_lo = 0xb9b93120;    write_index = 0;    tmp0 = 0;    // Read the escape code word upfront since it is required for coding all the blocks    spi_array_read(ac_huffman_table, utmpv, 0xf0);    escape_code_length	= spi_vshuffledi_hi (half_word_hi_lo, utmpv, tmp0);     // top 16 bits    escape_code_word	= spi_vshuffledi_lo (half_word_hi_lo, utmpv, tmp0);     // bottom 16 bits    // Read the end of block code word upfront since it is required for coding all the blocks    spi_array_read(ac_huffman_table, utmpv, 0);    eob_code_length		= spi_vshuffledi_hi (half_word_hi_lo, utmpv, tmp0);     // top 16 bits    eob_code_word		= spi_vshuffledi_lo (half_word_hi_lo, utmpv, tmp0);     // bottom 16 bits    while (!spi_eos(coefs))    {        // each loop handles STRIP_SIZE number of blocks per lane.         // read coefficients of one block. These coefficients are in transposed order of the original block after dct.        spi_read (coefs, c0c8);  spi_read (coefs, c16c24); spi_read (coefs, c32c40); spi_read (coefs, c48c56);        spi_read (coefs, c1c9);  spi_read (coefs, c17c25); spi_read (coefs, c33c41); spi_read (coefs, c49c57);        spi_read (coefs, c2c10); spi_read (coefs, c18c26); spi_read (coefs, c34c42); spi_read (coefs, c50c58);        spi_read (coefs, c3c11); spi_read (coefs, c19c27); spi_read (coefs, c35c43); spi_read (coefs, c51c59);        spi_read (coefs, c4c12); spi_read (coefs, c20c28); spi_read (coefs, c36c44); spi_read (coefs, c52c60);        spi_read (coefs, c5c13); spi_read (coefs, c21c29); spi_read (coefs, c37c45); spi_read (coefs, c53c61);        spi_read (coefs, c6c14); spi_read (coefs, c22c30); spi_read (coefs, c38c46); spi_read (coefs, c54c62);        spi_read (coefs, c7c15); spi_read (coefs, c23c31); spi_read (coefs, c39c47); spi_read (coefs, c55c63);
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -