⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 transpose_kc.sc

📁 motion Jpeg 在SPI DSP平台优化好的代码
💻 SC
字号:
///////////////////////////////////////////////////////////////////////////////////      Title:          transpose_kc.sc (KernelC code for 2-D 8x8 transpose)////      Notice:         COPYRIGHT (C) STREAM PROCESSORS, INC. 2005-2007//                      THIS PROGRAM IS PROVIDED UNDER THE TERMS OF THE SPI//                      END-USER LICENSE AGREEMENT (EULA). THE PROGRAM MAY ONLY//                      BE USED IN A MANNER EXPLICITLY SPECIFIED IN THE EULA,//                      WHICH INCLUDES LIMITATIONS ON COPYING, MODIFYING,//                      REDISTRIBUTION AND WARANTIES. UNAUTHORIZED USE OF THIS//                      PROGRAM IS STRICTLY PROHIBITED. YOU MAY OBTAIN A COPY OF//                      THE EULA FROM WWW.STREAMPROCESSORS.COM. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////      #includes ////////////////////////////////////////////////////////////////////////////////////////////////////#include "spi_common.h"///////////////////////////////////////////////////////////////////inline void kernel transpose_2x2_intralane(    vec int16x2   in0(in),   vec int16x2  in1(in),     vec int16x2   out0(out), vec int16x2  out1(out))//// Description: Transpose a 2x2 block of 16-bit values stored in//              two int16x2s within the same lane.  One transpose//              occurs on each lane.//// Returns:     Nothing.//////////////////////////////////////////////////////////////////{    out0 = (vec int16x2)spi_vshuffledi_lo ((vec uint32x1)0x75643120, (vec int32x1)in0, (vec int32x1)in1);    out1 = (vec int16x2)spi_vshuffledi_hi ((vec uint32x1)0x75643120, (vec int32x1)in0, (vec int32x1)in1);}///////////////////////////////////////////////////////////////////inline void kernel transpose_4x4_intralane( // inputs (in0,in1 is the first row, in6--in7 is the last row). // Within the first row, for example, the LS 16-bits of in0 is the // first value in the row, and the MS 16-bits of in1 is the last // value on the row. vec int16x2 in0(in), vec int16x2 in1(in), vec int16x2 in2(in), vec int16x2 in3(in), vec int16x2 in4(in), vec int16x2 in5(in), vec int16x2 in6(in), vec int16x2 in7(in), // outputs, laid out the same as the input vec int16x2 out0(out), vec int16x2 out1(out), vec int16x2 out2(out), vec int16x2 out3(out), vec int16x2 out4(out), vec int16x2 out5(out), vec int16x2 out6(out), vec int16x2 out7(out) )//// Description: Transpose a 4x4 block of 16-bit values stored in//              8 int16x2s within the same lane.  One transpose//              occurs one each lane.//// Returns:     Nothing.//////////////////////////////////////////////////////////////////{    // Do the top-left input 2x2 block    transpose_2x2_intralane(in0, in2, out0, out2);    // Do the top-right input 2x2 block    transpose_2x2_intralane(in1, in3, out4, out6);    // Do the bottom-left input 2x2 block    transpose_2x2_intralane(in4, in6, out1, out3);    // Do the bottom-right input 2x2 block    transpose_2x2_intralane(in5, in7, out5, out7);}///////////////////////////////////////////////////////////////////inline void kernel transpose_8x8_intralane( // inputs (in0--in3 is the first row, in28--in31 is the last row). // Within the first row, for example, the LS 16-bits of in0 is the // first value in the row, and the MS 16-bits of in3 is the last // value on the row. vec int16x2 in0(in),  vec int16x2 in1(in),  vec int16x2 in2(in),  vec int16x2 in3(in), vec int16x2 in4(in),  vec int16x2 in5(in),  vec int16x2 in6(in),  vec int16x2 in7(in), vec int16x2 in8(in),  vec int16x2 in9(in),  vec int16x2 in10(in), vec int16x2 in11(in), vec int16x2 in12(in), vec int16x2 in13(in), vec int16x2 in14(in), vec int16x2 in15(in), vec int16x2 in16(in), vec int16x2 in17(in), vec int16x2 in18(in), vec int16x2 in19(in), vec int16x2 in20(in), vec int16x2 in21(in), vec int16x2 in22(in), vec int16x2 in23(in), vec int16x2 in24(in), vec int16x2 in25(in), vec int16x2 in26(in), vec int16x2 in27(in), vec int16x2 in28(in), vec int16x2 in29(in), vec int16x2 in30(in), vec int16x2 in31(in), // outputs, laid out the same as the input vec int16x2 out0(out),  vec int16x2 out1(out),  vec int16x2 out2(out),  vec int16x2 out3(out), vec int16x2 out4(out),  vec int16x2 out5(out),  vec int16x2 out6(out),  vec int16x2 out7(out), vec int16x2 out8(out),  vec int16x2 out9(out),  vec int16x2 out10(out), vec int16x2 out11(out), vec int16x2 out12(out), vec int16x2 out13(out), vec int16x2 out14(out), vec int16x2 out15(out), vec int16x2 out16(out), vec int16x2 out17(out), vec int16x2 out18(out), vec int16x2 out19(out), vec int16x2 out20(out), vec int16x2 out21(out), vec int16x2 out22(out), vec int16x2 out23(out), vec int16x2 out24(out), vec int16x2 out25(out), vec int16x2 out26(out), vec int16x2 out27(out), vec int16x2 out28(out), vec int16x2 out29(out), vec int16x2 out30(out), vec int16x2 out31(out) )//// Description: Transpose a 8x8 block of 16-bit values stored in//              32 int16x2s within the same lane.  One transpose//              occurs one each lane.//// Returns:     Nothing.//////////////////////////////////////////////////////////////////{    // Do the top-left input 4x4 block    transpose_4x4_intralane(in0, in1, in4, in5, in8, in9, in12, in13,                            out0, out1, out4, out5, out8, out9, out12, out13);    // Do the top-right input 4x4 block    transpose_4x4_intralane(in2, in3, in6, in7, in10, in11, in14, in15,                            out16, out17, out20, out21, out24, out25, out28, out29);    // Do the bottom-left input 4x4 block    transpose_4x4_intralane(in16, in17, in20, in21, in24, in25, in28, in29,                            out2, out3, out6, out7, out10, out11, out14, out15);    // Do the bottom-right input 4x4 block    transpose_4x4_intralane(in18, in19, in22, in23, in26, in27, in30, in31,                            out18, out19, out22, out23, out26, out27, out30, out31);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -