⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mpeg_kc.i

📁 H.264完整的C语言代码和DCT的代码
💻 I
📖 第 1 页 / 共 2 页
字号:
#line 1 "D:\\working\\im_apps\\h264\\mpeg_kc.cpp"
#line 1 "D:/working/tools/isim/isimexe/blank_headers\\idb_kernelc.hpp"








#line 2 "D:\\working\\im_apps\\h264\\mpeg_kc.cpp"
#line 1 "D:\\working\\im_apps\\h264\\mpeg.hpp"




#line 1 "D:/working/tools/isim/isimexe/blank_headers\\idb_types.hpp"













#line 6 "D:\\working\\im_apps\\h264\\mpeg.hpp"
#line 1 "D:/working/tools/isim/isimexe/blank_headers\\idb_deftypes.hpp"




#line 7 "D:\\working\\im_apps\\h264\\mpeg.hpp"



#decl idxGen(ostream<uint>   indices,
              uc<int>&        uc_size,
              uc<int>&        uc_params);
;


#decl icolor(istream<ubyte4> datain,
              ostream<half2> Yout,
              ostream<half2> CrCbout);
;


#decl pcolor(istream<ubyte4> datain,
              ostream<ubyte4> Yout,
              ostream<half2> CrCbout);
;


#decl dct(istream<half2> datain,
           istream<uhalf2> consts,
           ostream<half2> out,
           uc<uhalf2>& uc_quantizer_scale);
;


#decl idct(istream<half2> datain,
            istream<uhalf2> consts,
            ostream<half2> out,
            uc<half2>& uc_quantizer_scale);
;


#decl rle(istream<half2> Yin,
           istream<half2> CrCbin,
           istream<int> indices,
           cistream<half2> motion,
           costream<half2> out,
           uc<uint>& pframe,
           uc<uint>& quant_scale);
;


#decl difference(istream<ubyte4> curryblks,
                  istream<half2> currcblks,
                  istream<ubyte4> refyblks,
                  istream<half2> refcblks,
                  ostream<half2> diffyblks,
                  ostream<half2> diffcblks);
;


#decl correlate(istream<half2> diffyblks,
                 istream<half2> diffcblks,
                 istream<ubyte4> refyblks,
                 istream<half2> refcblks,
                 ostream<ubyte4> newrefyblks,
                 ostream<half2> newrefcblks,
                 uc<uint>& uc_pframe);
;


#decl MV2idx(cistream<half2> motion,      
              ostream<uint>   yindices,    
              ostream<uint>   crcbindices, 
              uc<int>&        uc_offsets,  
              uc<int>&        uc_mblks,    
              uc<int>&        uc_mb_width);
;


#decl blocksearch(istream<ubyte4> row0,
                   istream<ubyte4> row1,
                   istream<ubyte4> row2,
                   istream<ubyte4> mblocks,
                   costream<half2> motions,
                   uc<int>& location);
;


#decl mb_encode(istream<byte4> datain,
                 istream<half2> consts,
                 ostream<half2> color_out,
                 ostream<half2> dct_out,
                 costream<int> out);
;





#decl me_fast(istream<ubyte4> row0,
               istream<ubyte4> row1,
               istream<ubyte4> row2,
               istream<ubyte4> mblocks,
               costream<half2> motions_out,
               ostream<ubyte4> refyblks,
               ostream<uint>   crcbindices,
               uc<int>& uc_margin,
               uc<int>& uc_offsets,
               uc<int>& uc_mblks,
               uc<int>& uc_mb_width);
;


#decl me_fast_search4(istream<ubyte4> row0,
                       istream<ubyte4> row1,
                       istream<ubyte4> row2,
                       istream<ubyte4> mblocks,
                       costream<half2> motions_out,
                       uc<int>& uc_margin,
                       uc<int>& uc_mblks);
;


#decl me_fast_jitter2(istream<ubyte4> row0,
                       istream<ubyte4> row1,
                       istream<ubyte4> row2,
                       istream<ubyte4> mblocks,
                       cistream<half2> motions_in,
                       costream<half2> motions_out,
                       uc<int>& uc_margin,
                       uc<int>& uc_mblks);
;


#decl me_fast_jitter1(istream<ubyte4> row0,
                       istream<ubyte4> row1,
                       istream<ubyte4> row2,
                       istream<ubyte4> mblocks,
                       cistream<half2> motions_in,
                       costream<half2> motions_out,
                       ostream<ubyte4> refyblks,
                       ostream<uint>   crcbindices,
                       uc<int>& uc_margin,
                       uc<int>& uc_offsets,
                       uc<int>& uc_mblks,
                       uc<int>& uc_mb_width);
;






#line 1 "D:/working/tools/isim/isimexe/blank_headers\\idb_undeftypes.hpp"




#line 155 "D:\\working\\im_apps\\h264\\mpeg.hpp"

#line 157 "D:\\working\\im_apps\\h264\\mpeg.hpp"
#line 3 "D:\\working\\im_apps\\h264\\mpeg_kc.cpp"
#line 1 "D:/working/tools/isim/isimexe/blank_headers\\idb_kernelc2.hpp"






#line 4 "D:\\working\\im_apps\\h264\\mpeg_kc.cpp"

;































































kernel mb_encode(istream<byte4> datain,
                 istream<half2> consts,
                 ostream<half2> color_out,
                 ostream<half2> dct_out,
                 costream<int> out)
{

  

  
  
  
  
  array<half2> CrCbY4Y3Y2Y1_a(24);
  array<half2> CrCbY4Y3Y2Y1_b(24);
  array<int> dc_pred(3);                

  int minus_one = 0 - 1;
  int two = 1 + 1;
  int four = two + two;
  int sixteen = lo(four * four);
  int minus_sixteen = 0 - sixteen;


  
  
  
  uc<int> perm_A = 0x76546420;
  uc<int> perm_B = 0x64203210;

  cc low = itocc(cid() < four);
  cc Y_combine = itocc(shift(minus_one, sixteen));
  


  
  byte4 shuf_func1, shuf_func2, shuf_func3;
  
  
  
  half2 temp;
  consts >> temp;
  shuf_func1 = byte4(temp);
  consts >> temp;
  shuf_func2 = byte4(temp);
  consts >> temp;
  shuf_func3 = byte4(temp);

  
  half2 RB_SCALE, G_SCALE, C_SCALE;
  
  
  
  
  consts >> RB_SCALE >> G_SCALE >> C_SCALE;

  half2 one_two_eight;     
  
  consts >> one_two_eight;



  

  int three = two + 1;
  int seven = four + three;

  half2 COS_2, COS_3, COS_1_plus_COS_3, COS_1_minus_COS_3;
  
  
  
  
  
  consts >> COS_2 >> COS_3 >> COS_1_plus_COS_3 >> COS_1_minus_COS_3;

  array<half2> K(8);
  
  
  
  
  
  
  
  
  
  consts >> K[0] >> K[1] >> K[2] >> K[3] >> K[4] >> K[5] >> K[6] >> K[7];

  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  uhalf2 quant_scale;
  array<half2> quant(24);

  consts >> temp;
  quant_scale = uhalf2(temp);

  int quant_idx = 0;
  int k_idx = 0;
  uhalf2 utmp;

  
  
  uc<int> i = 8;
  loop_count(i) unroll(1) {
    consts >> temp;
    utmp = uhalf2(temp);    

    
    utmp = shift(hi(quant_scale * utmp), 1);    

    
    quant[quant_idx] = half2(hi(utmp * uhalf2(shift(K[quant_idx], 1))));

    quant_idx = quant_idx + 1;
  }

  
  
  i = 8;
  int quant_idx2 = 0;
  loop_count(i) unroll(1) {
    quant[quant_idx] = quant[quant_idx2];
    quant_idx = quant_idx + 1;
    quant_idx2 = quant_idx2 + 1;
  }

  
  
  i = 8;
  k_idx = quant_idx & seven;  
  loop_count(i) unroll(1) {
    consts >> temp;
    utmp = uhalf2(temp);
    utmp = shift(hi(quant_scale * utmp), 1);
    quant[quant_idx] = half2(hi(utmp * uhalf2(shift(K[k_idx], 1))));

    quant_idx = quant_idx + 1;
    k_idx = quant_idx & seven;  
  }

  
  
  
  
  

  

  uc<int> perm_a = 0x07654321;
  uc<int> perm_b = 0x10765432;
  uc<int> perm_c = 0x21076543;
  uc<int> perm_d = 0x32107654;
  uc<int> perm_e = 0x43210765;
  uc<int> perm_f = 0x54321076;
  uc<int> perm_g = 0x65432107;


  

  
  int dp;
  consts >> temp;
  dp = int(temp);
  dc_pred[0] = dp;    
  dc_pred[1] = dp;    
  dc_pred[2] = dp;    

  uc<int> perm_1 = 0x75643120;
  uc<int> perm_2 = 0x67451320;
  uc<int> perm_3 = 0x75261430;
  uc<int> perm_4 = 0x57362401;
  uc<int> perm_5 = 0x74352601;
  uc<int> perm_6 = 0x64270531;
  uc<int> perm_7 = 0x63170542;

  array<int> send_idx_arr(8), store_idx_arr(8);
  consts >> temp; send_idx_arr[0] = int(temp);
  consts >> temp; store_idx_arr[0] = int(temp);
  consts >> temp; send_idx_arr[1] = int(temp);
  consts >> temp; store_idx_arr[1] = int(temp);
  consts >> temp; send_idx_arr[2] = int(temp);
  consts >> temp; store_idx_arr[2] = int(temp);
  consts >> temp; send_idx_arr[3] = int(temp);
  consts >> temp; store_idx_arr[3] = int(temp);
  consts >> temp; send_idx_arr[4] = int(temp);
  consts >> temp; store_idx_arr[4] = int(temp);
  consts >> temp; send_idx_arr[5] = int(temp);
  consts >> temp; store_idx_arr[5] = int(temp);
  consts >> temp; send_idx_arr[6] = int(temp);
  consts >> temp; store_idx_arr[6] = int(temp);
  consts >> temp; send_idx_arr[7] = int(temp);
  consts >> temp; store_idx_arr[7] = int(temp);

  

  
  int y_idx = 0;
  int c_idx;
  i = 8;

  byte4 color1, color2, color3, color4;
  half2 a0, a1, a2, a3, a4, a5, a6, a7, a1a3, a2a4, b1, b2, b3, b4;
  half2 c1, c2, c3, c4, d0, d1, d2, d3, d4, d5, d6, d7, e1, e2, e3, e4;
  half2  y1, y2, y3, y4, z1, z2, z3, z4, first, second;

  
  loop_count(i) pipeline(1) {

  stage(1);



    
    datain >> color1 >> color2 >> color3 >> color4;

    
    
    a1 = half2(shuffle(color1, shuf_func1));
    b1 = half2(shuffle(color1, shuf_func2));
    a2 = half2(shuffle(color2, shuf_func1));
    b2 = half2(shuffle(color2, shuf_func2));
    a3 = half2(shuffle(color3, shuf_func1));
    b3 = half2(shuffle(color3, shuf_func2));
    a4 = half2(shuffle(color4, shuf_func1));
    b4 = half2(shuffle(color4, shuf_func2));

    
    
    
    c1 = hi(RB_SCALE * shift(a1, 1));
    c2 = hi(RB_SCALE * shift(a2, 1));
    c3 = hi(RB_SCALE * shift(a3, 1));
    c4 = hi(RB_SCALE * shift(a4, 1));

    
    d1 = c1 + hi(G_SCALE * shift(b1, 1));
    d2 = c2 + hi(G_SCALE * shift(b2, 1));
    d3 = c3 + hi(G_SCALE * shift(b3, 1));
    d4 = c4 + hi(G_SCALE * shift(b4, 1));

    
    e1 = half2(shift(int(c1), minus_sixteen));
    e2 = half2(shift(int(c2), minus_sixteen));
    e3 = half2(shift(int(c3), minus_sixteen));
    e4 = half2(shift(int(c4), minus_sixteen));

    a1a3 = a1 + a3;
    a2a4 = a2 + a4;

    
    y1 = d1 + e1;
    y2 = d2 + e2;
    y3 = d3 + e3;
    y4 = d4 + e4;

  stage(2);

    
    z1 = half2(shuffle(y1, shuf_func3));
    z2 = half2(shuffle(y2, shuf_func3));
    z3 = half2(shuffle(y3, shuf_func3));
    z4 = half2(shuffle(y4, shuf_func3));

    CrCbY4Y3Y2Y1_a[0+y_idx] = select(Y_combine, z2, z1);
    CrCbY4Y3Y2Y1_a[1+y_idx] = select(Y_combine, z4, z3);

    
    
    
    
    
    
    
    
    
    first = hi((a1a3 - (z1 + z3)) * C_SCALE) + one_two_eight;
    second = hi((a2a4 - (z2 + z4)) * C_SCALE) + one_two_eight;

    
    first = commucperm(perm_A, first);
    second = commucperm(perm_B, second);

    c_idx = shift(y_idx, minus_one);
    y_idx = y_idx + two;
    CrCbY4Y3Y2Y1_a[16+c_idx] = select(low, first, second);
  }

  color_out << CrCbY4Y3Y2Y1_a[0];
  color_out << CrCbY4Y3Y2Y1_a[1];
  color_out << CrCbY4Y3Y2Y1_a[2];
  color_out << CrCbY4Y3Y2Y1_a[3];
  color_out << CrCbY4Y3Y2Y1_a[4];
  color_out << CrCbY4Y3Y2Y1_a[5];
  color_out << CrCbY4Y3Y2Y1_a[6];
  color_out << CrCbY4Y3Y2Y1_a[7];

  color_out << CrCbY4Y3Y2Y1_a[8];
  color_out << CrCbY4Y3Y2Y1_a[9];
  color_out << CrCbY4Y3Y2Y1_a[10];
  color_out << CrCbY4Y3Y2Y1_a[11];
  color_out << CrCbY4Y3Y2Y1_a[12];
  color_out << CrCbY4Y3Y2Y1_a[13];
  color_out << CrCbY4Y3Y2Y1_a[14];
  color_out << CrCbY4Y3Y2Y1_a[15];

  color_out << CrCbY4Y3Y2Y1_a[16];
  color_out << CrCbY4Y3Y2Y1_a[17];
  color_out << CrCbY4Y3Y2Y1_a[18];
  color_out << CrCbY4Y3Y2Y1_a[19];
  color_out << CrCbY4Y3Y2Y1_a[20];
  color_out << CrCbY4Y3Y2Y1_a[21];
  color_out << CrCbY4Y3Y2Y1_a[22];
  color_out << CrCbY4Y3Y2Y1_a[23];


  
  
  int eight = four + four;
  sixteen = eight + eight;
  half2 h2_one = 1 | half2(shift(1, sixteen));
  uhalf2 uh2_half = shift(uhalf2(h2_one), sixteen - 1);
  uhalf2 uh2_almost_half = uh2_half - uhalf2(h2_one);

  

  int dest_idx = 0;
  int src_idx = 0;
  int idx0 = cid();
  int idx1 = (idx0 - 1) & seven;
  int idx2 = (idx1 - 1) & seven;
  int idx3 = (idx2 - 1) & seven;
  int idx4 = (idx3 - 1) & seven;
  int idx5 = (idx4 - 1) & seven;
  int idx6 = (idx5 - 1) & seven;
  int idx7 = (idx6 - 1) & seven;

  i = 3;
  loop_count(i) pipeline(67) {

  stage(1);

    
    a0 = CrCbY4Y3Y2Y1_a[0+src_idx];
    a1 = CrCbY4Y3Y2Y1_a[1+src_idx];
    a2 = CrCbY4Y3Y2Y1_a[2+src_idx];
    a3 = CrCbY4Y3Y2Y1_a[3+src_idx];
    a4 = CrCbY4Y3Y2Y1_a[4+src_idx];
    a5 = CrCbY4Y3Y2Y1_a[5+src_idx];
    a6 = CrCbY4Y3Y2Y1_a[6+src_idx];
    a7 = CrCbY4Y3Y2Y1_a[7+src_idx];
    src_idx = src_idx + eight;
  
    
    half2 s16, s07, s25, s34, s1625, s0734;
  
    s07 = a0 + a7;
    s16 = a1 + a6;
    s25 = a2 + a5;
    s34 = a3 + a4;
    s1625 = s16 + s25;
    s0734 = s07 + s34;
  

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -