⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 me_fast_jitter2_kc.i

📁 H.264完整的C语言代码和DCT的代码
💻 I
📖 第 1 页 / 共 2 页
字号:
  outside = test_y > bottom_range;
  in_range = in_range & ~outside;
  
  outside = test_x < left_range;
  in_range = in_range & ~outside;
  
  outside = test_x > right_range;
  in_range = in_range & ~outside;

  return(in_range);
}



inline void save_MB(array<ubyte4>& save_arr,
                    ubyte4 mb0, ubyte4 mb1, ubyte4 mb2, ubyte4 mb3, ubyte4 mb4, ubyte4 mb5, ubyte4 mb6, ubyte4 mb7)
{
  save_arr[0] = mb0;
  save_arr[1] = mb1;
  save_arr[2] = mb2;
  save_arr[3] = mb3;
  save_arr[4] = mb4;
  save_arr[5] = mb5;
  save_arr[6] = mb6;
  save_arr[7] = mb7;
}



inline void load_MB(array<ubyte4>& save_arr,
                    ubyte4& mb0, ubyte4& mb1, ubyte4& mb2, ubyte4& mb3, ubyte4& mb4, ubyte4& mb5, ubyte4& mb6, ubyte4& mb7)
{
  mb0 = save_arr[0];
  mb1 = save_arr[1];
  mb2 = save_arr[2];
  mb3 = save_arr[3];
  mb4 = save_arr[4];
  mb5 = save_arr[5];
  mb6 = save_arr[6];
  mb7 = save_arr[7];
}



inline void extract_ref_MB4(array<ubyte4>& search_region,
                            int start_idx,
                            int x, int y,
                            int range_x, int range_y,
                            int& sr_row, int& sr_col, int& rot_perm,
                            ubyte4& ref_mb0, ubyte4& ref_mb1, ubyte4& ref_mb2, ubyte4& ref_mb3, ubyte4& ref_mb4, ubyte4& ref_mb5, ubyte4& ref_mb6, ubyte4& ref_mb7)
{
  
  int num_cols, sr_row_num;
  int new_x = x + range_x;
  int new_y = y + range_y;
  num_cols = shift(range_x, -1) + 4;
  int sr_col0 = shift(new_x, -2) + start_idx;
  int sr_col1 = sr_col0 + 1;
  int sr_col2 = sr_col1 + 1;
  int sr_col3 = sr_col2 + 1;
  sr_col0 = select(itocc(sr_col0 >= num_cols), sr_col0 - num_cols, sr_col0);
  sr_col1 = select(itocc(sr_col1 >= num_cols), sr_col1 - num_cols, sr_col1);
  sr_col2 = select(itocc(sr_col2 >= num_cols), sr_col2 - num_cols, sr_col2);
  sr_col3 = select(itocc(sr_col3 >= num_cols), sr_col3 - num_cols, sr_col3);
  sr_col = sr_col0;
  cc row_inc = itocc(cid() < (new_y & 0x7));
  sr_row_num = shift(new_y, -3);
  sr_row_num = select(row_inc, sr_row_num + 1, sr_row_num);
  sr_row = lo(sr_row_num * num_cols);
  int sr_idx0 = sr_row + sr_col0;
  int sr_idx1 = sr_row + sr_col1;
  int sr_idx2 = sr_row + sr_col2;
  int sr_idx3 = sr_row + sr_col3;
  int sr_idx4 = sr_idx0 + num_cols;
  int sr_idx5 = sr_idx1 + num_cols;
  int sr_idx6 = sr_idx2 + num_cols;
  int sr_idx7 = sr_idx3 + num_cols;

  
  ref_mb0 = search_region[sr_idx0];
  ref_mb1 = search_region[sr_idx1];
  ref_mb2 = search_region[sr_idx2];
  ref_mb3 = search_region[sr_idx3];
  
  ref_mb4 = search_region[sr_idx4];
  ref_mb5 = search_region[sr_idx5];
  ref_mb6 = search_region[sr_idx6];
  ref_mb7 = search_region[sr_idx7];

  
  
  rot_perm = (new_y + cid()) & 0x7;
  ref_mb0 = commclperm(rot_perm, ref_mb0);
  ref_mb1 = commclperm(rot_perm, ref_mb1);
  ref_mb2 = commclperm(rot_perm, ref_mb2);
  ref_mb3 = commclperm(rot_perm, ref_mb3);
  ref_mb4 = commclperm(rot_perm, ref_mb4);
  ref_mb5 = commclperm(rot_perm, ref_mb5);
  ref_mb6 = commclperm(rot_perm, ref_mb6);
  ref_mb7 = commclperm(rot_perm, ref_mb7);
}



inline void extract_ref_MB(array<ubyte4>& search_region,
                           int start_idx,
                           int x, int y,
                           int range_x, int range_y,
                           ubyte4& ref_mb0, ubyte4& ref_mb1, ubyte4& ref_mb2, ubyte4& ref_mb3, ubyte4& ref_mb4, ubyte4& ref_mb5, ubyte4& ref_mb6, ubyte4& ref_mb7)
{
  
  int num_cols, sr_row, sr_row_idx;
  int new_x = x + range_x;
  int new_y = y + range_y;
  num_cols = shift(range_x, -1) + 4;
  int sr_col0 = shift(new_x, -2) + start_idx;
  int sr_col1 = sr_col0 + 1;
  int sr_col2 = sr_col1 + 1;
  int sr_col3 = sr_col2 + 1;
  int sr_col4 = sr_col3 + 1;
  sr_col0 = select(itocc(sr_col0 >= num_cols), sr_col0 - num_cols, sr_col0);
  sr_col1 = select(itocc(sr_col1 >= num_cols), sr_col1 - num_cols, sr_col1);
  sr_col2 = select(itocc(sr_col2 >= num_cols), sr_col2 - num_cols, sr_col2);
  sr_col3 = select(itocc(sr_col3 >= num_cols), sr_col3 - num_cols, sr_col3);
  sr_col4 = select(itocc(sr_col4 >= num_cols), sr_col4 - num_cols, sr_col4);
  cc row_inc = itocc(cid() < (new_y & 0x7));
  sr_row = shift(new_y, -3);
  sr_row = select(row_inc, sr_row + 1, sr_row);
  sr_row_idx = lo(sr_row * num_cols);
  int sr_idx0 = sr_row_idx + sr_col0;
  int sr_idx1 = sr_row_idx + sr_col1;
  int sr_idx2 = sr_row_idx + sr_col2;
  int sr_idx3 = sr_row_idx + sr_col3;
  int sr_idx4 = sr_row_idx + sr_col4;
  int sr_idx5 = sr_idx0 + num_cols;
  int sr_idx6 = sr_idx1 + num_cols;
  int sr_idx7 = sr_idx2 + num_cols;
  int sr_idx8 = sr_idx3 + num_cols;
  int sr_idx9 = sr_idx4 + num_cols;

  
  
  
  cc x_nlsb = itocc((new_x & 2) != 0);
  cc x_lsb = itocc((new_x & 1) != 0);
  byte4 ctrl = select(x_nlsb,
                      select(x_lsb, 0x08838281, 0x18088382),
                      select(x_lsb, 0x28180883, 0x38281808));

  ubyte4 word0_lo, word0_hi, word1_lo, word1_hi;
  ubyte4 word2_lo, word2_hi, word3_lo, word3_hi;
  ubyte4 dummy;

  
  hi_lo(word0_hi,    dummy) = shuffled(search_region[sr_idx0], ctrl);
  hi_lo(word1_hi, word0_lo) = shuffled(search_region[sr_idx1], ctrl);
  hi_lo(word2_hi, word1_lo) = shuffled(search_region[sr_idx2], ctrl);
  hi_lo(word3_hi, word2_lo) = shuffled(search_region[sr_idx3], ctrl);
  hi_lo(dummy,    word3_lo) = shuffled(search_region[sr_idx4], ctrl);
  ref_mb0 = word0_hi | word0_lo;
  ref_mb1 = word1_hi | word1_lo;
  ref_mb2 = word2_hi | word2_lo;
  ref_mb3 = word3_hi | word3_lo;
  
  hi_lo(word0_hi,    dummy) = shuffled(search_region[sr_idx5], ctrl);
  hi_lo(word1_hi, word0_lo) = shuffled(search_region[sr_idx6], ctrl);
  hi_lo(word2_hi, word1_lo) = shuffled(search_region[sr_idx7], ctrl);
  hi_lo(word3_hi, word2_lo) = shuffled(search_region[sr_idx8], ctrl);
  hi_lo(dummy,    word3_lo) = shuffled(search_region[sr_idx9], ctrl);
  ref_mb4 = word0_hi | word0_lo;
  ref_mb5 = word1_hi | word1_lo;
  ref_mb6 = word2_hi | word2_lo;
  ref_mb7 = word3_hi | word3_lo;

  
  
  int rot_perm = (new_y + cid()) & 0x7;
  ref_mb0 = commclperm(rot_perm, ref_mb0);
  ref_mb1 = commclperm(rot_perm, ref_mb1);
  ref_mb2 = commclperm(rot_perm, ref_mb2);
  ref_mb3 = commclperm(rot_perm, ref_mb3);
  ref_mb4 = commclperm(rot_perm, ref_mb4);
  ref_mb5 = commclperm(rot_perm, ref_mb5);
  ref_mb6 = commclperm(rot_perm, ref_mb6);
  ref_mb7 = commclperm(rot_perm, ref_mb7);
}





inline void shift_ref_MB4(array<ubyte4>& search_region,
                          int sr_row, int& sr_col,
                          int range_x,
                          int rot_perm,
                          ubyte4& ref_mb0, ubyte4& ref_mb1, ubyte4& ref_mb2, ubyte4& ref_mb3, ubyte4& ref_mb4, ubyte4& ref_mb5, ubyte4& ref_mb6, ubyte4& ref_mb7)
{
  ref_mb0 = ref_mb1;
  ref_mb1 = ref_mb2;
  ref_mb2 = ref_mb3;

  ref_mb4 = ref_mb5;
  ref_mb5 = ref_mb6;
  ref_mb6 = ref_mb7;

  int num_cols = shift(range_x, -1) + 4;
  int sr_col_idx = sr_col + 4;
  sr_col_idx = select(itocc(sr_col_idx >= num_cols),
                      sr_col_idx - num_cols,
                      sr_col_idx);
  int sr_idx = sr_row + sr_col_idx;
  ref_mb3 = commclperm(rot_perm, search_region[sr_idx]);
  ref_mb7 = commclperm(rot_perm, search_region[sr_idx + num_cols]);

  sr_col = sr_col + 1;
  sr_col = select(itocc(sr_col == num_cols), 0, sr_col);
}





inline void compare_MB(ubyte4 ref_mb0, ubyte4 ref_mb1, ubyte4 ref_mb2, ubyte4 ref_mb3, ubyte4 ref_mb4, ubyte4 ref_mb5, ubyte4 ref_mb6, ubyte4 ref_mb7,
                       ubyte4 mb0, ubyte4 mb1, ubyte4 mb2, ubyte4 mb3, ubyte4 mb4, ubyte4 mb5, ubyte4 mb6, ubyte4 mb7,
                       int test_x, int test_y, int in_range,
                       int& mv_x, int& mv_y, uint& mv_sad)
{
  
  ubyte4 diff0 = abd(ref_mb0, mb0);
  ubyte4 diff1 = abd(ref_mb1, mb1);
  ubyte4 diff2 = abd(ref_mb2, mb2);
  ubyte4 diff3 = abd(ref_mb3, mb3);
  ubyte4 diff4 = abd(ref_mb4, mb4);
  ubyte4 diff5 = abd(ref_mb5, mb5);
  ubyte4 diff6 = abd(ref_mb6, mb6);
  ubyte4 diff7 = abd(ref_mb7, mb7);

  
  
  double<uhalf2> sadA0, sadA1;
  uhalf2 sadB0, sadB1, sadC0, sadC1;
  uint sad;
  sadA0 = shuffled(uhalf2(diff0), 0x88318820);
  sadA1 = shuffled(uhalf2(diff1), 0x88318820);
  sadB0 = (hi(sadA0) + lo(sadA0)) + (hi(sadA1) + lo(sadA1));
  sadA0 = shuffled(uhalf2(diff2), 0x88318820);
  sadA1 = shuffled(uhalf2(diff3), 0x88318820);
  sadB1 = (hi(sadA0) + lo(sadA0)) + (hi(sadA1) + lo(sadA1));
  sadC0 = sadB0 + sadB1;
  sadA0 = shuffled(uhalf2(diff4), 0x88318820);
  sadA1 = shuffled(uhalf2(diff5), 0x88318820);
  sadB0 = (hi(sadA0) + lo(sadA0)) + (hi(sadA1) + lo(sadA1));
  sadA0 = shuffled(uhalf2(diff6), 0x88318820);
  sadA1 = shuffled(uhalf2(diff7), 0x88318820);
  sadB1 = (hi(sadA0) + lo(sadA0)) + (hi(sadA1) + lo(sadA1));
  sadC1 = sadB0 + sadB1;
  double<uhalf2> final_sad = shuffled(sadC0 + sadC1, 0x88883120);
  sad = uint(hi(final_sad) + lo(final_sad));

  
  uc<int> tree_sum1 = 0x67452301;
  uc<int> tree_sum2 = 0x44660022;
  uc<int> tree_sum3 = 0x00004444;
  sad = sad + commucperm(tree_sum1, sad);
  sad = sad + commucperm(tree_sum2, sad);
  sad = sad + commucperm(tree_sum3, sad);

  
  cc better = itocc((sad < mv_sad) & in_range);
  mv_x = select(better, test_x, mv_x);
  mv_y = select(better, test_y, mv_y);
  mv_sad = select(better, sad, mv_sad);
}
#line 87 "D:\\working\\im_apps\\h264\\me_kc.hpp"
#line 88 "D:\\working\\im_apps\\h264\\me_kc.hpp"

#line 90 "D:\\working\\im_apps\\h264\\me_kc.hpp"
#line 4 "D:\\working\\im_apps\\h264\\me_fast_jitter2_kc.cpp"
#line 1 "D:/working/tools/isim/isimexe/blank_headers\\idb_kernelc2.hpp"






#line 5 "D:\\working\\im_apps\\h264\\me_fast_jitter2_kc.cpp"
























;
kernel me_fast_jitter2(istream<ubyte4> row0,
		       istream<ubyte4> row1,
		       istream<ubyte4> row2,
		       istream<ubyte4> mblocks,
		       cistream<half2> motions_in,
		       costream<half2> motions_out,
		       uc<int>& uc_margin,
		       uc<int>& uc_mblks)    
{
  
  synch();
  int margin = commclperm(8, 0, uc_margin);
  int mblks = commclperm(8, 0, uc_mblks);

  
  int top_margin = margin & 0xFF;
  int bottom_margin = shift(margin, -8) & 0xFF;
  int left_margin = shift(margin, -16) & 0xFF;
  int right_margin = shift(margin, -24) & 0xFF;

  cc clzero = itocc(cid() == 0);
  cc cc_true = itocc(0 == 0);
  cc dummy_cc;

  
  ubyte4 mb0, mb1, mb2, mb3, mb4, mb5, mb6, mb7;

  
  ubyte4 ref_mb0, ref_mb1, ref_mb2, ref_mb3, ref_mb4, ref_mb5, ref_mb6, ref_mb7;

  
  half2 mv_xy;
  int mv_x, mv_y, x, y, test_x, test_y;
  cc wrap;
  uint mv_sad;
  int in_range;

  
  
  
  
  
  int top_range = 0 - select(itocc(top_margin < 16), top_margin, 16);
  int bottom_range = select(itocc(bottom_margin < 15), bottom_margin, 15);
  int left_range = 0 - left_margin;
  
  int num_cols_to_right = shift(mblks, 4) + right_margin - 16;
  int right_range = select(itocc(num_cols_to_right < 15), num_cols_to_right, 15);

  
  array<ubyte4> search_region(72);
  
  
  
  int i = shift(left_margin + right_range + 1, -2);
  uc<int> loopcnt;
  i = commclperm(0, i, 0, loopcnt);
  int idx = shift(16 - left_margin, -2);
  loop_count(loopcnt) pipeline(1) {
    row0(ALL, cc_true) >> search_region[0+idx]; row0(ALL, cc_true) >> search_region[12+idx]; row1(ALL, cc_true) >> search_region[24+idx]; row1(ALL, cc_true) >> search_region[36+idx]; row2(ALL, cc_true) >> search_region[48+idx]; row2(ALL, cc_true) >> search_region[60+idx]; idx = idx + 1;
  }
  int num_cols_left_to_load = shift(mblks, 4);

  loop_stream(mblocks) {
    
    cc do_read = itocc(num_cols_left_to_load > 0);
    loopcnt = 4;
    loop_count(loopcnt) pipeline(1) {
      row0(ALL, do_read) >> search_region[0+idx]; row0(ALL, do_read) >> search_region[12+idx]; row1(ALL, do_read) >> search_region[24+idx]; row1(ALL, do_read) >> search_region[36+idx]; row2(ALL, do_read) >> search_region[48+idx]; row2(ALL, do_read) >> search_region[60+idx]; idx = idx + 1;;
    }
    cc wrap_idx = itocc(idx == 12);
    idx = select(wrap_idx, 0, idx);

    
    mblocks >> mb0 >> mb4 >> mb1 >> mb5;
    mblocks >> mb2 >> mb6 >> mb3 >> mb7;

    
    motions_in(clzero, dummy_cc) >> mv_xy;
    motions_in(clzero, dummy_cc) >> mv_sad;
    mv_xy = commclperm(0, mv_xy);
    mv_sad = commclperm(0, mv_sad);
    
    mv_xy = shifta(mv_xy, -1);
    hi_lo(mv_y, mv_x) = shuffled(int(mv_xy), 0x75643120);

    
    x = mv_x - 2;
    y = mv_y - 2;
    i = 0;
    loopcnt = 9;
    loop_count(loopcnt) pipeline(1) {
      test_x %= x;
      test_y %= y;
      in_range = check_xy(top_range, bottom_range, left_range, right_range, test_x, test_y);
      extract_ref_MB(search_region, idx, test_x, test_y, 16, 16, ref_mb0, ref_mb1, ref_mb2, ref_mb3, ref_mb4, ref_mb5, ref_mb6, ref_mb7);
      compare_MB(ref_mb0, ref_mb1, ref_mb2, ref_mb3, ref_mb4, ref_mb5, ref_mb6, ref_mb7, mb0, mb1, mb2, mb3, mb4, mb5, mb6, mb7, test_x, test_y, in_range, mv_x, mv_y, mv_sad);
      i = i + 1;
      wrap = itocc(i == 3);
      i = select(wrap, 0, i);
      x = select(wrap, x - 4, x + 2);
      y = select(wrap, y + 2, y);
    }

    
    mv_xy = shift(half2(shift(mv_y, 16) | (mv_x & 0xffff)), 1);
    motions_out(clzero) << mv_xy;
    motions_out(clzero) << mv_sad;

    left_range = select(itocc(left_range > -16), left_range - 16, left_range);
    left_range = select(itocc(left_range < -16), -16, left_range);
    num_cols_to_right = num_cols_to_right - 16;
    right_range = select(itocc(num_cols_to_right < 15), num_cols_to_right, 15);
    num_cols_left_to_load = num_cols_left_to_load - 16;
  }

  flush(motions_out, 0);
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -