⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 me_fns_kc.cpp

📁 H.264完整的C语言代码和DCT的代码
💻 CPP
字号:
#include "idb_kernelc.hpp"
#include "me_kc.hpp"
#include "idb_kernelc2.hpp"

#define SHUF0 0x38281808
#define SHUF1 0x28180883
#define SHUF2 0x18088382
#define SHUF3 0x08838281

// Returns -1 if test_x and test_y are within the specified range for
// the search region, else returns 0.  Therefore the range values are
// not absolute positions within the reference frame, but instead
// offsets relative to the current macroblock being processed.
// Therefore, for instance, the top and left ranges can be negative.
// For example, for a 16x16 search range algorithm when we are
// processing a macroblock somewhere in the middle of the top row, the
// specified ranges would be (0, 16, -16, 16).
inline int check_xy(int top_range, int bottom_range,
                    int left_range, int right_range,
                    int test_x, int test_y)
{
  int outside, in_range;

  // check if out-of-bounds on top
  outside = test_y < top_range;
  in_range = ~outside;
  // check if out-of-bounds on bottom
  outside = test_y > bottom_range;
  in_range = in_range & ~outside;
  // check if out-of-bounds on left
  outside = test_x < left_range;
  in_range = in_range & ~outside;
  // check if out-of-bounds on right
  outside = test_x > right_range;
  in_range = in_range & ~outside;

  return(in_range);
}


// Save a macroblock to an array in the scratchpad
inline void save_MB(array<ubyte4>& save_arr,
                    MB_PARAM_VAL(ubyte4, mb))
{
  save_arr[0] = mb0;
  save_arr[1] = mb1;
  save_arr[2] = mb2;
  save_arr[3] = mb3;
  save_arr[4] = mb4;
  save_arr[5] = mb5;
  save_arr[6] = mb6;
  save_arr[7] = mb7;
}


// Retrieve a saved macroblock from the scratchpad
inline void load_MB(array<ubyte4>& save_arr,
                    MB_PARAM_REF(ubyte4, mb))
{
  mb0 = save_arr[0];
  mb1 = save_arr[1];
  mb2 = save_arr[2];
  mb3 = save_arr[3];
  mb4 = save_arr[4];
  mb5 = save_arr[5];
  mb6 = save_arr[6];
  mb7 = save_arr[7];
}


// Get the MB located at (x,y) in the search region.
inline void extract_ref_MB4(array<ubyte4>& search_region,
                            int start_idx,
                            int x, int y,
                            int range_x, int range_y,
                            int& sr_row, int& sr_col, int& rot_perm,
                            MB_PARAM_REF(ubyte4, ref_mb))
{
  // Calculate starting index into search_region
  int num_cols, sr_row_num;
  int new_x = x + range_x;
  int new_y = y + range_y;
  num_cols = shift(range_x, -1) + 4;
  int sr_col0 = shift(new_x, -2) + start_idx;
  int sr_col1 = sr_col0 + 1;
  int sr_col2 = sr_col1 + 1;
  int sr_col3 = sr_col2 + 1;
  sr_col0 = select(itocc(sr_col0 >= num_cols), sr_col0 - num_cols, sr_col0);
  sr_col1 = select(itocc(sr_col1 >= num_cols), sr_col1 - num_cols, sr_col1);
  sr_col2 = select(itocc(sr_col2 >= num_cols), sr_col2 - num_cols, sr_col2);
  sr_col3 = select(itocc(sr_col3 >= num_cols), sr_col3 - num_cols, sr_col3);
  sr_col = sr_col0;
  cc row_inc = itocc(cid() < (new_y & 0x7));
  sr_row_num = shift(new_y, -3);
  sr_row_num = select(row_inc, sr_row_num + 1, sr_row_num);
  sr_row = lo(sr_row_num * num_cols);
  int sr_idx0 = sr_row + sr_col0;
  int sr_idx1 = sr_row + sr_col1;
  int sr_idx2 = sr_row + sr_col2;
  int sr_idx3 = sr_row + sr_col3;
  int sr_idx4 = sr_idx0 + num_cols;
  int sr_idx5 = sr_idx1 + num_cols;
  int sr_idx6 = sr_idx2 + num_cols;
  int sr_idx7 = sr_idx3 + num_cols;

  // Get rows 0-7 in the clusters.
  ref_mb0 = search_region[sr_idx0];
  ref_mb1 = search_region[sr_idx1];
  ref_mb2 = search_region[sr_idx2];
  ref_mb3 = search_region[sr_idx3];
  // Get rows 8-15 in the clusters
  ref_mb4 = search_region[sr_idx4];
  ref_mb5 = search_region[sr_idx5];
  ref_mb6 = search_region[sr_idx6];
  ref_mb7 = search_region[sr_idx7];

  // rotate values so that the first row of search region is in
  // cluster 0
  rot_perm = (new_y + cid()) & 0x7;
  ref_mb0 = commclperm(rot_perm, ref_mb0);
  ref_mb1 = commclperm(rot_perm, ref_mb1);
  ref_mb2 = commclperm(rot_perm, ref_mb2);
  ref_mb3 = commclperm(rot_perm, ref_mb3);
  ref_mb4 = commclperm(rot_perm, ref_mb4);
  ref_mb5 = commclperm(rot_perm, ref_mb5);
  ref_mb6 = commclperm(rot_perm, ref_mb6);
  ref_mb7 = commclperm(rot_perm, ref_mb7);
}


// Get the MB located at (x,y) in the search region
inline void extract_ref_MB(array<ubyte4>& search_region,
                           int start_idx,
                           int x, int y,
                           int range_x, int range_y,
                           MB_PARAM_REF(ubyte4, ref_mb))
{
  // Calculate starting index into search_region
  int num_cols, sr_row, sr_row_idx;
  int new_x = x + range_x;
  int new_y = y + range_y;
  num_cols = shift(range_x, -1) + 4;
  int sr_col0 = shift(new_x, -2) + start_idx;
  int sr_col1 = sr_col0 + 1;
  int sr_col2 = sr_col1 + 1;
  int sr_col3 = sr_col2 + 1;
  int sr_col4 = sr_col3 + 1;
  sr_col0 = select(itocc(sr_col0 >= num_cols), sr_col0 - num_cols, sr_col0);
  sr_col1 = select(itocc(sr_col1 >= num_cols), sr_col1 - num_cols, sr_col1);
  sr_col2 = select(itocc(sr_col2 >= num_cols), sr_col2 - num_cols, sr_col2);
  sr_col3 = select(itocc(sr_col3 >= num_cols), sr_col3 - num_cols, sr_col3);
  sr_col4 = select(itocc(sr_col4 >= num_cols), sr_col4 - num_cols, sr_col4);
  cc row_inc = itocc(cid() < (new_y & 0x7));
  sr_row = shift(new_y, -3);
  sr_row = select(row_inc, sr_row + 1, sr_row);
  sr_row_idx = lo(sr_row * num_cols);
  int sr_idx0 = sr_row_idx + sr_col0;
  int sr_idx1 = sr_row_idx + sr_col1;
  int sr_idx2 = sr_row_idx + sr_col2;
  int sr_idx3 = sr_row_idx + sr_col3;
  int sr_idx4 = sr_row_idx + sr_col4;
  int sr_idx5 = sr_idx0 + num_cols;
  int sr_idx6 = sr_idx1 + num_cols;
  int sr_idx7 = sr_idx2 + num_cols;
  int sr_idx8 = sr_idx3 + num_cols;
  int sr_idx9 = sr_idx4 + num_cols;

  // Calculate shuffle permutation necessary to extract the correct 16
  // bytes out of a total of 5 32-bit words (to account for horizontal
  // indices that are not a multiple of 4).
  cc x_nlsb = itocc((new_x & 2) != 0);
  cc x_lsb = itocc((new_x & 1) != 0);
  byte4 ctrl = select(x_nlsb,
                      select(x_lsb, SHUF3, SHUF2),
                      select(x_lsb, SHUF1, SHUF0));

  ubyte4 word0_lo, word0_hi, word1_lo, word1_hi;
  ubyte4 word2_lo, word2_hi, word3_lo, word3_hi;
  ubyte4 dummy;

  // Get rows 0-7 in the clusters.
  hi_lo(word0_hi,    dummy) = shuffled(search_region[sr_idx0], ctrl);
  hi_lo(word1_hi, word0_lo) = shuffled(search_region[sr_idx1], ctrl);
  hi_lo(word2_hi, word1_lo) = shuffled(search_region[sr_idx2], ctrl);
  hi_lo(word3_hi, word2_lo) = shuffled(search_region[sr_idx3], ctrl);
  hi_lo(dummy,    word3_lo) = shuffled(search_region[sr_idx4], ctrl);
  ref_mb0 = word0_hi | word0_lo;
  ref_mb1 = word1_hi | word1_lo;
  ref_mb2 = word2_hi | word2_lo;
  ref_mb3 = word3_hi | word3_lo;
  // Get rows 8-15 in the clusters
  hi_lo(word0_hi,    dummy) = shuffled(search_region[sr_idx5], ctrl);
  hi_lo(word1_hi, word0_lo) = shuffled(search_region[sr_idx6], ctrl);
  hi_lo(word2_hi, word1_lo) = shuffled(search_region[sr_idx7], ctrl);
  hi_lo(word3_hi, word2_lo) = shuffled(search_region[sr_idx8], ctrl);
  hi_lo(dummy,    word3_lo) = shuffled(search_region[sr_idx9], ctrl);
  ref_mb4 = word0_hi | word0_lo;
  ref_mb5 = word1_hi | word1_lo;
  ref_mb6 = word2_hi | word2_lo;
  ref_mb7 = word3_hi | word3_lo;

  // rotate values so that the first row of search region is in
  // cluster 0
  int rot_perm = (new_y + cid()) & 0x7;
  ref_mb0 = commclperm(rot_perm, ref_mb0);
  ref_mb1 = commclperm(rot_perm, ref_mb1);
  ref_mb2 = commclperm(rot_perm, ref_mb2);
  ref_mb3 = commclperm(rot_perm, ref_mb3);
  ref_mb4 = commclperm(rot_perm, ref_mb4);
  ref_mb5 = commclperm(rot_perm, ref_mb5);
  ref_mb6 = commclperm(rot_perm, ref_mb6);
  ref_mb7 = commclperm(rot_perm, ref_mb7);
}


// Shifts the reference macroblock 4 pixels to the right in the search
// region.  It assumes that the macroblock is originally aligned to a
// 4 pixel horizontal boundary.
inline void shift_ref_MB4(array<ubyte4>& search_region,
                          int sr_row, int& sr_col,
                          int range_x,
                          int rot_perm,
                          MB_PARAM_REF(ubyte4, ref_mb))
{
  ref_mb0 = ref_mb1;
  ref_mb1 = ref_mb2;
  ref_mb2 = ref_mb3;

  ref_mb4 = ref_mb5;
  ref_mb5 = ref_mb6;
  ref_mb6 = ref_mb7;

  int num_cols = shift(range_x, -1) + 4;
  int sr_col_idx = sr_col + 4;
  sr_col_idx = select(itocc(sr_col_idx >= num_cols),
                      sr_col_idx - num_cols,
                      sr_col_idx);
  int sr_idx = sr_row + sr_col_idx;
  ref_mb3 = commclperm(rot_perm, search_region[sr_idx]);
  ref_mb7 = commclperm(rot_perm, search_region[sr_idx + num_cols]);

  sr_col = sr_col + 1;
  sr_col = select(itocc(sr_col == num_cols), 0, sr_col);
}


// Compare the SAD of the MB at the location (test_x, test_y) in the
// search region with the previous best, and save (test_x, test_y) if
// its SAD is better.
inline void compare_MB(MB_PARAM_VAL(ubyte4, ref_mb),
                       MB_PARAM_VAL(ubyte4, mb),
                       int test_x, int test_y, int in_range,
                       int& mv_x, int& mv_y, uint& mv_sad)
{
  // Take absolute differences of 8-bit values
  ubyte4 diff0 = abd(ref_mb0, mb0);
  ubyte4 diff1 = abd(ref_mb1, mb1);
  ubyte4 diff2 = abd(ref_mb2, mb2);
  ubyte4 diff3 = abd(ref_mb3, mb3);
  ubyte4 diff4 = abd(ref_mb4, mb4);
  ubyte4 diff5 = abd(ref_mb5, mb5);
  ubyte4 diff6 = abd(ref_mb6, mb6);
  ubyte4 diff7 = abd(ref_mb7, mb7);

  // Convert to 16-bit values and start summing in each cluster, and
  // convert to 32-bit value at the end
  double<uhalf2> sadA0, sadA1;
  uhalf2 sadB0, sadB1, sadC0, sadC1;
  uint sad;
  sadA0 = shuffled(uhalf2(diff0), 0x88318820);
  sadA1 = shuffled(uhalf2(diff1), 0x88318820);
  sadB0 = (hi(sadA0) + lo(sadA0)) + (hi(sadA1) + lo(sadA1));
  sadA0 = shuffled(uhalf2(diff2), 0x88318820);
  sadA1 = shuffled(uhalf2(diff3), 0x88318820);
  sadB1 = (hi(sadA0) + lo(sadA0)) + (hi(sadA1) + lo(sadA1));
  sadC0 = sadB0 + sadB1;
  sadA0 = shuffled(uhalf2(diff4), 0x88318820);
  sadA1 = shuffled(uhalf2(diff5), 0x88318820);
  sadB0 = (hi(sadA0) + lo(sadA0)) + (hi(sadA1) + lo(sadA1));
  sadA0 = shuffled(uhalf2(diff6), 0x88318820);
  sadA1 = shuffled(uhalf2(diff7), 0x88318820);
  sadB1 = (hi(sadA0) + lo(sadA0)) + (hi(sadA1) + lo(sadA1));
  sadC1 = sadB0 + sadB1;
  double<uhalf2> final_sad = shuffled(sadC0 + sadC1, 0x88883120);
  sad = uint(hi(final_sad) + lo(final_sad));

  // accumulate across clusters
  uc<int> tree_sum1 = 0x67452301;
  uc<int> tree_sum2 = 0x44660022;
  uc<int> tree_sum3 = 0x00004444;
  sad = sad + commucperm(tree_sum1, sad);
  sad = sad + commucperm(tree_sum2, sad);
  sad = sad + commucperm(tree_sum3, sad);

  // Save current loc as best match if SAD is less than previous best
  cc better = itocc((sad < mv_sad) & in_range);
  mv_x = select(better, test_x, mv_x);
  mv_y = select(better, test_y, mv_y);
  mv_sad = select(better, sad, mv_sad);
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -