me_fast_jitter1_kc.cpp

来自「H.264完整的C语言代码和DCT的代码」· C++ 代码 · 共 183 行
CPP
183 行
#include "idb_kernelc.hpp"   
#include "mpeg.hpp"
#include "me_kc.hpp"
#include "idb_kernelc2.hpp"   

#define RANGE_X 16
#define RANGE_X_MINUS_1 15
// NOTE: If you change RANGE_Y, then you also have to change
// LOAD_4_SEARCH_COLS in me_kc.hpp
#define RANGE_Y 16
#define RANGE_Y_MINUS_1 15

// S.R. == search region

// Input parameters:
//
//   uc_margin:
//
//     each byte is an uint that indicates how many blocks of "margin"
//     are available on the outskirts of the S.R.  In other words, if
//     mblocks is an entire row of macroblocks, the left and right
//     margins would be '0' since we cannot search any further in
//     those directions.  Note, that macroblocks not on the edges will
//     not necessarily care about the margin values.  The top and
//     bottom margins are the same for each macroblock to be matched.
//     The order of the bytes from LSByte to MSByte is top, bottom,
//     left, right.

KERNELDEF(me_fast_jitter1, KERNELS_DIR "me_fast_jitter1_kc.uc");
kernel me_fast_jitter1(istream<ubyte4> row0,
		       istream<ubyte4> row1,
		       istream<ubyte4> row2,
		       istream<ubyte4> mblocks,
		       cistream<half2> motions_in,
		       costream<half2> motions_out,
		       ostream<ubyte4> refyblks,
		       ostream<uint>   crcbindices,
		       uc<int>& uc_margin,
		       uc<int>& uc_offsets,  // x, y offset of current block
		       uc<int>& uc_mblks,    // num MBs in mblocks
		       uc<int>& uc_mb_width) // number of macroblocks in row
{
  // read uc parameter
  synch();
  int margin = commclperm(8, 0, uc_margin);
  int mblks = commclperm(8, 0, uc_mblks);
  uint offsets = uint(commclperm(8, 0, uc_offsets));
  uint mb_width  = uint(commclperm(8, 0, uc_mb_width));

  // init constants
  int top_margin = margin & 0xFF;
  int bottom_margin = shift(margin, -8) & 0xFF;
  int left_margin = shift(margin, -16) & 0xFF;
  int right_margin = shift(margin, -24) & 0xFF;
  uint xoffset = offsets & 0xffff;
  uint yoffset = shift(offsets, -16);
  uint crefx = shift(xoffset, -1) + uint(cid());
  uint crefy = shift(yoffset, -1);
  uint crowlen = shift(mb_width, 6);

  cc clzero = itocc(cid() == 0);
  cc cc_true = itocc(0 == 0);
  cc dummy_cc;

  // Curent macroblock
  ubyte4 MB_EXPAND(mb);

  // Macroblock from S.R. to compare to.
  ubyte4 MB_EXPAND(ref_mb);

  // best motion vector and block sum
  half2 mv_xy;
  int mv_x, mv_y, x, y, test_x, test_y;
  cc wrap;
  uint mv_sad;
  int in_range;

  // The *_range variables contain the size of the S.R. for the
  // current macroblock that is being matched.  At most, it can be
  // RANGE_X or RANGE_Y.  However, the value of the *_range variables
  // can be smaller depending on the values of the *_margin variables
  // and whether the current macroblock is on an edge or not.
  int top_range = 0 - select(itocc(top_margin < RANGE_Y), top_margin, RANGE_Y);
  int bottom_range = select(itocc(bottom_margin < RANGE_Y_MINUS_1), bottom_margin, RANGE_Y_MINUS_1);
  int left_range = 0 - left_margin;
  // num_cols_to_right is the maximum size of the S.R. to the right
  int num_cols_to_right = shift(mblks, 4) + right_margin - 16;
  int right_range = select(itocc(num_cols_to_right < RANGE_X_MINUS_1), num_cols_to_right, RANGE_X_MINUS_1);

  // This array holds a 3x3 MB S.R.
  array<ubyte4> search_region(72);
  // All valid MB columns, except the right-most column, are read in
  // before the main loop to prime the array.  The final right-most
  // column for each MB is read in during the main loop.
  int i = shift(left_margin + right_range + 1, -2);
  uc<int> loopcnt;
  i = commclperm(0, i, 0, loopcnt);
  int idx = shift(RANGE_X - left_margin, -2);
  loop_count(loopcnt) pipeline(1) {
    LOAD_4_SEARCH_COLS(row0, row1, row2, cc_true, search_region, idx)
  }
  int num_cols_left_to_load = shift(mblks, 4);

  loop_stream(mblocks) {
    // read next MB column in S.R.
    cc do_read = itocc(num_cols_left_to_load > 0);
    loopcnt = 4;
    loop_count(loopcnt) pipeline(1) {
      LOAD_4_SEARCH_COLS(row0, row1, row2, do_read, search_region, idx);
    }
    cc wrap_idx = itocc(idx == 12);
    idx = select(wrap_idx, 0, idx);

    // read next MB to process
    mblocks >> mb0 >> mb4 >> mb1 >> mb5;
    mblocks >> mb2 >> mb6 >> mb3 >> mb7;

    // Read previous motion vectors.
    motions_in(clzero, dummy_cc) >> mv_xy;
    motions_in(clzero, dummy_cc) >> mv_sad;
    mv_xy = commclperm(0, mv_xy);
    mv_sad = commclperm(0, mv_sad);
    // Shift to make MV's full pixel values
    mv_xy = shifta(mv_xy, -1);
    hi_lo(mv_y, mv_x) = shuffled(int(mv_xy), 0x75643120);

    // Loop 3: granularity = 1
    x = mv_x - 1;
    y = mv_y - 1;
    i = 0;
    loopcnt = 9;
    loop_count(loopcnt) pipeline(1) {
      test_x %= x;
      test_y %= y;
      in_range = check_xy(top_range, bottom_range, left_range, right_range, test_x, test_y);
      extract_ref_MB(search_region, idx, test_x, test_y, RANGE_X, RANGE_Y, MB_EXPAND(ref_mb));
      compare_MB(MB_EXPAND(ref_mb), MB_EXPAND(mb), test_x, test_y, in_range, mv_x, mv_y, mv_sad);
      i = i + 1;
      wrap = itocc(i == 3);
      i = select(wrap, 0, i);
      x = select(wrap, x - 2, x + 1);
      y = select(wrap, y + 1, y);
    }

    // Output motion vectors.  Shift to make MV's 1/2 pixel values
    mv_xy = shift(half2(shift(mv_y, 16) | (mv_x & 0xffff)), 1);
    motions_out(clzero) << mv_xy;
    motions_out(clzero) << mv_sad;

    // Output Y vals for best match
    extract_ref_MB(search_region, idx, mv_x, mv_y, RANGE_X, RANGE_Y, MB_EXPAND(ref_mb));
    refyblks << ref_mb0 << ref_mb4 << ref_mb1 << ref_mb5;
    refyblks << ref_mb2 << ref_mb6 << ref_mb3 << ref_mb7;

    // Output load indices for chrominance block that corresponds to
    // the best match
    // add one if deltas are negative to get correct biasing when
    // using shift by 1 in place of divide by 2
    int dx = mv_x + select(itocc(mv_x < 0), 1, 0);
    int dy = mv_y + select(itocc(mv_y < 0), 1, 0);
    uint fetchx = crefx + uint(shifta(dx, -1));
    uint fetchy = crefy + uint(shifta(dy, -1));
    uint fetchxoffset = shift(shift(fetchx, -3), 6) + (fetchx & 0x7);
    crefx = crefx + 8;
    
    loopcnt = 8;
    loop_count(loopcnt) pipeline(1) {
      uint fetchyoffset = lo(shift(fetchy, -3) * crowlen) + shift(fetchy & 0x7, 3);
      fetchy = fetchy + 1;
      uint c_out = fetchxoffset + fetchyoffset;
      crcbindices << c_out;
    }

    left_range = select(itocc(left_range > -RANGE_X), left_range - 16, left_range);
    left_range = select(itocc(left_range < -RANGE_X), -RANGE_X, left_range);
    num_cols_to_right = num_cols_to_right - 16;
    right_range = select(itocc(num_cols_to_right < RANGE_X_MINUS_1), num_cols_to_right, RANGE_X_MINUS_1);
    num_cols_left_to_load = num_cols_left_to_load - 16;
  }

  flush(motions_out, 0);
}
me_fast_jitter1_kc.cpp - 源码说明

本页面展示了「H.264完整的C语言代码和DCT的代码」中的 me_fast_jitter1_kc.cpp 源码文件，采用 C++ 编程语言编写，共 183 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与264相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?