📄 me_fast_search4_kc.cpp
字号:
#include "idb_kernelc.hpp"
#include "mpeg.hpp"
#include "me_kc.hpp"
#include "idb_kernelc2.hpp"
#define RANGE_X 16
#define RANGE_X_MINUS_1 15
// NOTE: If you change RANGE_Y, then you also have to change
// LOAD_4_SEARCH_COLS in me_kc.hpp
#define RANGE_Y 16
#define RANGE_Y_MINUS_1 15
// S.R. == search region
// Input parameters:
//
// uc_margin:
//
// each byte is an uint that indicates how many blocks of "margin"
// are available on the outskirts of the S.R. In other words, if
// mblocks is an entire row of macroblocks, the left and right
// margins would be '0' since we cannot search any further in
// those directions. Note, that macroblocks not on the edges will
// not necessarily care about the margin values. The top and
// bottom margins are the same for each macroblock to be matched.
// The order of the bytes from LSByte to MSByte is top, bottom,
// left, right.
KERNELDEF(me_fast_search4, KERNELS_DIR "me_fast_search4_kc.uc");
kernel me_fast_search4(istream<ubyte4> row0,
istream<ubyte4> row1,
istream<ubyte4> row2,
istream<ubyte4> mblocks,
costream<half2> motions_out,
uc<int>& uc_margin,
uc<int>& uc_mblks) // num MBs in mblocks
{
// read uc parameter
synch();
int margin = commclperm(8, 0, uc_margin);
int mblks = commclperm(8, 0, uc_mblks);
// init constants
int top_margin = margin & 0xFF;
int bottom_margin = shift(margin, -8) & 0xFF;
int left_margin = shift(margin, -16) & 0xFF;
int right_margin = shift(margin, -24) & 0xFF;
cc clzero = itocc(cid() == 0);
cc cc_true = itocc(0 == 0);
cc dummy_cc;
// Curent macroblock
ubyte4 MB_EXPAND(mb);
// Macroblock from S.R. to compare to.
ubyte4 MB_EXPAND(ref_mb);
// best motion vector and block sum
half2 mv_xy;
int mv_x, mv_y, x, y, test_x, test_y;
cc wrap;
uint mv_sad;
int in_range;
// The *_range variables contain the size of the S.R. for the
// current macroblock that is being matched. At most, it can be
// RANGE_X or RANGE_Y. However, the value of the *_range variables
// can be smaller depending on the values of the *_margin variables
// and whether the current macroblock is on an edge or not.
int top_range = 0 - select(itocc(top_margin < RANGE_Y), top_margin, RANGE_Y);
int bottom_range = select(itocc(bottom_margin < RANGE_Y_MINUS_1), bottom_margin, RANGE_Y_MINUS_1);
int left_range = 0 - left_margin;
// num_cols_to_right is the maximum size of the S.R. to the right
int num_cols_to_right = shift(mblks, 4) + right_margin - 16;
int right_range = select(itocc(num_cols_to_right < RANGE_X_MINUS_1), num_cols_to_right, RANGE_X_MINUS_1);
// This array holds a 3x3 MB S.R.
array<ubyte4> search_region(72);
// All valid MB columns, except the right-most column, are read in
// before the main loop to prime the array. The final right-most
// column for each MB is read in during the main loop.
int i = shift(left_margin + right_range + 1, -2);
uc<int> loopcnt;
i = commclperm(0, i, 0, loopcnt);
int idx = shift(RANGE_X - left_margin, -2);
loop_count(loopcnt) pipeline(1) {
LOAD_4_SEARCH_COLS(row0, row1, row2, cc_true, search_region, idx);
}
int num_cols_left_to_load = shift(mblks, 4);
loop_stream(mblocks) {
// read next MB column in S.R.
cc do_read = itocc(num_cols_left_to_load > 0);
loopcnt = 4;
loop_count(loopcnt) pipeline(1) {
LOAD_4_SEARCH_COLS(row0, row1, row2, do_read, search_region, idx);
}//循环四次,读入16列像素,即宏块大小为16*16
cc wrap_idx = itocc(idx == 12);
idx = select(wrap_idx, 0, idx);
// read next MB to process
mblocks >> mb0 >> mb4 >> mb1 >> mb5;
mblocks >> mb2 >> mb6 >> mb3 >> mb7;
uc<int> vcnt, hcnt;
int SR_row, SR_col, rot_perm;
// Loop 1: granularity = 4
y = -12;
mv_sad = 0xffffffff;
vcnt = 7;
loop_count(vcnt) {
x = -12;
extract_ref_MB4(search_region, idx, x, y, RANGE_X, RANGE_Y, SR_row, SR_col, rot_perm, MB_EXPAND(ref_mb));
//在搜索区域中定位(x,y)处的宏块
hcnt = 7;
loop_count(hcnt) pipeline(1) {
in_range = check_xy(top_range, bottom_range, left_range, right_range, x, y);
//判断是否在搜索区域中:x,y在指定的范围内则返回-1,否则返回0
compare_MB(MB_EXPAND(ref_mb), MB_EXPAND(mb), x, y, in_range, mv_x, mv_y, mv_sad);
// 在搜索区域内计算(x,y)处的MB与先前最佳的MB的SAD,若SAD更好,则保存(x,y)
shift_ref_MB4(search_region, SR_row, SR_col, RANGE_X, rot_perm, MB_EXPAND(ref_mb));
// 在搜索区域中,将参考宏块右移4个象素
x = x + 4;
}
y = y + 4;//粒度为4
}
// Output motion vectors. Shift to make MV's 1/2 pixel values
mv_xy = shift(half2(shift(mv_y, 16) | (mv_x & 0xffff)), 1);
motions_out(clzero) << mv_xy;
motions_out(clzero) << mv_sad;
left_range = select(itocc(left_range > -RANGE_X), left_range - 16, left_range);
left_range = select(itocc(left_range < -RANGE_X), -RANGE_X, left_range);
num_cols_to_right = num_cols_to_right - 16;
right_range = select(itocc(num_cols_to_right < RANGE_X_MINUS_1), num_cols_to_right, RANGE_X_MINUS_1);
num_cols_left_to_load = num_cols_left_to_load - 16;
}
flush(motions_out, 0);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -