📄 mpeg_kc.i
字号:
#line 1 "D:\\working\\im_apps\\h264\\mpeg_kc.cpp"
#line 1 "D:/working/tools/isim/isimexe/blank_headers\\idb_kernelc.hpp"
#line 2 "D:\\working\\im_apps\\h264\\mpeg_kc.cpp"
#line 1 "D:\\working\\im_apps\\h264\\mpeg.hpp"
#line 1 "D:/working/tools/isim/isimexe/blank_headers\\idb_types.hpp"
#line 6 "D:\\working\\im_apps\\h264\\mpeg.hpp"
#line 1 "D:/working/tools/isim/isimexe/blank_headers\\idb_deftypes.hpp"
#line 7 "D:\\working\\im_apps\\h264\\mpeg.hpp"
#decl idxGen(ostream<uint> indices,
uc<int>& uc_size,
uc<int>& uc_params);
;
#decl icolor(istream<ubyte4> datain,
ostream<half2> Yout,
ostream<half2> CrCbout);
;
#decl pcolor(istream<ubyte4> datain,
ostream<ubyte4> Yout,
ostream<half2> CrCbout);
;
#decl dct(istream<half2> datain,
istream<uhalf2> consts,
ostream<half2> out,
uc<uhalf2>& uc_quantizer_scale);
;
#decl idct(istream<half2> datain,
istream<uhalf2> consts,
ostream<half2> out,
uc<half2>& uc_quantizer_scale);
;
#decl rle(istream<half2> Yin,
istream<half2> CrCbin,
istream<int> indices,
cistream<half2> motion,
costream<half2> out,
uc<uint>& pframe,
uc<uint>& quant_scale);
;
#decl difference(istream<ubyte4> curryblks,
istream<half2> currcblks,
istream<ubyte4> refyblks,
istream<half2> refcblks,
ostream<half2> diffyblks,
ostream<half2> diffcblks);
;
#decl correlate(istream<half2> diffyblks,
istream<half2> diffcblks,
istream<ubyte4> refyblks,
istream<half2> refcblks,
ostream<ubyte4> newrefyblks,
ostream<half2> newrefcblks,
uc<uint>& uc_pframe);
;
#decl MV2idx(cistream<half2> motion,
ostream<uint> yindices,
ostream<uint> crcbindices,
uc<int>& uc_offsets,
uc<int>& uc_mblks,
uc<int>& uc_mb_width);
;
#decl blocksearch(istream<ubyte4> row0,
istream<ubyte4> row1,
istream<ubyte4> row2,
istream<ubyte4> mblocks,
costream<half2> motions,
uc<int>& location);
;
#decl mb_encode(istream<byte4> datain,
istream<half2> consts,
ostream<half2> color_out,
ostream<half2> dct_out,
costream<int> out);
;
#decl me_fast(istream<ubyte4> row0,
istream<ubyte4> row1,
istream<ubyte4> row2,
istream<ubyte4> mblocks,
costream<half2> motions_out,
ostream<ubyte4> refyblks,
ostream<uint> crcbindices,
uc<int>& uc_margin,
uc<int>& uc_offsets,
uc<int>& uc_mblks,
uc<int>& uc_mb_width);
;
#decl me_fast_search4(istream<ubyte4> row0,
istream<ubyte4> row1,
istream<ubyte4> row2,
istream<ubyte4> mblocks,
costream<half2> motions_out,
uc<int>& uc_margin,
uc<int>& uc_mblks);
;
#decl me_fast_jitter2(istream<ubyte4> row0,
istream<ubyte4> row1,
istream<ubyte4> row2,
istream<ubyte4> mblocks,
cistream<half2> motions_in,
costream<half2> motions_out,
uc<int>& uc_margin,
uc<int>& uc_mblks);
;
#decl me_fast_jitter1(istream<ubyte4> row0,
istream<ubyte4> row1,
istream<ubyte4> row2,
istream<ubyte4> mblocks,
cistream<half2> motions_in,
costream<half2> motions_out,
ostream<ubyte4> refyblks,
ostream<uint> crcbindices,
uc<int>& uc_margin,
uc<int>& uc_offsets,
uc<int>& uc_mblks,
uc<int>& uc_mb_width);
;
#line 1 "D:/working/tools/isim/isimexe/blank_headers\\idb_undeftypes.hpp"
#line 155 "D:\\working\\im_apps\\h264\\mpeg.hpp"
#line 157 "D:\\working\\im_apps\\h264\\mpeg.hpp"
#line 3 "D:\\working\\im_apps\\h264\\mpeg_kc.cpp"
#line 1 "D:/working/tools/isim/isimexe/blank_headers\\idb_kernelc2.hpp"
#line 4 "D:\\working\\im_apps\\h264\\mpeg_kc.cpp"
;
kernel mb_encode(istream<byte4> datain,
istream<half2> consts,
ostream<half2> color_out,
ostream<half2> dct_out,
costream<int> out)
{
array<half2> CrCbY4Y3Y2Y1_a(24);
array<half2> CrCbY4Y3Y2Y1_b(24);
array<int> dc_pred(3);
int minus_one = 0 - 1;
int two = 1 + 1;
int four = two + two;
int sixteen = lo(four * four);
int minus_sixteen = 0 - sixteen;
uc<int> perm_A = 0x76546420;
uc<int> perm_B = 0x64203210;
cc low = itocc(cid() < four);
cc Y_combine = itocc(shift(minus_one, sixteen));
byte4 shuf_func1, shuf_func2, shuf_func3;
half2 temp;
consts >> temp;
shuf_func1 = byte4(temp);
consts >> temp;
shuf_func2 = byte4(temp);
consts >> temp;
shuf_func3 = byte4(temp);
half2 RB_SCALE, G_SCALE, C_SCALE;
consts >> RB_SCALE >> G_SCALE >> C_SCALE;
half2 one_two_eight;
consts >> one_two_eight;
int three = two + 1;
int seven = four + three;
half2 COS_2, COS_3, COS_1_plus_COS_3, COS_1_minus_COS_3;
consts >> COS_2 >> COS_3 >> COS_1_plus_COS_3 >> COS_1_minus_COS_3;
array<half2> K(8);
consts >> K[0] >> K[1] >> K[2] >> K[3] >> K[4] >> K[5] >> K[6] >> K[7];
uhalf2 quant_scale;
array<half2> quant(24);
consts >> temp;
quant_scale = uhalf2(temp);
int quant_idx = 0;
int k_idx = 0;
uhalf2 utmp;
uc<int> i = 8;
loop_count(i) unroll(1) {
consts >> temp;
utmp = uhalf2(temp);
utmp = shift(hi(quant_scale * utmp), 1);
quant[quant_idx] = half2(hi(utmp * uhalf2(shift(K[quant_idx], 1))));
quant_idx = quant_idx + 1;
}
i = 8;
int quant_idx2 = 0;
loop_count(i) unroll(1) {
quant[quant_idx] = quant[quant_idx2];
quant_idx = quant_idx + 1;
quant_idx2 = quant_idx2 + 1;
}
i = 8;
k_idx = quant_idx & seven;
loop_count(i) unroll(1) {
consts >> temp;
utmp = uhalf2(temp);
utmp = shift(hi(quant_scale * utmp), 1);
quant[quant_idx] = half2(hi(utmp * uhalf2(shift(K[k_idx], 1))));
quant_idx = quant_idx + 1;
k_idx = quant_idx & seven;
}
uc<int> perm_a = 0x07654321;
uc<int> perm_b = 0x10765432;
uc<int> perm_c = 0x21076543;
uc<int> perm_d = 0x32107654;
uc<int> perm_e = 0x43210765;
uc<int> perm_f = 0x54321076;
uc<int> perm_g = 0x65432107;
int dp;
consts >> temp;
dp = int(temp);
dc_pred[0] = dp;
dc_pred[1] = dp;
dc_pred[2] = dp;
uc<int> perm_1 = 0x75643120;
uc<int> perm_2 = 0x67451320;
uc<int> perm_3 = 0x75261430;
uc<int> perm_4 = 0x57362401;
uc<int> perm_5 = 0x74352601;
uc<int> perm_6 = 0x64270531;
uc<int> perm_7 = 0x63170542;
array<int> send_idx_arr(8), store_idx_arr(8);
consts >> temp; send_idx_arr[0] = int(temp);
consts >> temp; store_idx_arr[0] = int(temp);
consts >> temp; send_idx_arr[1] = int(temp);
consts >> temp; store_idx_arr[1] = int(temp);
consts >> temp; send_idx_arr[2] = int(temp);
consts >> temp; store_idx_arr[2] = int(temp);
consts >> temp; send_idx_arr[3] = int(temp);
consts >> temp; store_idx_arr[3] = int(temp);
consts >> temp; send_idx_arr[4] = int(temp);
consts >> temp; store_idx_arr[4] = int(temp);
consts >> temp; send_idx_arr[5] = int(temp);
consts >> temp; store_idx_arr[5] = int(temp);
consts >> temp; send_idx_arr[6] = int(temp);
consts >> temp; store_idx_arr[6] = int(temp);
consts >> temp; send_idx_arr[7] = int(temp);
consts >> temp; store_idx_arr[7] = int(temp);
int y_idx = 0;
int c_idx;
i = 8;
byte4 color1, color2, color3, color4;
half2 a0, a1, a2, a3, a4, a5, a6, a7, a1a3, a2a4, b1, b2, b3, b4;
half2 c1, c2, c3, c4, d0, d1, d2, d3, d4, d5, d6, d7, e1, e2, e3, e4;
half2 y1, y2, y3, y4, z1, z2, z3, z4, first, second;
loop_count(i) pipeline(1) {
stage(1);
datain >> color1 >> color2 >> color3 >> color4;
a1 = half2(shuffle(color1, shuf_func1));
b1 = half2(shuffle(color1, shuf_func2));
a2 = half2(shuffle(color2, shuf_func1));
b2 = half2(shuffle(color2, shuf_func2));
a3 = half2(shuffle(color3, shuf_func1));
b3 = half2(shuffle(color3, shuf_func2));
a4 = half2(shuffle(color4, shuf_func1));
b4 = half2(shuffle(color4, shuf_func2));
c1 = hi(RB_SCALE * shift(a1, 1));
c2 = hi(RB_SCALE * shift(a2, 1));
c3 = hi(RB_SCALE * shift(a3, 1));
c4 = hi(RB_SCALE * shift(a4, 1));
d1 = c1 + hi(G_SCALE * shift(b1, 1));
d2 = c2 + hi(G_SCALE * shift(b2, 1));
d3 = c3 + hi(G_SCALE * shift(b3, 1));
d4 = c4 + hi(G_SCALE * shift(b4, 1));
e1 = half2(shift(int(c1), minus_sixteen));
e2 = half2(shift(int(c2), minus_sixteen));
e3 = half2(shift(int(c3), minus_sixteen));
e4 = half2(shift(int(c4), minus_sixteen));
a1a3 = a1 + a3;
a2a4 = a2 + a4;
y1 = d1 + e1;
y2 = d2 + e2;
y3 = d3 + e3;
y4 = d4 + e4;
stage(2);
z1 = half2(shuffle(y1, shuf_func3));
z2 = half2(shuffle(y2, shuf_func3));
z3 = half2(shuffle(y3, shuf_func3));
z4 = half2(shuffle(y4, shuf_func3));
CrCbY4Y3Y2Y1_a[0+y_idx] = select(Y_combine, z2, z1);
CrCbY4Y3Y2Y1_a[1+y_idx] = select(Y_combine, z4, z3);
first = hi((a1a3 - (z1 + z3)) * C_SCALE) + one_two_eight;
second = hi((a2a4 - (z2 + z4)) * C_SCALE) + one_two_eight;
first = commucperm(perm_A, first);
second = commucperm(perm_B, second);
c_idx = shift(y_idx, minus_one);
y_idx = y_idx + two;
CrCbY4Y3Y2Y1_a[16+c_idx] = select(low, first, second);
}
color_out << CrCbY4Y3Y2Y1_a[0];
color_out << CrCbY4Y3Y2Y1_a[1];
color_out << CrCbY4Y3Y2Y1_a[2];
color_out << CrCbY4Y3Y2Y1_a[3];
color_out << CrCbY4Y3Y2Y1_a[4];
color_out << CrCbY4Y3Y2Y1_a[5];
color_out << CrCbY4Y3Y2Y1_a[6];
color_out << CrCbY4Y3Y2Y1_a[7];
color_out << CrCbY4Y3Y2Y1_a[8];
color_out << CrCbY4Y3Y2Y1_a[9];
color_out << CrCbY4Y3Y2Y1_a[10];
color_out << CrCbY4Y3Y2Y1_a[11];
color_out << CrCbY4Y3Y2Y1_a[12];
color_out << CrCbY4Y3Y2Y1_a[13];
color_out << CrCbY4Y3Y2Y1_a[14];
color_out << CrCbY4Y3Y2Y1_a[15];
color_out << CrCbY4Y3Y2Y1_a[16];
color_out << CrCbY4Y3Y2Y1_a[17];
color_out << CrCbY4Y3Y2Y1_a[18];
color_out << CrCbY4Y3Y2Y1_a[19];
color_out << CrCbY4Y3Y2Y1_a[20];
color_out << CrCbY4Y3Y2Y1_a[21];
color_out << CrCbY4Y3Y2Y1_a[22];
color_out << CrCbY4Y3Y2Y1_a[23];
int eight = four + four;
sixteen = eight + eight;
half2 h2_one = 1 | half2(shift(1, sixteen));
uhalf2 uh2_half = shift(uhalf2(h2_one), sixteen - 1);
uhalf2 uh2_almost_half = uh2_half - uhalf2(h2_one);
int dest_idx = 0;
int src_idx = 0;
int idx0 = cid();
int idx1 = (idx0 - 1) & seven;
int idx2 = (idx1 - 1) & seven;
int idx3 = (idx2 - 1) & seven;
int idx4 = (idx3 - 1) & seven;
int idx5 = (idx4 - 1) & seven;
int idx6 = (idx5 - 1) & seven;
int idx7 = (idx6 - 1) & seven;
i = 3;
loop_count(i) pipeline(67) {
stage(1);
a0 = CrCbY4Y3Y2Y1_a[0+src_idx];
a1 = CrCbY4Y3Y2Y1_a[1+src_idx];
a2 = CrCbY4Y3Y2Y1_a[2+src_idx];
a3 = CrCbY4Y3Y2Y1_a[3+src_idx];
a4 = CrCbY4Y3Y2Y1_a[4+src_idx];
a5 = CrCbY4Y3Y2Y1_a[5+src_idx];
a6 = CrCbY4Y3Y2Y1_a[6+src_idx];
a7 = CrCbY4Y3Y2Y1_a[7+src_idx];
src_idx = src_idx + eight;
half2 s16, s07, s25, s34, s1625, s0734;
s07 = a0 + a7;
s16 = a1 + a6;
s25 = a2 + a5;
s34 = a3 + a4;
s1625 = s16 + s25;
s0734 = s07 + s34;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -