📄 mv2idx_kc.cpp
字号:
#include "idb_kernelc.hpp"
#include "mpeg.hpp"
#include "idb_kernelc2.hpp"
KERNELDEF(MV2idx, KERNELS_DIR "mv2idx_kc.uc");
// MV2idx.i -- generate load indices from a row of motion vectors
kernel MV2idx(cistream<half2> motion, // Motion Vectors
ostream<uint> yindices, // indices for 4 Y blocks
ostream<uint> crcbindices, // indices for 2 C blocks
uc<int>& uc_offsets, // x, y offset of current block
uc<int>& uc_mblks, // number of motion vectors
uc<int>& uc_mb_width) // number of macroblocks in row
{
uint fetchaddress;
uint fetchx, fetchy;
int yrefx, yrefy;
int crefx, crefy;
uint yout0, yout1, yout2, yout3, yout4, yout5, yout6, yout7;
uint cout0, cout1, cout2, cout3, cout4, cout5, cout6, cout7;
int offsets = commclperm(0x8, 0, uc_offsets);
int xoffset = offsets & 0xffff;
int yoffset = shift(offsets, -16);
uint mb_width = uint(commclperm(0x8, 0, uc_mb_width));
uint rowlen = shift(mb_width, 2);
uint crowlen = shift(mb_width, 6);
yrefx = xoffset;
yrefy = yoffset + cid();
crefx = shift(xoffset, -1) + cid();
crefy = shift(yoffset, -1);
// Shuffle control words
byte4 unpackmv = 0x57571302; // lo: sign | sign | 3rd | 2nd
// hi: sign | sign | 1st | 0th
cc ccin = itocc (cid() == 0);
cc ccend = itocc(0);
// positive shift value means left shift
half2 dxdy, bestval;
loop_count(uc_mblks) pipeline(1) {
// get motion vectors
double<half2> dxdy2;
int dx, dy;
motion(ccin, ccend) >> dxdy >> bestval;
// Change 1/2 pixel MV's to full pixel MV's
dxdy = shifta(dxdy, -1);
// Broadcast and unpack MV's
dxdy = commclperm(0, dxdy);
dxdy2 = shuffled(dxdy, unpackmv);
dx = int(hi(dxdy2));
dy = int(lo(dxdy2));
// luminance
fetchx = uint(yrefx + dx);
fetchy = uint(yrefy + dy);
expand<uint> fetchxoffsets(4), fetchyoffsets(8);
fetchyoffsets[0] = shift(lo(shift(fetchy, -4) * rowlen), 4)
+ (fetchy & 0xf);
fetchy = fetchy + 8;
fetchyoffsets[1] = shift(lo(shift(fetchy, -4) * rowlen), 4)
+ (fetchy & 0xf);
fetchxoffsets[0] = shift(shift(fetchx, -4), 6)
+ shift(shift(fetchx & 0xf, -2), 4);
fetchx = fetchx + 4;
fetchxoffsets[1] = shift(shift(fetchx, -4), 6)
+ shift(shift(fetchx & 0xf, -2), 4);
fetchx = fetchx + 4;
fetchxoffsets[2] = shift(shift(fetchx, -4), 6)
+ shift(shift(fetchx & 0xf, -2), 4);
fetchx = fetchx + 4;
fetchxoffsets[3] = shift(shift(fetchx, -4), 6)
+ shift(shift(fetchx & 0xf, -2), 4);
yout0 = fetchyoffsets[0] + fetchxoffsets[0];
yout1 = fetchyoffsets[1] + fetchxoffsets[0];
yout2 = fetchyoffsets[0] + fetchxoffsets[1];
yout3 = fetchyoffsets[1] + fetchxoffsets[1];
yout4 = fetchyoffsets[0] + fetchxoffsets[2];
yout5 = fetchyoffsets[1] + fetchxoffsets[2];
yout6 = fetchyoffsets[0] + fetchxoffsets[3];
yout7 = fetchyoffsets[1] + fetchxoffsets[3];
yindices << yout0 << yout1 << yout2 << yout3;
yindices << yout4 << yout5 << yout6 << yout7;
// chromanance
// add one if deltas are negative to get correct biasing
// when using shift by 1 in place of divide by 2
dx = dx + select(itocc(dx < 0), 1, 0);
dy = dy + select(itocc(dy < 0), 1, 0);
fetchx = uint(crefx + shifta(int(dx), -1));
fetchy = uint(crefy + shifta(int(dy), -1));
uint fetchxoffset = shift(shift(fetchx, -3), 6) + (fetchx & 0x7);
fetchyoffsets[0] = lo(shift(fetchy, -3) * crowlen)
+ shift(fetchy & 0x7, 3);
fetchy = fetchy + 1;
fetchyoffsets[1] = lo(shift(fetchy, -3) * crowlen)
+ shift(fetchy & 0x7, 3);
fetchy = fetchy + 1;
fetchyoffsets[2] = lo(shift(fetchy, -3) * crowlen)
+ shift(fetchy & 0x7, 3);
fetchy = fetchy + 1;
fetchyoffsets[3] = lo(shift(fetchy, -3) * crowlen)
+ shift(fetchy & 0x7, 3);
fetchy = fetchy + 1;
fetchyoffsets[4] = lo(shift(fetchy, -3) * crowlen)
+ shift(fetchy & 0x7, 3);
fetchy = fetchy + 1;
fetchyoffsets[5] = lo(shift(fetchy, -3) * crowlen)
+ shift(fetchy & 0x7, 3);
fetchy = fetchy + 1;
fetchyoffsets[6] = lo(shift(fetchy, -3) * crowlen)
+ shift(fetchy & 0x7, 3);
fetchy = fetchy + 1;
fetchyoffsets[7] = lo(shift(fetchy, -3) * crowlen)
+ shift(fetchy & 0x7, 3);
cout0 = fetchxoffset + fetchyoffsets[0];
cout1 = fetchxoffset + fetchyoffsets[1];
cout2 = fetchxoffset + fetchyoffsets[2];
cout3 = fetchxoffset + fetchyoffsets[3];
cout4 = fetchxoffset + fetchyoffsets[4];
cout5 = fetchxoffset + fetchyoffsets[5];
cout6 = fetchxoffset + fetchyoffsets[6];
cout7 = fetchxoffset + fetchyoffsets[7];
crcbindices << cout0 << cout1 << cout2 << cout3;
crcbindices << cout4 << cout5 << cout6 << cout7;
yrefx = yrefx + 16;
crefx = crefx + 8;
}
cc always = itocc( 0xffffffff );
loop_until_any(ccend) {
motion( always, ccend) >> dxdy >> bestval;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -