📄 pcolor_kc.cpp
字号:
#include "idb_kernelc.hpp"
#include "mpeg.hpp"
#include "idb_kernelc2.hpp"
KERNELDEF(pcolor, KERNELS_DIR "pcolor_kc.uc");
// color.i (originally rgc_yc.i)
// Ujval Kapasi
// 3/28/97
// 7/22/97
// 6/31/98, revised by Manman Ren
// 12/10/99, revised by ujk
//
// Color space warp (RGB space ---> YCrCb space) for JPEG
// Includes a 2:1 subsampling in the horizontal and vertical direction
// for Cr and Cb. These are obtained by interpolating between two pixels.
// The sampling is done as follows (4:2:0 MPEG-2 format) :
// o o o o o (the "o" represents a luminance sample,
// . . . the "." represents a chrominance sample)
// o o o o o
//
// o o o o o
// . . .
// o o o o o
//
//
// Input : color = 0 | R || G | B // in 8.0 format
//
// Output :
kernel pcolor(istream<ubyte4> datain,
ostream<ubyte4> Yout,
ostream<half2> CrCbout)
{
// constants
cc low = itocc(cid() < 4);
cc Y_combine = itocc(half2(0) == half2(1)); // TRUE || FALSE
// Shuffle control words
byte4 shuf_func1 = 0x08020800; // zero | 3rd || zero | 1st
byte4 shuf_func2 = 0x08080801; // zero | zero || zero | 2nd
byte4 shuf_func3 = 0x01000100; // 1st || 1st
// Luminance transform constants (in 1.15 format)
half2 RB_SCALE = 0x26460e98; // 0.299 || 0.114
half2 G_SCALE = 0x00004b23; // 0 || 0.587
half2 C_SCALE = 0x4fe33f35; // 0.62411 || 0.4938
// For adding by 128
half2 one_two_eight = 0x00800080;
half2 Ymadj = 0x6d806d80; // 219/256
half2 Yaadj = 0x00100010; // 16
// communication permutations
// cluster : 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
// perm_a : C7 | C6 | C5 | C4 | C6 | C4 | C2 | C0 |
// perm_b : C6 | C4 | C2 | C0 | C3 | C2 | C1 | C0 |
// The first four clusters get first, and the second half get second.
uc<int> perm_a = 0x76546420;
uc<int> perm_b = 0x64203210;
// a = R || B
// b = 0 || G
// c = 0.299R || 0.114B
// d = 0 || 0.114B + 0.587G
// e = 0 || 0.299R
// y = - || Y (0.114B + 0.587G + 299R)
// z = Y || Y
// a and z are in 16.0. first and second are averages of two pixels,
// where each pixel adds the following weight to the average :
// (a-z)/1.6 + 128 || (a-z)/2 + 128. A little math is done to factor
// the multiplication (division) out of the average to reduce the number
// of necessary mutliplications and to obtain the maximum precision
// without shifts. C_SCALE is in 1.15, so the division by two is implicit,
// and no shift is necessary. The addition by 128 is also factored out.
// Each pixel has an additive factor of 128, for a total of 256, divided
// by two is 128.
byte4 color1, color2, color3, color4;
half2 first, second;
half2 a1, a2, a3, a4, b1, b2, b3, b4, c1, c2, c3, c4, d1, d2, d3, d4;
half2 z1, z2, z3, z4, temp0, temp1;
array<half2> Ya(8), Yb(8), Yc(8), Yd(8);
expand<ubyte4> Ye(8), Yf(8), out(8);
double<ubyte4> outa, outb, outc, outd;
int dest_idx, send_idx;
loop_stream(datain) { // pipeline(145) {
// Row 1,2
datain >> color1 >> color2 >> color3 >> color4;
a1 = half2(shuffle(color1, shuf_func1));
b1 = half2(shuffle(color1, shuf_func2));
a2 = half2(shuffle(color2, shuf_func1));
b2 = half2(shuffle(color2, shuf_func2));
a3 = half2(shuffle(color3, shuf_func1));
b3 = half2(shuffle(color3, shuf_func2));
a4 = half2(shuffle(color4, shuf_func1));
b4 = half2(shuffle(color4, shuf_func2));
c1 = hi(mulrnd(RB_SCALE, shift(a1, 1)));
c2 = hi(mulrnd(RB_SCALE, shift(a2, 1)));
c3 = hi(mulrnd(RB_SCALE, shift(a3, 1)));
c4 = hi(mulrnd(RB_SCALE, shift(a4, 1)));
d1 = c1 + hi(mulrnd(G_SCALE, shift(b1, 1)));
d2 = c2 + hi(mulrnd(G_SCALE, shift(b2, 1)));
d3 = c3 + hi(mulrnd(G_SCALE, shift(b3, 1)));
d4 = c4 + hi(mulrnd(G_SCALE, shift(b4, 1)));
z1 = half2(shuffle(d1 + half2(shift(int(c1), -16)), shuf_func3));
z2 = half2(shuffle(d2 + half2(shift(int(c2), -16)), shuf_func3));
z3 = half2(shuffle(d3 + half2(shift(int(c3), -16)), shuf_func3));
z4 = half2(shuffle(d4 + half2(shift(int(c4), -16)), shuf_func3));
temp0 = select(Y_combine, z2, z1);
temp1 = select(Y_combine, z4, z3);
Ya[0] = hi(mulrnd(Ymadj, shift(temp0, 1)))+Yaadj;
Ya[1] = hi(mulrnd(Ymadj, shift(temp1, 1)))+Yaadj;
first = hi(((a1 + a3) - (z1 + z3)) * C_SCALE) + one_two_eight;
second = hi(((a2 + a4) - (z2 + z4)) * C_SCALE) + one_two_eight;
first = commucperm(perm_a, first);
second = commucperm(perm_b, second);
CrCbout << select(low, first, second);
// Row 2,3
datain >> color1 >> color2 >> color3 >> color4;
a1 = half2(shuffle(color1, shuf_func1));
b1 = half2(shuffle(color1, shuf_func2));
a2 = half2(shuffle(color2, shuf_func1));
b2 = half2(shuffle(color2, shuf_func2));
a3 = half2(shuffle(color3, shuf_func1));
b3 = half2(shuffle(color3, shuf_func2));
a4 = half2(shuffle(color4, shuf_func1));
b4 = half2(shuffle(color4, shuf_func2));
c1 = hi(mulrnd(RB_SCALE, shift(a1, 1)));
c2 = hi(mulrnd(RB_SCALE, shift(a2, 1)));
c3 = hi(mulrnd(RB_SCALE, shift(a3, 1)));
c4 = hi(mulrnd(RB_SCALE, shift(a4, 1)));
d1 = c1 + hi(mulrnd(G_SCALE, shift(b1, 1)));
d2 = c2 + hi(mulrnd(G_SCALE, shift(b2, 1)));
d3 = c3 + hi(mulrnd(G_SCALE, shift(b3, 1)));
d4 = c4 + hi(mulrnd(G_SCALE, shift(b4, 1)));
z1 = half2(shuffle(d1 + half2(shift(int(c1), -16)), shuf_func3));
z2 = half2(shuffle(d2 + half2(shift(int(c2), -16)), shuf_func3));
z3 = half2(shuffle(d3 + half2(shift(int(c3), -16)), shuf_func3));
z4 = half2(shuffle(d4 + half2(shift(int(c4), -16)), shuf_func3));
temp0 = select(Y_combine, z2, z1);
temp1 = select(Y_combine, z4, z3);
Ya[2] = hi(mulrnd(Ymadj, shift(temp0, 1)))+Yaadj;
Ya[3] = hi(mulrnd(Ymadj, shift(temp1, 1)))+Yaadj;
first = hi(((a1 + a3) - (z1 + z3)) * C_SCALE) + one_two_eight;
second = hi(((a2 + a4) - (z2 + z4)) * C_SCALE) + one_two_eight;
first = commucperm(perm_a, first);
second = commucperm(perm_b, second);
CrCbout << select(low, first, second);
// Row 4,5
datain >> color1 >> color2 >> color3 >> color4;
a1 = half2(shuffle(color1, shuf_func1));
b1 = half2(shuffle(color1, shuf_func2));
a2 = half2(shuffle(color2, shuf_func1));
b2 = half2(shuffle(color2, shuf_func2));
a3 = half2(shuffle(color3, shuf_func1));
b3 = half2(shuffle(color3, shuf_func2));
a4 = half2(shuffle(color4, shuf_func1));
b4 = half2(shuffle(color4, shuf_func2));
c1 = hi(mulrnd(RB_SCALE, shift(a1, 1)));
c2 = hi(mulrnd(RB_SCALE, shift(a2, 1)));
c3 = hi(mulrnd(RB_SCALE, shift(a3, 1)));
c4 = hi(mulrnd(RB_SCALE, shift(a4, 1)));
d1 = c1 + hi(mulrnd(G_SCALE, shift(b1, 1)));
d2 = c2 + hi(mulrnd(G_SCALE, shift(b2, 1)));
d3 = c3 + hi(mulrnd(G_SCALE, shift(b3, 1)));
d4 = c4 + hi(mulrnd(G_SCALE, shift(b4, 1)));
z1 = half2(shuffle(d1 + half2(shift(int(c1), -16)), shuf_func3));
z2 = half2(shuffle(d2 + half2(shift(int(c2), -16)), shuf_func3));
z3 = half2(shuffle(d3 + half2(shift(int(c3), -16)), shuf_func3));
z4 = half2(shuffle(d4 + half2(shift(int(c4), -16)), shuf_func3));
temp0 = select(Y_combine, z2, z1);
temp1 = select(Y_combine, z4, z3);
Ya[4] = hi(mulrnd(Ymadj, shift(temp0, 1)))+Yaadj;
Ya[5] = hi(mulrnd(Ymadj, shift(temp1, 1)))+Yaadj;
first = hi(((a1 + a3) - (z1 + z3)) * C_SCALE) + one_two_eight;
second = hi(((a2 + a4) - (z2 + z4)) * C_SCALE) + one_two_eight;
first = commucperm(perm_a, first);
second = commucperm(perm_b, second);
CrCbout << select(low, first, second);
// Row 6,7
datain >> color1 >> color2 >> color3 >> color4;
a1 = half2(shuffle(color1, shuf_func1));
b1 = half2(shuffle(color1, shuf_func2));
a2 = half2(shuffle(color2, shuf_func1));
b2 = half2(shuffle(color2, shuf_func2));
a3 = half2(shuffle(color3, shuf_func1));
b3 = half2(shuffle(color3, shuf_func2));
a4 = half2(shuffle(color4, shuf_func1));
b4 = half2(shuffle(color4, shuf_func2));
c1 = hi(mulrnd(RB_SCALE, shift(a1, 1)));
c2 = hi(mulrnd(RB_SCALE, shift(a2, 1)));
c3 = hi(mulrnd(RB_SCALE, shift(a3, 1)));
c4 = hi(mulrnd(RB_SCALE, shift(a4, 1)));
d1 = c1 + hi(mulrnd(G_SCALE, shift(b1, 1)));
d2 = c2 + hi(mulrnd(G_SCALE, shift(b2, 1)));
d3 = c3 + hi(mulrnd(G_SCALE, shift(b3, 1)));
d4 = c4 + hi(mulrnd(G_SCALE, shift(b4, 1)));
z1 = half2(shuffle(d1 + half2(shift(int(c1), -16)), shuf_func3));
z2 = half2(shuffle(d2 + half2(shift(int(c2), -16)), shuf_func3));
z3 = half2(shuffle(d3 + half2(shift(int(c3), -16)), shuf_func3));
z4 = half2(shuffle(d4 + half2(shift(int(c4), -16)), shuf_func3));
temp0 = select(Y_combine, z2, z1);
temp1 = select(Y_combine, z4, z3);
Ya[6] = hi(mulrnd(Ymadj, shift(temp0, 1)))+Yaadj;
Ya[7] = hi(mulrnd(Ymadj, shift(temp1, 1)))+Yaadj;
first = hi(((a1 + a3) - (z1 + z3)) * C_SCALE) + one_two_eight;
second = hi(((a2 + a4) - (z2 + z4)) * C_SCALE) + one_two_eight;
first = commucperm(perm_a, first);
second = commucperm(perm_b, second);
CrCbout << select(low, first, second);
// Row 8,9
datain >> color1 >> color2 >> color3 >> color4;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -