⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 icolor_kc.cpp

📁 H.264完整的C语言代码和DCT的代码
💻 CPP
字号:
#include "idb_kernelc.hpp"   
#include "mpeg.hpp"
#include "idb_kernelc2.hpp"   

KERNELDEF(icolor, KERNELS_DIR "icolor_kc.uc");

// color.i    (originally rgc_yc.i)
// Ujval Kapasi
// 3/28/97
// 7/22/97
// 6/31/98, revised by Manman Ren
// 12/10/99, revised by ujk
//
// Color space warp (RGB space ---> YCrCb space) for JPEG
//   Includes a 2:1 subsampling in the horizontal and vertical direction
//   for Cr and Cb. These are obtained by interpolating between two pixels.
//   The sampling is done as follows (4:2:0 MPEG-2 format) :
//      o   o   o   o   o     (the "o" represents a luminance sample,
//      .       .       .      the "." represents a chrominance sample)
//      o   o   o   o   o
//
//      o   o   o   o   o
//      .       .       .
//      o   o   o   o   o
//
//
// Block layout :    1  2       -- each block is 8x8
//                   3  4
//
// The input comes in such that the first row of block 1 comes in, one
//   per cluster. Then the first row of block 2, then the second row of
//   block 1, second row of block 2, and so on.  Then after the eighth
//   row of 2, the same pattern repeats for the rows of blocks 3 and 4.
//
// Input  : color = 0 | R || G | B     // in 8.0 format
//
// Output :   Y         =   Y2  ||  Y1       // in 16.0 format
//            Y (contd) =   Y4  ||  Y3       // in 16.0 format
//            C         =   Cr  ||  Cb       // in 16.0 format
//
//
// NOTE : IGNORING GAMMA CORRECTION for now
//

kernel icolor(istream<ubyte4> datain,
              ostream<half2> Yout,
              ostream<half2> CrCbout)
{
  // constants
  cc low = itocc(cid() < 4);
  cc Y_combine = itocc(half2(0) == half2(1));   //  TRUE  ||  FALSE

  // Shuffle control words
  byte4 shuf_func1  = 0x08020800;           // zero |  3rd || zero |  1st
  byte4 shuf_func2  = 0x08080801;           // zero | zero || zero |  2nd
  byte4 shuf_func3  = 0x01000100;           //         1st ||  1st

  // Luminance transform constants (in 1.15 format)
  half2 RB_SCALE = 0x26460e98;         //           0.299  ||   0.114
  half2 G_SCALE  = 0x00004b23;         //             0    ||   0.587
  half2 C_SCALE  = 0x4fe33f35;         //         0.62411  ||   0.4938

  // For adding by 128
  half2 one_two_eight = 0x00800080;


  // communication permutations
  // cluster :  7  |  6  |  5  |  4  |  3  |  2  |  1  |  0  |
  // perm_a  : C7  | C6  | C5  | C4  | C6  | C4  | C2  | C0  |
  // perm_b  : C6  | C4  | C2  | C0  | C3  | C2  | C1  | C0  |
  uc<int> perm_a = 0x76546420;
  uc<int> perm_b = 0x64203210;

  // each iter. does one pixel in each of two blocks for two rows (4 pixels)
  loop_stream(datain) pipeline(1) {

    ubyte4 color1, color2, color3, color4;
    half2 first, second;
    half2 a1, a2, a3, a4, b1, b2, b3, b4, c1, c2, c3, c4, d1, d2, d3, d4;
    half2 e1, e2, e3, e4, a1a3, a2a4, y1, y2, y3, y4, z1, z2, z3, z4;
    half2 temp0, temp1, another0, another1, out00, out01, out10, out11;

    // The input data are in 16.0.
    datain >> color1 >> color2 >> color3 >> color4;

    // a =    R   ||   B
    // b =    0   ||   G
    a1 = half2(shuffle(color1, shuf_func1));
    b1 = half2(shuffle(color1, shuf_func2));
    a2 = half2(shuffle(color2, shuf_func1));
    b2 = half2(shuffle(color2, shuf_func2));
    a3 = half2(shuffle(color3, shuf_func1));
    b3 = half2(shuffle(color3, shuf_func2));
    a4 = half2(shuffle(color4, shuf_func1));
    b4 = half2(shuffle(color4, shuf_func2));

    // After shifting, the inputs are in 15.1 format. The constants are in
    // 1.15 format, so the result will be in 16.0 format. The results is
    // c =  0.299R || 0.114B
    c1 = hi(mulrnd(RB_SCALE, shift(a1, 1)));
    c2 = hi(mulrnd(RB_SCALE, shift(a2, 1)));
    c3 = hi(mulrnd(RB_SCALE, shift(a3, 1)));
    c4 = hi(mulrnd(RB_SCALE, shift(a4, 1)));

    // d =     0   || 0.114B + 0.587G
    d1 = c1 + hi(mulrnd(G_SCALE, shift(b1, 1)));
    d2 = c2 + hi(mulrnd(G_SCALE, shift(b2, 1)));
    d3 = c3 + hi(mulrnd(G_SCALE, shift(b3, 1)));
    d4 = c4 + hi(mulrnd(G_SCALE, shift(b4, 1)));

    // e =     0   || 0.299R
    e1 = half2(shift(int(c1), -16));
    e2 = half2(shift(int(c2), -16));
    e3 = half2(shift(int(c3), -16));
    e4 = half2(shift(int(c4), -16));

    a1a3 = a1 + a3;
    a2a4 = a2 + a4;

    // y =    -   ||   Y (0.114B + 0.587G + 299R)
    y1 = d1 + e1;
    y2 = d2 + e2;
    y3 = d3 + e3;
    y4 = d4 + e4;

    // z =    Y   ||   Y
    z1 = half2(shuffle(y1, shuf_func3));
    z2 = half2(shuffle(y2, shuf_func3));
    z3 = half2(shuffle(y3, shuf_func3));
    z4 = half2(shuffle(y4, shuf_func3));

    temp0 = select(Y_combine, z2, z1);
    temp1 = select(Y_combine, z4, z3);

    half2 Ymadj = 0x6d806d80;   // 219/256
    half2 Yaadj = 0x00100010;   // 16
    Yout << hi(mulrnd(Ymadj, shift(temp0, 1)))+Yaadj;
    Yout << hi(mulrnd(Ymadj, shift(temp1, 1)))+Yaadj;

    // a and z are in 16.0. first and second are averages of two pixels,
    // where each pixel adds the following weight to the average :
    // (a-z)/1.6 + 128 || (a-z)/2 + 128.  A little math is done to factor
    // the multiplication (division) out of the average to reduce the number
    // of necessary mutliplications and to obtain the maximum precision
    // without shifts. C_SCALE is in 1.15, so the division by two is implicit,
    // and no shift is necessary. The addition by 128 is also factored out.
    // Each pixel has an additive factor of 128, for a total of 256, divided
    // by two is 128.  
    first = hi(mulrnd((a1a3 - (z1 + z3)), C_SCALE)) + one_two_eight;
    second = hi(mulrnd((a2a4 - (z2 + z4)), C_SCALE)) + one_two_eight;

    // The first four clusters get first, and the second half get second.
    first = commucperm(perm_a, first);
    second = commucperm(perm_b, second);

    CrCbout << select(low, first, second);
  }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -