📄 ycbcr422pl16_to_rgb565_h.h

📁 基于DM642平台的视频缩小放大功能程序源代码
💻 H
字号:
/* ======================================================================== */
/*  TEXAS INSTRUMENTS, INC.                                                 */
/*                                                                          */
/*  NAME                                                                    */
/*      ycbcr422pl16_to_rgb565 -- Planarized YCbCr 4:2:2/4:2:0 to 16-bit    */
/*                                RGB 5:6:5 color space conversion.         */
/*                                                                          */
/*  USAGE                                                                   */
/*      This function is C callable, and is called according to this        */
/*      C prototype:                                                        */
/*                                                                          */
/*      void ycbcr422pl16_to_rgb565                                         */
/*      (                                                                   */
/*          const short         coeff[5],  -- Matrix coefficients.          */
/*          const short         *y_data,   -- Luminence data  (Y')          */
/*          const short         *cb_data,  -- Blue color-diff (B'-Y')       */
/*          const short         *cr_data,  -- Red color-diff  (R'-Y')       */
/*          unsigned short      *rgb_data, -- RGB 5:6:5 packed pixel out.   */
/*          unsigned            num_pixels -- # of luma pixels to process.  */
/*      )                                                                   */
/*                                                                          */
/*      The 'coeff[]' array contains the color-space-conversion matrix      */
/*      coefficients.  The 'y_data', 'cb_data' and 'cr_data' pointers       */
/*      point to the separate input image planes.  The 'rgb_data' pointer   */
/*      points to the output image buffer, and must be word aligned.        */
/*                                                                          */
/*      The kernel is designed to process arbitrary amounts of 4:2:2        */
/*      image data, although 4:2:0 image data may be processed as well.     */
/*      For 4:2:2 input data, the 'y_data', 'cb_data' and 'cr_data'         */
/*      arrays may hold an arbitrary amount of image data.  For 4:2:0       */
/*      input data, only a single scan-line (or portion thereof) may be     */
/*      processed at a time.                                                */
/*                                                                          */
/*      The coefficients in the coeff array must be in signed Q13 form,     */
/*      and correspond to the following matrix equation:                    */
/*                                                                          */
/*          [ Y' -  16 ]   [ coeff[0] 0.0000   coeff[1] ]     [ R']         */
/*          [ Cb - 128 ] * [ coeff[0] coeff[2] coeff[3] ]  =  [ G']         */
/*          [ Cr - 128 ]   [ coeff[0] coeff[4] 0.0000   ]     [ B']         */
/*                                                                          */
/*  DESCRIPTION                                                             */
/*      This function runs for 46 + (num_pixels * 3) cycles, including      */
/*      6 cycles of function-call overhead.  Interrupts are masked for      */
/*      37 + (num_pixels * 3) cycles.  Code size is 512 bytes.              */
/*                                                                          */
/*      This kernel performs Y'CbCr to RGB conversion.  From the Color      */
/*      FAQ, http://home.inforamp.net/~poynton/ColorFAQ.html :              */
/*                                                                          */
/*          Various scale factors are applied to (B'-Y') and (R'-Y')        */
/*          for different applications.  The Y'PbPr scale factors are       */
/*          optimized for component analog video.  The Y'CbCr scaling       */
/*          is appropriate for component digital video, JPEG and MPEG.      */
/*          Kodak's PhotoYCC(tm) uses scale factors optimized for the       */
/*          gamut of film colors.  Y'UV scaling is appropriate as an        */
/*          intermediate step in the formation of composite NTSC or PAL     */
/*          video signals, but is not appropriate when the components       */
/*          are keps separate.  Y'UV nomenclature is now used rather        */
/*          loosely, and it sometimes denotes any scaling of (B'-Y')        */
/*          and (R'-Y').  Y'IQ coding is obsolete.                          */
/*                                                                          */
/*      This code can perform various flavors of Y'CbCr to RGB conversion   */
/*      as long as the offsets on Y, Cb, and Cr are -16, -128, and -128,    */
/*      respectively, and the coefficients match the pattern shown.         */
/*                                                                          */
/*      The kernel implements the following matrix form, which involves 5   */
/*      unique coefficients:                                                */
/*                                                                          */
/*          [ Y' -  16 ]   [ coeff[0] 0.0000   coeff[1] ]     [ R']         */
/*          [ Cb - 128 ] * [ coeff[0] coeff[2] coeff[3] ]  =  [ G']         */
/*          [ Cr - 128 ]   [ coeff[0] coeff[4] 0.0000   ]     [ B']         */
/*                                                                          */
/*                                                                          */
/*      Below are some common coefficient sets, along with the matrix       */
/*      equation that they correspond to.   Coefficients are in signed      */
/*      Q13 notation, which gives a suitable balance between precision      */
/*      and range.                                                          */
/*                                                                          */
/*      1.  Y'CbCr -> RGB conversion with RGB levels that correspond to     */
/*          the 219-level range of Y'.  Expected ranges are [16..235] for   */
/*          Y' and [16..240] for Cb and Cr.                                 */
/*                                                                          */
/*          coeff[] = { 0x2000, 0x2BDD, -0x0AC5, -0x1658, 0x3770 };         */
/*                                                                          */
/*          [ Y' -  16 ]   [ 1.0000    0.0000    1.3707 ]     [ R']         */
/*          [ Cb - 128 ] * [ 1.0000   -0.3365   -0.6982 ]  =  [ G']         */
/*          [ Cr - 128 ]   [ 1.0000    1.7324    0.0000 ]     [ B']         */
/*                                                                          */
/*      2.  Y'CbCr -> RGB conversion with the 219-level range of Y'         */
/*          expanded to fill the full RGB dynamic range.  (The matrix has   */
/*          been scaled by 255/219.)  Expected ranges are [16..235] for Y'  */
/*          and [16..240] for Cb and Cr.                                    */
/*                                                                          */
/*          coeff[] = { 0x2543, 0x3313, -0x0C8A, -0x1A04, 0x408D };         */
/*                                                                          */
/*          [ Y' -  16 ]   [ 1.1644    0.0000    1.5960 ]     [ R']         */
/*          [ Cb - 128 ] * [ 1.1644   -0.3918   -0.8130 ]  =  [ G']         */
/*          [ Cr - 128 ]   [ 1.1644    2.0172    0.0000 ]     [ B']         */
/*                                                                          */
/*      Other scalings of the color differences (B'-Y') and (R'-Y')         */
/*      (sometimes incorrectly referred to as U and V) are supported, as    */
/*      long as the color differences are unsigned values centered around   */
/*      128 rather than signed values centered around 0, as noted above.    */
/*                                                                          */
/*      In addition to performing plain color-space conversion, color       */
/*      saturation can be adjusted by scaling coeff[1] through coeff[4].    */
/*      Similarly, brightness can be adjusted by scaling coeff[0].          */
/*      General hue adjustment can not be performed, however, due to the    */
/*      two zeros hard-coded in the matrix.                                 */
/*                                                                          */
/*  TECHNIQUES                                                              */
/*      Pixel replication is performed implicitly on chroma data to         */
/*      reduce the total number of multiplies required.  The chroma         */
/*      portion of the matrix is calculated once for each Cb, Cr pair,      */
/*      and the result is added to both Y' samples.                         */
/*                                                                          */
/*      Luma is biased downwards to produce R, G, and B values that are     */
/*      signed quantities centered around zero, rather than unsigned qtys.  */
/*      This allows us to use SSHL to perform saturation, followed by a     */
/*      quick XOR to correct the sign bits in the final packed pixels.      */
/*      The required downward bias is 128 shifted left by the Q-point, 13.  */
/*                                                                          */
/*      To save two instructions, I transformed "(y0-16)*luma - (128<<13)"  */
/*      to the slightly more cryptic "y0*luma - (16*luma + (128<<13))".     */
/*      This gives me the non-obvious but effective y_bias value            */
/*      -((128 << 13) + 16*luma).  The transformation allows me to fit in   */
/*      a 6 cycle loop.                                                     */
/*                                                                          */
/*      Twin pointers are used for the stack and coeff[] arrays for speed.  */
/*                                                                          */
/*      Because the loop accesses four different arrays at three different  */
/*      strides, no memory accesses are allowed to parallelize in the       */
/*      loop.  No bank conflicts occur, as a result.                        */
/*                                                                          */
/*      Creatively constructed multiplies are used to avoid a bottleneck    */
/*      on shifts in the loop.  In particular, the 5-bit mask 0xF8000000    */
/*      doubles as a right-shift constant that happens to negate while      */
/*      shifting.  This negation is reversed by merging the bits with a     */
/*      SUB instead of an ADD or OR.                                        */
/*                                                                          */
/*      Prolog and epilog collapsing have been performed, with only a       */
/*      partial stage of prolog and epilog left uncollapsed.  The partial   */
/*      stages are interscheduled with the rest of the code for speed.      */
/*                                                                          */
/*      The stack pointer is saved in IRP to allow all 32 registers to      */
/*      be used in the loop.  This enabled prolog collapsing by freeing     */
/*      up a predicate register.  The prolog collapse counter is            */
/*      implemented as a MPY which shifts a constant left by 3 bits each    */
/*      iteration.  The counter is initialized from one of the other        */
/*      constant registers, thereby reducing the S-unit bottleneck in the   */
/*      setup code.                                                         */
/*                                                                          */
/*      Instructions have been scheduled to minimize fetch-packet padding   */
/*      NOPs.  Only 3 padding NOPs and 1 explicit NOP remain.               */
/*                                                                          */
/*  ASSUMPTIONS                                                             */
/*      An even number of luma samples needs to be processed.               */
/*      The output image must be word aligned.                              */
/*                                                                          */
/*  NOTES                                                                   */
/*      No bank conflicts occur.                                            */
/*                                                                          */
/*      Codesize is 512 bytes.                                              */
/*                                                                          */
/*      On average, one bank per cycle is accessed on a C6201 in the loop,  */
/*      with 1 cycle of 6 accessing no banks, and 1 cycle accessing two.    */
/*                                                                          */
/*      The kernel requires 14 words of stack space.                        */
/*                                                                          */
/*  SOURCE                                                                  */
/*      Poynton, Charles et al.  "The Color FAQ,"  1999.                    */
/*          http://home.inforamp.net/~poynton/ColorFAQ.html                 */
/* ======================================================================== */

#ifndef _YCBCR422PL_TO_RGB565_H_H
#define _YCBCR422PL_TO_RGB565_H_H

void ycbcr422pl16_to_rgb565_asm
(
    const short         coeff[5],  /* Matrix coefficients.          */
    const short         *y_data,   /* Luminence data  (Y')          */
    const short         *cb_data,  /* Blue color-diff (B'-Y')       */
    const short         *cr_data,  /* Red color-diff  (R'-Y')       */
    unsigned short      *rgb_data, /* RGB 5:6:5 packed pixel out.   */
    unsigned            num_pixels /* # of luma pixels to process.  */
);

#endif

/* ======================================================================== */
/*  End of file:  ycbcr422pl16_to_rgb565_h.h                                */
/* ------------------------------------------------------------------------ */
/*            Copyright (c) 2000 Texas Instruments, Incorporated.           */
/*                           All Rights Reserved.                           */
/* ======================================================================== */
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -