📄 v422to444_v2mult_24t.v

📁 YUV422转YUV444的FPGA插植算法
💻 V
字号:



//-----------------------------------------------------------------
// v422to444_v2mult_24t.v - 422 to 444 Format Converter
//               Virtex-II Video Demo Board
//
//
//
//
//                  Author: Gregg C. Hawkes
//                  Senior Staff Applications Engineer
//
//                  Video Applications
//                  Advanced Products Division
//                  Xilinx, Inc.
//
//                  Copyright (c) 1999 Xilinx, Inc.
//                  All rights reserved
//
//                  Date:   Aug. 6, 2001
//                  For:    Video Demo Board
//
//                  RESTRICTED RIGHTS LEGEND
//
//      This software has not been published by the author, and 
//      has been disclosed to others for the purpose of enhancing 
//      and promoting design productivity in Xilinx products.
//
//      Therefore use, duplication or disclosure, now and in the 
//      future should give consideration to the productivity 
//      enhancements afforded the user of this code by the author's 
//      efforts.  Thank you for using our products !
//
// Disclaimer:  THESE DESIGNS ARE PROVIDED "AS IS" WITH NO WARRANTY 
//              WHATSOEVER AND XILINX SPECIFICALLY DISCLAIMS ANY 
//              IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR
//              A PARTICULAR PURPOSE, OR AGAINST INFRINGEMENT.
//
//
// 
// Revision:
//          Aug. 6, 2001     Creation
//
//
// Other modules instanced in this design:
//
//          none
/*

BRIEF DESCRIPTION

The process of 4:2:2 to 4:4:4 is simply creating the missing Cr and Cb
components. This version accomplishes this task by merely duplicating
the Cr and Cb information.

DETAILED DESCRIPTION

The video standard ITU-R BT.601 was introduced as the need for
transporting digital component video between countries and standards
increased. The analog component R'G'B' can be sampled in a very regular
way and converted from 4:4:4 to the digital 4:2:2 format, essentially
cutting in half the number of different components, Cr and Cb. 

The digital data is efficiently stored or transmitted to a destination
that reverses the process, i.e. converts back to 4:4:4 format, and
produces analog YUV or R'G'B' for display.


422 TO 444 CONVERSION
---------------------


Bob Turney, Xilinx Labs supplied me with this one to try.


  -   4  = 18'h3FFFC
      6  = 18'h00006
  -  12  = 18'h3FFF4
     20  = 18'h00014
  -  32  = 18'h3FFE0
     48  = 18'h00030
  -  70  = 18'h3FFBA
    104  = 18'h00068
  - 152  = 18'h3FF68
    236  = 18'h000EC
  - 420  = 18'h3FE5C
   1300  = 18'h00514
   1300  = 18'h00514
  - 420  = 18'h3FE5C
    236  = 18'h000EC
  - 152  = 18'h3FF68
    104  = 18'h00068
  -  70  = 18'h3FFBA
     48  = 18'h00030
  -  32  = 18'h3FFE0
     20  = 18'h00014
  -  12  = 18'h3FFF4
      6  = 18'h00006
  -   4  = 18'h3FFFC


       CrCb[i] = (
             -    4*(CrCb[1]+CrCb[24])
             +    6*(CrCb[2]+CrCb[23])
             -   12*(CrCb[3]+CrCb[22])
             +   20*(CrCb[4]+CrCb[21])
             -   32*(CrCb[5]+CrCb[20])
             +   48*(CrCb[6]+CrCb[19])
             -   70*(CrCb[7]+CrCb[18])
             +  104*(CrCb[8]+CrCb[17])
             -  152*(CrCb[9]+CrCb[16])
             +  236*(CrCb[10]+CrCb[15])
             -  420*(CrCb[11]+CrCb[14])
             + 1300*(CrCb[12]+CrCb[13]))/2048; 


Design Information
------------------
xc2v1000-ff896-6

MULT18X18s:  12   30%
Slices      864   16%
FFs       1,224   11%
LUTs        409    3%
IO           49   11%
gates    62,199

Minimum period:   12.042ns (Maximum frequency: 83 MHz)
Maximum net delay:   5.124ns

Note: Using coregen the V2 Mults can be replaced with parallel, 10 bit
signed integer X 12 bit coefficients for roughly 68 LUTs and 34
Registers each. This alternative would save the high performance V2
Multipliers for other uses or allow the design to directly map to
SPARTAN families.

*/


`timescale 1ns / 100ps



module v422to444_v2mult_24t (
rst,             // resets input data register and control
clk,             // video component rate clock, 27Mhz for SDTV

Fi,              // Low to High signals start of Field One
Vi,              // High signals Vertical Blanking
Hi,              // High signals Horizontal Blanking

Fo,              // Field signal delayed by pipe length
Vo,              // Vertical signal delayed by pipe length
Ho,              // Horizontal signal delayed by pipe length
ceo,             // output data rate is 1/2 the clock rate

YCrCb_in,        // video component data, I[8].F[2], twos complement

Y_out,           // Y out, I[8].F[2], twos complement, clamped
Cr_out,          // Cr out, I[8].F[2], twos complement, clamped
Cb_out,          // Cb out, I[8].F[2], twos complement, clamped
);


/*
TAPS must be an even number thereby making the length of the pipe
an even number of FFs. I think 4 is the minimum.
*/
parameter TAPS = 24;
parameter FILTER_PIPE_LENGTH = 4;

input rst, clk, Hi, Vi, Fi;
input[9:0] YCrCb_in;

output [9:0]  Y_out, Cr_out, Cb_out;
output Ho, Vo, Fo, ceo;

reg [TAPS+FILTER_PIPE_LENGTH+7:0] H_rg, V_rg, F_rg;
wire Ho, Vo, Fo, ceo, H_rising;

reg [1:0] cnt;
wire Y_ld, select_real_CrCb, select_filt_CrCb;

reg [9:0] Y_rg, CrCb_rg;

// pixel component pipelines
reg [9:0] Y_pipe  [(TAPS/2)+FILTER_PIPE_LENGTH-1:0];
reg [9:0] CrCb_pipe [(TAPS*2)-1:0];

// filter components
reg [10:0] CrCb_pre_add [(TAPS/2)-1:0];

wire [35:0] P0, P1, P2, P3, P4, P5,
            P6, P7, P8, P9, P10, P11;

reg [23:0] CrCb_mult [11:0];     // only use [23:0]
reg [23:0] CrCb_post_add;
reg [9:0] CrCb_corrected, Cb_filt, Cr_filt;

reg [9:0] Y_out, Cr_out, Cb_out;

integer i;



//-----------------------------------------------------------------------
//
/*
Delay SMPTE control signals F, V, H, by an amount equivalent to the
modules pipe length. The will allow different modules to be swapped out
without changing exterior control. this occurs in most of my modules.
*/


always @ (posedge clk) begin
  if (rst) begin F_rg <= 0; V_rg <= 0; H_rg <= 0; end
  else begin
    F_rg[TAPS+FILTER_PIPE_LENGTH+7:0] <= {F_rg[TAPS+FILTER_PIPE_LENGTH+6:0], Fi};
    V_rg[TAPS+FILTER_PIPE_LENGTH+7:0] <= {V_rg[TAPS+FILTER_PIPE_LENGTH+6:0], Vi};
    H_rg[TAPS+FILTER_PIPE_LENGTH+7:0] <= {H_rg[TAPS+FILTER_PIPE_LENGTH+6:0], Hi};
  end
end

assign Ho = H_rg[TAPS+FILTER_PIPE_LENGTH+7];
assign Vo = V_rg[TAPS+FILTER_PIPE_LENGTH+7];
assign Fo = F_rg[TAPS+FILTER_PIPE_LENGTH+7];



//-----------------------------------------------------------------------
//
// Identify Y and CrCb valid
//

assign H_rising = ~H_rg[0] & Hi;

always @ (posedge clk) begin
  if (rst | H_rising) cnt <= 1;
  else cnt <= cnt+1;
end

assign Y_ld = (cnt == 2'b01) | (cnt == 2'b11);
assign ceo = Y_ld;
assign select_real_CrCb = (cnt == 2'b11);
assign select_filt_CrCb = (cnt == 2'b01);


//-----------------------------------------------------------------------
//
// Seperate the Y and CrCb data streams
//
always @ (posedge clk) begin
  if (rst) Y_rg <= 10'h040;
  else if (Y_ld)  Y_rg <= YCrCb_in;
  else Y_rg <= Y_rg;
end

always @ (posedge clk) begin
  if (rst) CrCb_rg <= 0;
  else if (~Y_ld) CrCb_rg <= YCrCb_in;
  else CrCb_rg <= CrCb_rg;
end



//-----------------------------------------------------------------------
//
// 10 bit Y, Cr and Cb pipe registers, connected head to tail.  There are 
// 12 Y registers, 48 CrCb registers.
//
always @ (posedge clk) begin
  if (rst) Y_pipe[0] <= 10'h040;
  else if (Y_ld)  Y_pipe[0] <= Y_rg;
  else Y_pipe[0] <= Y_pipe[0];
end

always @ (posedge clk) begin
  for (i = 1; i <= (TAPS/2)+FILTER_PIPE_LENGTH-1; i = i+1) begin
    if (rst) Y_pipe[i] <= 10'h040;
    else if (Y_ld) Y_pipe[i] <= Y_pipe[i-1];
    else Y_pipe[i] <= Y_pipe[i];
  end
end

always @ (posedge clk) begin
  if (rst) CrCb_pipe[0] <= 0;
  else if (~Y_ld) CrCb_pipe[0] <= CrCb_rg;
  else CrCb_pipe[0] <= CrCb_pipe[0];
end

always @ (posedge clk) begin
  for (i = 1; i <= (2*TAPS)-1; i = i+1) begin
    if (rst) CrCb_pipe[i] <= 0;
    else if (~Y_ld) CrCb_pipe[i] <= CrCb_pipe[i-1];
    else CrCb_pipe[i] <= CrCb_pipe[i];
  end
end



//-----------------------------------------------------------------------
//
// Pre multiply adder
//
always @ (posedge clk) begin
  for (i = 0; i <= (TAPS/2)-1; i = i+1) begin
    if (rst) CrCb_pre_add[i] <= 0;
    else if (~Y_ld) 
      CrCb_pre_add[i] <= CrCb_pipe[((2*TAPS)-1)-(2*i)] + CrCb_pipe[(2*i)+1];
    else CrCb_pre_add[i] <= CrCb_pre_add[i];
  end
end


//-----------------------------------------------------------------------
//
/*
Multipliers
Note: A is 12 bits, B is 10 bits + 10 bits, so make P = 24 bits
maximum positive value = 960 X 1300 = 1248000 or 130b00 hex = 21 bits
maximum negative value = 960 X -420 = -403200 or  9d900 hex = 20 bits
*/

MULT18X18 U1  (.P(P0),  .A(18'h3FFFC), .B({7'h00, CrCb_pre_add[0]}));
MULT18X18 U2  (.P(P1),  .A(18'h00006), .B({7'h00, CrCb_pre_add[1]}));
MULT18X18 U3  (.P(P2),  .A(18'h3FFF4), .B({7'h00, CrCb_pre_add[2]}));
MULT18X18 U4  (.P(P3),  .A(18'h00014), .B({7'h00, CrCb_pre_add[3]}));
MULT18X18 U5  (.P(P4),  .A(18'h3FFE0), .B({7'h00, CrCb_pre_add[4]}));
MULT18X18 U6  (.P(P5),  .A(18'h00030), .B({7'h00, CrCb_pre_add[5]}));
MULT18X18 U7  (.P(P6),  .A(18'h3FFBA), .B({7'h00, CrCb_pre_add[6]}));
MULT18X18 U8  (.P(P7),  .A(18'h00068), .B({7'h00, CrCb_pre_add[7]}));
MULT18X18 U9  (.P(P8),  .A(18'h3FF68), .B({7'h00, CrCb_pre_add[8]}));
MULT18X18 U10 (.P(P9),  .A(18'h000EC), .B({7'h00, CrCb_pre_add[9]}));
MULT18X18 U11 (.P(P10), .A(18'h3FE5C), .B({7'h00, CrCb_pre_add[10]}));
MULT18X18 U12 (.P(P11), .A(18'h00514), .B({7'h00, CrCb_pre_add[11]}));



//-----------------------------------------------------------------------
//
// Register outputs of multiply
//
always @ (posedge clk) begin
  if (rst) begin
    for (i = 0; i <= 11; i = i+1) CrCb_mult[i] <= 0;
  end
  else if (~Y_ld) begin
    CrCb_mult[0]  <=  P0[23:0];
    CrCb_mult[1]  <=  P1[23:0];
    CrCb_mult[2]  <=  P2[23:0];
    CrCb_mult[3]  <=  P3[23:0];
    CrCb_mult[4]  <=  P4[23:0];
    CrCb_mult[5]  <=  P5[23:0];
    CrCb_mult[6]  <=  P6[23:0];
    CrCb_mult[7]  <=  P7[23:0];
    CrCb_mult[8]  <=  P8[23:0];
    CrCb_mult[9]  <=  P9[23:0];
    CrCb_mult[10] <= P10[23:0];
    CrCb_mult[11] <= P11[23:0];
  end
  else begin
    for (i = 0; i <= 11; i = i+1) CrCb_mult[i] <= CrCb_mult[i];
  end
end



//-----------------------------------------------------------------------
//
// Post multiply adder (this needs to run at 74.25 MHz for HDTV).  This
// is the performance bottle-neck.  It can be easily pipelined.
//
always @ (posedge clk) begin
  if (rst) CrCb_post_add <= 0;
  else if (~Y_ld) CrCb_post_add <=
       CrCb_mult[0] + CrCb_mult[1] + CrCb_mult[2]  + CrCb_mult[3]
    +  CrCb_mult[4] + CrCb_mult[5] + CrCb_mult[6]  + CrCb_mult[7]
    +  CrCb_mult[8] + CrCb_mult[9] + CrCb_mult[10] + CrCb_mult[11];
  else CrCb_post_add <=  CrCb_post_add;
end



//-----------------------------------------------------------------------
//
// Correct overflows and underflows
//
/*
Note 1: Wire shift by 11 bits is equivalent to dividing by 2048 on the
input. This is to account for the non fractional coefficients in the FIR
filter multiplies.

Note 2: A new Cr_filt and Cb_filt are available every four clock ticks.
*/

always @ (posedge clk) begin
  if (rst) CrCb_corrected <= 9'h40;
  else if (~Y_ld & (CrCb_post_add[20:11] > 12'h3AC)) CrCb_corrected <= 12'h3AC;
  else if (~Y_ld & (CrCb_post_add[20:11] < 12'h40 )) CrCb_corrected <= 12'h40;
  else if (~Y_ld) CrCb_corrected <= CrCb_post_add[20:11];
  else CrCb_corrected <= CrCb_corrected;
end

always @ (posedge clk) begin
  if (rst) begin Cr_filt <= 0; Cb_filt <= 0; end
  else if (~Y_ld) begin Cr_filt <= CrCb_corrected; Cb_filt <= Cr_filt; end
end



//-----------------------------------------------------------------------
//
// Divide the CrCb stream into seperate outgoing components
//
always @ (posedge clk) begin
  if (rst) begin Y_out <= 0; Cr_out <= 0; Cb_out <= 0; end
  else if (select_real_CrCb) begin
    Y_out  <= Y_pipe[(TAPS/2)+FILTER_PIPE_LENGTH-1];
    Cb_out <= CrCb_pipe[(TAPS/2)+FILTER_PIPE_LENGTH];
    Cr_out <= CrCb_pipe[(TAPS/2)+FILTER_PIPE_LENGTH-1];
  end
  else if (select_filt_CrCb) begin
    Y_out  <= Y_pipe[(TAPS/2)+FILTER_PIPE_LENGTH-1];
    Cb_out <= Cb_filt;
    Cr_out <= Cr_filt;
  end
  else begin
    Y_out  <= Y_out;
    Cb_out <= Cb_out;
    Cr_out <= Cr_out;
  end
end



endmodule
💿 文件大小 70 K
👤 上传用户 mmmmmmmmmxxx
📂 所属分类 VHDL/FPGA/Verilog
📄 代码行数 444 行
💻 语言类型 Verilog
🏷️ 相关标签

#YUV #FPGA #422 #444
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -