📄 v422to444_v2mult_24t.v
字号:
//-----------------------------------------------------------------
// v422to444_v2mult_24t.v - 422 to 444 Format Converter
// Virtex-II Video Demo Board
//
//
//
//
// Author: Gregg C. Hawkes
// Senior Staff Applications Engineer
//
// Video Applications
// Advanced Products Division
// Xilinx, Inc.
//
// Copyright (c) 1999 Xilinx, Inc.
// All rights reserved
//
// Date: Aug. 6, 2001
// For: Video Demo Board
//
// RESTRICTED RIGHTS LEGEND
//
// This software has not been published by the author, and
// has been disclosed to others for the purpose of enhancing
// and promoting design productivity in Xilinx products.
//
// Therefore use, duplication or disclosure, now and in the
// future should give consideration to the productivity
// enhancements afforded the user of this code by the author's
// efforts. Thank you for using our products !
//
// Disclaimer: THESE DESIGNS ARE PROVIDED "AS IS" WITH NO WARRANTY
// WHATSOEVER AND XILINX SPECIFICALLY DISCLAIMS ANY
// IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR
// A PARTICULAR PURPOSE, OR AGAINST INFRINGEMENT.
//
//
//
// Revision:
// Aug. 6, 2001 Creation
//
//
// Other modules instanced in this design:
//
// none
/*
BRIEF DESCRIPTION
The process of 4:2:2 to 4:4:4 is simply creating the missing Cr and Cb
components. This version accomplishes this task by merely duplicating
the Cr and Cb information.
DETAILED DESCRIPTION
The video standard ITU-R BT.601 was introduced as the need for
transporting digital component video between countries and standards
increased. The analog component R'G'B' can be sampled in a very regular
way and converted from 4:4:4 to the digital 4:2:2 format, essentially
cutting in half the number of different components, Cr and Cb.
The digital data is efficiently stored or transmitted to a destination
that reverses the process, i.e. converts back to 4:4:4 format, and
produces analog YUV or R'G'B' for display.
422 TO 444 CONVERSION
---------------------
Bob Turney, Xilinx Labs supplied me with this one to try.
- 4 = 18'h3FFFC
6 = 18'h00006
- 12 = 18'h3FFF4
20 = 18'h00014
- 32 = 18'h3FFE0
48 = 18'h00030
- 70 = 18'h3FFBA
104 = 18'h00068
- 152 = 18'h3FF68
236 = 18'h000EC
- 420 = 18'h3FE5C
1300 = 18'h00514
1300 = 18'h00514
- 420 = 18'h3FE5C
236 = 18'h000EC
- 152 = 18'h3FF68
104 = 18'h00068
- 70 = 18'h3FFBA
48 = 18'h00030
- 32 = 18'h3FFE0
20 = 18'h00014
- 12 = 18'h3FFF4
6 = 18'h00006
- 4 = 18'h3FFFC
CrCb[i] = (
- 4*(CrCb[1]+CrCb[24])
+ 6*(CrCb[2]+CrCb[23])
- 12*(CrCb[3]+CrCb[22])
+ 20*(CrCb[4]+CrCb[21])
- 32*(CrCb[5]+CrCb[20])
+ 48*(CrCb[6]+CrCb[19])
- 70*(CrCb[7]+CrCb[18])
+ 104*(CrCb[8]+CrCb[17])
- 152*(CrCb[9]+CrCb[16])
+ 236*(CrCb[10]+CrCb[15])
- 420*(CrCb[11]+CrCb[14])
+ 1300*(CrCb[12]+CrCb[13]))/2048;
Design Information
------------------
xc2v1000-ff896-6
MULT18X18s: 12 30%
Slices 864 16%
FFs 1,224 11%
LUTs 409 3%
IO 49 11%
gates 62,199
Minimum period: 12.042ns (Maximum frequency: 83 MHz)
Maximum net delay: 5.124ns
Note: Using coregen the V2 Mults can be replaced with parallel, 10 bit
signed integer X 12 bit coefficients for roughly 68 LUTs and 34
Registers each. This alternative would save the high performance V2
Multipliers for other uses or allow the design to directly map to
SPARTAN families.
*/
`timescale 1ns / 100ps
module v422to444_v2mult_24t (
rst, // resets input data register and control
clk, // video component rate clock, 27Mhz for SDTV
Fi, // Low to High signals start of Field One
Vi, // High signals Vertical Blanking
Hi, // High signals Horizontal Blanking
Fo, // Field signal delayed by pipe length
Vo, // Vertical signal delayed by pipe length
Ho, // Horizontal signal delayed by pipe length
ceo, // output data rate is 1/2 the clock rate
YCrCb_in, // video component data, I[8].F[2], twos complement
Y_out, // Y out, I[8].F[2], twos complement, clamped
Cr_out, // Cr out, I[8].F[2], twos complement, clamped
Cb_out, // Cb out, I[8].F[2], twos complement, clamped
);
/*
TAPS must be an even number thereby making the length of the pipe
an even number of FFs. I think 4 is the minimum.
*/
parameter TAPS = 24;
parameter FILTER_PIPE_LENGTH = 4;
input rst, clk, Hi, Vi, Fi;
input[9:0] YCrCb_in;
output [9:0] Y_out, Cr_out, Cb_out;
output Ho, Vo, Fo, ceo;
reg [TAPS+FILTER_PIPE_LENGTH+7:0] H_rg, V_rg, F_rg;
wire Ho, Vo, Fo, ceo, H_rising;
reg [1:0] cnt;
wire Y_ld, select_real_CrCb, select_filt_CrCb;
reg [9:0] Y_rg, CrCb_rg;
// pixel component pipelines
reg [9:0] Y_pipe [(TAPS/2)+FILTER_PIPE_LENGTH-1:0];
reg [9:0] CrCb_pipe [(TAPS*2)-1:0];
// filter components
reg [10:0] CrCb_pre_add [(TAPS/2)-1:0];
wire [35:0] P0, P1, P2, P3, P4, P5,
P6, P7, P8, P9, P10, P11;
reg [23:0] CrCb_mult [11:0]; // only use [23:0]
reg [23:0] CrCb_post_add;
reg [9:0] CrCb_corrected, Cb_filt, Cr_filt;
reg [9:0] Y_out, Cr_out, Cb_out;
integer i;
//-----------------------------------------------------------------------
//
/*
Delay SMPTE control signals F, V, H, by an amount equivalent to the
modules pipe length. The will allow different modules to be swapped out
without changing exterior control. this occurs in most of my modules.
*/
always @ (posedge clk) begin
if (rst) begin F_rg <= 0; V_rg <= 0; H_rg <= 0; end
else begin
F_rg[TAPS+FILTER_PIPE_LENGTH+7:0] <= {F_rg[TAPS+FILTER_PIPE_LENGTH+6:0], Fi};
V_rg[TAPS+FILTER_PIPE_LENGTH+7:0] <= {V_rg[TAPS+FILTER_PIPE_LENGTH+6:0], Vi};
H_rg[TAPS+FILTER_PIPE_LENGTH+7:0] <= {H_rg[TAPS+FILTER_PIPE_LENGTH+6:0], Hi};
end
end
assign Ho = H_rg[TAPS+FILTER_PIPE_LENGTH+7];
assign Vo = V_rg[TAPS+FILTER_PIPE_LENGTH+7];
assign Fo = F_rg[TAPS+FILTER_PIPE_LENGTH+7];
//-----------------------------------------------------------------------
//
// Identify Y and CrCb valid
//
assign H_rising = ~H_rg[0] & Hi;
always @ (posedge clk) begin
if (rst | H_rising) cnt <= 1;
else cnt <= cnt+1;
end
assign Y_ld = (cnt == 2'b01) | (cnt == 2'b11);
assign ceo = Y_ld;
assign select_real_CrCb = (cnt == 2'b11);
assign select_filt_CrCb = (cnt == 2'b01);
//-----------------------------------------------------------------------
//
// Seperate the Y and CrCb data streams
//
always @ (posedge clk) begin
if (rst) Y_rg <= 10'h040;
else if (Y_ld) Y_rg <= YCrCb_in;
else Y_rg <= Y_rg;
end
always @ (posedge clk) begin
if (rst) CrCb_rg <= 0;
else if (~Y_ld) CrCb_rg <= YCrCb_in;
else CrCb_rg <= CrCb_rg;
end
//-----------------------------------------------------------------------
//
// 10 bit Y, Cr and Cb pipe registers, connected head to tail. There are
// 12 Y registers, 48 CrCb registers.
//
always @ (posedge clk) begin
if (rst) Y_pipe[0] <= 10'h040;
else if (Y_ld) Y_pipe[0] <= Y_rg;
else Y_pipe[0] <= Y_pipe[0];
end
always @ (posedge clk) begin
for (i = 1; i <= (TAPS/2)+FILTER_PIPE_LENGTH-1; i = i+1) begin
if (rst) Y_pipe[i] <= 10'h040;
else if (Y_ld) Y_pipe[i] <= Y_pipe[i-1];
else Y_pipe[i] <= Y_pipe[i];
end
end
always @ (posedge clk) begin
if (rst) CrCb_pipe[0] <= 0;
else if (~Y_ld) CrCb_pipe[0] <= CrCb_rg;
else CrCb_pipe[0] <= CrCb_pipe[0];
end
always @ (posedge clk) begin
for (i = 1; i <= (2*TAPS)-1; i = i+1) begin
if (rst) CrCb_pipe[i] <= 0;
else if (~Y_ld) CrCb_pipe[i] <= CrCb_pipe[i-1];
else CrCb_pipe[i] <= CrCb_pipe[i];
end
end
//-----------------------------------------------------------------------
//
// Pre multiply adder
//
always @ (posedge clk) begin
for (i = 0; i <= (TAPS/2)-1; i = i+1) begin
if (rst) CrCb_pre_add[i] <= 0;
else if (~Y_ld)
CrCb_pre_add[i] <= CrCb_pipe[((2*TAPS)-1)-(2*i)] + CrCb_pipe[(2*i)+1];
else CrCb_pre_add[i] <= CrCb_pre_add[i];
end
end
//-----------------------------------------------------------------------
//
/*
Multipliers
Note: A is 12 bits, B is 10 bits + 10 bits, so make P = 24 bits
maximum positive value = 960 X 1300 = 1248000 or 130b00 hex = 21 bits
maximum negative value = 960 X -420 = -403200 or 9d900 hex = 20 bits
*/
MULT18X18 U1 (.P(P0), .A(18'h3FFFC), .B({7'h00, CrCb_pre_add[0]}));
MULT18X18 U2 (.P(P1), .A(18'h00006), .B({7'h00, CrCb_pre_add[1]}));
MULT18X18 U3 (.P(P2), .A(18'h3FFF4), .B({7'h00, CrCb_pre_add[2]}));
MULT18X18 U4 (.P(P3), .A(18'h00014), .B({7'h00, CrCb_pre_add[3]}));
MULT18X18 U5 (.P(P4), .A(18'h3FFE0), .B({7'h00, CrCb_pre_add[4]}));
MULT18X18 U6 (.P(P5), .A(18'h00030), .B({7'h00, CrCb_pre_add[5]}));
MULT18X18 U7 (.P(P6), .A(18'h3FFBA), .B({7'h00, CrCb_pre_add[6]}));
MULT18X18 U8 (.P(P7), .A(18'h00068), .B({7'h00, CrCb_pre_add[7]}));
MULT18X18 U9 (.P(P8), .A(18'h3FF68), .B({7'h00, CrCb_pre_add[8]}));
MULT18X18 U10 (.P(P9), .A(18'h000EC), .B({7'h00, CrCb_pre_add[9]}));
MULT18X18 U11 (.P(P10), .A(18'h3FE5C), .B({7'h00, CrCb_pre_add[10]}));
MULT18X18 U12 (.P(P11), .A(18'h00514), .B({7'h00, CrCb_pre_add[11]}));
//-----------------------------------------------------------------------
//
// Register outputs of multiply
//
always @ (posedge clk) begin
if (rst) begin
for (i = 0; i <= 11; i = i+1) CrCb_mult[i] <= 0;
end
else if (~Y_ld) begin
CrCb_mult[0] <= P0[23:0];
CrCb_mult[1] <= P1[23:0];
CrCb_mult[2] <= P2[23:0];
CrCb_mult[3] <= P3[23:0];
CrCb_mult[4] <= P4[23:0];
CrCb_mult[5] <= P5[23:0];
CrCb_mult[6] <= P6[23:0];
CrCb_mult[7] <= P7[23:0];
CrCb_mult[8] <= P8[23:0];
CrCb_mult[9] <= P9[23:0];
CrCb_mult[10] <= P10[23:0];
CrCb_mult[11] <= P11[23:0];
end
else begin
for (i = 0; i <= 11; i = i+1) CrCb_mult[i] <= CrCb_mult[i];
end
end
//-----------------------------------------------------------------------
//
// Post multiply adder (this needs to run at 74.25 MHz for HDTV). This
// is the performance bottle-neck. It can be easily pipelined.
//
always @ (posedge clk) begin
if (rst) CrCb_post_add <= 0;
else if (~Y_ld) CrCb_post_add <=
CrCb_mult[0] + CrCb_mult[1] + CrCb_mult[2] + CrCb_mult[3]
+ CrCb_mult[4] + CrCb_mult[5] + CrCb_mult[6] + CrCb_mult[7]
+ CrCb_mult[8] + CrCb_mult[9] + CrCb_mult[10] + CrCb_mult[11];
else CrCb_post_add <= CrCb_post_add;
end
//-----------------------------------------------------------------------
//
// Correct overflows and underflows
//
/*
Note 1: Wire shift by 11 bits is equivalent to dividing by 2048 on the
input. This is to account for the non fractional coefficients in the FIR
filter multiplies.
Note 2: A new Cr_filt and Cb_filt are available every four clock ticks.
*/
always @ (posedge clk) begin
if (rst) CrCb_corrected <= 9'h40;
else if (~Y_ld & (CrCb_post_add[20:11] > 12'h3AC)) CrCb_corrected <= 12'h3AC;
else if (~Y_ld & (CrCb_post_add[20:11] < 12'h40 )) CrCb_corrected <= 12'h40;
else if (~Y_ld) CrCb_corrected <= CrCb_post_add[20:11];
else CrCb_corrected <= CrCb_corrected;
end
always @ (posedge clk) begin
if (rst) begin Cr_filt <= 0; Cb_filt <= 0; end
else if (~Y_ld) begin Cr_filt <= CrCb_corrected; Cb_filt <= Cr_filt; end
end
//-----------------------------------------------------------------------
//
// Divide the CrCb stream into seperate outgoing components
//
always @ (posedge clk) begin
if (rst) begin Y_out <= 0; Cr_out <= 0; Cb_out <= 0; end
else if (select_real_CrCb) begin
Y_out <= Y_pipe[(TAPS/2)+FILTER_PIPE_LENGTH-1];
Cb_out <= CrCb_pipe[(TAPS/2)+FILTER_PIPE_LENGTH];
Cr_out <= CrCb_pipe[(TAPS/2)+FILTER_PIPE_LENGTH-1];
end
else if (select_filt_CrCb) begin
Y_out <= Y_pipe[(TAPS/2)+FILTER_PIPE_LENGTH-1];
Cb_out <= Cb_filt;
Cr_out <= Cr_filt;
end
else begin
Y_out <= Y_out;
Cb_out <= Cb_out;
Cr_out <= Cr_out;
end
end
endmodule
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -