📄 idct.v
字号:
if (RST)
begin
prod_en1 <= 4'b0000;
end
else if(rdy_in == 1'b1)
begin
if (prod_en1 < 4'b1001)
begin
prod_en1 <= prod_en1 + 1;
end
else
begin
prod_en1 <= 4'b1001;
end
end
end
always @ (posedge CLK or posedge RST)
begin
if (RST)
begin
xa0_reg <= 12'b0; xa1_reg <= 12'b0; xa2_reg <= 12'b0; xa3_reg <= 12'b0;
xa4_reg <= 12'b0; xa5_reg <= 12'b0; xa6_reg <= 12'b0; xa7_reg <= 12'b0;
end
else if (cntr8 == 4'b1000)
begin
xa0_reg <= xa0_in; xa1_reg <= xa1_in; xa2_reg <= xa2_in; xa3_reg <= xa3_in;
xa4_reg <= xa4_in; xa5_reg <= xa5_in; xa6_reg <= xa6_in; xa7_reg <= xa7_in;
end
else
begin
end
end
/* take absolute value of signals */
always @ (posedge CLK or posedge RST)
begin
if (RST)
begin
xa0_reg_comp <= 11'b0; xa1_reg_comp <= 11'b0; xa2_reg_comp <= 11'b0; xa3_reg_comp <= 11'b0;
xa4_reg_comp <= 11'b0; xa5_reg_comp <= 11'b0; xa6_reg_comp <= 11'b0; xa7_reg_comp <= 11'b0;
xa0_reg_sign <= 1'b0; xa1_reg_sign <= 1'b0; xa2_reg_sign <= 1'b0; xa3_reg_sign <= 1'b0;
xa4_reg_sign <= 1'b0; xa5_reg_sign <= 1'b0; xa6_reg_sign <= 1'b0; xa7_reg_sign <= 1'b0;
end
else
begin
xa0_reg_sign <= xa0_reg[11];
xa0_reg_comp[10:0] <= (xa0_reg[11]) ? (-xa0_reg) : xa0_reg[10:0];
xa1_reg_sign <= xa1_reg[11];
xa1_reg_comp[10:0] <= (xa1_reg[11]) ? (-xa1_reg) : xa1_reg[10:0];
xa2_reg_sign <= xa2_reg[11];
xa2_reg_comp[10:0] <= (xa2_reg[11]) ? (-xa2_reg) : xa2_reg[10:0];
xa3_reg_sign <= xa3_reg[11];
xa3_reg_comp[10:0] <= (xa3_reg[11]) ? (-xa3_reg) : xa3_reg[10:0];
xa4_reg_sign <= xa4_reg[11];
xa4_reg_comp[10:0] <= (xa4_reg[11]) ? (-xa4_reg) : xa4_reg[10:0];
xa5_reg_sign <= xa5_reg[11];
xa5_reg_comp[10:0] <= (xa5_reg[11]) ? (-xa5_reg) : xa5_reg[10:0];
xa6_reg_sign <= xa6_reg[11];
xa6_reg_comp[10:0] <= (xa6_reg[11]) ? (-xa6_reg) : xa6_reg[10:0];
xa7_reg_sign <= xa7_reg[11];
xa7_reg_comp[10:0] <= (xa7_reg[11]) ? (-xa7_reg) : xa7_reg[10:0];
end
end
/* multiply the outputs of the add/sub block with the 8 sets of stored coefficients */
/* The inputs are shifted thru 8 registers in 8 clk cycles. The ouput of the shift
registers are registered at the 9th clk. The values are then added or subtracted at the 10th
clk. The first mutiplier output is obtained at the 11th clk. Memoryx[0] shd be accessed
at the 11th clk*/
/*wait state counter */
// First valid add_sub appears at the 10th clk (8 clks for shifting inputs,
// 9th clk for registering shifted input and 10th clk for add_sub
// to synchronize the i value to the add_sub value, i value is incremented
// only after 10 clks using i_wait
/* max value for p1a = 2047*126. = 18 bits */
assign p1a_all = xa7_reg_comp[10:0] * memory1a[6:0];/*11bits * 7bits = 18bits */
assign p2a_all = xa6_reg_comp[10:0] * memory2a[6:0];
assign p3a_all = xa5_reg_comp[10:0] * memory3a[6:0];
assign p4a_all = xa4_reg_comp[10:0] * memory4a[6:0];
assign p5a_all = xa3_reg_comp[10:0] * memory5a[6:0];
assign p6a_all = xa2_reg_comp[10:0] * memory6a[6:0];
assign p7a_all = xa1_reg_comp[10:0] * memory7a[6:0];
assign p8a_all = xa0_reg_comp[10:0] * memory8a[6:0];
/* The following instantiation can be used while targetting Virtex2 */
//MULT18X18 mult1a (.A({10'b0,xa7_reg_comp[7:0]}), .B({11'b0,memory1a[6:0]}), .P(p1a_all));
//MULT18X18 mult2a (.A({10'b0,xa6_reg_comp[7:0]}), .B({11'b0,memory2a[6:0]}), .P(p2a_all));
//MULT18X18 mult3a (.A({10'b0,xa5_reg_comp[7:0]}), .B({11'b0,memory3a[6:0]}), .P(p3a_all));
//MULT18X18 mult4a (.A({10'b0,xa4_reg_comp[7:0]}), .B({11'b0,memory4a[6:0]}), .P(p4a_all));
//MULT18X18 mult5a (.A({10'b0,xa3_reg_comp[7:0]}), .B({11'b0,memory5a[6:0]}), .P(p5a_all));
//MULT18X18 mult6a (.A({10'b0,xa2_reg_comp[7:0]}), .B({11'b0,memory6a[6:0]}), .P(p6a_all));
//MULT18X18 mult7a (.A({10'b0,xa1_reg_comp[7:0]}), .B({11'b0,memory7a[6:0]}), .P(p7a_all));
//MULT18X18 mult8a (.A({10'b0,xa0_reg_comp[7:0]}), .B({11'b0,memory8a[6:0]}), .P(p8a_all));
always @ (posedge RST or posedge CLK)
begin
if (RST)
begin
p1a <= 21'b0; p2a <= 21'b0; p3a <= 21'b0; p4a <= 21'b0;
p5a <= 21'b0; p6a <= 21'b0; p7a <= 21'b0; p8a <= 21'b0;
indexi_val <= 3'b000;
end
else if (rdy_in == 1'b1 && prod_en1 == 4'b1001)
begin
p1a <= (xa7_reg_sign ^ memory1a[7])?(-p1a_all[17:0]):(p1a_all[17:0]);
p2a <= (xa6_reg_sign ^ memory2a[7])?(-p2a_all[17:0]):(p2a_all[17:0]);
p3a <= (xa5_reg_sign ^ memory3a[7])?(-p3a_all[17:0]):(p3a_all[17:0]);
p4a <= (xa4_reg_sign ^ memory4a[7])?(-p4a_all[17:0]):(p4a_all[17:0]);
p5a <= (xa3_reg_sign ^ memory5a[7])?(-p5a_all[17:0]):(p5a_all[17:0]);
p6a <= (xa2_reg_sign ^ memory6a[7])?(-p6a_all[17:0]):(p6a_all[17:0]);
p7a <= (xa1_reg_sign ^ memory7a[7])?(-p7a_all[17:0]):(p7a_all[17:0]);
p8a <= (xa0_reg_sign ^ memory8a[7])?(-p8a_all[17:0]):(p8a_all[17:0]);
if (indexi_val == 3'b111)
indexi_val <= 3'b000;
else
indexi_val <= indexi_val + 1'b1;
end
else
begin
p1a <= 21'b0; p2a <= 21'b0; p3a <= 21'b0; p4a <= 21'b0;
p5a <= 21'b0; p6a <= 21'b0; p7a <= 21'b0; p8a <= 21'b0;
end
end
/* Final adder. Adding the ouputs of the 4 multipliers */
/* max value for z_out_int = 2047*126*8 = 2063376 = 21 bits */
always @ (posedge CLK or posedge RST)
begin
if (RST)
begin
z_out_int1 <= 21'b0; z_out_int2 <= 21'b0; z_out_int3 <= 21'b0;
z_out_int4 <= 21'b0; z_out_int <= 21'b0;
end
else
begin
z_out_int1 <= (p1a + p2a);
z_out_int2 <= (p3a + p4a);
z_out_int3 <= (p5a + p6a);
z_out_int4 <= (p7a + p8a);
z_out_int <= (z_out_int1 + z_out_int2 + z_out_int3 + z_out_int4);
end
end
// rounding of the value
/* max value for a 1D-DCT output is "11111111"*126*8/256=1004.
To represent this we need only 10 bits, plus 1 bit for sign */
assign z_out_rnd = z_out_int[18:8];
assign z_out = z_out_int[7] ? (z_out_rnd + 1'b1) : z_out_rnd;
/* 1D-DCT END */
/* tranpose memory to store intermediate Z coefficients */
/* store the 64 coefficients in the first 64 locations of the RAM */
/* first valid final (product) adder ouput is at the 13th clk. 8clk SR
+ 1 clk reg + 1 clk comp + 1 clk prod. + 2 clks summing.
So the RAM is enabled at the 11th clk) */
always @ (posedge CLK or posedge RST)
begin
if (RST)
begin
cntr11 <= 4'b0;
end
else if (rdy_in == 1'b1)
begin
cntr11 <= cntr11 + 1;
end
end
/* enable RAM at the 14th clk after RST goes inactive */
assign en_ram1 = RST ? 1'b0 : (cntr11== 4'b1100) ? 1'b1 : en_ram1;
always @ (posedge CLK or posedge RST)
begin
if (RST)
begin
en_ram1reg <= 1'b0;
end
else
begin
en_ram1reg <= en_ram1 ;
end
end
/* After the RAM is enabled, data is written into the RAM1 for 64 clk cycles. Data is written in into
each consecutive location . After 64 locations are written into, RAM1 goes into read mode and RAM2 goes into
write mode. The cycle then repeats.
For either RAM, data is written into each consecutive location. However , data is read in a different order. If data
is assumed to be written in each row at a time, in an 8x8 matrix, data is read each column at a time. ie., after
the first data is read out, every eight data is read out . Then the 2nd data is read out followed be every 8th.
the write is as follows:
1w(ram_locn1) 2w(ram_locn2) 3w(ram_locn3) 4w(ram_locn4) 5w(ram_locn5) 6w(ram_locn6) 7w(ram_locn7) 8w(ram_locn8)
9w(ram_locn9) 10w(ram_locn10) 11w(ram_locn11) 12w(ram_locn12) 13w(ram_locn13) 14w(ram_locn14) 15w(ram_locn15) 16w(ram_locn16)
..................
57w(ram_locn57) 58w(ram_locn58) 59w(ram_locn59) 60w(ram_locn60) 61w(ram_locn61) 62w(ram_locn62) 63w(ram_locn63) 64w(ram_locn64)
the read is as follows:
1r(ram_locn1) 9r(ram_locn2) . . . 57r(ram_locn8)
2r(ram_locn9) 10r(ram_locn10) . . . 58r(ram_locn16)
3r(ram_locn17) 11r(ram_locn18) . . . 59r(ram_locn24)
4r(ram_locn25) 12r(ram_locn26) . . . 60r(ram_locn32)
5r(ram_locn33) 13r(ram_locn34) . . . 61r(ram_locn40)
6r(ram_locn41) 14r(ram_locn42) . . . 62r(ram_locn48)
7r(ram_locn49) 15r(ram_locn50) . . . 63r(ram_locn56)
8r(ram_locn57) 16r(ram_locn58) . . . 64r(ram_locn64)
where "xw" is the xth write and "ram_locnx" is the xth ram location and "xr" is the xth read. Reading
is advanced by the read counter rd_cntr, nd writing by the write counter wr_cntr. */
always @ (posedge CLK or posedge RST)
begin
if (RST)
begin
rd_cntr[5:3] <= 3'b111;
end
else
begin
if (en_ram1reg == 1'b1)
rd_cntr[5:3] <= rd_cntr[5:3] + 1;
end
end
always @ (posedge CLK or posedge RST)
begin
if (RST)
begin
rd_cntr[2:0] <= 3'b111;
end
else
begin
if (en_ram1reg == 1'b1 && rd_cntr[5:3] == 3'b111)
rd_cntr[2:0] <= rd_cntr[2:0] + 1;
end
end
always @ (posedge CLK or posedge RST)
begin
if (RST)
begin
rd_cntr[6] <= 1'b1;
end
else
begin
if (en_ram1reg == 1'b1 && rd_cntr[5:0] == 6'b111111)
rd_cntr[6] <= ~rd_cntr[6];
end
end
always @ (posedge CLK or posedge RST)
begin
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -