📄 idct.v

📁 离散余弦变换及反离散余弦变换的HDL代码及测试文件。包括VHDL及Verilog版本。可用途JPEG及MEPG压缩算法。
💻 V
📖 第 1 页 / 共 3 页
字号:
   if (RST)
       begin
       prod_en1 <= 4'b0000;
       end
   else if(rdy_in == 1'b1)
       begin
           if (prod_en1 < 4'b1001)
           begin
           prod_en1 <= prod_en1 + 1;
           end
       else 
           begin
           prod_en1 <= 4'b1001;
           end
       end
   end

always @ (posedge CLK or posedge RST)
   begin
   if (RST)
       begin
       xa0_reg <= 12'b0; xa1_reg <= 12'b0; xa2_reg <= 12'b0; xa3_reg <= 12'b0;
       xa4_reg <= 12'b0; xa5_reg <= 12'b0; xa6_reg <= 12'b0; xa7_reg <= 12'b0;
       end
   else if (cntr8 == 4'b1000)
       begin 
       xa0_reg <= xa0_in; xa1_reg <= xa1_in; xa2_reg <= xa2_in; xa3_reg <= xa3_in;
       xa4_reg <= xa4_in; xa5_reg <= xa5_in; xa6_reg <= xa6_in; xa7_reg <= xa7_in;
       end
   else 
       begin
       end
   end
/* take absolute value of signals */

always @ (posedge CLK or posedge RST)
   begin
   if (RST)
       begin
       xa0_reg_comp <= 11'b0; xa1_reg_comp <= 11'b0; xa2_reg_comp <= 11'b0; xa3_reg_comp <= 11'b0;
       xa4_reg_comp <= 11'b0; xa5_reg_comp <= 11'b0; xa6_reg_comp <= 11'b0; xa7_reg_comp <= 11'b0;
       xa0_reg_sign <= 1'b0; xa1_reg_sign <= 1'b0; xa2_reg_sign <= 1'b0; xa3_reg_sign <= 1'b0;
       xa4_reg_sign <= 1'b0; xa5_reg_sign <= 1'b0; xa6_reg_sign <= 1'b0; xa7_reg_sign <= 1'b0;
       end
   else 
       begin 
       xa0_reg_sign <= xa0_reg[11];
       xa0_reg_comp[10:0] <= (xa0_reg[11]) ? (-xa0_reg) : xa0_reg[10:0]; 
       xa1_reg_sign <= xa1_reg[11];
       xa1_reg_comp[10:0] <= (xa1_reg[11]) ? (-xa1_reg) : xa1_reg[10:0]; 
       xa2_reg_sign <= xa2_reg[11];
       xa2_reg_comp[10:0] <= (xa2_reg[11]) ? (-xa2_reg) : xa2_reg[10:0]; 
       xa3_reg_sign <= xa3_reg[11];
       xa3_reg_comp[10:0] <= (xa3_reg[11]) ? (-xa3_reg) : xa3_reg[10:0];
       xa4_reg_sign <= xa4_reg[11];
       xa4_reg_comp[10:0] <= (xa4_reg[11]) ? (-xa4_reg) : xa4_reg[10:0]; 
       xa5_reg_sign <= xa5_reg[11];
       xa5_reg_comp[10:0] <= (xa5_reg[11]) ? (-xa5_reg) : xa5_reg[10:0]; 
       xa6_reg_sign <= xa6_reg[11];
       xa6_reg_comp[10:0] <= (xa6_reg[11]) ? (-xa6_reg) : xa6_reg[10:0]; 
       xa7_reg_sign <= xa7_reg[11];
       xa7_reg_comp[10:0] <= (xa7_reg[11]) ? (-xa7_reg) : xa7_reg[10:0];
       end
   end


/* multiply the outputs of the add/sub block with the 8 sets of stored coefficients */
/* The inputs are shifted thru 8 registers in 8 clk cycles. The ouput of the shift
registers are registered at the 9th clk. The values are then added or subtracted at the 10th
clk. The first mutiplier output is obtained at the 11th clk. Memoryx[0] shd be accessed
at the 11th clk*/

/*wait state counter */
// First valid add_sub appears at the 10th clk (8 clks for shifting inputs,
// 9th clk for registering shifted input and 10th clk for add_sub
// to synchronize the i value to the add_sub value, i value is incremented
// only after 10 clks using i_wait
/* max value for p1a = 2047*126. = 18 bits */

     assign p1a_all = xa7_reg_comp[10:0] * memory1a[6:0];/*11bits * 7bits = 18bits */
     assign p2a_all = xa6_reg_comp[10:0] * memory2a[6:0];
     assign p3a_all = xa5_reg_comp[10:0] * memory3a[6:0];
     assign p4a_all = xa4_reg_comp[10:0] * memory4a[6:0];
     assign p5a_all = xa3_reg_comp[10:0] * memory5a[6:0];
     assign p6a_all = xa2_reg_comp[10:0] * memory6a[6:0];
     assign p7a_all = xa1_reg_comp[10:0] * memory7a[6:0];
     assign p8a_all = xa0_reg_comp[10:0] * memory8a[6:0];

/* The following instantiation can be used while targetting Virtex2 */
//MULT18X18 mult1a (.A({10'b0,xa7_reg_comp[7:0]}), .B({11'b0,memory1a[6:0]}), .P(p1a_all));
//MULT18X18 mult2a (.A({10'b0,xa6_reg_comp[7:0]}), .B({11'b0,memory2a[6:0]}), .P(p2a_all));
//MULT18X18 mult3a (.A({10'b0,xa5_reg_comp[7:0]}), .B({11'b0,memory3a[6:0]}), .P(p3a_all));
//MULT18X18 mult4a (.A({10'b0,xa4_reg_comp[7:0]}), .B({11'b0,memory4a[6:0]}), .P(p4a_all));
//MULT18X18 mult5a (.A({10'b0,xa3_reg_comp[7:0]}), .B({11'b0,memory5a[6:0]}), .P(p5a_all));
//MULT18X18 mult6a (.A({10'b0,xa2_reg_comp[7:0]}), .B({11'b0,memory6a[6:0]}), .P(p6a_all));
//MULT18X18 mult7a (.A({10'b0,xa1_reg_comp[7:0]}), .B({11'b0,memory7a[6:0]}), .P(p7a_all));
//MULT18X18 mult8a (.A({10'b0,xa0_reg_comp[7:0]}), .B({11'b0,memory8a[6:0]}), .P(p8a_all));

always @ (posedge RST or posedge CLK)
  begin
    if (RST)
      begin
        p1a <= 21'b0; p2a <= 21'b0; p3a <= 21'b0; p4a <= 21'b0; 
        p5a <= 21'b0; p6a <= 21'b0; p7a <= 21'b0; p8a <= 21'b0;
        indexi_val <= 3'b000;
      end
    else if (rdy_in == 1'b1 && prod_en1 == 4'b1001)
        begin
        p1a <= (xa7_reg_sign ^ memory1a[7])?(-p1a_all[17:0]):(p1a_all[17:0]);
        p2a <= (xa6_reg_sign ^ memory2a[7])?(-p2a_all[17:0]):(p2a_all[17:0]);
        p3a <= (xa5_reg_sign ^ memory3a[7])?(-p3a_all[17:0]):(p3a_all[17:0]);
        p4a <= (xa4_reg_sign ^ memory4a[7])?(-p4a_all[17:0]):(p4a_all[17:0]);
        p5a <= (xa3_reg_sign ^ memory5a[7])?(-p5a_all[17:0]):(p5a_all[17:0]);
        p6a <= (xa2_reg_sign ^ memory6a[7])?(-p6a_all[17:0]):(p6a_all[17:0]);
        p7a <= (xa1_reg_sign ^ memory7a[7])?(-p7a_all[17:0]):(p7a_all[17:0]);
        p8a <= (xa0_reg_sign ^ memory8a[7])?(-p8a_all[17:0]):(p8a_all[17:0]);
        if (indexi_val == 3'b111)
          indexi_val <= 3'b000;
        else 
          indexi_val <= indexi_val + 1'b1;
        end
   else
        begin
        p1a <= 21'b0; p2a <= 21'b0; p3a <= 21'b0; p4a <= 21'b0; 
        p5a <= 21'b0; p6a <= 21'b0; p7a <= 21'b0; p8a <= 21'b0;
        end
  end


/* Final adder. Adding the ouputs of the 4 multipliers */
/* max value for z_out_int = 2047*126*8 = 2063376 = 21 bits */
always @ (posedge CLK or posedge RST)
   begin
   if (RST)
       begin
       z_out_int1 <= 21'b0; z_out_int2 <= 21'b0; z_out_int3 <= 21'b0;
       z_out_int4 <= 21'b0; z_out_int <= 21'b0;
       end
   else
       begin
       z_out_int1 <= (p1a + p2a);
       z_out_int2 <= (p3a + p4a);
       z_out_int3 <= (p5a + p6a);
       z_out_int4 <= (p7a + p8a);
       z_out_int  <= (z_out_int1 + z_out_int2 + z_out_int3 + z_out_int4);
       end
   end

// rounding of the value
/* max value for a 1D-DCT output is "11111111"*126*8/256=1004.
To represent this we need only 10 bits, plus 1 bit for sign */

assign z_out_rnd = z_out_int[18:8];
assign z_out = z_out_int[7] ? (z_out_rnd + 1'b1) : z_out_rnd;

/* 1D-DCT END */

/* tranpose memory to store intermediate Z coefficients */
/* store the 64 coefficients in the first 64 locations of the RAM */
/* first valid final (product) adder ouput is at the 13th clk. 8clk SR
+ 1 clk reg + 1 clk comp + 1 clk prod. + 2 clks summing.
So the RAM is enabled at the 11th clk) */

always @ (posedge CLK or posedge RST)
   begin
   if (RST)
       begin
       cntr11 <= 4'b0;
       end
   else if (rdy_in == 1'b1)
       begin
       cntr11 <= cntr11 + 1;
       end
   end

/* enable RAM at the 14th clk after RST goes inactive */

assign en_ram1 = RST ? 1'b0 : (cntr11== 4'b1100) ? 1'b1 : en_ram1;

always @ (posedge CLK or posedge RST)
   begin
   if (RST)
       begin
	   en_ram1reg <= 1'b0;
       end
   else
       begin
       en_ram1reg <= en_ram1 ;
       end
   end

/* After the RAM is enabled, data is written into the RAM1 for 64 clk cycles. Data is written in into
each consecutive location . After 64 locations are written into, RAM1 goes into read mode and RAM2 goes into
write mode. The cycle then repeats.
For either RAM, data is written into each consecutive location. However , data is read in a different order. If data
is assumed to be written in each row at a time, in an 8x8 matrix, data is read each column at a time. ie., after
the first data is read out, every eight data is read out . Then the 2nd data is read out followed be every 8th.

the write is as follows:
1w(ram_locn1) 2w(ram_locn2) 3w(ram_locn3) 4w(ram_locn4) 5w(ram_locn5) 6w(ram_locn6) 7w(ram_locn7) 8w(ram_locn8)
9w(ram_locn9) 10w(ram_locn10) 11w(ram_locn11) 12w(ram_locn12) 13w(ram_locn13) 14w(ram_locn14) 15w(ram_locn15) 16w(ram_locn16)
..................
57w(ram_locn57) 58w(ram_locn58) 59w(ram_locn59) 60w(ram_locn60) 61w(ram_locn61) 62w(ram_locn62) 63w(ram_locn63) 64w(ram_locn64)

the read is as follows:
1r(ram_locn1)  9r(ram_locn2) . . . 57r(ram_locn8)
2r(ram_locn9) 10r(ram_locn10) . . . 58r(ram_locn16) 
3r(ram_locn17) 11r(ram_locn18) . . . 59r(ram_locn24)
4r(ram_locn25) 12r(ram_locn26) . . . 60r(ram_locn32)
5r(ram_locn33) 13r(ram_locn34) . . . 61r(ram_locn40)
6r(ram_locn41) 14r(ram_locn42) . . . 62r(ram_locn48)
7r(ram_locn49) 15r(ram_locn50) . . . 63r(ram_locn56)
8r(ram_locn57) 16r(ram_locn58) . . . 64r(ram_locn64)

where "xw" is the xth write and "ram_locnx" is the xth ram location and "xr" is the xth read. Reading 
is advanced by the read counter rd_cntr, nd writing by the write counter wr_cntr. */

always @ (posedge CLK or posedge RST)
   begin
   if (RST)
       begin
	   rd_cntr[5:3] <= 3'b111;
       end
   else
       begin 
	   if (en_ram1reg == 1'b1)
	       rd_cntr[5:3] <= rd_cntr[5:3] + 1;
	   end
   end

always @ (posedge CLK or posedge RST)
   begin
   if (RST)
       begin
	   rd_cntr[2:0] <= 3'b111;
       end
   else
       begin 
	   if (en_ram1reg == 1'b1 && rd_cntr[5:3] == 3'b111)
	       rd_cntr[2:0] <= rd_cntr[2:0] + 1;
       end
   end

always @ (posedge CLK or posedge RST)
   begin
   if (RST)
       begin
	   rd_cntr[6] <= 1'b1;
       end
   else
       begin 
       if (en_ram1reg == 1'b1 && rd_cntr[5:0] == 6'b111111)
          rd_cntr[6] <= ~rd_cntr[6];
       end
   end


always @ (posedge CLK or posedge RST)
   begin
💿 文件大小 30 K
👤 上传用户 xingxian
📂 所属分类 VHDL/FPGA/Verilog
🏷️ 相关标签

#Verilog #VHDL #JPEG #MEPG
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -