📄 dct_1d.v
字号:
`timescale 1ns/10ps
`define bw 12
module dct_1d(//input
nrst,clk,
dcten,idcten,
din,cntr,
//output
dctout,idctout
);
input nrst,clk;
input dcten,idcten;
input [`bw-1:0] din;
input [3:0] cntr;
output [`bw-1:0] dctout;
output [2*`bw-1:0] idctout;
wire [`bw-1:0] scc4_o,scc2_o;
//==========================================================
// while in dct mode
// need to do reordering pre-processing
//==========================================================
wire scc2en =(dcten)? ((cntr==1) | (cntr==2)):(idcten)? ((cntr==5) | (cntr==6)):0 ;
wire scc2en_d =(dcten)? ((cntr==2) | (cntr==3)):(idcten)? ((cntr==6) | (cntr==7)):0 ;
wire a0en=(cntr==1);
wire a1en=(cntr==2);
wire a2en=(cntr==5);
wire a3en=(cntr==3);
wire a4en=(cntr==8);
wire a5en=(cntr==7);
wire a6en=(cntr==4);
wire a7en=(cntr==6);
wire sub1en=dcten?a0en:idcten?a2en:0;
wire sub2en=dcten?a1en:idcten?a7en:0;
wire sub3en=dcten?a3en:idcten?a5en:0;
wire sub4en=dcten?a6en:idcten?a4en:0;
wire scc4en=dcten?(a0en|a1en|a3en|a6en):
idcten?(a2en|a7en|a5en|a4en):0;
wire scc4en_d=dcten?(a1en|a3en|a6en|a2en|a7en|a5en):
idcten?(a2en|a7en|a5en|a4en|a0en|a1en|a3en):0;
wire [`bw-1:0] scc4_in= dcten? din:
idcten? din :0;
wire [`bw-1:0] a0,a1,a2,a3,a4,a5,a6,a7;
//==========================================================
//skew circular convolution 4x4
// idct
//==========================================================
scc4 scc4(
//input
.nrst (nrst),
.clk (clk),
.dcten (dcten),
.idcten (idcten),
.scc4en (scc4en),
.scc4en_d (scc4en_d),
.a0en (a0en),
.a1en (a1en),
.a2en (a2en),
.a3en (a3en),
.a4en (a4en),
.a5en (a5en),
.a6en (a6en),
.a7en (a7en),
.sub1en (sub1en),
.sub2en (sub2en),
.sub3en (sub3en),
.sub4en (sub4en),
.di (scc4_in),
.a0 (a0),
.a1 (a1),
.a2 (a2),
.a3 (a3),
.a4 (a4),
.a5 (a5),
.a6 (a6),
.a7 (a7),
//output
.do (scc4_o)); //s2(1),s2(2),s2(3),s2(4)
//==========================================================
//skew circular convolution 2x2
//==========================================================
wire [`bw+2:0] f0,f1;
wire f0_en=(cntr==1);
wire f1_en=(cntr==2);
wire f01en=f0_en|f1_en;
//==========================================================
// post processing for IDCT
reg [`bw+2:0] fodd_d;
wire [`bw-1:0] b1=a5;//fodd_d;
wire [`bw-1:0] b3=a4;//scc2_o;
wire [`bw-1:0] m1=b1+b3;
wire [`bw-1:0] m2=b1-b3;
wire [`bw-1:0] m1_2=(m1[`bw-1]==1)?{2'b11,m1[`bw-1:2]}:{2'b0,m1[`bw-1:2]};
wire [`bw-1:0] m1_3=(m1[`bw-1]==1)?{{3{1'b1}},m1[`bw-1:3]}:{3'b0,m1[`bw-1:3]};
wire [`bw-1:0] m1_5=(m1[`bw-1]==1)?{{5{1'b1}},m1[`bw-1:5]}:{5'b0,m1[`bw-1:5]};
wire [`bw-1:0] m1_7=(m1[`bw-1]==1)?{{7{1'b1}},m1[`bw-1:7]}:{7'b0,m1[`bw-1:7]};
wire [`bw-1:0] m2_2=(m2[`bw-1]==1)?{2'b11,m2[`bw-1:2]}:{2'b0,m2[`bw-1:2]};
wire [`bw-1:0] m2_3=(m2[`bw-1]==1)?{{3{1'b1}},m2[`bw-1:3]}:{3'b0,m2[`bw-1:3]};
wire [`bw-1:0] m2_5=(m2[`bw-1]==1)?{{5{1'b1}},m2[`bw-1:5]}:{5'b0,m2[`bw-1:5]};
wire [`bw-1:0] m2_7=(m2[`bw-1]==1)?{{7{1'b1}},m2[`bw-1:7]}:{7'b0,m2[`bw-1:7]};
wire [`bw-1:0] ae1 =(dcten&f0_en)? a0:idcten?m1:0;
wire [`bw-1:0] ae2 =(dcten&f1_en)? a1:idcten?m2:0;
wire [`bw-1:0] ae3 =(dcten&f0_en)?~a2:idcten?m1_5:0;
wire [`bw-1:0] ae4 =(dcten&f1_en)?~a3:idcten?m2_5:0;
wire [`bw-1:0] ae5 =(dcten&f0_en)? a4:idcten?m1_2:0;
wire [`bw-1:0] ae6 =(dcten&f1_en)? a5:idcten?m2_2:0;
wire [`bw-1:0] ae7 =(dcten&f0_en)?~a6:idcten?m1_3:0;
wire [`bw-1:0] ae8 =(dcten&f1_en)?~a7:idcten?m2_3:0;
wire [`bw-1:0] sgn1= dcten?{10'b0,2'b10}:idcten? m1_7:0;
wire [`bw-1:0] sgn2= dcten?{10'b0,2'b10}:idcten? m2_7:0;
//both dct & idct will use csa51 csa52
csa5 csa51(.a1(ae1), .a2(ae5), .a3(ae7), .a4(ae3), .a5(sgn1), .sum(f0));
csa5 csa52(.a1(ae2), .a2(ae6), .a3(ae4), .a4(ae8), .a5(sgn2), .sum(f1));
// for idct only
wire [`bw-1:0] m1035=f0[`bw+2:2];
wire [`bw-1:0] m2035=f1[`bw+2:2];
wire [`bw-1:0] scc2_in= dcten? (f0_en? f0[`bw-1:0]: f1_en? f1[`bw-1:0]:0):
(idcten&scc2en)? din :0;
scc2 scc2(
//input
.nrst (nrst),
.clk (clk),
.scc2en (scc2en),
.scc2en_d (scc2en_d),
.di (scc2_in),
//output
.do (scc2_o));
//==========================================================
// while in dct mode
// need to do post-processing
//==========================================================
wire [`bw+1:0] feven1,feven2;
wire [`bw-1:0] csa41_a1=dcten? a0: idcten? m1035 :0;
wire [`bw-1:0] csa41_a2=dcten? a1: idcten? fodd_d:0; //s1_tmp(1)
wire [`bw-1:0] csa41_a3=dcten? a2: 0;
wire [`bw-1:0] csa41_a4=dcten? a3: 0;
wire [`bw-1:0] csa42_a1=dcten? a4: idcten? m2035 :0;
wire [`bw-1:0] csa42_a2=dcten? a5: idcten? scc2_o:0; //s1_tmp(2)
wire [`bw-1:0] csa42_a3=dcten? a6: 0;
wire [`bw-1:0] csa42_a4=dcten? a7: 0;
csa4 csa41(.a1(csa41_a1), .a2(csa41_a2), .a3(csa41_a3), .a4(csa41_a4), .sum(feven1)); //s11
csa4 csa42(.a1(csa42_a1), .a2(csa42_a2), .a3(csa42_a3), .a4(csa42_a4), .sum(feven2)); //s12
wire [`bw+2:0] feven1_tmp=feven1[`bw+1]?{1'b1,feven1}:{1'b0,feven1};
wire [`bw+2:0] feven2_tmp=feven2[`bw+1]?{1'b1,feven2}:{1'b0,feven2};
wire [`bw+2:0] feven=feven1_tmp+feven2_tmp;
wire [`bw+1:0] fodd1,fodd2;
wire [`bw-1:0] csa43_a1=dcten? a0:idcten? m1035:0;
wire [`bw-1:0] csa43_a2=dcten? a4:idcten? ~fodd_d:0;
wire [`bw-1:0] csa43_a3=dcten? a6:idcten? {11'b0,1'b1}:0;
wire [`bw-1:0] csa43_a4=dcten? a2:idcten? 0:0;
wire [`bw-1:0] csa44_a1=dcten? a1:idcten? m2035:0;
wire [`bw-1:0] csa44_a2=dcten? a5:idcten? ~scc2_o:0;
wire [`bw-1:0] csa44_a3=dcten? a3:idcten? {11'b0,1'b1}:0;
wire [`bw-1:0] csa44_a4=dcten? a7:idcten? 0:0;
csa4 csa43(.a1(csa43_a1), .a2(csa43_a2), .a3(csa43_a3), .a4(csa43_a4), .sum(fodd1)); //s14
csa4 csa44(.a1(csa44_a1), .a2(csa44_a2), .a3(csa44_a3), .a4(csa44_a4), .sum(fodd2)); //s13
wire [`bw+2:0] fodd1_tmp=fodd1[`bw+1]?{1'b1,fodd1}:{1'b0,fodd1};
wire [`bw+2:0] fodd2_tmp=fodd2[`bw+1]?{1'b1,fodd2}:{1'b0,fodd2};
wire [`bw+2:0] fodd=fodd1_tmp-fodd2_tmp;
//wire [`bw-1:0] fodd=fodd_tmp[`bw-1:0];
always @(posedge clk or negedge nrst)
if (~nrst) fodd_d<=0;
else if (dcten) fodd_d<=fodd;
else if (idcten&a5en) fodd_d<=scc2_o;
wire [`bw+2:0] tmp=dcten?(a0en? feven :
a1en? fodd_d : 0):0;
//==========================================================
// multiplier 0.35355
wire [`bw+5:0] csa0353_tmp;
wire [`bw+2:0] csa5_2=(tmp[`bw+2]==1)?{2'b11,tmp[`bw+2:2]}:{2'b0,tmp[`bw+2:2]};
wire [`bw+2:0] csa5_3=(tmp[`bw+2]==1)?{{3{1'b1}},tmp[`bw+2:3]}:{3'b0,tmp[`bw+2:3]};
wire [`bw+2:0] csa5_4=(tmp[`bw+2]==1)?{{5{1'b1}},tmp[`bw+2:5]}:{5'b0,tmp[`bw+2:5]};
wire [`bw+2:0] csa5_5=(tmp[`bw+2]==1)?{{7{1'b1}},tmp[`bw+2:7]}:{7'b0,tmp[`bw+2:7]};
csa51 csa55(.a1(tmp), .a2(csa5_2), .a3(csa5_3), .a4(csa5_4), .a5(csa5_5), .sum(csa0353_tmp));
wire [`bw-1:0] csa053=csa0353_tmp[`bw+1:2];
//==========================================================
// DCT最后硬件输出的顺序是
// 1->5->3->7->2->4->8->6 or say
// 0->4->2->6->1->3->7->5
wire [`bw-1:0] dctout=(a0en|a1en)? csa053:
(a3en|a6en)? scc2_o:
a5en ?~scc4_o+1:scc4_o;
//==========================================================
// IDCT POST Processing
// see fidct.m file
// t(1)=s11+s2(1);
// t(2)=s12+s2(2);
// t(3)=s13+s2(3);
// t(4)=s14+s2(4);
// t(5)=s14-s2(4);
// t(6)=s13-s2(3);
// t(7)=s12-s2(2);
// t(8)=s11-s2(1);
wire [`bw-1:0] s11=idcten?feven1[`bw-1:0]:0;
wire [`bw-1:0] s12=idcten?feven2[`bw-1:0]:0;
wire [`bw-1:0] s14=idcten?fodd1[`bw-1:0] :0;
wire [`bw-1:0] s13=idcten?fodd2[`bw-1:0] :0;
wire [`bw-1:0] s2tmp=(cntr==3)?(~scc4_o+1):scc4_o;
wire [`bw-1:0] stmp=(cntr==1)?s11:
(cntr==2)?s12:
(cntr==3)?s14:
(cntr==4)?s13:0;
wire [`bw-1:0] idctout1,idctout2;
add add(.a1(stmp), .a2(s2tmp), .sum(idctout1));
sub sub(.a1(stmp), .a2(s2tmp), .sum(idctout2));
//==========================================================
// idctout输出顺序是FX(1,8)->(2,7)->(4,5)->(3,6)
// 这种输出并不是十分恰当,因为写到SRAM的数据都是一个数据一个数据的写
// 如果输出两个数据,则必须用FLOP keep住
wire [2*`bw-1:0] idctout={idctout1,idctout2};
endmodule
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -