📄 time.c.svn-base
字号:
if (numstack>5) numstack = 5;
/*dt: The *8 is there because above we mysteriously divide the capacity in BYTES by the number of BITS per wordline */
l_predec_nor_v = rows*8;
/*dt: If we follow the original drawings from the TR's, then there is almost no horizontal wires, only the poly for contacting
the nor gates. The poly part we don't model right now */
l_predec_nor_h = 0;
/* Calculate rise time. Consider two inverters */
if (NSubbanks > 2) {
Ceq = draincap(Waddrdrvp1,PCH,1)+draincap(Waddrdrvn1,NCH,1) +
gatecap(Wdecdrivep_first+Wdecdriven_first,0.0);
tf = Ceq*transreson(Waddrdrvn1,NCH,1);
nextinputtime = horowitz(0.0,tf,VTHINV360x240,VTHINV360x240,FALL)/
(VTHINV360x240);
}
else {
Ceq = draincap(Wdecdrivep_first,PCH,1)+draincap(Wdecdriven_first,NCH,1) +
gatecap(Wdecdrivep_first+Wdecdriven_first,0.0);
tf = Ceq*transreson(Wdecdriven_first,NCH,1);
nextinputtime = horowitz(0.0,tf,VTHINV360x240,VTHINV360x240,FALL)/
(VTHINV360x240);
}
lkgCurrent += addr_bits_routed*0.5*cmos_ileakage(Wdecdriven_first,Wdecdrivep_first,Vt_bit_nmos_low,Vthn,Vt_bit_pmos_low,Vthp)
*1.0/(Ndwl*Ndbl);
*Tdecdrive = 0;
/*dt: the first inverter driving a bigger inverter*/
Ceq = draincap(Wdecdrivep_first,PCH,1)+draincap(Wdecdriven_first,NCH,1) +
gatecap(Wdecdrivep_second+Wdecdriven_second,0.0);
tf = Ceq*transreson(Wdecdriven_first,NCH,1);
this_delay = horowitz(0.0,tf,VTHINV360x240,VTHINV360x240,RISE);
*Tdecdrive += this_delay;
inrisetime = this_delay/(1.0-VTHINV360x240);
if(nr_dectreesegments) {
Ceq = draincap(Wdecdrivep_second,PCH,1)+draincap(Wdecdriven_second,NCH,1) +
gatecap(3*WdecdrivetreeN[nr_dectreesegments-1],0) + Cdectreesegments[nr_dectreesegments-1];
Req = transreson(Wdecdriven_second,NCH,1) + Rdectreesegments[nr_dectreesegments-1];
tf = Ceq*Req;
this_delay = horowitz(inrisetime,tf,VTHINV360x240,VTHINV360x240,RISE);
*Tdecdrive += this_delay;
inrisetime = this_delay/(1.0-VTHINV360x240);
dynPower+=addr_bits_routed*Ceq*.5*VddPow*VddPow;
lkgCurrent += addr_bits_routed*0.5*cmos_ileakage(Wdecdriven_second,Wdecdrivep_second,Vt_bit_nmos_low,Vthn,Vt_bit_pmos_low,Vthp)
*1.0/(Ndwl*Ndbl);
}
/*dt: doing all the H-tree segments*/
for(i=nr_dectreesegments; i>2;i--) {
/*dt: this too should alternate...*/
Ceq = (Cdectreesegments[i-2] + draincap(2*WdecdrivetreeN[i-1],PCH,1)+ draincap(WdecdrivetreeN[i-1],NCH,1) +
gatecap(3*WdecdrivetreeN[i-2],0.0));
Req = (Rdectreesegments[i-2] + transreson(WdecdrivetreeN[i-1],NCH,1));
tf = Req*Ceq;
/*dt: This shouldn't be all falling, but interleaved. Have to fix that at some point.*/
this_delay = horowitz(inrisetime,tf,VTHINV360x240,VTHINV360x240,RISE);
*Tdecdrive += this_delay;
inrisetime = this_delay/(1.0 - VTHINV360x240);
dynPower+=addr_bits_routed*Ceq*.5*VddPow*VddPow;
lkgCurrent += 1.0/(Ndwl*Ndbl)*pow(2,nr_dectreesegments - i)*addr_bits_routed*0.5*
cmos_ileakage(WdecdrivetreeN[i-1],2*WdecdrivetreeN[i-1],Vt_bit_nmos_low,Vthn,Vt_bit_pmos_low,Vthp);
}
if(nr_dectreesegments) {
Ceq = 4*gatecap(Wdec3to8n+Wdec3to8p,10.0 /FUDGEFACTOR) + Cdectreesegments[0] +
draincap(2*WdecdrivetreeN[0],PCH,1)+ draincap(WdecdrivetreeN[0],NCH,1);
Rwire = Rdectreesegments[0];
tf = (Rwire + transreson(2*WdecdrivetreeN[0],PCH,1))*Ceq;
dynPower+=addr_bits_routed*Ceq*.5*VddPow*VddPow;
lkgCurrent += 1.0/(Ndwl*Ndbl)*pow(2,nr_dectreesegments)*addr_bits_routed*0.5*cmos_ileakage(WdecdrivetreeN[0],2*WdecdrivetreeN[0],Vt_bit_nmos_low,Vthn,Vt_bit_pmos_low,Vthp);
}
else {
Ceq = 4*gatecap(Wdec3to8n+Wdec3to8p,10.0 /FUDGEFACTOR) + Cdectreesegments[0] +
draincap(Wdecdrivep_second,PCH,1)+ draincap(Wdecdriven_second,NCH,1);
Rwire = Rdectreesegments[0];
tf = (Rwire + transreson(Wdecdrivep_second,PCH,1))*Ceq;
dynPower+=addr_bits_routed*Ceq*.5*VddPow*VddPow;
lkgCurrent += 1.0/(Ndwl*Ndbl)*addr_bits_routed*0.5*cmos_ileakage(Wdecdriven_second,Wdecdrivep_second,Vt_bit_nmos_low,Vthn,Vt_bit_pmos_low,Vthp);
}
// there are 8 nand gates in each 3-8 decoder. since these transistors are
// stacked, we use a stacking factor of 1/5 (0.2). 0.5 signifies that we
// are taking the average of both nmos and pmos transistors.
this_delay = horowitz(inrisetime,tf,VTHINV360x240,VTHNAND60x120,FALL);
*Tdecdrive += this_delay;
lkgCurrent += 8*0.2*0.5*cmos_ileakage(Wdec3to8n,Wdec3to8p,Vt_bit_nmos_low,Vthn,Vt_bit_pmos_low,Vthp)*
// For the all the 3-8 decoders per quad:
ceil((1.0/3.0)*logtwo( (double)((double)C/(double)(B*A*Ndbl*Nspd))))
/*for all the quads*/
*0.25;
nextinputtime = this_delay/VTHNAND60x120;
Ceq = 3*draincap(Wdec3to8p,PCH,1) + draincap(Wdec3to8n,NCH,3) +
gatecap(WdecNORn+WdecNORp,((numstack*40 / FUDGEFACTOR)+20.0 / FUDGEFACTOR))*rows/8 +
GlobalCbitmetal*(l_predec_nor_v)+ GlobalCwordmetal*(l_predec_nor_h);
Rwire = GlobalRbitmetal*(l_predec_nor_v)/2 + GlobalRwordmetal*(l_predec_nor_h)/2;
tf = Ceq*(Rwire+transreson(Wdec3to8n,NCH,3));
// 0.2 is the stacking factor, 0.5 for averging of nmos and pmos leakage
// and since there are rows number of nor gates:
lkgCurrent += 0.5*0.2* rows * cmos_ileakage(WdecNORn,WdecNORp,Vt_bit_nmos_low,Vthn,Vt_bit_pmos_low,Vthp);
// number of active blocks among Ndwl modules
if (Ndwl/Nspd < 1) {
Nact = 1;
}
else {
//v4.1: Fixing double->int type conversion problems. EPSILON is added below to make sure
//the final int value is the correct one
//Nact = Ndwl/Nspd;
Nact = (int) (Ndwl/Nspd + EPSILON);
}
//dynPower+=Ndwl*Ndbl*Ceq*VddPow*VddPow*4*ceil((1.0/3.0)*logtwo( (double)((double)C/(double)(B*A*Ndbl*Nspd))));
dynPower+=0.5*Nact*Ceq*VddPow*VddPow*4*ceil((1.0/3.0)*logtwo( (double)((double)C/(double)(B*A*Ndbl*Nspd))));
/* we only want to charge the output to the threshold of the
nor gate. But the threshold depends on the number of inputs
to the nor. */
switch(numstack) {
case 1: vth = VTHNOR12x4x1; break;
case 2: vth = VTHNOR12x4x2; break;
case 3: vth = VTHNOR12x4x3; break;
case 4: vth = VTHNOR12x4x4; break;
case 5: vth = VTHNOR12x4x4; break;
default: printf("error:numstack=%d\n",numstack);
printf("Cacti does not support a series stack of %d transistors !\n",numstack);
exit(0);
break;
}
*Tdecoder1 = horowitz(nextinputtime,tf,VTHNAND60x120,vth,RISE);
nextinputtime = *Tdecoder1/(1.0-vth);
/* Final stage: driving an inverter with the nor */
Req = transreson(WdecNORp,PCH,numstack);
Ceq = (gatecap(Wdecinvn+Wdecinvp,20.0 / FUDGEFACTOR)+
numstack * draincap(WdecNORn,NCH,1)+draincap(WdecNORp,PCH,numstack));
lkgCurrent += 0.5* rows * cmos_ileakage(Wdecinvn,Wdecinvp,Vt_bit_nmos_low,Vthn,Vt_bit_pmos_low,Vthp);
tf = Req*Ceq;
*Tdecoder2 = horowitz(nextinputtime,tf,vth,VSINV,FALL);
*outrisetime = *Tdecoder2/(VSINV);
*nor_inputs=numstack;
dynPower+=Ceq*VddPow*VddPow;
//printf("%g %g %g %d %d %d\n",*Tdecdrive,*Tdecoder1,*Tdecoder2,Ndwl, Ndbl,Nspd);
//fprintf(stderr, "%f %f %f %f %d %d %d\n", (*Tdecdrive+*Tdecoder1+*Tdecoder2)*1e3, *Tdecdrive*1e3, *Tdecoder1*1e3, *Tdecoder2*1e3, Ndwl, Ndbl, Nspd);
power->readOp.dynamic = dynPower;
power->readOp.leakage = (lkgCurrent * VddPow) * Ndwl * Ndbl;
power->writeOp.dynamic = dynPower;
power->writeOp.leakage = (lkgCurrent * VddPow) * Ndwl * Ndbl;
return(*Tdecdrive+*Tdecoder1+*Tdecoder2);
}
/*----------------------------------------------------------------------*/
/* Decoder delay in the tag array (see section 6.1 of tech report) */
/*dt: incorporating leakage code from eCacti, see decoder_delay for more comments */
double decoder_tag_delay(int C, int B,int A,int Ntwl,int Ntbl, int Ntspd,double NSubbanks,
double *Tdecdrive, double *Tdecoder1, double *Tdecoder2,double inrisetime,double *outrisetime, int *nor_inputs,powerDef *power)
{
//double Ceq,Req,Rwire,tf,nextinputtime,vth,tstep;
double Ceq,Req,Rwire,tf,nextinputtime,vth;
int numstack,tagbits, Nact;
int rows, cols;
//int l_inv_predecode,l_predec_nor_v,l_predec_nor_h;
int l_predec_nor_v,l_predec_nor_h;
//double wire_cap, wire_res;
double lkgCurrent=0.0, dynPower = 0.0;
//v4.1: Fixing double->int type conversion problems. EPSILON is added below to make sure
//the final int value is the correct one
//int addr_bits=(int)logtwo( (double)((double)C/(double)(B*A*Ntbl*Ntspd)));
int addr_bits=(int) (logtwo( (double)((double)C/(double)(B*A*Ntbl*Ntspd))) + EPSILON);
int horizontal_edge = 0;
int nr_subarrays_left = 0, v_or_h = 0;
int horizontal_step = 0, vertical_step = 0;
int h_inv_predecode = 0, v_inv_predecode = 0;
double this_delay;
int i = 0;
//v4.1: Fixing double->int type conversion problems. EPSILON is added below to make sure
//the final int value is the correct one
//int routing_bits = (int)logtwo( (double)((double)C/(double)B));
int routing_bits = (int) (logtwo( (double)((double)C/(double)B)) + EPSILON);
int tag_bits_routed;
rows = C/(8*B*A*Ntbl*Ntspd);
if(!force_tag) {
//tagbits = ADDRESS_BITS + EXTRA_TAG_BITS-(int)logtwo((double)C)+(int)logtwo((double)A)-(int)(logtwo(NSubbanks));
tagbits = (int) (ADDRESS_BITS + EXTRA_TAG_BITS-(int)logtwo((double)C)+(int)logtwo((double)A)-(int)(logtwo(NSubbanks)) + EPSILON);
}
else {
tagbits = force_tag_size;
}
tag_bits_routed = routing_bits + tagbits;
cols = tagbits*A*Ntspd/Ntwl ;
numstack =
(int)ceil((1.0/3.0)*logtwo( (double)((double)C/(double)(B*A*Ntbl*Ntspd))));
if (numstack==0) numstack = 1;
if (numstack>5) numstack = 5;
/*dt: see comments in compute_device_widths*/
/*dt: The *8 is there because above we mysteriously divide the capacity in BYTES by the number of BITS per wordline */
l_predec_nor_v = rows*8;
/*dt: If we follow the original drawings from the TR's, then there is almost no horizontal wires, only the poly for contacting
the nor gates. The poly part we don't model right now */
l_predec_nor_h = 0;
/* Calculate rise time. Consider two inverters */
if (NSubbanks > 2) {
Ceq = draincap(Waddrdrvp1,PCH,1)+draincap(Waddrdrvn1,NCH,1) +
gatecap(Wtdecdrivep_first+Wtdecdriven_first,0.0);
tf = Ceq*transreson(Waddrdrvn1,NCH,1);
nextinputtime = horowitz(0.0,tf,VTHINV360x240,VTHINV360x240,FALL)/
(VTHINV360x240);
}
else {
Ceq = draincap(Wdecdrivep,PCH,1)+draincap(Wdecdriven,NCH,1) +
gatecap(Wtdecdrivep_first+Wtdecdriven_first,0.0);
tf = Ceq*transreson(Wdecdriven_first,NCH,1);
nextinputtime = horowitz(0.0,tf,VTHINV360x240,VTHINV360x240,FALL)/
(VTHINV360x240);
}
lkgCurrent = 0.5*cmos_ileakage(Wtdecdriven_first,Wtdecdrivep_first,Vt_bit_nmos_low,Vthn,Vt_bit_pmos_low,Vthp)*1.0/(Ntwl*Ntbl);
*Tdecdrive = 0;
/*dt: the first inverter driving a bigger inverter*/
Ceq = draincap(Wtdecdrivep_first,PCH,1)+draincap(Wtdecdriven_first,NCH,1) +
gatecap(Wtdecdrivep_second+Wtdecdriven_second,0.0);
tf = Ceq*transreson(Wtdecdriven_first,NCH,1);
this_delay = horowitz(0.0,tf,VTHINV360x240,VTHINV360x240,RISE);
*Tdecdrive += this_delay;
inrisetime = this_delay/(1.0-VTHINV360x240);
if(nr_tdectreesegments) {
Ceq = draincap(Wtdecdrivep_second,PCH,1)+draincap(Wtdecdriven_second,NCH,1) +
gatecap(3*WtdecdrivetreeN[nr_tdectreesegments-1],0) + Ctdectreesegments[nr_tdectreesegments-1];
Req = transreson(Wtdecdriven_second,NCH,1) + Rtdectreesegments[nr_tdectreesegments-1];
tf = Ceq*Req;
this_delay = horowitz(inrisetime,tf,VTHINV360x240,VTHINV360x240,RISE);
*Tdecdrive += this_delay;
inrisetime = this_delay/(1.0-VTHINV360x240);
dynPower+= tag_bits_routed*Ceq*.5*VddPow*VddPow;
lkgCurrent += tag_bits_routed*0.5*cmos_ileakage(Wtdecdriven_second,Wtdecdrivep_second,Vt_bit_nmos_low,Vthn,Vt_bit_pmos_low,Vthp)
*1.0/(Ntwl*Ntbl);
}
/*dt: doing all the H-tree segments*/
for(i=nr_tdectreesegments; i>2;i--) {
/*dt: this too should alternate...*/
Ceq = (Ctdectreesegments[i-2] + draincap(2*WtdecdrivetreeN[i-1],PCH,1)+ draincap(WtdecdrivetreeN[i-1],NCH,1) +
gatecap(3*WtdecdrivetreeN[i-2],0.0));
Req = (Rtdectreesegments[i-2] + transreson(WtdecdrivetreeN[i-1],NCH,1));
tf = Req*Ceq;
/*dt: This shouldn't be all falling, but interleaved. Have to fix that at some point.*/
this_delay = horowitz(inrisetime,tf,VTHINV360x240,VTHINV360x240,RISE);
*Tdecdrive += this_delay;
inrisetime = this_delay/(1.0 - VTHINV360x240);
dynPower+= tag_bits_routed*Ceq*.5*VddPow*VddPow;
lkgCurrent += 1.0/(Ntwl*Ntbl)*pow(2,nr_tdectreesegments - i)*tag_bits_routed*0.5*cmos_ileakage(WtdecdrivetreeN[i-1],2*WtdecdrivetreeN[i-1],Vt_bit_nmos_low,Vthn,Vt_bit_pmos_low,Vthp);
}
if(nr_tdectreesegments) {
//v4.1: Change below, gatecap(Wtdec3to8n+Wdec3to8p,10.0) -> gatecap(Wtdec3to8n+Wtdec3to8p,10.0)
//Ceq = 4*gatecap(Wtdec3to8n+Wdec3to8p,10.0) + Ctdectreesegments[0] +
//draincap(2*WtdecdrivetreeN[0],PCH,1)+ draincap(WtdecdrivetreeN[0],NCH,1);
Ceq = 4*gatecap(Wtdec3to8n+Wtdec3to8p,10.0 / FUDGEFACTOR) + Ctdectreesegments[0] +
draincap(2*WtdecdrivetreeN[0],PCH,1)+ draincap(WtdecdrivetreeN[0],NCH,1);
Rwire = Rtdectreesegments[0];
tf = (Rwire + transreson(2*WtdecdrivetreeN[0],PCH,1))*Ceq;
dynPower+= tag_bits_routed*Ceq*.5*VddPow*VddPow;
lkgCurrent += 1.0/(Ntwl*Ntbl)*pow(2,nr_tdectreesegments)*tag_bits_routed*0.5*cmos_ileakage(WtdecdrivetreeN[0],2*WtdecdrivetreeN[0],Vt_bit_nmos_low,Vthn,Vt_bit_pmos_low,Vthp);
}
else {
//v4.1: Change below, gatecap(Wtdec3to8n+Wdec3to8p,10.0) -> gatecap(Wtdec3to8n+Wtdec3to8p,10.0)
//Ceq = 4*gatecap(Wtdec3to8n+Wdec3to8p,10.0) + Ctdectreesegments[0] +
//draincap(Wtdecdrivep_second,PCH,1)+ draincap(Wtdecdriven_second,NCH,1);
Ceq = 4*gatecap(Wtdec3to8n+Wtdec3to8p,10.0 / FUDGEFACTOR) + Ctdectreesegments[0] +
draincap(Wtdecdrivep_second,PCH,1)+ draincap(Wtdecdriven_second,NCH,1);
Rwire = Rtdectreesegments[0];
tf = (Rwire + transreson(Wtdecdrivep_second,PCH,1))*Ceq;
dynPower+= tag_bits_routed*Ceq*.5*VddPow*VddPow;
lkgCurrent += 1.0/(Ntwl*Ntbl)*tag_bits_routed*0.5*cmos_ileakage(Wtdecdriven_second,Wtdecdrivep_second,Vt_bit_nmos_low,Vthn,Vt_bit_pmos_low,Vthp);
}
this_delay = horowitz(inrisetime,tf,VTHINV360x240,VTHNAND60x120,FALL);
*Tdecdrive += this_delay;
nextinputtime = this_delay/VTHNAND60x120;
// there are 8 nand gates in each 3-8 decoder. since these transistors are
// stacked, we use a stacking factor of 1/5 (0.2). 0.5 signifies that we
// are taking the average of both nmos and pmos transistors.
lkgCurrent += 8*0.2*0.5* cmos_ileakage(Wtdec3to8n,Wtdec3to8p,Vt_bit_nmos_low,Vthn,Vt_bit_pmos_low,Vthp)*
// For the all the 3-8 decoders per quad:
ceil((1.0/3.0)*logtwo( (double)((double)C/(double)(B*A*Ntbl*Ntspd))))
/*for all the quads*/
*0.25;
/* se
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -