⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 time.c.svn-base

📁 模拟多核状态下龙芯处理器的功能
💻 SVN-BASE
📖 第 1 页 / 共 5 页
字号:
  if (numstack>5) numstack = 5;

  /*dt: The *8 is there because above we mysteriously divide the capacity in BYTES by the number of BITS per wordline */
  l_predec_nor_v = rows*8; 
  /*dt: If we follow the original drawings from the TR's, then there is almost no horizontal wires, only the poly for contacting
    the nor gates. The poly part we don't model right now */
  l_predec_nor_h = 0;


  /* Calculate rise time.  Consider two inverters */


  if (NSubbanks > 2) {
    Ceq = draincap(Waddrdrvp1,PCH,1)+draincap(Waddrdrvn1,NCH,1) +
      gatecap(Wdecdrivep_first+Wdecdriven_first,0.0);

    tf = Ceq*transreson(Waddrdrvn1,NCH,1);
    nextinputtime = horowitz(0.0,tf,VTHINV360x240,VTHINV360x240,FALL)/
      (VTHINV360x240);
  }
  else {
    Ceq = draincap(Wdecdrivep_first,PCH,1)+draincap(Wdecdriven_first,NCH,1) +
      gatecap(Wdecdrivep_first+Wdecdriven_first,0.0);

    tf = Ceq*transreson(Wdecdriven_first,NCH,1);
    nextinputtime = horowitz(0.0,tf,VTHINV360x240,VTHINV360x240,FALL)/
      (VTHINV360x240);
  }


  lkgCurrent += addr_bits_routed*0.5*cmos_ileakage(Wdecdriven_first,Wdecdrivep_first,Vt_bit_nmos_low,Vthn,Vt_bit_pmos_low,Vthp)
    *1.0/(Ndwl*Ndbl);

  *Tdecdrive = 0;

  /*dt: the first inverter driving a bigger inverter*/
  Ceq = draincap(Wdecdrivep_first,PCH,1)+draincap(Wdecdriven_first,NCH,1) +
    gatecap(Wdecdrivep_second+Wdecdriven_second,0.0);

  tf = Ceq*transreson(Wdecdriven_first,NCH,1);

  this_delay = horowitz(0.0,tf,VTHINV360x240,VTHINV360x240,RISE);

  *Tdecdrive += this_delay;
  inrisetime = this_delay/(1.0-VTHINV360x240);


  if(nr_dectreesegments) {
    Ceq = draincap(Wdecdrivep_second,PCH,1)+draincap(Wdecdriven_second,NCH,1) + 
      gatecap(3*WdecdrivetreeN[nr_dectreesegments-1],0) + Cdectreesegments[nr_dectreesegments-1];
    Req = transreson(Wdecdriven_second,NCH,1) + Rdectreesegments[nr_dectreesegments-1]; 

    tf = Ceq*Req;
    this_delay = horowitz(inrisetime,tf,VTHINV360x240,VTHINV360x240,RISE);

    *Tdecdrive += this_delay;
    inrisetime = this_delay/(1.0-VTHINV360x240);

    dynPower+=addr_bits_routed*Ceq*.5*VddPow*VddPow;
    lkgCurrent += addr_bits_routed*0.5*cmos_ileakage(Wdecdriven_second,Wdecdrivep_second,Vt_bit_nmos_low,Vthn,Vt_bit_pmos_low,Vthp)
      *1.0/(Ndwl*Ndbl);
  }



  /*dt: doing all the H-tree segments*/

  for(i=nr_dectreesegments; i>2;i--) {
    /*dt: this too should alternate...*/
    Ceq = (Cdectreesegments[i-2] + draincap(2*WdecdrivetreeN[i-1],PCH,1)+ draincap(WdecdrivetreeN[i-1],NCH,1) + 
        gatecap(3*WdecdrivetreeN[i-2],0.0));
    Req = (Rdectreesegments[i-2] + transreson(WdecdrivetreeN[i-1],NCH,1));
    tf = Req*Ceq;
    /*dt: This shouldn't be all falling, but interleaved. Have to fix that at some point.*/
    this_delay = horowitz(inrisetime,tf,VTHINV360x240,VTHINV360x240,RISE);
    *Tdecdrive += this_delay;
    inrisetime = this_delay/(1.0 - VTHINV360x240);

    dynPower+=addr_bits_routed*Ceq*.5*VddPow*VddPow;
    lkgCurrent += 1.0/(Ndwl*Ndbl)*pow(2,nr_dectreesegments - i)*addr_bits_routed*0.5*
      cmos_ileakage(WdecdrivetreeN[i-1],2*WdecdrivetreeN[i-1],Vt_bit_nmos_low,Vthn,Vt_bit_pmos_low,Vthp);
  }

  if(nr_dectreesegments) {
    Ceq = 4*gatecap(Wdec3to8n+Wdec3to8p,10.0 /FUDGEFACTOR) + Cdectreesegments[0] + 
      draincap(2*WdecdrivetreeN[0],PCH,1)+ draincap(WdecdrivetreeN[0],NCH,1); 
    Rwire = Rdectreesegments[0];
    tf = (Rwire + transreson(2*WdecdrivetreeN[0],PCH,1))*Ceq;

    dynPower+=addr_bits_routed*Ceq*.5*VddPow*VddPow;
    lkgCurrent += 1.0/(Ndwl*Ndbl)*pow(2,nr_dectreesegments)*addr_bits_routed*0.5*cmos_ileakage(WdecdrivetreeN[0],2*WdecdrivetreeN[0],Vt_bit_nmos_low,Vthn,Vt_bit_pmos_low,Vthp);
  }
  else {
    Ceq = 4*gatecap(Wdec3to8n+Wdec3to8p,10.0 /FUDGEFACTOR) + Cdectreesegments[0] + 
      draincap(Wdecdrivep_second,PCH,1)+ draincap(Wdecdriven_second,NCH,1); 
    Rwire = Rdectreesegments[0];
    tf = (Rwire + transreson(Wdecdrivep_second,PCH,1))*Ceq;

    dynPower+=addr_bits_routed*Ceq*.5*VddPow*VddPow;
    lkgCurrent += 1.0/(Ndwl*Ndbl)*addr_bits_routed*0.5*cmos_ileakage(Wdecdriven_second,Wdecdrivep_second,Vt_bit_nmos_low,Vthn,Vt_bit_pmos_low,Vthp);
  }

  // there are 8 nand gates in each 3-8 decoder. since these transistors are
  // stacked, we use a stacking factor of 1/5 (0.2). 0.5 signifies that we
  // are taking the average of both nmos and pmos transistors.



  this_delay = horowitz(inrisetime,tf,VTHINV360x240,VTHNAND60x120,FALL);
  *Tdecdrive += this_delay;

  lkgCurrent += 8*0.2*0.5*cmos_ileakage(Wdec3to8n,Wdec3to8p,Vt_bit_nmos_low,Vthn,Vt_bit_pmos_low,Vthp)*
    // For the all the 3-8 decoders per quad:
    ceil((1.0/3.0)*logtwo( (double)((double)C/(double)(B*A*Ndbl*Nspd))))
    /*for all the quads*/
    *0.25;

  nextinputtime = this_delay/VTHNAND60x120;

  Ceq = 3*draincap(Wdec3to8p,PCH,1) + draincap(Wdec3to8n,NCH,3) +
    gatecap(WdecNORn+WdecNORp,((numstack*40 / FUDGEFACTOR)+20.0 / FUDGEFACTOR))*rows/8 +
    GlobalCbitmetal*(l_predec_nor_v)+ GlobalCwordmetal*(l_predec_nor_h);
  Rwire = GlobalRbitmetal*(l_predec_nor_v)/2 + GlobalRwordmetal*(l_predec_nor_h)/2;

  tf = Ceq*(Rwire+transreson(Wdec3to8n,NCH,3));

  // 0.2 is the stacking factor, 0.5 for averging of nmos and pmos leakage
  // and since there are rows number of nor gates:

  lkgCurrent += 0.5*0.2* rows * cmos_ileakage(WdecNORn,WdecNORp,Vt_bit_nmos_low,Vthn,Vt_bit_pmos_low,Vthp); 

  // number of active blocks among Ndwl modules
  if (Ndwl/Nspd < 1) {
    Nact = 1;
  }
  else {
    //v4.1: Fixing double->int type conversion problems. EPSILON is added below to make sure
    //the final int value is the correct one 
    //Nact = Ndwl/Nspd;
    Nact = (int) (Ndwl/Nspd + EPSILON);
  }

  //dynPower+=Ndwl*Ndbl*Ceq*VddPow*VddPow*4*ceil((1.0/3.0)*logtwo( (double)((double)C/(double)(B*A*Ndbl*Nspd))));
  dynPower+=0.5*Nact*Ceq*VddPow*VddPow*4*ceil((1.0/3.0)*logtwo( (double)((double)C/(double)(B*A*Ndbl*Nspd))));

  /* we only want to charge the output to the threshold of the
     nor gate.  But the threshold depends on the number of inputs
     to the nor.  */

  switch(numstack) {
    case 1: vth = VTHNOR12x4x1; break;
    case 2: vth = VTHNOR12x4x2; break;
    case 3: vth = VTHNOR12x4x3; break;
    case 4: vth = VTHNOR12x4x4; break;
    case 5: vth = VTHNOR12x4x4; break;
    default: printf("error:numstack=%d\n",numstack);
             printf("Cacti does not support a series stack of %d transistors !\n",numstack);
             exit(0);
             break;
  }

  *Tdecoder1 = horowitz(nextinputtime,tf,VTHNAND60x120,vth,RISE);

  nextinputtime = *Tdecoder1/(1.0-vth);

  /* Final stage: driving an inverter with the nor */

  Req = transreson(WdecNORp,PCH,numstack);

  Ceq = (gatecap(Wdecinvn+Wdecinvp,20.0 / FUDGEFACTOR)+
      numstack * draincap(WdecNORn,NCH,1)+draincap(WdecNORp,PCH,numstack));


  lkgCurrent += 0.5* rows * cmos_ileakage(Wdecinvn,Wdecinvp,Vt_bit_nmos_low,Vthn,Vt_bit_pmos_low,Vthp);
  tf = Req*Ceq;

  *Tdecoder2 = horowitz(nextinputtime,tf,vth,VSINV,FALL);

  *outrisetime = *Tdecoder2/(VSINV);
  *nor_inputs=numstack;
  dynPower+=Ceq*VddPow*VddPow;

  //printf("%g %g %g %d %d %d\n",*Tdecdrive,*Tdecoder1,*Tdecoder2,Ndwl, Ndbl,Nspd);

  //fprintf(stderr, "%f %f %f %f %d %d %d\n", (*Tdecdrive+*Tdecoder1+*Tdecoder2)*1e3, *Tdecdrive*1e3, *Tdecoder1*1e3, *Tdecoder2*1e3, Ndwl, Ndbl, Nspd);
  power->readOp.dynamic = dynPower;
  power->readOp.leakage = (lkgCurrent * VddPow) * Ndwl * Ndbl;

  power->writeOp.dynamic = dynPower;
  power->writeOp.leakage = (lkgCurrent * VddPow) * Ndwl * Ndbl;


  return(*Tdecdrive+*Tdecoder1+*Tdecoder2);
}

/*----------------------------------------------------------------------*/

/* Decoder delay in the tag array (see section 6.1 of tech report) */
/*dt: incorporating leakage code from eCacti, see decoder_delay for more comments */
double decoder_tag_delay(int C, int B,int A,int Ntwl,int Ntbl, int Ntspd,double NSubbanks,
    double *Tdecdrive, double *Tdecoder1, double *Tdecoder2,double inrisetime,double *outrisetime, int *nor_inputs,powerDef *power)
{
  //double Ceq,Req,Rwire,tf,nextinputtime,vth,tstep;
  double Ceq,Req,Rwire,tf,nextinputtime,vth;
  int numstack,tagbits, Nact;
  int rows, cols;
  //int l_inv_predecode,l_predec_nor_v,l_predec_nor_h;
  int l_predec_nor_v,l_predec_nor_h;
  //double wire_cap, wire_res;
  double lkgCurrent=0.0, dynPower = 0.0;
  //v4.1: Fixing double->int type conversion problems. EPSILON is added below to make sure
  //the final int value is the correct one 
  //int addr_bits=(int)logtwo( (double)((double)C/(double)(B*A*Ntbl*Ntspd)));
  int addr_bits=(int) (logtwo( (double)((double)C/(double)(B*A*Ntbl*Ntspd))) + EPSILON);
  int horizontal_edge = 0;
  int nr_subarrays_left = 0, v_or_h = 0;
  int horizontal_step = 0, vertical_step = 0;
  int h_inv_predecode = 0, v_inv_predecode = 0;
  double this_delay;
  int i = 0;

  //v4.1: Fixing double->int type conversion problems. EPSILON is added below to make sure
  //the final int value is the correct one 
  //int routing_bits = (int)logtwo( (double)((double)C/(double)B));
  int routing_bits = (int) (logtwo( (double)((double)C/(double)B)) + EPSILON);
  int tag_bits_routed;

  rows = C/(8*B*A*Ntbl*Ntspd);
  if(!force_tag) {
    //tagbits = ADDRESS_BITS + EXTRA_TAG_BITS-(int)logtwo((double)C)+(int)logtwo((double)A)-(int)(logtwo(NSubbanks));
    tagbits = (int) (ADDRESS_BITS + EXTRA_TAG_BITS-(int)logtwo((double)C)+(int)logtwo((double)A)-(int)(logtwo(NSubbanks)) + EPSILON);
  }
  else {
    tagbits = force_tag_size;
  }
  tag_bits_routed = routing_bits + tagbits;


  cols = tagbits*A*Ntspd/Ntwl ;

  numstack =
    (int)ceil((1.0/3.0)*logtwo( (double)((double)C/(double)(B*A*Ntbl*Ntspd))));
  if (numstack==0) numstack = 1;
  if (numstack>5) numstack = 5;

  /*dt: see comments in compute_device_widths*/
  /*dt: The *8 is there because above we mysteriously divide the capacity in BYTES by the number of BITS per wordline */
  l_predec_nor_v = rows*8; 
  /*dt: If we follow the original drawings from the TR's, then there is almost no horizontal wires, only the poly for contacting
    the nor gates. The poly part we don't model right now */
  l_predec_nor_h = 0;


  /* Calculate rise time.  Consider two inverters */
  if (NSubbanks > 2) {
    Ceq = draincap(Waddrdrvp1,PCH,1)+draincap(Waddrdrvn1,NCH,1) +
      gatecap(Wtdecdrivep_first+Wtdecdriven_first,0.0);

    tf = Ceq*transreson(Waddrdrvn1,NCH,1);

    nextinputtime = horowitz(0.0,tf,VTHINV360x240,VTHINV360x240,FALL)/
      (VTHINV360x240);
  } 
  else {
    Ceq = draincap(Wdecdrivep,PCH,1)+draincap(Wdecdriven,NCH,1) +
      gatecap(Wtdecdrivep_first+Wtdecdriven_first,0.0);

    tf = Ceq*transreson(Wdecdriven_first,NCH,1);

    nextinputtime = horowitz(0.0,tf,VTHINV360x240,VTHINV360x240,FALL)/
      (VTHINV360x240);
  }

  lkgCurrent = 0.5*cmos_ileakage(Wtdecdriven_first,Wtdecdrivep_first,Vt_bit_nmos_low,Vthn,Vt_bit_pmos_low,Vthp)*1.0/(Ntwl*Ntbl);

  *Tdecdrive = 0;

  /*dt: the first inverter driving a bigger inverter*/
  Ceq = draincap(Wtdecdrivep_first,PCH,1)+draincap(Wtdecdriven_first,NCH,1) +
    gatecap(Wtdecdrivep_second+Wtdecdriven_second,0.0);

  tf = Ceq*transreson(Wtdecdriven_first,NCH,1);

  this_delay = horowitz(0.0,tf,VTHINV360x240,VTHINV360x240,RISE);

  *Tdecdrive += this_delay;
  inrisetime = this_delay/(1.0-VTHINV360x240);


  if(nr_tdectreesegments) {
    Ceq = draincap(Wtdecdrivep_second,PCH,1)+draincap(Wtdecdriven_second,NCH,1) + 
      gatecap(3*WtdecdrivetreeN[nr_tdectreesegments-1],0) + Ctdectreesegments[nr_tdectreesegments-1];
    Req = transreson(Wtdecdriven_second,NCH,1) + Rtdectreesegments[nr_tdectreesegments-1]; 

    tf = Ceq*Req;
    this_delay = horowitz(inrisetime,tf,VTHINV360x240,VTHINV360x240,RISE);

    *Tdecdrive += this_delay;
    inrisetime = this_delay/(1.0-VTHINV360x240);

    dynPower+= tag_bits_routed*Ceq*.5*VddPow*VddPow;
    lkgCurrent += tag_bits_routed*0.5*cmos_ileakage(Wtdecdriven_second,Wtdecdrivep_second,Vt_bit_nmos_low,Vthn,Vt_bit_pmos_low,Vthp)
      *1.0/(Ntwl*Ntbl);
  }



  /*dt: doing all the H-tree segments*/

  for(i=nr_tdectreesegments; i>2;i--) {
    /*dt: this too should alternate...*/
    Ceq = (Ctdectreesegments[i-2] + draincap(2*WtdecdrivetreeN[i-1],PCH,1)+ draincap(WtdecdrivetreeN[i-1],NCH,1) + 
        gatecap(3*WtdecdrivetreeN[i-2],0.0));
    Req = (Rtdectreesegments[i-2] + transreson(WtdecdrivetreeN[i-1],NCH,1));
    tf = Req*Ceq;
    /*dt: This shouldn't be all falling, but interleaved. Have to fix that at some point.*/
    this_delay = horowitz(inrisetime,tf,VTHINV360x240,VTHINV360x240,RISE);
    *Tdecdrive += this_delay;
    inrisetime = this_delay/(1.0 - VTHINV360x240);

    dynPower+= tag_bits_routed*Ceq*.5*VddPow*VddPow;
    lkgCurrent += 1.0/(Ntwl*Ntbl)*pow(2,nr_tdectreesegments - i)*tag_bits_routed*0.5*cmos_ileakage(WtdecdrivetreeN[i-1],2*WtdecdrivetreeN[i-1],Vt_bit_nmos_low,Vthn,Vt_bit_pmos_low,Vthp);

  }

  if(nr_tdectreesegments) {
    //v4.1: Change below, gatecap(Wtdec3to8n+Wdec3to8p,10.0) -> gatecap(Wtdec3to8n+Wtdec3to8p,10.0)
    //Ceq = 4*gatecap(Wtdec3to8n+Wdec3to8p,10.0) + Ctdectreesegments[0] + 
    //draincap(2*WtdecdrivetreeN[0],PCH,1)+ draincap(WtdecdrivetreeN[0],NCH,1); 
    Ceq = 4*gatecap(Wtdec3to8n+Wtdec3to8p,10.0 / FUDGEFACTOR) + Ctdectreesegments[0] + 
      draincap(2*WtdecdrivetreeN[0],PCH,1)+ draincap(WtdecdrivetreeN[0],NCH,1); 
    Rwire = Rtdectreesegments[0];
    tf = (Rwire + transreson(2*WtdecdrivetreeN[0],PCH,1))*Ceq;

    dynPower+= tag_bits_routed*Ceq*.5*VddPow*VddPow;
    lkgCurrent += 1.0/(Ntwl*Ntbl)*pow(2,nr_tdectreesegments)*tag_bits_routed*0.5*cmos_ileakage(WtdecdrivetreeN[0],2*WtdecdrivetreeN[0],Vt_bit_nmos_low,Vthn,Vt_bit_pmos_low,Vthp);
  }
  else {
    //v4.1: Change below, gatecap(Wtdec3to8n+Wdec3to8p,10.0) -> gatecap(Wtdec3to8n+Wtdec3to8p,10.0)
    //Ceq = 4*gatecap(Wtdec3to8n+Wdec3to8p,10.0) + Ctdectreesegments[0] + 
    //draincap(Wtdecdrivep_second,PCH,1)+ draincap(Wtdecdriven_second,NCH,1); 
    Ceq = 4*gatecap(Wtdec3to8n+Wtdec3to8p,10.0 / FUDGEFACTOR) + Ctdectreesegments[0] + 
      draincap(Wtdecdrivep_second,PCH,1)+ draincap(Wtdecdriven_second,NCH,1); 
    Rwire = Rtdectreesegments[0];
    tf = (Rwire + transreson(Wtdecdrivep_second,PCH,1))*Ceq;

    dynPower+= tag_bits_routed*Ceq*.5*VddPow*VddPow;
    lkgCurrent += 1.0/(Ntwl*Ntbl)*tag_bits_routed*0.5*cmos_ileakage(Wtdecdriven_second,Wtdecdrivep_second,Vt_bit_nmos_low,Vthn,Vt_bit_pmos_low,Vthp);
  }
  this_delay = horowitz(inrisetime,tf,VTHINV360x240,VTHNAND60x120,FALL);
  *Tdecdrive += this_delay;
  nextinputtime = this_delay/VTHNAND60x120;

  // there are 8 nand gates in each 3-8 decoder. since these transistors are
  // stacked, we use a stacking factor of 1/5 (0.2). 0.5 signifies that we
  // are taking the average of both nmos and pmos transistors.

  lkgCurrent += 8*0.2*0.5* cmos_ileakage(Wtdec3to8n,Wtdec3to8p,Vt_bit_nmos_low,Vthn,Vt_bit_pmos_low,Vthp)*
    // For the all the 3-8 decoders per quad:
    ceil((1.0/3.0)*logtwo( (double)((double)C/(double)(B*A*Ntbl*Ntspd))))
    /*for all the quads*/
    *0.25;


  /* se

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -