Merge branch 'main' of github.com:davidharrishmc/riscv-wally into main

2022-11-16 12:44:06 -06:00 · 2022-11-16 12:44:06 -06:00 · d1ce84d172
commit d1ce84d172
parent 3fbacc2339 cf964e30fb
8 changed files with 142 additions and 29 deletions
--- a/pipelined/src/cache/cache.sv
+++ b/pipelined/src/cache/cache.sv
@ -117,13 +117,15 @@ module cache #(parameter LINELEN,  NUMLINES,  NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
  /////////////////////////////////////////////////////////////////////////////////////////////

  // Choose read address (RAdr).  Normally use NextAdr, but use PAdr during stalls
-  // and FlushAdr when handling D$ flushes  
+  // and FlushAdr when handling D$ flushes
+  // The icache must update to the newest PCNextF on flush as it is probably a trap.  Trap
+  // sets PCNextF to XTVEC and the icache must start reading the instruction.
  mux3 #(SETLEN) AdrSelMux(
    .d0(NextAdr[SETTOP-1:OFFSETLEN]), .d1(PAdr[SETTOP-1:OFFSETLEN]), .d2(FlushAdr),
-    .s({SelFlush, (SelAdr | SelHPTW)}), .y(RAdr));
+    .s({SelFlush, ((SelAdr | SelHPTW) & ~((DCACHE == 0) & FlushStage))}), .y(RAdr));

  // Array of cache ways, along with victim, hit, dirty, and read merging logic
-  cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN) 
+  cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, DCACHE) 
    CacheWays[NUMWAYS-1:0](.clk, .reset, .ce(SRAMEnable), .RAdr, .PAdr, .LineWriteData, .LineByteMask,
    .SetValidWay, .ClearValidWay, .SetDirtyWay, .ClearDirtyWay, .SelEvict, .VictimWay,
    .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .VictimDirtyWay, .VictimTagWay, .FlushStage,
--- a/pipelined/src/cache/cachereplacementpolicy.sv
+++ b/pipelined/src/cache/cachereplacementpolicy.sv
@ -46,10 +46,115 @@ module cachereplacementpolicy
  logic [SETLEN-1:0]                   RAdrD;
  logic                                LRUWriteEnD;

+
+  localparam                           LOGNUMWAYS = $clog2(NUMWAYS);
+  localparam                           LEN = NUMWAYS-1;
+
+  logic [LOGNUMWAYS-1:0]               HitWayEnc;
+  logic [LEN-1:0]                      HitWayExpand;
+  genvar                               row;
+
+  logic [NUMWAYS-2:0]                  cEn;
+  
+/* -----\/----- EXCLUDED -----\/-----
+  // proposed generic solution
+  
+  binencoder #(NUMWAYS) encoder(HitWay, HitWayEnc);
+
+  // bit duplication
+  // expand HitWay as HitWay[3], {{2}{HitWay[2]}}, {{4}{HitWay[1]}, {{8{HitWay[0]}}, ...
+  for(row = 0; row < LOGNUMWAYS; row++) begin
+    localparam integer DuplicationFactor = 2**(LOGNUMWAYS-row-1);
+    localparam integer StartIndex = NUMWAYS-2 - DuplicationFactor + 1;
+    localparam integer EndIndex = NUMWAYS-2 - 2 * DuplicationFactor + 2;
+    assign HitWayExpand[StartIndex : EndIndex] = {{DuplicationFactor}{HitWayEnc[row]}};
+  end
+
+
+  genvar               r, a,s;
+  //localparam           s = NUMWAYS-2;
+
+
+  assign cEn[NUMWAYS-2] = '1;
+  for(s = NUMWAYS-2; s >= NUMWAYS/2; s--) begin : enables
+    localparam p = NUMWAYS - s;
+    localparam g = $clog2(p);
+    localparam t0 = s - g;
+    localparam t1 = t0 - 1;
+    localparam r = LOGNUMWAYS - g;
+    assign cEn[t0] = cEn[s] & ~HitWayEnc[r];
+    assign cEn[t1] = cEn[s] & HitWayEnc[r];
+  end
+
+  mux2 #(1) LRUMuxes[NUMWAYS-2:0](LineReplacementBits, HitWayExpand, cEn, NewReplacement);
+
+  assign VictimWay[0] = ~LineReplacementBits[2] & ~LineReplacementBits[0];
+  assign VictimWay[1] = ~LineReplacementBits[2] & LineReplacementBits[0];
+  assign VictimWay[2] = LineReplacementBits[2] & ~LineReplacementBits[1];
+  assign VictimWay[3] = LineReplacementBits[2] & LineReplacementBits[1];      
+ -----/\----- EXCLUDED -----/\----- */
+
+  
+
+/* -----\/----- EXCLUDED -----\/-----
+//  logic [NUMWAYS/2-1:0]                rawEn [LOGNUMWAYS-1:0];
+  for(r = LOGNUMWAYS-1; r >= 0; r--) begin
+    localparam integer g = 2**(LOGNUMWAYS-r-1);
+    for(a = g-1; a > 0; a--) begin
+      localparam t0 = s - 2**(g-1);
+      localparam t1 = t0 - 1;
+      localparam s = s - 1;
+      assign cEn[t0] = cEn[s] & ~HitWayEnc[r];
+      assign cEn[t1] = cEn[s] & HitWayEnc[r];
+    end
+ -----/\----- EXCLUDED -----/\----- */
+/* -----\/----- EXCLUDED -----\/-----
+      for(a = g-1; a > 0; a--) begin
+        localparam t0 = s - 2**(g-1);
+        localparam t1 = t0 - 1;
+        s = s - 1;
+      end
+  end
+ -----/\----- EXCLUDED -----/\----- */
+  
+/* -----\/----- EXCLUDED -----\/-----
+  always_comb begin
+    for(r = LOGNUMWAYS-1; r > 0; r--) begin
+      localparam g = 2**(LOGNUMWAYS-r-1);
+      for(a = g-1; a > 0; a--) begin
+        localparam t0 = s - 2**(g-1);
+        localparam t1 = t0 - 1;
+        s = s - 1;
+      end
+    end
+  end
+ -----/\----- EXCLUDED -----/\----- */
+
+/* -----\/----- EXCLUDED -----\/-----
+
+  genvar row2;
+  logic [LOGNUMWAYS-1:0] indices [LOGNUMWAYS-1:0];
+  integer                jindex;
+  always_comb begin
+    rawEn[LOGNUMWAYS-1] = 1;
+    for(jindex = 0; jindex < LOGNUMWAYS-1; jindex++) begin
+      rawEn[jindex] = 0;
+      rawEn[jindex][~(HitWayEnc>>(jindex+1))] = 1;
+
+      //cEn[2**(LOGNUMWAYS-jindex)-1+jindex:0] = rawEn[jindex][2**(LOGNUMWAYS-jindex)-1:0];
+      
+    end
+  end
+ -----/\----- EXCLUDED -----/\----- */
+
+  
+    
  // *** high priority to clean up
+/* -----\/----- EXCLUDED -----\/-----
  initial begin
      assert (NUMWAYS == 2 || NUMWAYS == 4) else $error("Only 2 or 4 ways supported");
  end
+ -----/\----- EXCLUDED -----/\----- */
  
  // Replacement Bits: Register file
  // Needs to be resettable for simulation, but could omit reset for synthesis ***
--- a/pipelined/src/fpu/fctrl.sv
+++ b/pipelined/src/fpu/fctrl.sv
@ -47,7 +47,7 @@ module fctrl (
  output logic 		         FRegWriteM, FRegWriteW, // FP register write enable
  output logic [2:0] 	      FrmM,                   // FP rounding mode
  output logic [`FMTBITS-1:0] FmtE, FmtM,             // FP format
-  output logic 		         DivStartE,             // Start division or squareroot
+  output logic 		         FDivStartE, IDivStartE,             // Start division or squareroot
  output logic              XEnE, YEnE, ZEnE,
  output logic              YEnForwardE, ZEnForwardE,
  output logic 		         FWriteIntE, FCvtIntE, FWriteIntM,                         // Write to integer register
@ -62,7 +62,7 @@ module fctrl (
  logic [`FCTRLW-1:0] ControlsD;
  logic       IllegalFPUInstrD, IllegalFPUInstrE;
  logic 		  FRegWriteD; // FP register write enable
-  logic 		  FDivStartD, FDivStartE, IDivStartE; // integer register write enable
+  logic 		  FDivStartD; // integer register write enable
  logic 		  FWriteIntD; // integer register write enable
  logic 		         FRegWriteE; // FP register write enable
  logic [2:0] 	      OpCtrlD;       // Select which opperation to do in each component
@ -266,10 +266,8 @@ module fctrl (
  flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, 
                           {Adr1E, Adr2E, Adr3E});
  flopenrc #(1) DEFDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, FDivStartD, FDivStartE);
-  if (`M_SUPPORTED) begin
-    assign IDivStartE = MDUE & Funct3E[2];
-    assign DivStartE = FDivStartE | IDivStartE; // integer or floating-point division
-  end else assign DivStartE = FDivStartE;
+  if (`M_SUPPORTED) assign IDivStartE = MDUE & Funct3E[2];
+  else              assign IDivStartE = 0; 

  assign FCvtIntE = (FResSelE == 2'b01);

--- a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv
@ -40,7 +40,7 @@ module fdivsqrt(
  input  logic XInfE, YInfE, 
  input  logic XZeroE, YZeroE, 
  input  logic XNaNE, YNaNE, 
-  input  logic DivStartE, 
+  input  logic FDivStartE, IDivStartE,
  input  logic StallM,
  input  logic StallE,
  input  logic SqrtE, SqrtM,
@ -48,7 +48,7 @@ module fdivsqrt(
 	input  logic [2:0] 	Funct3E, Funct3M,
 	input  logic MDUE, W64E,
  output logic DivSM,
-  output logic DivBusy,
+  output logic FDivBusyE,
  output logic DivDone,
  output logic [`NE+1:0] QeM,
  output logic [`DIVb:0] QmM
@ -66,6 +66,7 @@ module fdivsqrt(
  logic SpecialCaseM;
  logic [`DIVBLEN:0] n, m;
  logic OTFCSwap, ALTB, BZero, As;
+  logic DivStartE;

  fdivsqrtpreproc fdivsqrtpreproc(
    .clk, .DivStartE, .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), 
@ -74,14 +75,14 @@ module fdivsqrt(
    .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E);
  fdivsqrtfsm fdivsqrtfsm(
    .clk, .reset, .FmtE, .XsE, .SqrtE, 
-    .DivBusy, .DivStartE,.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, 
+    .FDivBusyE, .FDivStartE, .IDivStartE, .DivStartE, .StallE, .StallM, .DivDone, .XZeroE, .YZeroE, 
    .XNaNE, .YNaNE, .MDUE, .n,
    .XInfE, .YInfE, .WZero, .SpecialCaseM);
  fdivsqrtiter fdivsqrtiter(
    .clk, .Firstun, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, .SqrtM, 
    .X,.Dpreproc, .FirstWS(WS), .FirstWC(WC),
    .DivStartE, .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, .OTFCSwap,
-    .DivBusy);
+    .FDivBusyE);
  fdivsqrtpostproc fdivsqrtpostproc(
    .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .Firstun, 
    .SqrtM, .SpecialCaseM, .RemOp(Funct3E[1]),
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv
@ -37,7 +37,7 @@ module fdivsqrtfsm(
  input  logic XInfE, YInfE, 
  input  logic XZeroE, YZeroE, 
  input  logic XNaNE, YNaNE, 
-  input  logic DivStartE, 
+  input  logic FDivStartE, IDivStartE,
  input  logic XsE,
  input  logic SqrtE,
  input  logic StallE,
@ -45,8 +45,9 @@ module fdivsqrtfsm(
  input  logic WZero,
  input  logic MDUE,
  input  logic [`DIVBLEN:0] n,
+  output logic DivStartE,
  output logic DivDone,
-  output logic DivBusy,
+  output logic FDivBusyE,
  output logic SpecialCaseM
 );
  
@ -57,6 +58,15 @@ module fdivsqrtfsm(
  logic [`DURLEN-1:0] cycles;
  logic SpecialCaseE;

+  // *** start logic is presently in fctl.  Make it look more like integer division start logic
+  // DivStartE comes from fctrl, reflecitng the start of floating-point and possibly integer division
+  assign DivStartE = (FDivStartE | IDivStartE) & (state == IDLE) & ~StallM;
+  assign DivDone = (state == DONE) | (WZero & (state == BUSY)); // *** used in postprocess.sv and round.sv.  This doesn't seem proper.  They break when removed.
+  assign FDivBusyE = (state == BUSY & ~DivDone); // *** want to add | DivStartE but it creates comb loop
+
+    // Divider control signals from MDU
+  //assign DivBusyE = (state == BUSY) | DivStartE;
+
  // terminate immediately on special cases
  assign SpecialCaseE = XZeroE | (YZeroE&~SqrtE) | XInfE | YInfE | XNaNE | YNaNE | (XsE&SqrtE);
  flopenr #(1) SpecialCaseReg(clk, reset, ~StallM, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc
@ -120,8 +130,5 @@ module fdivsqrtfsm(
      end 
  end

-  // *** start logic is presently in fctl.  Make it look more like integer division start logic
-  assign DivDone = (state == DONE) | (WZero & (state == BUSY));
-  assign DivBusy = (state == BUSY & ~DivDone);

 endmodule
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv
@ -33,7 +33,7 @@
 module fdivsqrtiter(
  input  logic clk,
  input  logic DivStartE, 
-  input  logic DivBusy, 
+  input  logic FDivBusyE, 
  input  logic [`NE-1:0] Xe, Ye,
  input  logic XZeroE, YZeroE, 
  input  logic SqrtE,
@ -85,8 +85,8 @@ module fdivsqrtiter(
  // Residual WS/SC registers/initializaiton mux
  mux2   #(`DIVb+4) wsmux(WS[`DIVCOPIES], X, DivStartE, WSN);
  mux2   #(`DIVb+4) wcmux(WC[`DIVCOPIES], '0, DivStartE, WCN);
-  flopen   #(`DIVb+4) wsflop(clk, DivStartE|DivBusy, WSN, WS[0]);
-  flopen   #(`DIVb+4) wcflop(clk, DivStartE|DivBusy, WCN, WC[0]);
+  flopen   #(`DIVb+4) wsflop(clk, DivStartE|FDivBusyE, WSN, WS[0]);
+  flopen   #(`DIVb+4) wcflop(clk, DivStartE|FDivBusyE, WCN, WC[0]);

  // UOTFC Result U and UM registers/initialization mux
  // Initialize U to 1.0 and UM to 0 for square root; U to 0 and UM to -1 for division
@ -94,8 +94,8 @@ module fdivsqrtiter(
  assign initUM = SqrtE ? 0 : {1'b1, {(`DIVb){1'b0}}}; 
  mux2 #(`DIVb+1) Umux(UNext[`DIVCOPIES-1], initU, DivStartE, UMux);
  mux2 #(`DIVb+1) UMmux(UMNext[`DIVCOPIES-1], initUM, DivStartE, UMMux);
-  flopen #(`DIVb+1) UReg(clk, DivStartE|DivBusy, UMux, U[0]);
-  flopen #(`DIVb+1) UMReg(clk, DivStartE|DivBusy, UMMux, UM[0]);
+  flopen #(`DIVb+1) UReg(clk, DivStartE|FDivBusyE, UMux, U[0]);
+  flopen #(`DIVb+1) UMReg(clk, DivStartE|FDivBusyE, UMMux, UM[0]);

  // C register/initialization mux
  // Initialize C to -1 for sqrt and -R for division
@ -103,7 +103,7 @@ module fdivsqrtiter(
  assign initCUpper = SqrtE ? 2'b11 : (`RADIX == 4) ? 2'b00 : 2'b10;
  assign initC = {initCUpper, {`DIVb{1'b0}}};
  mux2 #(`DIVb+2) Cmux(C[`DIVCOPIES], initC, DivStartE, CMux); 
-  flopen #(`DIVb+2) cflop(clk, DivStartE|DivBusy, CMux, C[0]);
+  flopen #(`DIVb+2) cflop(clk, DivStartE|FDivBusyE, CMux, C[0]);

   // Divisior register
  flopen #(`DIVN-1) dflop(clk, DivStartE, Dpreproc, D);
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@ -67,7 +67,7 @@ module fpu (
   logic 		         FRegWriteW; // FP register write enable
   logic [2:0] 	      FrmM;                   // FP rounding mode
   logic [`FMTBITS-1:0] FmtE, FmtM;             // FP precision 0-single 1-double
-   logic 		         DivStartE;             // Start division or squareroot
+   logic 		         FDivStartE, IDivStartE;             // Start division or squareroot
   logic 		         FWriteIntM;                         // Write to integer register
   logic [1:0] 	      ForwardXE, ForwardYE, ForwardZE; // forwarding mux control signals
   logic [2:0] 	      OpCtrlE, OpCtrlM;       // Select which opperation to do in each component
@ -167,7 +167,7 @@ module fpu (
               .Funct3E, .MDUE, .InstrD,
               .StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE,
               .reset, .clk, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM, .YEnForwardE, .ZEnForwardE,
-               .DivStartE, .FWriteIntE, .FCvtIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .IllegalFPUInstrM, .XEnE, .YEnE, .ZEnE,
+               .FDivStartE, .IDivStartE, .FWriteIntE, .FCvtIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .IllegalFPUInstrM, .XEnE, .YEnE, .ZEnE,
               .FResSelE, .FResSelM, .FResSelW, .PostProcSelE, .PostProcSelM, .FCvtIntW, .Adr1E, .Adr2E, .Adr3E);

   // FP register file
@ -261,9 +261,9 @@ module fpu (
   //    - fsqrt
   // *** add other opperations
   fdivsqrt fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]),
-                  .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(DivStartE), .XsE,
+                  .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE,
                  .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E,
-                  .StallE, .StallM, .DivSM, .DivBusy(FDivBusyE), .QeM, //***change divbusyE to M signal
+                  .StallE, .StallM, .DivSM, .FDivBusyE, .QeM, 
                  .QmM, .DivDone(DivDoneM));

                  //
--- a/pipelined/src/fpu/postproc/postprocess.sv
+++ b/pipelined/src/fpu/postproc/postprocess.sv
@ -129,7 +129,7 @@ module postprocess (
    assign Mult = OpCtrl[2]&~OpCtrl[1]&~OpCtrl[0];
    assign CvtOp = (PostProcSel == 2'b00);
    assign FmaOp = (PostProcSel == 2'b10);
-    assign DivOp = (PostProcSel == 2'b01)&DivDone;
+    assign DivOp = (PostProcSel == 2'b01) & DivDone;
    assign Sqrt =  OpCtrl[0];

    // is there an input of infinity or NaN being used