From d64cd715f9d3d3957086ab00993a1f26d6963161 Mon Sep 17 00:00:00 2001
From: David Harris <david_harris@hmc.edu>
Date: Fri, 2 Dec 2022 11:30:49 -0800
Subject: [PATCH] Renamed DivStartE to IFDivStartE

---
 pipelined/src/fpu/fdivsqrt/fdivsqrt.sv        |  8 ++--
 pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv     | 38 +++----------------
 pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv    | 20 +++++-----
 pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 10 ++---
 pipelined/src/fpu/fpu.sv                      | 14 +++----
 5 files changed, 29 insertions(+), 61 deletions(-)

diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv
index 4f3dcf6ff..6ec0cb746 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv
@@ -49,7 +49,7 @@ module fdivsqrt(
 	input  logic [2:0] 	Funct3E, Funct3M,
 	input  logic MDUE, W64E,
   output logic DivSM,
-  output logic FDivBusyE, DivStartE, FDivDoneE,
+  output logic FDivBusyE, IFDivStartE, FDivDoneE,
 //  output logic DivDone,
   output logic [`NE+1:0] QeM,
   output logic [`DIVb:0] QmM
@@ -69,19 +69,19 @@ module fdivsqrt(
   logic OTFCSwap, ALTB, BZero, As;
 
   fdivsqrtpreproc fdivsqrtpreproc(
-    .clk, .DivStartE, .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), 
+    .clk, .IFDivStartE, .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), 
     .Sqrt(SqrtE), .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc, 
     .n, .m, .OTFCSwap, .ALTB, .BZero, .As,
     .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E);
   fdivsqrtfsm fdivsqrtfsm(
     .clk, .reset, .FmtE, .XsE, .SqrtE, 
-    .FDivBusyE, .FDivStartE, .IDivStartE, .DivStartE, .FDivDoneE, .StallE, .StallM, .TrapM, /*.DivDone, */ .XZeroE, .YZeroE, 
+    .FDivBusyE, .FDivStartE, .IDivStartE, .IFDivStartE, .FDivDoneE, .StallE, .StallM, .TrapM, /*.DivDone, */ .XZeroE, .YZeroE, 
     .XNaNE, .YNaNE, .MDUE, .n,
     .XInfE, .YInfE, .WZero, .SpecialCaseM);
   fdivsqrtiter fdivsqrtiter(
     .clk, .Firstun, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, // .SqrtM, 
     .X,.Dpreproc, .FirstWS(WS), .FirstWC(WC),
-    .DivStartE, .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, .OTFCSwap,
+    .IFDivStartE, .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, .OTFCSwap,
     .FDivBusyE);
   fdivsqrtpostproc fdivsqrtpostproc(
     .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .Firstun, 
diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv
index aa13b7da9..a1e09d4d0 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv
@@ -46,8 +46,7 @@ module fdivsqrtfsm(
   input  logic WZero,
   input  logic MDUE,
   input  logic [`DIVBLEN:0] n,
-  output logic DivStartE,
-//  output logic DivDone,
+  output logic IFDivStartE,
   output logic FDivBusyE, FDivDoneE,
   output logic SpecialCaseM
 );
@@ -60,15 +59,10 @@ module fdivsqrtfsm(
   logic SpecialCaseE;
 
   // *** start logic is presently in fctl.  Make it look more like integer division start logic
-  // DivStartE comes from fctrl, reflecitng the start of floating-point and possibly integer division
-  assign DivStartE = (FDivStartE | IDivStartE) & (state == IDLE) & ~StallM;
+  // FDivStartE and IDivStartE come from fctrl, reflecitng the start of floating-point and possibly integer division
+  assign IFDivStartE = (FDivStartE | IDivStartE) & (state == IDLE) & ~StallM;
   assign FDivDoneE = (state == DONE);
- // assign DivDone = (state == DONE) | (WZero & (state == BUSY)); // *** used in postprocess.sv and round.sv.  This doesn't seem proper.  They break when removed.
-  //assign FDivBusyE = (state == BUSY & ~DivDone); // *** want to add | DivStartE but it creates comb loop
-  assign FDivBusyE = (state == BUSY) | DivStartE; 
-
-    // Divider control signals from MDU
-  //assign DivBusyE = (state == BUSY) | DivStartE;
+  assign FDivBusyE = (state == BUSY) | IFDivStartE; 
 
   // terminate immediately on special cases
   assign SpecialCaseE = XZeroE | (YZeroE&~SqrtE) | XInfE | YInfE | XNaNE | YNaNE | (XsE&SqrtE);
@@ -116,7 +110,7 @@ module fdivsqrtfsm(
   always_ff @(posedge clk) begin
       if (reset | TrapM) begin
           state <= #1 IDLE; 
-      end else if (DivStartE) begin 
+      end else if (IFDivStartE) begin 
           step <= cycles; 
           if (SpecialCaseE) state <= #1 DONE;
           else             state <= #1 BUSY;
@@ -129,26 +123,4 @@ module fdivsqrtfsm(
       end 
   end
 
-/*
-  always_ff @(posedge clk) begin
-      if (reset) begin
-          state <= #1 IDLE; 
-      end else if (DivStartE&~StallE) begin 
-          step <= cycles; 
-//          $display("Setting Nf = %d fbits %d cycles = %d FmtE %d FPSIZES = %d Q_NF = %d num = %d denom = %d\n", Nf, fbits, cycles, FmtE, `FPSIZES, `Q_NF,
-//          (fbits +(`LOGR*`DIVCOPIES)-1), (`LOGR*`DIVCOPIES));
-          if (SpecialCaseE) state <= #1 DONE;
-          else             state <= #1 BUSY;
-      end else if (DivDone) begin
-        if (StallM) state <= #1 DONE;
-        else        state <= #1 IDLE;
-      end else if (state == BUSY) begin
-          if (step == 1) begin
-              state <= #1 DONE;
-          end
-          step <= step - 1;
-      end 
-  end
-*/
-
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv
index df8dd2c7f..72cde3943 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv
@@ -32,7 +32,7 @@
 
 module fdivsqrtiter(
   input  logic clk,
-  input  logic DivStartE, 
+  input  logic IFDivStartE, 
   input  logic FDivBusyE, 
   input  logic [`NE-1:0] Xe, Ye,
   input  logic XZeroE, YZeroE, 
@@ -83,8 +83,8 @@ module fdivsqrtiter(
   // are fed back for the next iteration.
  
   // Residual WS/SC registers/initializaiton mux
-  mux2   #(`DIVb+4) wsmux(WS[`DIVCOPIES], X, DivStartE, WSN);
-  mux2   #(`DIVb+4) wcmux(WC[`DIVCOPIES], '0, DivStartE, WCN);
+  mux2   #(`DIVb+4) wsmux(WS[`DIVCOPIES], X, IFDivStartE, WSN);
+  mux2   #(`DIVb+4) wcmux(WC[`DIVCOPIES], '0, IFDivStartE, WCN);
   flopen   #(`DIVb+4) wsflop(clk, FDivBusyE, WSN, WS[0]);
   flopen   #(`DIVb+4) wcflop(clk, FDivBusyE, WCN, WC[0]);
 
@@ -92,21 +92,21 @@ module fdivsqrtiter(
   // Initialize U to 1.0 and UM to 0 for square root; U to 0 and UM to -1 for division
   assign initU = SqrtE ? {1'b1, {(`DIVb){1'b0}}} : 0;
   assign initUM = SqrtE ? 0 : {1'b1, {(`DIVb){1'b0}}}; 
-  mux2 #(`DIVb+1) Umux(UNext[`DIVCOPIES-1], initU, DivStartE, UMux);
-  mux2 #(`DIVb+1) UMmux(UMNext[`DIVCOPIES-1], initUM, DivStartE, UMMux);
-  flopen #(`DIVb+1) UReg(clk, DivStartE|FDivBusyE, UMux, U[0]);
-  flopen #(`DIVb+1) UMReg(clk, DivStartE|FDivBusyE, UMMux, UM[0]);
+  mux2 #(`DIVb+1) Umux(UNext[`DIVCOPIES-1], initU, IFDivStartE, UMux);
+  mux2 #(`DIVb+1) UMmux(UMNext[`DIVCOPIES-1], initUM, IFDivStartE, UMMux);
+  flopen #(`DIVb+1) UReg(clk, IFDivStartE|FDivBusyE, UMux, U[0]);
+  flopen #(`DIVb+1) UMReg(clk, IFDivStartE|FDivBusyE, UMMux, UM[0]);
 
   // C register/initialization mux
   // Initialize C to -1 for sqrt and -R for division
   logic [1:0] initCUpper;
   assign initCUpper = SqrtE ? 2'b11 : (`RADIX == 4) ? 2'b00 : 2'b10;
   assign initC = {initCUpper, {`DIVb{1'b0}}};
-  mux2 #(`DIVb+2) Cmux(C[`DIVCOPIES], initC, DivStartE, CMux); 
-  flopen #(`DIVb+2) cflop(clk, DivStartE|FDivBusyE, CMux, C[0]);
+  mux2 #(`DIVb+2) Cmux(C[`DIVCOPIES], initC, IFDivStartE, CMux); 
+  flopen #(`DIVb+2) cflop(clk, IFDivStartE|FDivBusyE, CMux, C[0]);
 
    // Divisior register
-  flopen #(`DIVN-1) dflop(clk, DivStartE, Dpreproc, D);
+  flopen #(`DIVN-1) dflop(clk, IFDivStartE, Dpreproc, D);
 
   // Divisor Selections
   //  - choose the negitive version of what's being selected
diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
index af6a86179..483ce3f92 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
@@ -32,7 +32,7 @@
 
 module fdivsqrtpreproc (
   input  logic clk,
-  input  logic DivStartE, 
+  input  logic IFDivStartE, 
   input  logic [`NF:0] Xm, Ym,
   input  logic [`NE-1:0] Xe, Ye,
   input  logic [`FMTBITS-1:0] Fmt,
@@ -115,10 +115,10 @@ module fdivsqrtpreproc (
   // DIVRESLEN = DIVLEN or DIVLEN+2
   // r = 1 or 2
   // DIVRESLEN/(r*`DIVCOPIES)
-  flopen #(`NE+2) expflop(clk, DivStartE, Qe, QeM);
-  flopen #(1) swapflop(clk, DivStartE, OTFCSwapTemp, OTFCSwap);
-  flopen #(`DIVBLEN+1) nflop(clk, DivStartE, Calcn, n);
-  flopen #(`DIVBLEN+1) mflop(clk, DivStartE, Calcm, m);
+  flopen #(`NE+2) expflop(clk, IFDivStartE, Qe, QeM);
+  flopen #(1) swapflop(clk, IFDivStartE, OTFCSwapTemp, OTFCSwap);
+  flopen #(`DIVBLEN+1) nflop(clk, IFDivStartE, Calcn, n);
+  flopen #(`DIVBLEN+1) mflop(clk, IFDivStartE, Calcm, m);
   expcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero, .L, .m(Calcm), .Qe);
 
 endmodule
diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv
index 0aa549991..bd4053dcb 100755
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@@ -131,7 +131,7 @@ module fpu (
    logic [`NE+1:0]      QeE, QeM; 
    logic                DivSE, DivSM;
 //   logic                DivDoneM;
-   logic                FDivDoneE, DivStartE;
+   logic                FDivDoneE, IFDivStartE;
 
    // result and flag signals
    logic [`XLEN-1:0] ClassResE;               // classify result
@@ -151,7 +151,7 @@ module fpu (
    logic [`FLEN-1:0] 	 AlignedSrcAE;                       // align SrcA to the floating point format
    logic [`FLEN-1:0]     BoxedZeroE;                         // Zero value for Z for multiplication, with NaN boxing if needed
    logic [`FLEN-1:0]     BoxedOneE;                         // Zero value for Z for multiplication, with NaN boxing if needed
-   logic             EMRegEn;
+   logic             StallUnpackedM;
 
    // DECODE STAGE
 
@@ -266,7 +266,7 @@ module fpu (
    fdivsqrt fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]),
                   .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE,
                   .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E,
-                  .StallE, .StallM, .TrapM, .DivSM, .FDivBusyE, .DivStartE, .FDivDoneE, .QeM, 
+                  .StallE, .StallM, .TrapM, .DivSM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .QeM, 
                   .QmM /*, .DivDone(DivDoneM) */);
 
                   //
@@ -340,20 +340,16 @@ module fpu (
 
    // E/M pipe registers
 
-   assign EMRegEn = ~StallM & (~FDivBusyE & ~FDivDoneE | DivStartE);
+   assign StallUnpackedM = StallM | (FDivBusyE & ~IFDivStartE | FDivDoneE); // Need to stall during divsqrt iterations to avoid capturing bad flags from stale forwarded sources
 
-   // flopenrc #(64) EMFpReg1(clk, reset, FlushM, EMRegEn, XE, FSrcXM);
    flopenrc #(`NF+1) EMFpReg2 (clk, reset, FlushM, ~StallM, XmE, XmM);
    flopenrc #(`NF+1) EMFpReg3 (clk, reset, FlushM, ~StallM, YmE, YmM);
    flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZeE,ZmE}, {ZeM,ZmM});
    flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
    flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM);
-   flopenr #(15) EMFpReg5 (clk, reset, EMRegEn, 
+   flopenr #(15) EMFpReg5 (clk, reset, ~StallUnpackedM, 
             {XsE, YsE, XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE, ZDenormE},
             {XsM, YsM, XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM, ZDenormM});     
-   /* flopenrc #(13) EMFpReg5 (clk, reset, FlushM, ~StallM, 
-            {XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE, ZDenormE},
-            {XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM, ZDenormM});   */   
    flopenrc #(1)  EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM);      
    flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM); 
    flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, PeE, PeM);