From 930fcbe956efd2362400a04ae463fb59b3dfc88e Mon Sep 17 00:00:00 2001
From: cturek <cturek@hmc.edu>
Date: Sat, 10 Dec 2022 21:56:35 +0000
Subject: [PATCH 1/5] Fixed D sizing issues across fdivsqrt. Fixed preproc to
 accept either int or float inputs

---
 pipelined/src/fpu/fdivsqrt/fdivsqrt.sv        | 12 ++--
 pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv    | 72 +++++++++----------
 .../src/fpu/fdivsqrt/fdivsqrtpostproc.sv      | 14 ++--
 pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 56 +++++++--------
 pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv  |  4 +-
 pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv  |  6 +-
 pipelined/testbench/tests.vh                  |  1 +
 7 files changed, 78 insertions(+), 87 deletions(-)

diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv
index 63ffab713..bdbfbde00 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv
@@ -56,11 +56,11 @@ module fdivsqrt(
 //   output logic [`XLEN-1:0] RemM,
 );
 
-  logic [`DIVb+3:0]  WS, WC;
+  logic [`DIVb+3:0] WS, WC;
   logic [`DIVb+3:0] X;
-  logic [`DIVN-2:0]  D; // U0.N-1
-  logic [`DIVN-2:0] Dpreproc;
-  logic [`DIVb:0] FirstU, FirstUM;
+  logic [`DIVb-1:0] D;
+  logic [`DIVb-1:0] DPreproc;
+  logic [`DIVb:0]   FirstU, FirstUM;
   logic [`DIVb+1:0] FirstC;
   logic Firstun;
   logic WZero;
@@ -71,7 +71,7 @@ module fdivsqrt(
 
   fdivsqrtpreproc fdivsqrtpreproc(
     .clk, .IFDivStartE, .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), 
-    .Sqrt(SqrtE), .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc, 
+    .Sqrt(SqrtE), .Ym(YmE), .XZero(XZeroE), .X, .DPreproc, 
     .n, .m, .OTFCSwap, .ALTBM, .BZero, .As,
     .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E);
   fdivsqrtfsm fdivsqrtfsm(
@@ -81,7 +81,7 @@ module fdivsqrt(
     .XInfE, .YInfE, .WZero, .SpecialCaseM);
   fdivsqrtiter fdivsqrtiter(
     .clk, .Firstun, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, // .SqrtM, 
-    .X,.Dpreproc, .FirstWS(WS), .FirstWC(WC),
+    .X,.DPreproc, .FirstWS(WS), .FirstWC(WC),
     .IFDivStartE, .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, .OTFCSwap,
     .FDivBusyE);
   fdivsqrtpostproc fdivsqrtpostproc(
diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv
index 72cde3943..2948713b0 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv
@@ -40,42 +40,34 @@ module fdivsqrtiter(
 //  input  logic SqrtM,
   input  logic OTFCSwap,
   input  logic [`DIVb+3:0] X,
-  input  logic [`DIVN-2:0] Dpreproc,
-  output logic [`DIVN-2:0]  D, // U0.N-1
-  output logic [`DIVb:0] FirstU, FirstUM,
+  input  logic [`DIVb-1:0] DPreproc,
+  output logic [`DIVb-1:0] D,
+  output logic [`DIVb:0]   FirstU, FirstUM,
   output logic [`DIVb+1:0] FirstC,
   output logic             Firstun,
-  output logic [`DIVb+3:0]  FirstWS, FirstWC
+  output logic [`DIVb+3:0] FirstWS, FirstWC
 );
 
-//QLEN = 1.(number of bits created for division)
-// N is NF+1 or XLEN
-// WC/WS is dependent on D so 4.N-1 ie N+3 bits or N+2:0 + one more bit in fraction for possible sqrt right shift
-// D is 1.N-1, but the msb is always 1 so 0.N-1 or N-1 bits or N-2:0
-// Dsel should match WC/WS so 4.N-1 ie N+3 bits or N+2:0
-// U/UM should be 1.b so b+1 bits or b:0
-// C needs to be the lenght of the final fraction 0.b so b or b-1:0
- /* verilator lint_off UNOPTFLAT */
-  logic [`DIVb+3:0]  WSNext[`DIVCOPIES-1:0]; // Q4.b
-  logic [`DIVb+3:0]  WCNext[`DIVCOPIES-1:0]; // Q4.b
-  logic [`DIVb+3:0]  WS[`DIVCOPIES:0]; // Q4.b
-  logic [`DIVb+3:0]  WC[`DIVCOPIES:0]; // Q4.b
-  logic [`DIVb:0] U[`DIVCOPIES:0]; // U1.b
-  logic [`DIVb:0] UM[`DIVCOPIES:0];// 1.b
-  logic [`DIVb:0] UNext[`DIVCOPIES-1:0];// U1.b
-  logic [`DIVb:0] UMNext[`DIVCOPIES-1:0];// U1.b
-  logic [`DIVb+1:0] C[`DIVCOPIES:0]; // Q2.b
-  logic [`DIVb+1:0] initC; // Q2.b
+  /* verilator lint_off UNOPTFLAT */
+  logic [`DIVb+3:0]      WSNext[`DIVCOPIES-1:0]; // Q4.b
+  logic [`DIVb+3:0]      WCNext[`DIVCOPIES-1:0]; // Q4.b
+  logic [`DIVb+3:0]      WS[`DIVCOPIES:0];       // Q4.b
+  logic [`DIVb+3:0]      WC[`DIVCOPIES:0];       // Q4.b
+  logic [`DIVb:0]        U[`DIVCOPIES:0];        // U1.b
+  logic [`DIVb:0]        UM[`DIVCOPIES:0];       // U1.b
+  logic [`DIVb:0]        UNext[`DIVCOPIES-1:0];  // U1.b
+  logic [`DIVb:0]        UMNext[`DIVCOPIES-1:0]; // U1.b
+  logic [`DIVb+1:0]      C[`DIVCOPIES:0];        // Q2.b
+  logic [`DIVb+1:0]      initC;                  // Q2.b
   logic [`DIVCOPIES-1:0] un; 
 
- /* verilator lint_on UNOPTFLAT */
-  logic [`DIVb+3:0]  WSN, WCN; // Q4.N-1
-  logic [`DIVb+3:0]  DBar, D2, DBar2; // Q4.N-1
-  logic [`DIVb+1:0] NextC;
-  logic [`DIVb+1:0] CMux;
-  logic [`DIVb:0] UMux, UMMux;
-  logic [`DIVb:0] initU, initUM;
-
+  logic [`DIVb+3:0]      WSN, WCN;               // Q4.b
+  logic [`DIVb+3:0]      DBar, D2, DBar2;        // Q4.b
+  logic [`DIVb+1:0]      NextC;
+  logic [`DIVb+1:0]      CMux;
+  logic [`DIVb:0]        UMux, UMMux;
+  logic [`DIVb:0]        initU, initUM;
+  /* verilator lint_on UNOPTFLAT */
 
   // Top Muxes and Registers
   // When start is asserted, the inputs are loaded into the divider.
@@ -85,15 +77,15 @@ module fdivsqrtiter(
   // Residual WS/SC registers/initializaiton mux
   mux2   #(`DIVb+4) wsmux(WS[`DIVCOPIES], X, IFDivStartE, WSN);
   mux2   #(`DIVb+4) wcmux(WC[`DIVCOPIES], '0, IFDivStartE, WCN);
-  flopen   #(`DIVb+4) wsflop(clk, FDivBusyE, WSN, WS[0]);
-  flopen   #(`DIVb+4) wcflop(clk, FDivBusyE, WCN, WC[0]);
+  flopen #(`DIVb+4) wsreg(clk, FDivBusyE, WSN, WS[0]);
+  flopen #(`DIVb+4) wcreg(clk, FDivBusyE, WCN, WC[0]);
 
   // UOTFC Result U and UM registers/initialization mux
   // Initialize U to 1.0 and UM to 0 for square root; U to 0 and UM to -1 for division
   assign initU = SqrtE ? {1'b1, {(`DIVb){1'b0}}} : 0;
   assign initUM = SqrtE ? 0 : {1'b1, {(`DIVb){1'b0}}}; 
-  mux2 #(`DIVb+1) Umux(UNext[`DIVCOPIES-1], initU, IFDivStartE, UMux);
-  mux2 #(`DIVb+1) UMmux(UMNext[`DIVCOPIES-1], initUM, IFDivStartE, UMMux);
+  mux2   #(`DIVb+1) Umux(UNext[`DIVCOPIES-1], initU, IFDivStartE, UMux);
+  mux2   #(`DIVb+1) UMmux(UMNext[`DIVCOPIES-1], initUM, IFDivStartE, UMMux);
   flopen #(`DIVb+1) UReg(clk, IFDivStartE|FDivBusyE, UMux, U[0]);
   flopen #(`DIVb+1) UMReg(clk, IFDivStartE|FDivBusyE, UMMux, UM[0]);
 
@@ -103,18 +95,18 @@ module fdivsqrtiter(
   assign initCUpper = SqrtE ? 2'b11 : (`RADIX == 4) ? 2'b00 : 2'b10;
   assign initC = {initCUpper, {`DIVb{1'b0}}};
   mux2 #(`DIVb+2) Cmux(C[`DIVCOPIES], initC, IFDivStartE, CMux); 
-  flopen #(`DIVb+2) cflop(clk, IFDivStartE|FDivBusyE, CMux, C[0]);
+  flopen #(`DIVb+2) creg(clk, IFDivStartE|FDivBusyE, CMux, C[0]);
 
    // Divisior register
-  flopen #(`DIVN-1) dflop(clk, IFDivStartE, Dpreproc, D);
+  flopen #(`DIVb) dreg(clk, IFDivStartE, DPreproc, D);
 
   // Divisor Selections
   //  - choose the negitive version of what's being selected
-  //  - D is only the fraction
-  assign DBar = {3'b111, 1'b0, ~D, {`DIVb-`DIVN+1{1'b1}}};
+  //  - D is a 0.b mantissa
+  assign DBar    = {3'b111, 1'b0, ~D};
   if(`RADIX == 4) begin : d2
-    assign DBar2 = {2'b11, 1'b0, ~D, {`DIVb+2-`DIVN{1'b1}}};
-    assign D2 = {2'b0, 1'b1, D, {`DIVb+2-`DIVN{1'b0}}};
+    assign DBar2 = {2'b11, 1'b0, ~D, 1'b1};
+    assign D2    = {2'b0, 1'b1, D, 1'b0};
   end
 
   // k=DIVCOPIES of the recurrence logic
diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv
index f009cfd8b..7ac8229b9 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv
@@ -32,8 +32,8 @@
 
 module fdivsqrtpostproc(
   input  logic [`DIVb+3:0] WS, WC,
-  input  logic [`DIVN-2:0]  D, // U0.N-1
-  input  logic [`DIVb:0] FirstU, FirstUM, 
+  input  logic [`DIVb-1:0] D, 
+  input  logic [`DIVb:0]   FirstU, FirstUM, 
   input  logic [`DIVb+1:0] FirstC,
   input  logic Firstun,
   input  logic SqrtM,
@@ -41,12 +41,12 @@ module fdivsqrtpostproc(
 	input  logic [`XLEN-1:0] ForwardedSrcAE,
   input  logic RemOpM, ALTBM, BZero, As,
   input  logic [`DIVBLEN:0] n, m,
-  output logic [`DIVb:0] QmM, 
+  output logic [`DIVb:0]    QmM, 
   output logic WZero,
   output logic DivSM
 );
   
-  logic [`DIVb+3:0] W, Sum, RemD;
+  logic [`DIVb+3:0] W, Sum, RemDM;
   logic [`DIVb:0] PreQmM;
   logic NegStickyM, PostIncM;
   logic weq0;
@@ -78,14 +78,14 @@ module fdivsqrtpostproc(
   assign Sum = WC + WS;
   assign W = $signed(Sum) >>> `LOGR;
   assign NegStickyM = W[`DIVb+3];
-  assign RemD = {4'b0000, D, {(`DIVb-`DIVN+1){1'b0}}};
+  assign RemDM = {4'b0000, D};
 
   // Integer division: sign handling for div and rem
   always_comb 
     if (~As)
       if (NegStickyM) begin
         NormQuotM = FirstUM;
-        NormRemM  = W + RemD;
+        NormRemM  = W + RemDM;
         PostIncM  = 0;
       end else begin
         NormQuotM = FirstU;
@@ -99,7 +99,7 @@ module fdivsqrtpostproc(
         PostIncM  = 0;
       end else begin 
         NormQuotM = FirstU;
-        NormRemM  = W - RemD;
+        NormRemM  = W - RemDM;
         PostIncM  = 1;
       end
 
diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
index b06780996..e74934e5e 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
@@ -45,22 +45,21 @@ module fdivsqrtpreproc (
   output logic OTFCSwap, ALTBM, BZero, As,
   output logic [`NE+1:0] QeM,
   output logic [`DIVb+3:0] X,
-  output logic [`DIVN-2:0] Dpreproc
+  output logic [`DIVb-1:0] DPreproc
 );
-  // logic  [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
-  logic  [`NF-1:0] PreprocA, PreprocX;
-  logic  [`NF-1:0] PreprocB, PreprocY;
-  logic  [`NF+1:0] SqrtX;
+
+  logic  [`DIVb-1:0] XPreproc;
+  logic  [`DIVb:0] SqrtX;
   logic  [`DIVb+3:0] DivX;
-  logic  [`NE+1:0] Qe;
+  logic  [`NE+1:0] QeE;
   // Intdiv signals
-  logic  [`DIVb-1:0] ZeroBufX, ZeroBufY;
+  logic  [`DIVb-1:0] IFNormLenX, IFNormLenD;
   logic  [`XLEN-1:0] PosA, PosB;
-  logic  Bs, OTFCSwapTemp, ALTBE;
+  logic  Bs, CalcOTFCSwap, ALTBE;
   logic  [`XLEN-1:0] A64, B64;
   logic  [`DIVBLEN:0] Calcn, Calcm;
   logic  [`DIVBLEN:0] ZeroDiff, IntBits, RightShiftX;
-  logic  [`DIVBLEN:0] pPlusr, pPrCeil, p, L;
+  logic  [`DIVBLEN:0] pPlusr, pPrCeil, p, ell;
   logic  [`LOGRK-1:0] pPrTrunc;
   logic  [`DIVb+3:0] PreShiftX;
 
@@ -72,39 +71,38 @@ module fdivsqrtpreproc (
   assign A64 = W64E ? {{(`XLEN-32){As}}, ForwardedSrcAE[31:0]} : ForwardedSrcAE;
   assign B64 = W64E ? {{(`XLEN-32){Bs}}, ForwardedSrcBE[31:0]} : ForwardedSrcBE;
 
-  assign OTFCSwapTemp = (As ^ Bs) & MDUE;
+  assign CalcOTFCSwap = (As ^ Bs) & MDUE;
   
   assign PosA = As ? -A64 : A64;
   assign PosB = Bs ? -B64 : B64;
   assign BZero = |ForwardedSrcBE;
 
-  assign ZeroBufX = MDUE ? {PosA, {`DIVb-`XLEN{1'b0}}} : {Xm, {`DIVb-`NF-1{1'b0}}};
-  assign ZeroBufY = MDUE ? {PosB, {`DIVb-`XLEN{1'b0}}} : {Ym, {`DIVb-`NF-1{1'b0}}};
-  lzc #(`DIVb) lzcX (ZeroBufX, L);
-  lzc #(`DIVb) lzcY (ZeroBufY, Calcm);
+  assign IFNormLenX = MDUE ? {PosA, {(`DIVb-`XLEN){1'b0}}} : {Xm, {(`DIVb-`NF-1){1'b0}}};
+  assign IFNormLenD = MDUE ? {PosB, {(`DIVb-`XLEN){1'b0}}} : {Ym, {(`DIVb-`NF-1){1'b0}}};
+  lzc #(`DIVb) lzcX (IFNormLenX, ell);
+  lzc #(`DIVb) lzcY (IFNormLenD, Calcm);
 
-  assign PreprocX = Xm[`NF-1:0]<<L;
-  assign PreprocY = Ym[`NF-1:0]<<Calcm;
+  assign XPreproc = IFNormLenX << (ell + {{`DIVBLEN{1'b0}}, ~MDUE}); // had issue with (`DIVBLEN+1)'(~MDUE) so using this instead
+  assign DPreproc = IFNormLenD << (Calcm + {{`DIVBLEN{1'b0}}, ~MDUE});
 
-  assign ZeroDiff = Calcm - L;
+  assign ZeroDiff = Calcm - ell;
   assign ALTBE = ZeroDiff[`DIVBLEN]; // A less than B
   assign p = ALTBE ? '0 : ZeroDiff;
 
   assign pPlusr = (`DIVBLEN)'(`LOGR) + p;
   assign pPrTrunc = pPlusr[`LOGRK-1:0];
-  assign pPrCeil = (pPlusr >> `LOGRK) + {{`DIVBLEN-1{1'b0}}, |(pPrTrunc)};
+  assign pPrCeil = (pPlusr >> `LOGRK) + {{`DIVBLEN{1'b0}}, |(pPrTrunc)};
   assign Calcn = (pPrCeil << `LOGK) - 1;
   assign IntBits = (`DIVBLEN)'(`RK) + p;
   assign RightShiftX = (`DIVBLEN)'(`RK) - {{(`DIVBLEN-`RK){1'b0}}, IntBits[`RK-1:0]};
 
-  assign SqrtX = Xe[0]^L[0] ? {1'b0, ~XZero, PreprocX} : {~XZero, PreprocX, 1'b0};
-  assign DivX = {3'b000, ~XZero, PreprocX, {`DIVb-`NF{1'b0}}};
+  assign SqrtX = (Xe[0]^ell[0]) ? {1'b0, ~XZero, XPreproc[`DIVb-1:1]} : {~XZero, XPreproc}; // Bottom bit of XPreproc is always zero because DIVb is larger than XLEN and NF
+  assign DivX = {3'b000, ~XZero, XPreproc};
 
   // *** explain why X is shifted between radices (initial assignment of WS=RX)
-  if (`RADIX == 2)  assign PreShiftX = Sqrt ? {3'b111, SqrtX, {`DIVb-1-`NF{1'b0}}} : DivX;
-  else              assign PreShiftX = Sqrt ? {2'b11, SqrtX, {`DIVb-1-`NF{1'b0}}, 1'b0} : DivX;
+  if (`RADIX == 2)  assign PreShiftX = Sqrt ? {3'b111, SqrtX} : DivX;
+  else              assign PreShiftX = Sqrt ? {2'b11, SqrtX, 1'b0} : DivX;
   assign X = MDUE ? DivX >> RightShiftX : PreShiftX;
-  assign Dpreproc = {PreprocY, {`DIVN-1-`NF{1'b0}}};
 
   //           radix 2     radix 4
   // 1 copies  DIVLEN+2    DIVLEN+2/2
@@ -116,12 +114,12 @@ module fdivsqrtpreproc (
   // r = 1 or 2
   // DIVRESLEN/(r*`DIVCOPIES)
 
-  flopen #(`NE+2)    expreg(clk, IFDivStartE, Qe, QeM);
-  flopen #(1)       swapreg(clk, IFDivStartE, OTFCSwapTemp, OTFCSwap);
+  flopen #(`NE+2)    expreg(clk, IFDivStartE, QeE, QeM);
+  flopen #(1)       swapreg(clk, IFDivStartE, CalcOTFCSwap, OTFCSwap);
   flopen #(1)       altbreg(clk, IFDivStartE, ALTBE, ALTBM);
   flopen #(`DIVBLEN+1) nreg(clk, IFDivStartE, Calcn, n);
   flopen #(`DIVBLEN+1) mreg(clk, IFDivStartE, Calcm, m);
-  expcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero, .L, .m(Calcm), .Qe);
+  expcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero, .ell, .m(Calcm), .Qe(QeE));
 
 endmodule
 
@@ -130,7 +128,7 @@ module expcalc(
   input  logic [`NE-1:0] Xe, Ye,
   input  logic Sqrt,
   input  logic XZero, 
-  input  logic [`DIVBLEN:0] L, m,
+  input  logic [`DIVBLEN:0] ell, m,
   output logic [`NE+1:0] Qe
   );
   logic [`NE-2:0] Bias;
@@ -162,10 +160,10 @@ module expcalc(
             2'h2: Bias =  (`NE-1)'(`H_BIAS);
         endcase
   end
-  assign SXExp = {2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, L} - (`NE+2)'(`BIAS);
+  assign SXExp = {2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - (`NE+2)'(`BIAS);
   assign SExp  = {SXExp[`NE+1], SXExp[`NE+1:1]} + {2'b0, Bias};
   // correct exponent for denormalized input's normalization shifts
-  assign DExp  = ({2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, L} - {2'b0, Ye} + {{(`NE+1-`DIVBLEN){1'b0}}, m} + {3'b0, Bias}) & {`NE+2{~XZero}};
+  assign DExp  = ({2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(`NE+1-`DIVBLEN){1'b0}}, m} + {3'b0, Bias}) & {`NE+2{~XZero}};
   
   assign Qe = Sqrt ? SExp : DExp;
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv
index b4c2527d3..088aff3a7 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv
@@ -32,7 +32,7 @@
 
 /* verilator lint_off UNOPTFLAT */
 module fdivsqrtstage2 (
-  input  logic [`DIVN-2:0] D,
+  input  logic [`DIVb-1:0] D,
   input  logic [`DIVb+3:0]  DBar, 
   input  logic [`DIVb:0] U, UM,
   input  logic [`DIVb+3:0]  WS, WC,
@@ -69,7 +69,7 @@ module fdivsqrtstage2 (
   always_comb
     if      (up) Dsel = DBar;
     else if (uz) Dsel = '0; // qz
-    else         Dsel = {3'b0, 1'b1, D, {`DIVb-`DIVN+1{1'b0}}}; // un
+    else         Dsel = {3'b000, 1'b1, D}; // un
 
   // Partial Product Generation
   //  WSA, WCA = WS + WC - qD
diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv
index fb203fd72..f006b0478 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv
@@ -31,7 +31,7 @@
 `include "wally-config.vh"
 
 module fdivsqrtstage4 (
-  input  logic [`DIVN-2:0] D,
+  input  logic [`DIVb-1:0] D,
   input  logic [`DIVb+3:0]  DBar, D2, DBar2,
   input  logic [`DIVb:0] U, UM,
   input  logic [`DIVb+3:0]  WS, WC,
@@ -61,7 +61,7 @@ module fdivsqrtstage4 (
 	// 0010 = -1
 	// 0001 = -2
   assign Smsbs = U[`DIVb:`DIVb-4];
-  assign Dmsbs = D[`DIVN-2:`DIVN-4];
+  assign Dmsbs = D[`DIVb-1:`DIVb-3];
   assign WCmsbs = WC[`DIVb+3:`DIVb-4];
   assign WSmsbs = WS[`DIVb+3:`DIVb-4];
 
@@ -77,7 +77,7 @@ module fdivsqrtstage4 (
       4'b1000: Dsel = DBar2;
       4'b0100: Dsel = DBar;
       4'b0000: Dsel = '0;
-      4'b0010: Dsel = {3'b0, 1'b1, D, {`DIVb-`DIVN+1{1'b0}}};
+      4'b0010: Dsel = {3'b0, 1'b1, D};
       4'b0001: Dsel = D2;
       default: Dsel = 'x;
     endcase
diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh
index 2d32ea394..9bd99aeef 100644
--- a/pipelined/testbench/tests.vh
+++ b/pipelined/testbench/tests.vh
@@ -1413,6 +1413,7 @@ string imperas32f[] = '{
 
   string arch32f[] = '{
     `RISCVARCHTEST,
+    "rv32i_m/F/src/fdiv_b20-01.S",
     "rv32i_m/F/src/fadd_b10-01.S",
     "rv32i_m/F/src/fadd_b1-01.S",
     "rv32i_m/F/src/fadd_b11-01.S",

From b69aa39f30548b1db9ae5941447103c559e1ef48 Mon Sep 17 00:00:00 2001
From: Ross Thompson <ross1728@gmail.com>
Date: Wed, 14 Dec 2022 09:34:29 -0600
Subject: [PATCH 2/5] Reduced complexity of linebytemask.

---
 pipelined/src/cache/cache.sv | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv
index 6c7aa8994..6145749e8 100644
--- a/pipelined/src/cache/cache.sv
+++ b/pipelined/src/cache/cache.sv
@@ -163,7 +163,8 @@ module cache #(parameter LINELEN,  NUMLINES,  NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
   end
 
   assign FetchBufferByteSel = SetValid & ~SetDirty ? '1 : ~DemuxedByteMask;  // If load miss set all muxes to 1.
-  assign LineByteMask = ~SetValid & ~SetDirty ? '0 : ~SetValid & SetDirty ? DemuxedByteMask : '1; // if store hit only enable the word and subword bytes, else write all bytes.
+  logic [LINELEN/8-1:0]       LineByteMask2;
+  assign LineByteMask = SetValid ? '1 : SetDirty ? DemuxedByteMask : '0;
 
   for(index = 0; index < LINELEN/8; index++) begin
     mux2 #(8) WriteDataMux(.d0(CacheWriteData[(8*index)%WORDLEN+7:(8*index)%WORDLEN]),

From 4a0e4aed99a7b5c0a2c47b6e39867f291b999bb1 Mon Sep 17 00:00:00 2001
From: Ross Thompson <ross1728@gmail.com>
Date: Wed, 14 Dec 2022 09:49:15 -0600
Subject: [PATCH 3/5] Signal renames to reflect figures.

---
 pipelined/src/cache/cache.sv    | 16 +++++++--------
 pipelined/src/cache/cacheLRU.sv |  4 ++--
 pipelined/src/cache/cachefsm.sv |  8 ++++----
 pipelined/src/cache/cacheway.sv | 35 ++++++++++++++++-----------------
 4 files changed, 31 insertions(+), 32 deletions(-)

diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv
index 6145749e8..4e34eb02b 100644
--- a/pipelined/src/cache/cache.sv
+++ b/pipelined/src/cache/cache.sv
@@ -94,14 +94,14 @@ module cache #(parameter LINELEN,  NUMLINES,  NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
   logic [NUMWAYS-1:0]         NextFlushWay;
   logic                       FlushWayCntEn;
   logic                       FlushWayCntRst;  
-  logic                       SelEvict;
+  logic                       SelWriteback;
   logic                       LRUWriteEn;
   logic                       SelFlush;
   logic                       ResetOrFlushAdr, ResetOrFlushWay;
   logic [LINELEN-1:0]         ReadDataLine, ReadDataLineCache;
   logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1:0]          WordOffsetAddr;
   logic                       SelFetchBuffer;
-  logic                       ce;
+  logic                       CacheEn;
 
   localparam                  LOGLLENBYTES = $clog2(WORDLEN/8);
   localparam                  CACHEWORDSPERLINE = `DCACHE_LINELENINBITS/WORDLEN;
@@ -124,12 +124,12 @@ module cache #(parameter LINELEN,  NUMLINES,  NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
 
   // Array of cache ways, along with victim, hit, dirty, and read merging logic
   cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, DCACHE) 
-    CacheWays[NUMWAYS-1:0](.clk, .reset, .ce, .CAdr, .PAdr, .LineWriteData, .LineByteMask,
-    .SetValid, .ClearValid, .SetDirty, .ClearDirty, .SelEvict, .VictimWay,
+    CacheWays[NUMWAYS-1:0](.clk, .reset, .CacheEn, .CAdr, .PAdr, .LineWriteData, .LineByteMask,
+    .SetValid, .ClearValid, .SetDirty, .ClearDirty, .SelWriteback, .VictimWay,
     .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .ValidWay, .DirtyWay, .TagWay, .FlushStage, .InvalidateCache);
   if(NUMWAYS > 1) begin:vict
     cacheLRU #(NUMWAYS, SETLEN, OFFSETLEN, NUMLINES) cacheLRU(
-      .clk, .reset, .ce, .FlushStage, .HitWay, .ValidWay, .VictimWay, .CAdr, .LRUWriteEn(LRUWriteEn & ~FlushStage),
+      .clk, .reset, .CacheEn, .FlushStage, .HitWay, .ValidWay, .VictimWay, .CAdr, .LRUWriteEn(LRUWriteEn & ~FlushStage),
       .SetValid, .PAdr(PAdr[SETTOP-1:OFFSETLEN]), .InvalidateCache, .FlushCache);
   end else assign VictimWay = 1'b1; // one hot.
   assign CacheHit = | HitWay;
@@ -174,7 +174,7 @@ module cache #(parameter LINELEN,  NUMLINES,  NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
   mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
 		.d1({Tag, PAdr[SETTOP-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
 		.d2({Tag, FlushAdr, {OFFSETLEN{1'b0}}}),
-		.s({SelFlush, SelEvict}), .y(CacheBusAdr));
+		.s({SelFlush, SelWriteback}), .y(CacheBusAdr));
   
   /////////////////////////////////////////////////////////////////////////////////////////////
   // Flush address and way generation during flush
@@ -199,10 +199,10 @@ module cache #(parameter LINELEN,  NUMLINES,  NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
  		.CacheHit, .LineDirty, .CacheStall, .CacheCommitted, 
 		.CacheMiss, .CacheAccess, .SelAdr, 
 		.ClearValid, .ClearDirty, .SetDirty,
-		.SetValid, .SelEvict, .SelFlush,
+		.SetValid, .SelWriteback, .SelFlush,
 		.FlushAdrCntEn, .FlushWayCntEn, .FlushAdrCntRst,
 		.FlushWayCntRst, .FlushAdrFlag, .FlushWayFlag, .FlushCache, .SelFetchBuffer,
         .InvalidateCache,
-        .ce,
+        .CacheEn,
         .LRUWriteEn);
 endmodule 
diff --git a/pipelined/src/cache/cacheLRU.sv b/pipelined/src/cache/cacheLRU.sv
index f6d5adf57..fe22ec2c8 100644
--- a/pipelined/src/cache/cacheLRU.sv
+++ b/pipelined/src/cache/cacheLRU.sv
@@ -32,7 +32,7 @@
 
 module cacheLRU
   #(parameter NUMWAYS = 4, SETLEN = 9, OFFSETLEN = 5, NUMLINES = 128)(
-   input logic                clk, reset, ce, FlushStage,
+   input logic                clk, reset, CacheEn, FlushStage,
    input logic [NUMWAYS-1:0]  HitWay,
    input logic [NUMWAYS-1:0]  ValidWay,
    output logic [NUMWAYS-1:0] VictimWay,
@@ -120,7 +120,7 @@ module cacheLRU
   // LRU storage must be reset for modelsim to run. However the reset value does not actually matter in practice.
   always_ff @(posedge clk) begin
     if (reset) for (int set = 0; set < NUMLINES; set++) LRUMemory[set] <= '0;
-    if(ce) begin
+    if(CacheEn) begin
       if((InvalidateCache | FlushCache) & ~FlushStage) for (int set = 0; set < NUMLINES; set++) LRUMemory[set] <= '0;
       else if (LRUWriteEn & ~FlushStage) begin 
         LRUMemory[CAdr] <= NextLRU; ///***** RT: This is not right. Logically should be PAdr, but it breaks linux.
diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv
index 860432c82..1396adf43 100644
--- a/pipelined/src/cache/cachefsm.sv
+++ b/pipelined/src/cache/cachefsm.sv
@@ -64,7 +64,7 @@ module cachefsm
    output logic       ClearDirty,
    output logic       SetDirty,
    output logic       SetValid,
-   output logic       SelEvict,
+   output logic       SelWriteback,
    output logic       LRUWriteEn,
    output logic       SelFlush,
    output logic       FlushAdrCntEn,
@@ -72,7 +72,7 @@ module cachefsm
    output logic       FlushAdrCntRst,
    output logic       FlushWayCntRst,
    output logic       SelFetchBuffer, 
-   output logic       ce);
+   output logic       CacheEn);
   
   logic               resetDelay;
   logic               AMO, StoreAMO;
@@ -170,7 +170,7 @@ module cachefsm
   assign LRUWriteEn = (CurrState == STATE_READY & AnyHit) |
                       (CurrState == STATE_MISS_WRITE_CACHE_LINE);
   // Flush and eviction controls
-  assign SelEvict = (CurrState == STATE_MISS_EVICT_DIRTY & ~CacheBusAck) |
+  assign SelWriteback = (CurrState == STATE_MISS_EVICT_DIRTY & ~CacheBusAck) |
                     (CurrState == STATE_READY & AnyMiss & LineDirty);
   assign SelFlush = (CurrState == STATE_FLUSH) | (CurrState == STATE_FLUSH_CHECK) |
                     (CurrState == STATE_FLUSH_INCR) | (CurrState == STATE_FLUSH_WRITE_BACK);
@@ -201,6 +201,6 @@ module cachefsm
                   resetDelay;
 
   assign SelFetchBuffer = CurrState == STATE_MISS_WRITE_CACHE_LINE | CurrState == STATE_MISS_READ_DELAY;
-  assign ce = (CurrState == STATE_READY & ~Stall | CacheStall) | (CurrState != STATE_READY) | reset;
+  assign CacheEn = (CurrState == STATE_READY & ~Stall | CacheStall) | (CurrState != STATE_READY) | reset;
                        
 endmodule // cachefsm
diff --git a/pipelined/src/cache/cacheway.sv b/pipelined/src/cache/cacheway.sv
index 1fcc2566f..5f9869547 100644
--- a/pipelined/src/cache/cacheway.sv
+++ b/pipelined/src/cache/cacheway.sv
@@ -33,7 +33,7 @@
 module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
 				  parameter OFFSETLEN = 5, parameter INDEXLEN = 9, parameter DIRTY_BITS = 1) (
   input logic                        clk,
-  input logic                        ce,
+  input logic                        CacheEn,
   input logic                        reset,
   input logic [$clog2(NUMLINES)-1:0] CAdr,
   input logic [`PA_BITS-1:0]         PAdr,
@@ -42,7 +42,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
   input logic                        ClearValid,
   input logic                        SetDirty,
   input logic                        ClearDirty,
-  input logic                        SelEvict,
+  input logic                        SelWriteback,
   input logic                        SelFlush,
   input logic                        VictimWay,
   input logic                        FlushWay,
@@ -76,8 +76,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
   logic                              ClearValidWay;
   logic                              SetDirtyWay;
   logic                              ClearDirtyWay;
-  logic                              SelectedWay;
-  logic                              SelWriteback;
+  logic                              SelNonHit;
   logic                              SelData;
   logic                              FlushWayEn, VictimWayEn;
   
@@ -85,28 +84,28 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
   // FlushWay and VictimWay are part of a one hot way selection.  Must clear them if FlushWay not selected
   // or VictimWay not selected.
   assign FlushWayEn = FlushWay & SelFlush;
-  assign VictimWayEn = VictimWay & SelEvict;
+  assign VictimWayEn = VictimWay & SelWriteback;
   
-  assign SelWriteback = FlushWayEn | SetValid | SelEvict;
+  assign SelNonHit = FlushWayEn | SetValid | SelWriteback;
   
   mux2 #(1) seltagmux(VictimWay, FlushWay, SelFlush, SelTag);
   //assign SelTag = VictimWay | FlushWay;
-  assign SelData = HitWay | FlushWayEn | VictimWayEn;
+  //assign SelData = HitWay | FlushWayEn | VictimWayEn;
   
-  mux2 #(1) selectedwaymux(HitWay, SelTag, SelWriteback , SelectedWay);
+  mux2 #(1) selectedwaymux(HitWay, SelTag, SelNonHit , SelData);
 
   /////////////////////////////////////////////////////////////////////////////////////////////
   // Write Enable demux
   /////////////////////////////////////////////////////////////////////////////////////////////
 
   // RT: Can we merge these two muxes?  This is also shared in cacheLRU.
-  //mux3 #(1) selectwaymux(HitWay, VictimWay, FlushWay,     {SelFlush, SetValid}, SelectedWay);
-  //mux3 #(1) selecteddatamux(HitWay, VictimWay, FlushWay, {SelFlush, SelEvict}, SelData);
+  //mux3 #(1) selectwaymux(HitWay, VictimWay, FlushWay,     {SelFlush, SetValid}, SelData);
+  //mux3 #(1) selecteddatamux(HitWay, VictimWay, FlushWay, {SelFlush, SelNonHit}, SelData);
 
-  assign SetValidWay = SetValid & SelectedWay;
-  assign ClearValidWay = ClearValid & SelectedWay;
-  assign SetDirtyWay = SetDirty & SelectedWay;
-  assign ClearDirtyWay = ClearDirty & SelectedWay;
+  assign SetValidWay = SetValid & SelData;
+  assign ClearValidWay = ClearValid & SelData;
+  assign SetDirtyWay = SetDirty & SelData;
+  assign ClearDirtyWay = ClearDirty & SelData;
   
   // If writing the whole line set all write enables to 1, else only set the correct word.
   assign SelectedWriteWordEn = (SetValidWay | SetDirtyWay) & ~FlushStage;
@@ -117,7 +116,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
   // Tag Array
   /////////////////////////////////////////////////////////////////////////////////////////////
 
-  sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(TAGLEN)) CacheTagMem(.clk, .ce,
+  sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(TAGLEN)) CacheTagMem(.clk, .ce(CacheEn),
     .addr(CAdr), .dout(ReadTag), .bwe('1),
     .din(PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]), .we(SetValidEN));
 
@@ -140,7 +139,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
   localparam integer           LOGNUMSRAM = $clog2(NUMSRAM);
   
   for(words = 0; words < NUMSRAM; words++) begin: word
-    sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(SRAMLEN)) CacheDataMem(.clk, .ce, .addr(CAdr),
+    sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(SRAMLEN)) CacheDataMem(.clk, .ce(CacheEn), .addr(CAdr),
       .dout(ReadDataLine[SRAMLEN*(words+1)-1:SRAMLEN*words]),
       .din(LineWriteData[SRAMLEN*(words+1)-1:SRAMLEN*words]),
       .we(SelectedWriteWordEn), .bwe(FinalByteMask[SRAMLENINBYTES*(words+1)-1:SRAMLENINBYTES*words]));
@@ -155,7 +154,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
   
   always_ff @(posedge clk) begin // Valid bit array, 
     if (reset) ValidBits        <= #1 '0;
-    if(ce) begin 
+    if(CacheEn) begin 
 	  ValidWay <= #1 ValidBits[CAdr];
 	  if(InvalidateCache & ~FlushStage)                    ValidBits <= #1 '0;
       else if (SetValidEN | (ClearValidWay & ~FlushStage)) ValidBits[CAdr] <= #1 SetValidWay;
@@ -171,7 +170,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
     always_ff @(posedge clk) begin
       // reset is optional.  Consider merging with TAG array in the future.
       //if (reset) DirtyBits <= #1 {NUMLINES{1'b0}}; 
-      if(ce) begin
+      if(CacheEn) begin
         Dirty <= #1 DirtyBits[CAdr];
         if((SetDirtyWay | ClearDirtyWay) & ~FlushStage) DirtyBits[CAdr] <= #1 SetDirtyWay;
       end

From 643a2e7cf9980aa51ff1f3b1053cedb3db80c6fc Mon Sep 17 00:00:00 2001
From: David Harris <david_harris@hmc.edu>
Date: Wed, 14 Dec 2022 17:03:13 -0800
Subject: [PATCH 4/5] Use FPU divider for integer division when F is supported

---
 pipelined/src/fpu/fdivsqrt/fdivsqrt.sv         |  5 +++--
 pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 10 ++++++----
 pipelined/src/fpu/fpu.sv                       |  9 ++++++---
 pipelined/src/ieu/controller.sv                | 16 +++++++++-------
 pipelined/src/ieu/datapath.sv                  | 11 +++++++----
 pipelined/src/ieu/ieu.sv                       |  8 +++++---
 pipelined/src/muldiv/muldiv.sv                 | 15 +++++++++++----
 pipelined/src/wally/wallypipelinedcore.sv      |  6 ++++--
 tests/riscof/Makefile                          | 11 ++---------
 9 files changed, 53 insertions(+), 38 deletions(-)

diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv
index bdbfbde00..2c1aa7ed3 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv
@@ -52,7 +52,8 @@ module fdivsqrt(
   output logic FDivBusyE, IFDivStartE, FDivDoneE,
 //  output logic DivDone,
   output logic [`NE+1:0] QeM,
-  output logic [`DIVb:0] QmM
+  output logic [`DIVb:0] QmM,
+  output logic [`XLEN-1:0] FPIntDivResultM
 //   output logic [`XLEN-1:0] RemM,
 );
 
@@ -88,5 +89,5 @@ module fdivsqrt(
     .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .Firstun, 
     .SqrtM, .SpecialCaseM, .RemOpM(Funct3M[1]), .ForwardedSrcAE,
     .n, .ALTBM, .m, .BZero, .As,
-    .QmM, .WZero, .DivSM);
+    .QmM, .WZero, .DivSM, .FPIntDivResultM);
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv
index 7ac8229b9..925c12892 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv
@@ -43,7 +43,8 @@ module fdivsqrtpostproc(
   input  logic [`DIVBLEN:0] n, m,
   output logic [`DIVb:0]    QmM, 
   output logic WZero,
-  output logic DivSM
+  output logic DivSM,
+  output logic [`XLEN-1:0] FPIntDivResultM
 );
   
   logic [`DIVb+3:0] W, Sum, RemDM;
@@ -53,7 +54,7 @@ module fdivsqrtpostproc(
   logic [`DIVBLEN:0] NormShiftM;
   logic [`DIVb:0] IntQuotM, NormQuotM;
   logic [`DIVb+3:0] IntRemM, NormRemM;
-  logic [`DIVb+3:0] PreResultM, ResultM;
+  logic [`DIVb+3:0] PreResultM, PreFPIntDivResultM;
 
   // check for early termination on an exact result.  If the result is not exact, the sticky should be set
   aplusbeq0 #(`DIVb+4) wspluswceq0(WS, WC, weq0);
@@ -136,8 +137,9 @@ module fdivsqrtpostproc(
 
    // division takes the result from the next cycle, which is shifted to the left one more time so the square root also needs to be shifted
   
-  assign ResultM = ($signed(PreResultM) >>> NormShiftM) + {{(`DIVb+3){1'b0}}, (PostIncM & ~RemOpM)};
-
+  assign PreFPIntDivResultM = ($signed(PreResultM) >>> NormShiftM) + {{(`DIVb+3){1'b0}}, (PostIncM & ~RemOpM)};
+  assign FPIntDivResultM = PreFPIntDivResultM[`XLEN-1:0];
+ 
   assign PreQmM = NegStickyM ? FirstUM : FirstU; // Select U or U-1 depending on negative sticky bit
   assign QmM = SqrtM ? (PreQmM << 1) : PreQmM;
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv
index bd4053dcb..10fa4d70e 100755
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@@ -55,7 +55,8 @@ module fpu (
    output logic              FCvtIntW,      // select FCvtIntRes (to IEU)
    output logic 		        FDivBusyE,     // Is the divide/sqrt unit busy (stall execute stage) (to HZU)
    output logic 		        IllegalFPUInstrM, // Is the instruction an illegal fpu instruction (to privileged unit)
-   output logic [4:0] 	     SetFflagsM        // FPU flags (to privileged unit)
+   output logic [4:0] 	     SetFflagsM,        // FPU flags (to privileged unit)
+   output logic [`XLEN-1:0]  FPIntDivResultW
   );
 
    // FPU specifics:
@@ -152,6 +153,7 @@ module fpu (
    logic [`FLEN-1:0]     BoxedZeroE;                         // Zero value for Z for multiplication, with NaN boxing if needed
    logic [`FLEN-1:0]     BoxedOneE;                         // Zero value for Z for multiplication, with NaN boxing if needed
    logic             StallUnpackedM;
+   logic [`XLEN-1:0] FPIntDivResultM;
 
    // DECODE STAGE
 
@@ -267,7 +269,7 @@ module fpu (
                   .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE,
                   .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E,
                   .StallE, .StallM, .TrapM, .DivSM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .QeM, 
-                  .QmM /*, .DivDone(DivDoneM) */);
+                  .QmM, .FPIntDivResultM /*, .DivDone(DivDoneM) */);
 
                   //
    // compare
@@ -387,7 +389,8 @@ module fpu (
 
    // M/W pipe registers
    flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW); 
-   flopenrc #(`XLEN) MWRegInt(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW); 
+   flopenrc #(`XLEN) MWRegIntCvtRes(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW); 
+   flopenrc #(`XLEN) MWRegIntDivRes(clk, reset, FlushW, ~StallW, FPIntDivResultM, FPIntDivResultW); 
 
    // BEGIN WRITEBACK STAGE
 
diff --git a/pipelined/src/ieu/controller.sv b/pipelined/src/ieu/controller.sv
index ab623b0e5..bf9482c80 100644
--- a/pipelined/src/ieu/controller.sv
+++ b/pipelined/src/ieu/controller.sv
@@ -65,7 +65,7 @@ module controller(
   output logic       FWriteIntM,
   // Writeback stage control signals
   input  logic       StallW, FlushW,
-  output logic 	     RegWriteW,     // for datapath and Hazard Unit
+  output logic 	     RegWriteW, DivW,    // for datapath and Hazard Unit
   output logic [2:0] ResultSrcW,
   // Stall during CSRs
   output logic       CSRWriteFencePendingDEM,
@@ -109,6 +109,7 @@ module controller(
   logic        IllegalERegAdrD;
   logic [1:0]  AtomicE;
    logic       FencePendingD, FencePendingE, FencePendingM;
+   logic       DivE, DivM;
    
 
   // Extract fields
@@ -222,16 +223,17 @@ module controller(
   assign MemReadE = MemRWE[1];
   assign SCE = (ResultSrcE == 3'b100);
   assign RegWriteE = IEURegWriteE | FWriteIntE; // IRF register writes could come from IEU or FPU controllers
+  assign DivE = MDUE & Funct3E[2]; // Division operation
   
   // Memory stage pipeline control register
-  flopenrc #(19) controlregM(clk, reset, FlushM, ~StallM,
-                         {RegWriteE, ResultSrcE, MemRWE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, FWriteIntE, AtomicE, InvalidateICacheE, FlushDCacheE, FencePendingE, InstrValidE},
-                         {RegWriteM, ResultSrcM, MemRWM, CSRReadM, CSRWriteM, PrivilegedM, Funct3M, FWriteIntM, AtomicM, InvalidateICacheM, FlushDCacheM, FencePendingM, InstrValidM});
+  flopenrc #(20) controlregM(clk, reset, FlushM, ~StallM,
+                         {RegWriteE, ResultSrcE, MemRWE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, FWriteIntE, AtomicE, InvalidateICacheE, FlushDCacheE, FencePendingE, InstrValidE, DivE},
+                         {RegWriteM, ResultSrcM, MemRWM, CSRReadM, CSRWriteM, PrivilegedM, Funct3M, FWriteIntM, AtomicM, InvalidateICacheM, FlushDCacheM, FencePendingM, InstrValidM, DivM});
   
   // Writeback stage pipeline control register
-  flopenrc #(4) controlregW(clk, reset, FlushW, ~StallW,
-                         {RegWriteM, ResultSrcM},
-                         {RegWriteW, ResultSrcW});  
+  flopenrc #(5) controlregW(clk, reset, FlushW, ~StallW,
+                         {RegWriteM, ResultSrcM, DivM},
+                         {RegWriteW, ResultSrcW, DivW});  
 
   // Stall pipeline at Fetch if a CSR Write or Fence is pending in the subsequent stages
   assign CSRWriteFencePendingDEM = CSRWriteD | CSRWriteE | CSRWriteM | FencePendingD | FencePendingE | FencePendingM;
diff --git a/pipelined/src/ieu/datapath.sv b/pipelined/src/ieu/datapath.sv
index 89ebd9b5d..abc232651 100644
--- a/pipelined/src/ieu/datapath.sv
+++ b/pipelined/src/ieu/datapath.sv
@@ -57,14 +57,15 @@ module datapath (
   output logic [`XLEN-1:0] WriteDataM, 
   // Writeback stage signals
   input  logic             StallW, FlushW,
-(* mark_debug = "true" *)  input  logic             RegWriteW, 
+(* mark_debug = "true" *)  input  logic             RegWriteW, DivW,
   input  logic             SquashSCW,
   input  logic [2:0]       ResultSrcW,
   input logic [`XLEN-1:0]  FCvtIntResW,
   input logic [`XLEN-1:0] ReadDataW,
   // input  logic [`XLEN-1:0] PCLinkW,
   input  logic [`XLEN-1:0] CSRReadValW, MDUResultW, 
-  // Hazard Unit signals 
+  input logic [`XLEN-1:0] FPIntDivResultW,
+   // Hazard Unit signals 
   output logic [4:0]       Rs1D, Rs2D, Rs1E, Rs2E,
   output logic [4:0]       RdE, RdM, RdW 
 );
@@ -85,7 +86,7 @@ module datapath (
   // Writeback stage signals
   logic [`XLEN-1:0] SCResultW;
   logic [`XLEN-1:0] ResultW;
-  logic [`XLEN-1:0] IFResultW, IFCvtResultW;
+  logic [`XLEN-1:0] IFResultW, IFCvtResultW, MulDivResultW;
 
   // Decode stage
   assign Rs1D      = InstrD[19:15];
@@ -125,10 +126,12 @@ module datapath (
   if (`F_SUPPORTED) begin:fpmux
     mux2  #(`XLEN)  resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM);
     mux2  #(`XLEN)  cvtresultmuxW(IFResultW, FCvtIntResW, FCvtIntW, IFCvtResultW);
+    mux2  #(`XLEN)  divresultmuxW(MDUResultW, FPIntDivResultW, DivW, MulDivResultW);
   end else begin:fpmux
     assign IFResultM = IEUResultM; assign IFCvtResultW = IFResultW;
+    assign MulDivResultW = MDUResultW;
   end
-  mux5  #(`XLEN)  resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW); 
+  mux5  #(`XLEN)  resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MulDivResultW, SCResultW, ResultSrcW, ResultW); 
  
   // handle Store Conditional result if atomic extension supported
   if (`A_SUPPORTED) assign SCResultW = {{(`XLEN-1){1'b0}}, SquashSCW};
diff --git a/pipelined/src/ieu/ieu.sv b/pipelined/src/ieu/ieu.sv
index 014d24f67..40d91a409 100644
--- a/pipelined/src/ieu/ieu.sv
+++ b/pipelined/src/ieu/ieu.sv
@@ -58,6 +58,7 @@ module ieu (
   output logic       InvalidateICacheM, FlushDCacheM,
 
   // Writeback stage
+  input logic [`XLEN-1:0] FPIntDivResultW,
   input logic [`XLEN-1:0]  CSRReadValW, MDUResultW,
   input logic [`XLEN-1:0]  FCvtIntResW,
   output logic [4:0]       RdW,
@@ -83,6 +84,7 @@ module ieu (
   logic        SCE;
   logic [4:0]  RdE;
   logic        FWriteIntM;
+  logic        DivW;
 
   // forwarding signals
   logic [4:0]       Rs1D, Rs2D, Rs1E, Rs2E;
@@ -99,15 +101,15 @@ module ieu (
     .Funct3E, .MDUE, .W64E, .JumpE, .SCE, .BranchSignedE, .StallM, .FlushM, .MemRWM,
     .CSRReadM, .CSRWriteM, .PrivilegedM, .AtomicM, .Funct3M,
     .RegWriteM, .InvalidateICacheM, .FlushDCacheM, .InstrValidM, .FWriteIntM,
-    .StallW, .FlushW, .RegWriteW, .ResultSrcW, .CSRWriteFencePendingDEM, .StoreStallD);
+    .StallW, .FlushW, .RegWriteW, .DivW, .ResultSrcW, .CSRWriteFencePendingDEM, .StoreStallD);
 
   datapath   dp(
     .clk, .reset, .ImmSrcD, .InstrD, .StallE, .FlushE, .ForwardAE, .ForwardBE,
     .ALUControlE, .Funct3E, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .JumpE, .BranchSignedE, 
     .PCE, .PCLinkE, .FlagsE, .IEUAdrE, .ForwardedSrcAE, .ForwardedSrcBE, 
     .StallM, .FlushM, .FWriteIntM, .FIntResM, .SrcAM, .WriteDataM, .FCvtIntW,
-    .StallW, .FlushW, .RegWriteW, .SquashSCW, .ResultSrcW, .ReadDataW, .FCvtIntResW,
-    .CSRReadValW, .MDUResultW, .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW);             
+    .StallW, .FlushW, .RegWriteW, .DivW, .SquashSCW, .ResultSrcW, .ReadDataW, .FCvtIntResW,
+    .CSRReadValW, .MDUResultW, .FPIntDivResultW, .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW);             
   
   forward    fw(
     .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW,
diff --git a/pipelined/src/muldiv/muldiv.sv b/pipelined/src/muldiv/muldiv.sv
index 5fa717e5f..c41ec4592 100644
--- a/pipelined/src/muldiv/muldiv.sv
+++ b/pipelined/src/muldiv/muldiv.sv
@@ -59,10 +59,17 @@ module muldiv (
 
 	// Divide
 	// Start a divide when a new division instruction is received and the divider isn't already busy or finishing
-	assign DivE = MDUE & Funct3E[2];
-	assign DivSignedE = ~Funct3E[0];
-	intdivrestoring div(.clk, .reset, .StallM, .TrapM, .DivSignedE, .W64E, .DivE, 
-	                    .ForwardedSrcAE, .ForwardedSrcBE, .DivBusyE, .QuotM, .RemM);
+	// When F extensions are supported, use the FPU divider instead
+	if (`F_SUPPORTED) begin  
+	  assign QuotM = 0;
+	  assign RemM = 0;
+	  assign DivBusyE = 0;
+	end else begin
+		assign DivE = MDUE & Funct3E[2];
+		assign DivSignedE = ~Funct3E[0];
+		intdivrestoring div(.clk, .reset, .StallM, .TrapM, .DivSignedE, .W64E, .DivE, 
+							.ForwardedSrcAE, .ForwardedSrcBE, .DivBusyE, .QuotM, .RemM);
+	end
 		
 	// Result multiplexer
 	always_comb
diff --git a/pipelined/src/wally/wallypipelinedcore.sv b/pipelined/src/wally/wallypipelinedcore.sv
index 076088857..a34a088e6 100644
--- a/pipelined/src/wally/wallypipelinedcore.sv
+++ b/pipelined/src/wally/wallypipelinedcore.sv
@@ -99,6 +99,7 @@ module wallypipelinedcore (
   logic             FpLoadStoreM;
   logic [1:0]       FResSelW;
   logic [4:0]             SetFflagsM;
+  logic [`XLEN-1:0] FPIntDivResultW;
 
   // memory management unit signals
   logic             ITLBWriteF;
@@ -228,7 +229,7 @@ module wallypipelinedcore (
      .RdM, .FIntResM, .InvalidateICacheM, .FlushDCacheM,
 
      // Writeback stage
-     .CSRReadValW, .MDUResultW,
+     .CSRReadValW, .MDUResultW, .FPIntDivResultW,
      .RdW, .ReadDataW(ReadDataW[`XLEN-1:0]),
      .InstrValidM, 
      .FCvtIntResW,
@@ -405,7 +406,8 @@ module wallypipelinedcore (
          .FCvtIntW,   // fpu result selection
          .FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage)
          .IllegalFPUInstrM, // Is the instruction an illegal fpu instruction
-         .SetFflagsM        // FPU flags (to privileged unit)
+         .SetFflagsM,        // FPU flags (to privileged unit)
+         .FPIntDivResultW
       ); // floating point unit
    end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low
       assign FStallD = 0;
diff --git a/tests/riscof/Makefile b/tests/riscof/Makefile
index aba3983c3..8292b9888 100644
--- a/tests/riscof/Makefile
+++ b/tests/riscof/Makefile
@@ -8,8 +8,7 @@ wally_workdir = $(work)/wally-riscv-arch-test
 current_dir = $(shell pwd)
 #XLEN    ?= 64
 
-all: root fsd_fld_tempfix arch32 wally32  wally32e arch64 wally64
-#all: root fsd_fld_tempfix wally32
+all: root arch32 wally32  wally32e arch64 wally64
 
 root:
 	mkdir -p $(work_dir)
@@ -20,14 +19,8 @@ root:
 	sed 's,{0},$(current_dir),g;s,{1},64gc,g' config.ini > config64.ini
 	sed 's,{0},$(current_dir),g;s,{1},32e,g' config.ini > config32e.ini
 
-fsd_fld_tempfix:
-    # this is a temporary fix, there's a typo on the rv64i_m/D/src/d_fsd-align-01.S and rv64i_m/D/src/d_fld-align-01.S tests
-    # https://github.com/riscv-non-isa/riscv-arch-test/issues/266
-	find ../../addins/riscv-arch-test/riscv-test-suite -type f -name "*d_fld-align*.S" | xargs -I{} sed -i 's,regex(\.\*32\.\*),regex(\.\*64\.\*),g' {}
-	find ../../addins/riscv-arch-test/riscv-test-suite -type f -name "*d_fsd-align*.S" | xargs -I{} sed -i 's,regex(\.\*32\.\*),regex(\.\*64\.\*),g' {}
-
 arch32:
-	riscof --verbose debug run --work-dir=$(work_dir) --config=config32.ini --suite=$(arch_dir)/riscv-test-suite/ --env=$(arch_dir)/riscv-test-suite/env --no-browser
+	riscof run --work-dir=$(work_dir) --config=config32.ini --suite=$(arch_dir)/riscv-test-suite/ --env=$(arch_dir)/riscv-test-suite/env --no-browser
 	rsync -a $(work_dir)/rv32i_m/ $(arch_workdir)/rv32i_m/ || echo "error suppressed"
 
 arch64:

From e80e84aacedb7c05d2c59858e32b3198b02b0298 Mon Sep 17 00:00:00 2001
From: David Harris <david_harris@hmc.edu>
Date: Thu, 15 Dec 2022 06:37:55 -0800
Subject: [PATCH 5/5] Added IDIV_ON_FPU flag to control whether integer
 division uses FPU

---
 pipelined/config/buildroot/wally-config.vh  | 1 +
 pipelined/config/fpga/wally-config.vh       | 1 +
 pipelined/config/rv32e/wally-config.vh      | 1 +
 pipelined/config/rv32gc/wally-config.vh     | 1 +
 pipelined/config/rv32i/wally-config.vh      | 1 +
 pipelined/config/rv32ic/wally-config.vh     | 1 +
 pipelined/config/rv64BP/wally-config.vh     | 1 +
 pipelined/config/rv64fpquad/wally-config.vh | 1 +
 pipelined/config/rv64gc/wally-config.vh     | 1 +
 pipelined/config/rv64i/wally-config.vh      | 1 +
 pipelined/src/ieu/datapath.sv               | 6 +++++-
 pipelined/src/muldiv/muldiv.sv              | 2 +-
 12 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/pipelined/config/buildroot/wally-config.vh b/pipelined/config/buildroot/wally-config.vh
index defbf458d..6b5490354 100644
--- a/pipelined/config/buildroot/wally-config.vh
+++ b/pipelined/config/buildroot/wally-config.vh
@@ -72,6 +72,7 @@
 // Integer Divider Configuration
 // DIV_BITSPERCYCLE must be 1, 2, or 4
 `define DIV_BITSPERCYCLE 4
+`define IDIV_ON_FPU 0
 
 // Legal number of PMP entries are 0, 16, or 64
 `define PMP_ENTRIES 16
diff --git a/pipelined/config/fpga/wally-config.vh b/pipelined/config/fpga/wally-config.vh
index 8d9ff7e15..97b43c5b4 100644
--- a/pipelined/config/fpga/wally-config.vh
+++ b/pipelined/config/fpga/wally-config.vh
@@ -74,6 +74,7 @@
 // Integer Divider Configuration
 // DIV_BITSPERCYCLE must be 1, 2, or 4
 `define DIV_BITSPERCYCLE 4
+`define IDIV_ON_FPU 0
 
 // Legal number of PMP entries are 0, 16, or 64
 `define PMP_ENTRIES 64
diff --git a/pipelined/config/rv32e/wally-config.vh b/pipelined/config/rv32e/wally-config.vh
index 8b6dea7e7..99f61c12a 100644
--- a/pipelined/config/rv32e/wally-config.vh
+++ b/pipelined/config/rv32e/wally-config.vh
@@ -73,6 +73,7 @@
 // Integer Divider Configuration
 // DIV_BITSPERCYCLE must be 1, 2, or 4
 `define DIV_BITSPERCYCLE 1
+`define IDIV_ON_FPU 0
 
 // Legal number of PMP entries are 0, 16, or 64
 `define PMP_ENTRIES 0
diff --git a/pipelined/config/rv32gc/wally-config.vh b/pipelined/config/rv32gc/wally-config.vh
index 69b256ad1..bafe35e6d 100644
--- a/pipelined/config/rv32gc/wally-config.vh
+++ b/pipelined/config/rv32gc/wally-config.vh
@@ -72,6 +72,7 @@
 // Integer Divider Configuration
 // DIV_BITSPERCYCLE must be 1, 2, or 4
 `define DIV_BITSPERCYCLE 4
+`define IDIV_ON_FPU 0
 
 // Legal number of PMP entries are 0, 16, or 64
 `define PMP_ENTRIES 64
diff --git a/pipelined/config/rv32i/wally-config.vh b/pipelined/config/rv32i/wally-config.vh
index 4eaa116a0..46e493cbf 100644
--- a/pipelined/config/rv32i/wally-config.vh
+++ b/pipelined/config/rv32i/wally-config.vh
@@ -73,6 +73,7 @@
 // Integer Divider Configuration
 // DIV_BITSPERCYCLE must be 1, 2, or 4
 `define DIV_BITSPERCYCLE 4
+`define IDIV_ON_FPU 0
 
 // Legal number of PMP entries are 0, 16, or 64
 `define PMP_ENTRIES 64
diff --git a/pipelined/config/rv32ic/wally-config.vh b/pipelined/config/rv32ic/wally-config.vh
index ba47915de..d6ca045ea 100644
--- a/pipelined/config/rv32ic/wally-config.vh
+++ b/pipelined/config/rv32ic/wally-config.vh
@@ -72,6 +72,7 @@
 // Integer Divider Configuration
 // DIV_BITSPERCYCLE must be 1, 2, or 4
 `define DIV_BITSPERCYCLE 4
+`define IDIV_ON_FPU 0
 
 // Legal number of PMP entries are 0, 16, or 64
 `define PMP_ENTRIES 0
diff --git a/pipelined/config/rv64BP/wally-config.vh b/pipelined/config/rv64BP/wally-config.vh
index 8591ab1c9..85456e2c9 100644
--- a/pipelined/config/rv64BP/wally-config.vh
+++ b/pipelined/config/rv64BP/wally-config.vh
@@ -76,6 +76,7 @@
 // Integer Divider Configuration
 // DIV_BITSPERCYCLE must be 1, 2, or 4
 `define DIV_BITSPERCYCLE 4
+`define IDIV_ON_FPU 0
 
 // Address space
 `define RESET_VECTOR 64'h0000000000001000
diff --git a/pipelined/config/rv64fpquad/wally-config.vh b/pipelined/config/rv64fpquad/wally-config.vh
index 13df811c4..eeba56990 100644
--- a/pipelined/config/rv64fpquad/wally-config.vh
+++ b/pipelined/config/rv64fpquad/wally-config.vh
@@ -74,6 +74,7 @@
 // Integer Divider Configuration
 // DIV_BITSPERCYCLE must be 1, 2, or 4
 `define DIV_BITSPERCYCLE 4
+`define IDIV_ON_FPU 0
 
 // Legal number of PMP entries are 0, 16, or 64
 `define PMP_ENTRIES 64
diff --git a/pipelined/config/rv64gc/wally-config.vh b/pipelined/config/rv64gc/wally-config.vh
index 630dfdb6f..a469a2552 100644
--- a/pipelined/config/rv64gc/wally-config.vh
+++ b/pipelined/config/rv64gc/wally-config.vh
@@ -74,6 +74,7 @@
 // Integer Divider Configuration
 // DIV_BITSPERCYCLE must be 1, 2, or 4
 `define DIV_BITSPERCYCLE 4
+`define IDIV_ON_FPU 0
 
 // Legal number of PMP entries are 0, 16, or 64
 `define PMP_ENTRIES 64
diff --git a/pipelined/config/rv64i/wally-config.vh b/pipelined/config/rv64i/wally-config.vh
index 098755cd2..5c34b96a1 100644
--- a/pipelined/config/rv64i/wally-config.vh
+++ b/pipelined/config/rv64i/wally-config.vh
@@ -74,6 +74,7 @@
 // Integer Divider Configuration
 // DIV_BITSPERCYCLE must be 1, 2, or 4
 `define DIV_BITSPERCYCLE 4
+`define IDIV_ON_FPU 0
 
 // Legal number of PMP entries are 0, 16, or 64
 `define PMP_ENTRIES 0
diff --git a/pipelined/src/ieu/datapath.sv b/pipelined/src/ieu/datapath.sv
index abc232651..7c05986d5 100644
--- a/pipelined/src/ieu/datapath.sv
+++ b/pipelined/src/ieu/datapath.sv
@@ -126,7 +126,11 @@ module datapath (
   if (`F_SUPPORTED) begin:fpmux
     mux2  #(`XLEN)  resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM);
     mux2  #(`XLEN)  cvtresultmuxW(IFResultW, FCvtIntResW, FCvtIntW, IFCvtResultW);
-    mux2  #(`XLEN)  divresultmuxW(MDUResultW, FPIntDivResultW, DivW, MulDivResultW);
+    if (`IDIV_ON_FPU) begin
+      mux2  #(`XLEN)  divresultmuxW(MDUResultW, FPIntDivResultW, DivW, MulDivResultW);
+    end else begin 
+      assign MulDivResultW = MDUResultW;
+    end
   end else begin:fpmux
     assign IFResultM = IEUResultM; assign IFCvtResultW = IFResultW;
     assign MulDivResultW = MDUResultW;
diff --git a/pipelined/src/muldiv/muldiv.sv b/pipelined/src/muldiv/muldiv.sv
index c41ec4592..d6bc26de4 100644
--- a/pipelined/src/muldiv/muldiv.sv
+++ b/pipelined/src/muldiv/muldiv.sv
@@ -60,7 +60,7 @@ module muldiv (
 	// Divide
 	// Start a divide when a new division instruction is received and the divider isn't already busy or finishing
 	// When F extensions are supported, use the FPU divider instead
-	if (`F_SUPPORTED) begin  
+	if (`IDIV_ON_FPU) begin  
 	  assign QuotM = 0;
 	  assign RemM = 0;
 	  assign DivBusyE = 0;