From bebaf08bed640845d0e193706c2a26c0695c51ed Mon Sep 17 00:00:00 2001
From: Cedar Turek <cturek@g.hmc.edu>
Date: Mon, 26 Dec 2022 21:03:56 -0800
Subject: [PATCH] took out otfc swap. updated postprocessing quotient/remainder
 logic for int div.

---
 pipelined/src/fpu/fdivsqrt/fdivsqrt.sv        |  9 ++-
 pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv    |  9 ++-
 .../src/fpu/fdivsqrt/fdivsqrtpostproc.sv      | 61 ++++++++++---------
 pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv |  8 +--
 pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv  |  4 +-
 pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv  |  4 +-
 pipelined/src/fpu/fdivsqrt/fdivsqrtuotfc2.sv  | 11 +---
 pipelined/src/fpu/fdivsqrt/fdivsqrtuotfc4.sv  | 14 ++---
 8 files changed, 55 insertions(+), 65 deletions(-)

diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv
index c21ab754..ddcf80a3 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv
@@ -66,14 +66,14 @@ module fdivsqrt(
   logic WZeroE, AZeroM, BZeroM, AZeroE, BZeroE;
   logic SpecialCaseM, MDUM;
   logic [`DIVBLEN:0] nE, nM, mM;
-  logic CalcOTFCSwapE, OTFCSwapE, ALTBM, AsM;
+  logic NegQuotM, ALTBM, AsM;
   logic DivStartE;
   logic [`XLEN-1:0] ForwardedSrcAM;
 
   fdivsqrtpreproc fdivsqrtpreproc(
     .clk, .IFDivStartE, .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), 
     .Sqrt(SqrtE), .Ym(YmE), .XZeroE, .X, .DPreproc, .ForwardedSrcAM, .MDUM,
-    .nE, .nM, .mM, .CalcOTFCSwapE, .OTFCSwapE, .ALTBM, .AZeroM, .BZeroM, .AZeroE, .BZeroE, .AsM,
+    .nE, .nM, .mM, .NegQuotM, .ALTBM, .AZeroM, .BZeroM, .AZeroE, .BZeroE, .AsM,
     .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .MDUE, .W64E);
   fdivsqrtfsm fdivsqrtfsm(
     .clk, .reset, .FmtE, .XsE, .SqrtE, .nE,
@@ -84,12 +84,11 @@ module fdivsqrt(
   fdivsqrtiter fdivsqrtiter(
     .clk, .Firstun, .D, .FirstU, .FirstUM, .FirstC, .MDUE, .SqrtE, // .SqrtM,
     .X,.DPreproc, .FirstWS(WS), .FirstWC(WC),
-    .IFDivStartE, .CalcOTFCSwapE, .OTFCSwapE,
-    .FDivBusyE);
+    .IFDivStartE, .FDivBusyE);
   fdivsqrtpostproc fdivsqrtpostproc(
     .clk, .reset, .StallM,
     .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, .MDUE, .Firstun, 
     .SqrtM, .SpecialCaseM, .RemOpM(Funct3M[1]), .ForwardedSrcAM,
-    .nM, .ALTBM, .mM, .BZeroM, .AsM, .OTFCSwapEM(OTFCSwapE),
+    .nM, .ALTBM, .mM, .BZeroM, .AsM, .NegQuotM,
     .QmM, .WZeroE, .DivSM, .FPIntDivResultM);
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv
index b91728ea..659915a1 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv
@@ -36,7 +36,6 @@ module fdivsqrtiter(
   input  logic FDivBusyE, 
   input  logic SqrtE, MDUE,
 //  input  logic SqrtM,
-  input  logic CalcOTFCSwapE, OTFCSwapE,
   input  logic [`DIVb+3:0] X,
   input  logic [`DIVb-1:0] DPreproc,
   output logic [`DIVb-1:0] D,
@@ -79,8 +78,8 @@ module fdivsqrtiter(
 
   // UOTFC Result U and UM registers/initialization mux
   // Initialize U to 1.0 and UM to 0 for square root or negative-result int division; U to 0 and UM to -1 otherwise
-  assign initU =  ((MDUE & CalcOTFCSwapE) | (SqrtE & ~(MDUE))) ? {1'b1, {(`DIVb){1'b0}}} : 0;
-  assign initUM = ((MDUE & CalcOTFCSwapE) | (SqrtE & ~(MDUE))) ? 0 : {1'b1, {(`DIVb){1'b0}}}; 
+  assign initU =  ((SqrtE & ~(MDUE))) ? {1'b1, {(`DIVb){1'b0}}} : 0;
+  assign initUM = ((SqrtE & ~(MDUE))) ? 0 : {1'b1, {(`DIVb){1'b0}}}; 
   mux2   #(`DIVb+1) Umux(UNext[`DIVCOPIES-1], initU, IFDivStartE, UMux);
   mux2   #(`DIVb+1) UMmux(UMNext[`DIVCOPIES-1], initUM, IFDivStartE, UMMux);
   flopen #(`DIVb+1) UReg(clk, IFDivStartE|FDivBusyE, UMux, U[0]);
@@ -111,13 +110,13 @@ module fdivsqrtiter(
   generate
     for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin : iterations
       if (`RADIX == 2) begin: stage
-        fdivsqrtstage2 fdivsqrtstage(.D, .DBar, .SqrtE, .OTFCSwapE, .MDUE,
+        fdivsqrtstage2 fdivsqrtstage(.D, .DBar, .SqrtE, .MDUE,
         .WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]),
         .C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i]));
       end else begin: stage
         logic j1;
         assign j1 = (i == 0 & ~C[0][`DIVb-1]);
-        fdivsqrtstage4 fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtE, .j1, .OTFCSwapE, .MDUE,
+        fdivsqrtstage4 fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtE, .j1, .MDUE,
         .WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]), 
         .C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i]));
       end
diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv
index 8eaf98af..02348131 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv
@@ -38,7 +38,7 @@ module fdivsqrtpostproc(
   input  logic [`DIVb:0]    FirstU, FirstUM, 
   input  logic [`DIVb+1:0]  FirstC,
   input  logic              SqrtE, MDUE,
-  input  logic              Firstun, SqrtM, SpecialCaseM, OTFCSwapEM,
+  input  logic              Firstun, SqrtM, SpecialCaseM, NegQuotM,
 	input  logic [`XLEN-1:0]  ForwardedSrcAM,
   input  logic              RemOpM, ALTBM, BZeroM, AsM, 
   input  logic [`DIVBLEN:0] nM, mM,
@@ -50,8 +50,8 @@ module fdivsqrtpostproc(
   
   logic [`DIVb+3:0] W, Sum, DM;
   logic [`DIVb:0] PreQmM;
-  logic NegStickyM, PostIncM;
-  logic weq0E;
+  logic NegStickyM;
+  logic weq0E, weq0M;
   logic [`DIVBLEN:0] NormShiftM;
   logic [`DIVb:0] IntQuotM, NormQuotM;
   logic [`DIVb+3:0] IntRemM, NormRemM;
@@ -85,6 +85,7 @@ module fdivsqrtpostproc(
   //////////////////////////
  
   flopenr #(1) WZeroMReg(clk, reset, ~StallM, WZeroE, WZeroM);
+  flopenr #(1) WeqZeroMReg(clk, reset, ~StallM, weq0E, weq0M);
 
   //////////////////////////
   // Memory Stage: Postprocessing
@@ -107,45 +108,43 @@ module fdivsqrtpostproc(
       if (NegStickyM) begin
         NormQuotM = FirstUM;
         NormRemM  = W + DM;
-        PostIncM  = 0;
       end else begin
         NormQuotM = FirstU;
         NormRemM  = W;
-        PostIncM  = 0;
       end
     else 
 //      if (NegStickyM | weq0) begin // *** old code, replaced by the one below in the right stage and more comprehensive
       if (NegStickyM | WZeroM) begin
-        NormQuotM = FirstU;
+        NormQuotM = FirstUM;
         NormRemM  = W;
-        PostIncM  = 0;
       end else begin 
         NormQuotM = FirstU;
         NormRemM  = W - DM;
-        PostIncM  = ~ALTBM;
       end
 
   // Integer division: Special cases
   always_comb
-    if (BZeroM) begin
-      IntQuotM = '1;
-      IntRemM  = {{(`DIVb-`XLEN+4){1'b0}}, ForwardedSrcAM};
-    end else if (ALTBM) begin
+    if (ALTBM) begin
       IntQuotM = '0;
       IntRemM  = {{(`DIVb-`XLEN+4){1'b0}}, ForwardedSrcAM};
-    end else if (WZeroM) begin
-    // *** dh: 12/26: don't understand this logic and why weq0 inside WZero check.  Need a divide by 0 check here
-/*      if (weq0) begin */
-        IntQuotM = FirstU;
-        IntRemM  = '0;
-/*      end else begin
-        IntQuotM = FirstUM;
-        IntRemM  = '0;
-      end */
-    end else begin 
-      IntQuotM = NormQuotM;
-      IntRemM  = NormRemM;
-    end 
+    end else begin
+      logic [`DIVb:0] PreIntQuotM;
+      if (WZeroM) begin
+        if (weq0M) begin
+          PreIntQuotM = FirstU;
+          IntRemM  = '0;
+        end else begin
+          PreIntQuotM = FirstUM;
+          IntRemM  = '0;
+        end 
+      end else begin 
+        PreIntQuotM = NormQuotM;
+        IntRemM  = NormRemM;
+      end 
+      // flip sign if necessary
+      if (NegQuotM) IntQuotM = -PreIntQuotM;
+      else          IntQuotM =  PreIntQuotM;
+    end
   
   always_comb
     if (RemOpM) begin
@@ -153,19 +152,21 @@ module fdivsqrtpostproc(
       PreResultM = IntRemM;
     end else begin
       NormShiftM = ((`DIVBLEN+1)'(`DIVb) - (nM * (`DIVBLEN+1)'(`LOGR)));
-      if (BZeroM | (~ALTBM & OTFCSwapEM)) begin
-        PreResultM = {3'b111, IntQuotM};
+      PreResultM = {{3{IntQuotM[`DIVb]}}, IntQuotM};
+      /*
+      if (~ALTBM & NegQuotM) begin
+        PreResultM = {3'b111, -IntQuotM};
       end else begin
         PreResultM = {3'b000, IntQuotM};
-      end
+      end*/
       //PreResultM = {IntQuotM[`DIVb], IntQuotM[`DIVb], IntQuotM[`DIVb], IntQuotM}; // Suspicious Sign Extender
     end
   
 
    // division takes the result from the next cycle, which is shifted to the left one more time so the square root also needs to be shifted
   
-  assign PreFPIntDivResultM = $signed(PreResultM >>> NormShiftM) + {{(`DIVb+3){1'b0}}, (PostIncM & ~RemOpM)};
-  assign FPIntDivResultM = PreFPIntDivResultM[`XLEN-1:0];
+  assign PreFPIntDivResultM = $signed(PreResultM >>> NormShiftM);
+  assign FPIntDivResultM = BZeroM ? (RemOpM ? ForwardedSrcAM : {(`XLEN){1'b1}}) : PreFPIntDivResultM[`XLEN-1:0]; // special cases
  
   assign PreQmM = NegStickyM ? FirstUM : FirstU; // Select U or U-1 depending on negative sticky bit
   assign QmM = SqrtM ? (PreQmM << 1) : PreQmM;
diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
index 6711441f..9dd9ccb1 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
@@ -42,7 +42,7 @@ module fdivsqrtpreproc (
 	input  logic [2:0] 	Funct3E,
 	input  logic MDUE, W64E,
   output logic [`DIVBLEN:0] nE, nM, mM,
-  output logic CalcOTFCSwapE, OTFCSwapE, ALTBM, MDUM,
+  output logic NegQuotM, ALTBM, MDUM,
   output logic AsM, AZeroM, BZeroM, AZeroE, BZeroE,
   output logic [`NE+1:0] QeM,
   output logic [`DIVb+3:0] X,
@@ -57,7 +57,7 @@ module fdivsqrtpreproc (
   // Intdiv signals
   logic  [`DIVb-1:0] IFNormLenX, IFNormLenD;
   logic  [`XLEN-1:0] PosA, PosB;
-  logic  AsE, BsE, ALTBE;
+  logic  AsE, BsE, ALTBE, NegQuotE;
   logic  [`XLEN-1:0]  A64, B64;
   logic  [`DIVBLEN:0] mE;
   logic  [`DIVBLEN:0] ZeroDiff, IntBits, RightShiftX;
@@ -74,7 +74,7 @@ module fdivsqrtpreproc (
   assign A64 = W64E ? {{(`XLEN-32){AsE}}, ForwardedSrcAE[31:0]} : ForwardedSrcAE;
   assign B64 = W64E ? {{(`XLEN-32){BsE}}, ForwardedSrcBE[31:0]} : ForwardedSrcBE;
 
-  assign CalcOTFCSwapE = (AsE ^ BsE) & MDUE;
+  assign NegQuotE = (AsE ^ BsE) & MDUE;
   
   assign PosA = AsE ? -A64 : A64;
   assign PosB = BsE ? -B64 : B64;
@@ -127,7 +127,7 @@ module fdivsqrtpreproc (
   // DIVRESLEN/(r*`DIVCOPIES)
 
   flopen #(`NE+2)    expreg(clk, IFDivStartE, QeE, QeM);
-  flopen #(1)       swapreg(clk, IFDivStartE, CalcOTFCSwapE, OTFCSwapE); // Retain value for each iteration of divider in Execute stage
+  flopen #(1)    negquotreg(clk, IFDivStartE, NegQuotE, NegQuotM);
   flopen #(1)       altbreg(clk, IFDivStartE, ALTBE, ALTBM);
   flopen #(1)      azeroreg(clk, IFDivStartE, AZeroE, AZeroM);
   flopen #(1)      bzeroreg(clk, IFDivStartE, BZeroE, BZeroM);
diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv
index 52e2780f..ed9b1a12 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv
@@ -38,7 +38,7 @@ module fdivsqrtstage2 (
   input  logic [`DIVb+3:0]  WS, WC,
   input  logic [`DIVb+1:0] C,
   input  logic SqrtE,
-  input  logic OTFCSwapE, MDUE,
+  input  logic MDUE,
   output logic un,
   output logic [`DIVb+1:0] CNext,
   output logic [`DIVb:0] UNext, UMNext, 
@@ -82,7 +82,7 @@ module fdivsqrtstage2 (
   assign CNext = {1'b1, C[`DIVb+1:1]};
 
   // Unified On-The-Fly Converter to accumulate result
-  fdivsqrtuotfc2 uotfc2(.up, .un, .swap(OTFCSwapE), .C(CNext), .U, .UM, .UNext, .UMNext);
+  fdivsqrtuotfc2 uotfc2(.up, .un, .C(CNext), .U, .UM, .UNext, .UMNext);
 endmodule
 
 
diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv
index 95803d9e..03795715 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv
@@ -36,7 +36,7 @@ module fdivsqrtstage4 (
   input  logic [`DIVb:0] U, UM,
   input  logic [`DIVb+3:0]  WS, WC,
   input  logic [`DIVb+1:0] C,
-  input  logic SqrtE, j1, OTFCSwapE, MDUE,
+  input  logic SqrtE, j1, MDUE,
   output logic [`DIVb+1:0] CNext,
   output logic un,
   output logic [`DIVb:0] UNext, UMNext, 
@@ -94,7 +94,7 @@ module fdivsqrtstage4 (
   assign CNext = {2'b11, C[`DIVb+1:2]};
  
   // On-the-fly converter to accumulate result
-  fdivsqrtuotfc4 fdivsqrtuotfc4(.udigit, .swap(OTFCSwapE), .Sqrt(SqrtE), .C(CNext[`DIVb:0]), .U, .UM, .UNext, .UMNext);
+  fdivsqrtuotfc4 fdivsqrtuotfc4(.udigit, .Sqrt(SqrtE), .C(CNext[`DIVb:0]), .U, .UM, .UNext, .UMNext);
 endmodule
 
 
diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtuotfc2.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtuotfc2.sv
index 7298eff6..2a93f85a 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtuotfc2.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtuotfc2.sv
@@ -34,7 +34,7 @@
 // Unified OTFC, Radix 2 //
 ///////////////////////////////
 module fdivsqrtuotfc2(
-  input  logic         up, un, swap,
+  input  logic         up, un,
   input  logic [`DIVb+1:0] C,
   input logic [`DIVb:0] U, UM,
   output logic [`DIVb:0] UNext, UMNext
@@ -42,19 +42,14 @@ module fdivsqrtuotfc2(
   //  The on-the-fly converter transfers the divsqrt
   //  bits to the quotient as they come.
   logic [`DIVb:0] K;
-  logic unSwap, upSwap;
-  
-  // Check for swap (int div only)
-  assign unSwap = swap ? up : un;
-  assign upSwap = swap ? un : up;
 
   assign K = (C[`DIVb:0] & ~(C[`DIVb:0] << 1));
 
   always_comb begin
-    if (upSwap) begin
+    if (up) begin
       UNext  = U | K;
       UMNext = U;
-    end else if (unSwap) begin
+    end else if (un) begin
       UNext  = UM | K;
       UMNext = UM;
     end else begin // If up and un are not true, then uz is
diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtuotfc4.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtuotfc4.sv
index 156f4f96..5c05488e 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtuotfc4.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtuotfc4.sv
@@ -32,7 +32,7 @@
 
 module fdivsqrtuotfc4(
   input  logic [3:0]   udigit,
-  input  logic         Sqrt, swap,
+  input  logic         Sqrt,
   input  logic [`DIVb:0] U, UM,
   input  logic [`DIVb:0] C,
   output logic [`DIVb:0] UNext, UMNext
@@ -41,26 +41,22 @@ module fdivsqrtuotfc4(
   //  bits to the quotient as they come.
   //  Use this otfc for division and square root.
 
-  logic [3:0] udigitswap, udigitsel;
   logic [`DIVb:0] K1, K2, K3;
   assign K1 = (C&~(C << 1));        // K
   assign K2 = ((C << 1)&~(C << 2)); // 2K
   assign K3 = (C & ~(C << 2));      // 3K
 
-  assign udigitswap = {udigit[0], udigit[1], udigit[2], udigit[3]};
-  assign udigitsel = swap ? udigitswap : udigit;
-
   always_comb begin
-    if (udigitsel[3]) begin           // +2
+    if (udigit[3]) begin           // +2
       UNext  = U | K2;
       UMNext = U | K1;
-    end else if (udigitsel[2]) begin  // +1
+    end else if (udigit[2]) begin  // +1
       UNext  = U | K1;
       UMNext = U;
-    end else if (udigitsel[1]) begin  // -1
+    end else if (udigit[1]) begin  // -1
       UNext  = UM | K3;
       UMNext = UM | K2;
-    end else if (udigitsel[0]) begin  // -2
+    end else if (udigit[0]) begin  // -2
       UNext  = UM | K2;
       UMNext = UM | K1;
     end else begin                    // 0