From a36747fda04aa0f00998cf6164d5d14ff97b1596 Mon Sep 17 00:00:00 2001
From: David Harris <david_harris@hmc.edu>
Date: Mon, 19 Sep 2022 08:30:59 -0700
Subject: [PATCH] Finished unified divsqrt otfc and fgen name changes

---
 pipelined/config/shared/wally-shared.vh |  2 +-
 pipelined/src/fpu/fdivsqrtiter.sv       |  4 +-
 pipelined/src/fpu/fdivsqrtpostproc.sv   |  6 ++-
 pipelined/src/fpu/fdivsqrtstage2.sv     |  8 ++--
 pipelined/src/fpu/fdivsqrtstage4.sv     | 10 ++---
 pipelined/src/fpu/otfc.sv               | 51 ++++++++++++-------------
 pipelined/src/fpu/qsel.sv               | 16 ++++----
 7 files changed, 49 insertions(+), 48 deletions(-)

diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh
index 1f05a4f1..214c747d 100644
--- a/pipelined/config/shared/wally-shared.vh
+++ b/pipelined/config/shared/wally-shared.vh
@@ -102,7 +102,7 @@
 
 // division constants
 `define RADIX 32'h2
-`define DIVCOPIES 32'h1
+`define DIVCOPIES 32'h5
 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3))
 // `define DIVN (`NF < `XLEN ? `XLEN : `NF+1) // length of input
 `define DIVN (`NF < `XLEN ? `XLEN : `NF+3) // length of input
diff --git a/pipelined/src/fpu/fdivsqrtiter.sv b/pipelined/src/fpu/fdivsqrtiter.sv
index 399daeb8..ea2c99bc 100644
--- a/pipelined/src/fpu/fdivsqrtiter.sv
+++ b/pipelined/src/fpu/fdivsqrtiter.sv
@@ -119,13 +119,13 @@ module fdivsqrtiter(
       if (`RADIX == 2) begin: stage
         fdivsqrtstage2 fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtM,
         .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), 
-        .C(C[i]), .S(U[i]), .SM(UM[i]), .CNext(C[i+1]), .SNext(UNext[i]), .SMNext(UMNext[i]), .qn(qn[i]));
+        .C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .qn(qn[i]));
       end else begin: stage
         logic j1;
         assign j1 = (i == 0 & ~C[0][`DIVb-1]);
         fdivsqrtstage4 fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtM, .j1,
         .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), 
-        .C(C[i]), .S(U[i]), .SM(UM[i]), .CNext(C[i+1]), .SNext(UNext[i]), .SMNext(UMNext[i]), .qn(qn[i]));
+        .C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .qn(qn[i]));
       end
       if(i<(`DIVCOPIES-1)) begin 
         assign WS[i+1] = WSA[i] << `LOGR;
diff --git a/pipelined/src/fpu/fdivsqrtpostproc.sv b/pipelined/src/fpu/fdivsqrtpostproc.sv
index a9015ad6..c882dffa 100644
--- a/pipelined/src/fpu/fdivsqrtpostproc.sv
+++ b/pipelined/src/fpu/fdivsqrtpostproc.sv
@@ -71,11 +71,13 @@ module fdivsqrtpostproc(
 
    // division takes the result from the next cycle, which is shifted to the left one more time so the square root also needs to be shifted
   always_comb
-    if(SqrtM) // sqrt ouputs in the range (1, .5]
+    if(NegSticky) QmM = FirstUM[`DIVb-(`RADIX/4):0] << SqrtM;
+    else          QmM = FirstU[`DIVb-(`RADIX/4):0]  << SqrtM;
+/*    if(SqrtM) // sqrt ouputs in the range (1, .5]
       if(NegSticky) QmM = {FirstUM[`DIVb-1-(`RADIX/4):0], 1'b0};
       else          QmM = {FirstU[`DIVb-1-(`RADIX/4):0], 1'b0};
     else  
       if(NegSticky) QmM = FirstUM[`DIVb-(`RADIX/4):0];
-      else          QmM = FirstU[`DIVb-(`RADIX/4):0];
+      else          QmM = FirstU[`DIVb-(`RADIX/4):0]; */
 
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/fdivsqrtstage2.sv b/pipelined/src/fpu/fdivsqrtstage2.sv
index 12f83ac0..1671ddaa 100644
--- a/pipelined/src/fpu/fdivsqrtstage2.sv
+++ b/pipelined/src/fpu/fdivsqrtstage2.sv
@@ -34,13 +34,13 @@
 module fdivsqrtstage2 (
   input logic [`DIVN-2:0] D,
   input logic [`DIVb+3:0]  DBar, D2, DBar2,
-  input logic [`DIVb:0] S, SM,
+  input logic [`DIVb:0] U, UM,
   input logic [`DIVb+3:0]  WS, WC,
   input logic [`DIVb+1:0] C,
   input logic SqrtM,
   output logic qn,
   output logic [`DIVb+1:0] CNext,
-  output logic [`DIVb:0] SNext, SMNext, 
+  output logic [`DIVb:0] UNext, UMNext, 
   output logic [`DIVb+3:0]  WSA, WCA
 );
  /* verilator lint_on UNOPTFLAT */
@@ -61,7 +61,7 @@ module fdivsqrtstage2 (
 	// 0010 = -1
 	// 0001 = -2
   qsel2 qsel2(WS[`DIVb+3:`DIVb], WC[`DIVb+3:`DIVb], qp, qz, qn);
-  fgen2 fgen2(.sp(qp), .sz(qz), .C(CNext), .S, .SM, .F);
+  fgen2 fgen2(.sp(qp), .sz(qz), .C(CNext), .U, .UM, .F);
 
   assign Dsel = {`DIVb+4{~qz}}&(qp ? DBar : {3'b0, 1'b1, D, {`DIVb-`DIVN+1{1'b0}}});
   // Partial Product Generation
@@ -69,7 +69,7 @@ module fdivsqrtstage2 (
   assign AddIn = SqrtM ? F : Dsel;
   csa #(`DIVb+4) csa(WS, WC, AddIn, qp&~SqrtM, WSA, WCA);
 
-  sotfc2 sotfc2(.sp(qp), .sz(qz), .C(CNext), .S, .SM, .SNext, .SMNext);
+  uotfc2 uotfc2(.sp(qp), .sz(qz), .C(CNext), .U, .UM, .UNext, .UMNext);
 endmodule
 
 
diff --git a/pipelined/src/fpu/fdivsqrtstage4.sv b/pipelined/src/fpu/fdivsqrtstage4.sv
index 643c914a..9f70b9c2 100644
--- a/pipelined/src/fpu/fdivsqrtstage4.sv
+++ b/pipelined/src/fpu/fdivsqrtstage4.sv
@@ -34,13 +34,13 @@
 module fdivsqrtstage4 (
   input logic [`DIVN-2:0] D,
   input logic [`DIVb+3:0]  DBar, D2, DBar2,
-  input logic [`DIVb:0] S, SM,
+  input logic [`DIVb:0] U, UM,
   input logic [`DIVb+3:0]  WS, WC,
   input logic [`DIVb+1:0] C,
   output logic [`DIVb+1:0] CNext,
   input logic SqrtM, j1,
   output logic qn,
-  output logic [`DIVb:0] SNext, SMNext, 
+  output logic [`DIVb:0] UNext, UMNext, 
   output logic [`DIVb+3:0]  WSA, WCA
 );
  /* verilator lint_on UNOPTFLAT */
@@ -61,9 +61,9 @@ module fdivsqrtstage4 (
 	// 0000 =  0
 	// 0010 = -1
 	// 0001 = -2
-  assign Smsbs = S[`DIVb:`DIVb-4];
+  assign Smsbs = U[`DIVb:`DIVb-4];
   qsel4 qsel4(.D, .Smsbs, .WS, .WC, .Sqrt(SqrtM), .j1, .q);
-  fgen4 fgen4(.s(q), .C({2'b11, CNext}), .S({3'b000, S}), .SM({3'b000, SM}), .F);
+  fgen4 fgen4(.s(q), .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F);
 
   always_comb
   case (q)
@@ -81,7 +81,7 @@ module fdivsqrtstage4 (
   assign CarryIn = ~SqrtM & (q[3] | q[2]); // +1 for 2's complement of -D and -2D 
   csa #(`DIVb+4) csa(WS, WC, AddIn, CarryIn, WSA, WCA);
  
-  sotfc4 sotfc4(.s(q), .Sqrt(SqrtM), .C(CNext[`DIVb:0]), .S, .SM, .SNext, .SMNext);
+  uotfc4 uotfc4(.s(q), .Sqrt(SqrtM), .C(CNext[`DIVb:0]), .U, .UM, .UNext, .UMNext);
 
   assign qn = 0; // unused for radix 4
 endmodule
diff --git a/pipelined/src/fpu/otfc.sv b/pipelined/src/fpu/otfc.sv
index 8f12a6d6..cc4ab534 100644
--- a/pipelined/src/fpu/otfc.sv
+++ b/pipelined/src/fpu/otfc.sv
@@ -31,45 +31,44 @@
 `include "wally-config.vh"
 
 ///////////////////////////////
-// Square Root OTFC, Radix 2 //
+// Un ified OTFC, Radix 2 //
 ///////////////////////////////
-module sotfc2(
+module uotfc2(
   input  logic         sp, sz,
   input  logic [`DIVb+1:0] C,
-  input logic [`DIVb:0] S, SM,
-  output logic [`DIVb:0] SNext, SMNext
+  input logic [`DIVb:0] U, UM,
+  output logic [`DIVb:0] UNext, UMNext
 );
-  //  The on-the-fly converter transfers the square root 
+  //  The on-the-fly converter transfers the divsqrt
   //  bits to the quotient as they come.
-  //  Use this otfc for division and square root.
   logic [`DIVb:0] K;
 
   assign K = (C[`DIVb:0] & ~(C[`DIVb:0] << 1));
 
   always_comb begin
     if (sp) begin
-      SNext  = S | K;
-      SMNext = S;
+      UNext  = U | K;
+      UMNext = U;
     end else if (sz) begin
-      SNext  = S;
-      SMNext = SM | K;
+      UNext  = U;
+      UMNext = UM | K;
     end else begin        // If sp and sz are not true, then sn is
-      SNext  = SM | K;
-      SMNext = SM;
+      UNext  = UM | K;
+      UMNext = UM;
     end 
   end
 
 endmodule
 
 ///////////////////////////////
-// Square Root OTFC, Radix 4 //
+// Unified OTFC, Radix 4 //
 ///////////////////////////////
-module sotfc4(
+module uotfc4(
   input  logic [3:0]   s,
   input  logic         Sqrt,
-  input  logic [`DIVb:0] S, SM,
+  input  logic [`DIVb:0] U, UM,
   input  logic [`DIVb:0] C,
-  output logic [`DIVb:0] SNext, SMNext
+  output logic [`DIVb:0] UNext, UMNext
 );
   //  The on-the-fly converter transfers the square root 
   //  bits to the quotient as they come.
@@ -82,20 +81,20 @@ module sotfc4(
 
   always_comb begin
     if (s[3]) begin
-      SNext  = S | K2;
-      SMNext = S | K1;
+      UNext  = U | K2;
+      UMNext = U | K1;
     end else if (s[2]) begin
-      SNext  = S | K1;
-      SMNext = S;
+      UNext  = U | K1;
+      UMNext = U;
     end else if (s[1]) begin
-      SNext  = SM | K3;
-      SMNext = SM | K2;
+      UNext  = UM | K3;
+      UMNext = UM | K2;
     end else if (s[0]) begin
-      SNext  = SM | K2;
-      SMNext = SM | K1;
+      UNext  = UM | K2;
+      UMNext = UM | K1;
     end else begin        // If sp and sn are not true, then sz is
-      SNext  = S;
-      SMNext = SM | K3;
+      UNext  = U;
+      UMNext = UM | K3;
     end 
   end
 
diff --git a/pipelined/src/fpu/qsel.sv b/pipelined/src/fpu/qsel.sv
index cb1c72ef..84614197 100644
--- a/pipelined/src/fpu/qsel.sv
+++ b/pipelined/src/fpu/qsel.sv
@@ -68,14 +68,14 @@ endmodule
 module fgen2 (
   input  logic sp, sz,
   input  logic [`DIVb+1:0] C,
-  input  logic [`DIVb:0] S, SM,
+  input  logic [`DIVb:0] U, UM,
   output logic [`DIVb+3:0] F
 );
   logic [`DIVb+3:0] FP, FN, FZ;
   logic [`DIVb+3:0] SExt, SMExt, CExt;
 
-  assign SExt = {3'b0, S};
-  assign SMExt = {3'b0, SM};
+  assign SExt = {3'b0, U};
+  assign SMExt = {3'b0, UM};
   assign CExt = {2'b11, C}; // extend C from Q2.k to Q4.k
 
   // Generate for both positive and negative bits
@@ -254,17 +254,17 @@ endmodule
 ////////////////////////////////////
 module fgen4 (
   input  logic [3:0] s,
-  input  logic [`DIVb+3:0] C, S, SM,
+  input  logic [`DIVb+3:0] C, U, UM,
   output logic [`DIVb+3:0] F
 );
   logic [`DIVb+3:0] F2, F1, F0, FN1, FN2;
   
   // Generate for both positive and negative bits
-  assign F2  = (~S << 2) & (C << 2);
-  assign F1  = ~(S << 1) & C;
+  assign F2  = (~U << 2) & (C << 2);
+  assign F1  = ~(U << 1) & C;
   assign F0  = '0;
-  assign FN1 = (SM << 1) | (C & ~(C << 3));
-  assign FN2 = (SM << 2) | ((C << 2)&~(C << 4));
+  assign FN1 = (UM << 1) | (C & ~(C << 3));
+  assign FN2 = (UM << 2) | ((C << 2)&~(C << 4));
 
   // Choose which adder input will be used