From b893d9249d700ecd80ea984392a89232587551f2 Mon Sep 17 00:00:00 2001
From: cturek <cturek@hmc.edu>
Date: Sun, 6 Nov 2022 21:53:48 +0000
Subject: [PATCH] Added new macros for int div preprocessing, added p, n, and
 rightshiftx logic

---
 pipelined/config/shared/wally-shared.vh       |  8 +++-
 pipelined/src/fpu/fdivsqrt/fdivsqrt.sv        |  3 +-
 pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 42 +++++++++++++------
 3 files changed, 38 insertions(+), 15 deletions(-)

diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh
index ca93d7e7..97feac9e 100644
--- a/pipelined/config/shared/wally-shared.vh
+++ b/pipelined/config/shared/wally-shared.vh
@@ -110,7 +110,7 @@
 
 // division constants
 `define RADIX 32'h4
-`define DIVCOPIES 32'h3
+`define DIVCOPIES 32'h2
 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : `NF+3)
 // `define DIVN (`NF < `XLEN ? `XLEN : `NF+1) // length of input
 `define DIVN (`NF<`XLEN ? `XLEN : (`NF + 3)) // length of input
@@ -118,12 +118,16 @@
 `define EXTRAINTBITS ((`NF < `XLEN) ? 0 : (`NF - `XLEN + 3))
 `define DIVRESLEN ((`NF>`XLEN) ? (`NF + 4) : `XLEN)
 `define LOGR ((`RADIX==2) ? 32'h1 : 32'h2)
-// FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES))
+`define RK (`DIVCOPIES*`LOGR) // r*k used for intdiv preproc
+`define LOGK ($clog2(`DIVCOPIES))
+`define LOGRK ($clog2(`RK))
+// FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES)) 
 // one iteration is required for the integer bit for minimally redundent radix-4
 `define FPDUR ((`DIVN+2+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES)+(`RADIX/4))
 `define DURLEN ($clog2(`FPDUR+1))
 `define QLEN (`FPDUR*`LOGR*`DIVCOPIES)
 `define DIVb (`QLEN-1)
+`define DIVBLEN ($clog2(`DIVb))
 
 
 `define USE_SRAM 0
diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv
index 604a0711..3f619993 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv
@@ -64,10 +64,11 @@ module fdivsqrt(
   logic Firstun;
   logic WZero;
   logic SpecialCaseM;
+  logic [`DIVBLEN:0] n;
 
   fdivsqrtpreproc fdivsqrtpreproc(
     .clk, .DivStartE, .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), 
-    .Sqrt(SqrtE), .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc, 
+    .Sqrt(SqrtE), .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc, .n,
     .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E);
   fdivsqrtfsm fdivsqrtfsm(
     .clk, .reset, .FmtE, .XsE, .SqrtE, 
diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
index f1882ad6..ae015a58 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
@@ -41,7 +41,8 @@ module fdivsqrtpreproc (
   input  logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
 	input  logic [2:0] 	Funct3E, Funct3M,
 	input  logic MDUE, W64E,
-  output logic  [`NE+1:0] QeM,
+  output logic [`DIVBLEN:0] n,
+  output logic [`NE+1:0] QeM,
   output logic [`DIVb+3:0] X,
   output logic [`DIVN-2:0] Dpreproc
 );
@@ -53,33 +54,50 @@ module fdivsqrtpreproc (
   logic  [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
   logic  [`NE+1:0] Qe;
   // Intdiv signals
-  logic  [`DIVN-1:0] ZeroBufX, ZeroBufY;
+  logic  [`DIVb-1:0] ZeroBufX, ZeroBufY;
   logic  [`XLEN-1:0] PosA, PosB;
-  logic  Signed, Aneg, Bneg;
+  logic  As, Bs;
+  logic  [`XLEN-1:0] A64, B64;
+  logic  [`DIVBLEN:0] p, ZeroDiff, IntBits, RightShiftX;
+  logic  [`DIVBLEN:0] pPlusr, pPrTrunc, pPrCeil;
+  logic  [`DIVb+3:0] PreShiftX;
 
   // ***can probably merge X LZC with conversion
   // cout the number of leading zeros
-  // Muxes needed for Int; add after Cedar Commit
-  assign ZeroBufX = MDUE ? {ForwardedSrcAE, {`DIVN-`XLEN{1'b0}}} : {Xm, {`DIVN-`NF-1{1'b0}}};
-  assign ZeroBufY = MDUE ? {ForwardedSrcBE, {`DIVN-`XLEN{1'b0}}} : {Ym, {`DIVN-`NF-1{1'b0}}};
+
+  assign As = ForwardedSrcAE[`XLEN-1] & Funct3E[0];
+  assign Bs = ForwardedSrcBE[`XLEN-1] & Funct3E[0];
+  assign A64 = W64E ? {{(`XLEN-32){As}}, ForwardedSrcAE[31:0]} : ForwardedSrcAE;
+  assign B64 = W64E ? {{(`XLEN-32){Bs}}, ForwardedSrcBE[31:0]} : ForwardedSrcBE;
+  
+  assign PosA = As ? -A64 : A64;
+  assign PosB = Bs ? -B64 : B64;
+
+  assign ZeroBufX = MDUE ? {PosA, {`DIVb-`XLEN{1'b0}}} : {Xm, {`DIVb-`NF-1{1'b0}}};
+  assign ZeroBufY = MDUE ? {PosB, {`DIVb-`XLEN{1'b0}}} : {Ym, {`DIVb-`NF-1{1'b0}}};
   lzc #(`NF+1) lzcX (Xm, XZeroCnt);
   lzc #(`NF+1) lzcY (Ym, YZeroCnt);
 
-  assign Signed = Funct3E[0];
-  assign Aneg = ForwardedSrcAE[`XLEN-1] & Signed;
-  assign Bneg = ForwardedSrcBE[`XLEN-1] & Signed;
-  assign PosA = Aneg ? -ForwardedSrcAE : ForwardedSrcAE;
-  assign PosB = Bneg ? -ForwardedSrcBE : ForwardedSrcBE;
-
   assign PreprocX = Xm[`NF-1:0]<<XZeroCnt;
   assign PreprocY = Ym[`NF-1:0]<<YZeroCnt;
 
+  // assign ZeroDiff = YZeroCnt - XZeroCnt;
+  // assign p = ZeroDiff[`DIVBLEN] ? '0 : ZeroDiff;
+
+  // assign pPlusr = (`DIVBLEN)'(`LOGR) + p;
+  // assign pPrTrunc = pPlusr[`LOGRK-1:0];
+  // assign pPrCeil = (pPlusr >> `LOGRK) + |(pPrTrunc);
+  // assign n = (pPrCeil << `LOGK) - ((`DIVBLEN)'b1);
+  // assign IntBits = (`DIVBLEN)'(`RK) + p;
+  // assign RightShiftX = (`DIVBLEN)'(`RK) - {{(`DIVBLEN-`RK){1'b0}}, IntBits[`RK-1:0]};
+
   assign SqrtX = Xe[0]^XZeroCnt[0] ? {1'b0, ~XZero, PreprocX} : {~XZero, PreprocX, 1'b0};
   assign DivX = {3'b000, ~XZero, PreprocX, {`DIVb-`NF{1'b0}}};
 
   // *** explain why X is shifted between radices (initial assignment of WS=RX)
   if (`RADIX == 2)  assign X = Sqrt ? {3'b111, SqrtX, {`DIVb-1-`NF{1'b0}}} : DivX;
   else              assign X = Sqrt ? {2'b11, SqrtX, {`DIVb-1-`NF{1'b0}}, 1'b0} : DivX;
+  // assign X = MDUE ? PreShiftX >> RightShiftX : PreShiftX;
   assign Dpreproc = {PreprocY, {`DIVN-1-`NF{1'b0}}};
 
   //           radix 2     radix 4