From 1be91753fe8525667d58170bc5b4eaba8f3be3dc Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Fri, 27 May 2022 09:04:02 -0700
Subject: [PATCH] moved lzc to generic and small optimizations on fcvt

---
 pipelined/regression/fp.do   |  2 +-
 pipelined/src/fpu/fcvt.sv    | 39 +++++++++++++-----------------------
 pipelined/src/fpu/fma.sv     | 14 +------------
 pipelined/src/generic/lzc.sv | 13 ++++++++++++
 4 files changed, 29 insertions(+), 39 deletions(-)
 create mode 100644 pipelined/src/generic/lzc.sv

diff --git a/pipelined/regression/fp.do b/pipelined/regression/fp.do
index 208118fc..68c240c8 100644
--- a/pipelined/regression/fp.do
+++ b/pipelined/regression/fp.do
@@ -32,7 +32,7 @@ vlib work
 # start and run simulation
 # remove +acc flag for faster sim during regressions if there is no need to access internal signals
 # $num = the added words after the call
-vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv -suppress 2583,7063,8607,2697 
+vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv ../src/generic/*.sv -suppress 2583,7063,8607,2697 
 
 vsim -voptargs=+acc work.testbenchfp -G TEST=$2
 
diff --git a/pipelined/src/fpu/fcvt.sv b/pipelined/src/fpu/fcvt.sv
index 55e6706c..dfe98a79 100644
--- a/pipelined/src/fpu/fcvt.sv
+++ b/pipelined/src/fpu/fcvt.sv
@@ -39,9 +39,10 @@ module fcvt (
 
     logic [`FPSIZES/3:0]    OutFmt;     // format of the output
     logic [`XLEN-1:0]       PosInt;     // the positive integer input
+    logic [`XLEN-1:0]       TrimInt;    // integer trimmed to the correct size
     logic [`LGLEN-1:0]      LzcIn;      // input to the Leading Zero Counter (priority encoder)
     logic [`NE:0]           CalcExp;    // the calculated expoent
-	logic [$clog2(`LGLEN):0] ShiftAmt;  // how much to shift by
+	logic [$clog2(`LGLEN)-1:0] ShiftAmt;  // how much to shift by
     logic [`LGLEN+`NF:0]    ShiftIn;    // number to be shifted
     logic                   ResDenormUf;// does the result underflow or is denormalized
     logic                   ResUf;      // does the result underflow
@@ -71,6 +72,7 @@ module fcvt (
     logic                   Int64;      // is the integer 64 bits?
     logic                   IntToFp;       // is the opperation an int->fp conversion?
     logic                   ToInt;      // is the opperation an fp->int conversion?
+    logic [$clog2(`LGLEN)-1:0] ZeroCnt; // output from the LZC
 
 
     // seperate OpCtrl for code readability
@@ -91,18 +93,11 @@ module fcvt (
     ///////////////////////////////////////////////////////////////////////////
     // negation
     ///////////////////////////////////////////////////////////////////////////
-    // negate the input if the input is a negitive singed integer
-    //      - remove leading ones if the input is a unsigned 32-bit integer
-    //
-    //              Negitive input
-    //                      64-bit input : negate the input
-    //                      32-bit input : trim to 32-bits and negate the input
-    //              Positive input
-    //                      64-bit input : do nothing
-    //                      32-bit input : trim to 32-bits
+    // 1) negate the input if the input is a negitive singed integer
+    // 2) trim the input to the proper size (kill the 32 most significant zeroes if needed)
 
-    assign PosInt = ResSgn ? Int64 ? -ForwardedSrcAE : {{`XLEN-32{1'b0}}, -ForwardedSrcAE[31:0]} : 
-                             Int64 ? ForwardedSrcAE : {{`XLEN-32{1'b0}}, ForwardedSrcAE[31:0]};
+    assign PosInt = ResSgn ? -ForwardedSrcAE : ForwardedSrcAE;
+    assign TrimInt = {{`XLEN-32{Int64}}, {32{1'b1}}} & PosInt;
 
     ///////////////////////////////////////////////////////////////////////////
     // lzc 
@@ -111,16 +106,10 @@ module fcvt (
     // choose the input to the leading zero counter i.e. priority encoder
     //             int -> fp : | positive integer | 00000... (if needed) | 
     //             fp  -> fp : | fraction         | 00000... (if needed) | 
-    assign LzcIn = IntToFp ? {PosInt, {`LGLEN-`XLEN{1'b0}}} :      // I->F
-                             {XManE[`NF-1:0], {`LGLEN-`NF{1'b0}}}; // F->F
+    assign LzcIn = IntToFp ? {TrimInt, {`LGLEN-`XLEN{1'b0}}} :
+                             {XManE[`NF-1:0], {`LGLEN-`NF{1'b0}}};
     
-    // lglen is the largest possible value of ZeroCnt (NF or XLEN) hence normcnt must be log2(lglen) bits
-	logic [$clog2(`LGLEN):0]	i, ZeroCnt;
-	always_comb begin
-			i = 0;
-			while (~LzcIn[`LGLEN-1-i] & i <= `LGLEN-1) i = i+1;  // search for leading one 
-			ZeroCnt = i;
-	end
+    lzc #(`LGLEN) lzc (.num(LzcIn), .ZeroCnt);
 
 
     ///////////////////////////////////////////////////////////////////////////
@@ -154,9 +143,9 @@ module fcvt (
     //              - only shift fp -> fp if the intital value is denormalized
     //                  - this is a problem because the input to the lzc was the fraction rather than the mantissa
     //                  - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true?
-    assign ShiftAmt = ToInt ? CalcExp[$clog2(`LGLEN):0]&{$clog2(`LGLEN)+1{~CalcExp[`NE]}} :
-                    ResDenormUf&~IntToFp ? ($clog2(`LGLEN)+1)'(`NF-1)+CalcExp[$clog2(`LGLEN):0] : 
-                              (ZeroCnt+1)&{$clog2(`LGLEN)+1{XOrigDenormE|IntToFp}};
+    assign ShiftAmt = ToInt ? CalcExp[$clog2(`LGLEN)-1:0]&{$clog2(`LGLEN){~CalcExp[`NE]}} :
+                    ResDenormUf&~IntToFp ? ($clog2(`LGLEN))'(`NF-1)+CalcExp[$clog2(`LGLEN)-1:0] : 
+                              (ZeroCnt+1)&{$clog2(`LGLEN){XOrigDenormE|IntToFp}};
     
     // shift
     //      fp -> int: |  `XLEN  zeros |     Mantissa      | 0's if nessisary | << CalcExp
@@ -568,7 +557,7 @@ module fcvt (
     //      - do so if the result underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
     //      - dont set to zero if fp input is zero but not using the fp input
     //      - dont set to zero if int input is zero but not using the int input
-    assign KillRes = (ResUf|(XZeroE&~IntToFp)|(~|PosInt&IntToFp));
+    assign KillRes = (ResUf|(XZeroE&~IntToFp)|(~|TrimInt&IntToFp));
 
     if (`FPSIZES == 1) begin        
         // IEEE sends a payload while Riscv says to send a canonical quiet NaN
diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv
index 30b352f0..179bc264 100644
--- a/pipelined/src/fpu/fma.sv
+++ b/pipelined/src/fpu/fma.sv
@@ -409,22 +409,10 @@ module loa( //https://ieeexplore.ieee.org/abstract/document/930098
 
 
 
-    lzc lzc(.f, .NormCntE);
+    lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(NormCntE));
   
 endmodule
 
-module lzc(
-    input logic  [3*`NF+6:0]            f,
-    output logic [$clog2(3*`NF+7)-1:0]    NormCntE    // normalization shift
-);
-    
-    logic [$clog2(3*`NF+7)-1:0] i;
-    always_comb begin
-        i = 0;
-        while (~f[3*`NF+6-i] & $unsigned(i) <= $unsigned($clog2(3*`NF+7)'(3)*($clog2(3*`NF+7))'(`NF)+($clog2(3*`NF+7))'(6))) i = i+1;  // search for leading one
-        NormCntE = i;
-    end
-endmodule
 
 
 
diff --git a/pipelined/src/generic/lzc.sv b/pipelined/src/generic/lzc.sv
new file mode 100644
index 00000000..78ac99e5
--- /dev/null
+++ b/pipelined/src/generic/lzc.sv
@@ -0,0 +1,13 @@
+//leading zero counter i.e. priority encoder
+module lzc #(parameter WIDTH=1) (
+    input logic  [WIDTH-1:0]            num,
+    output logic [$clog2(WIDTH)-1:0]  ZeroCnt
+);
+    
+    logic [$clog2(WIDTH)-1:0] i;
+    always_comb begin
+        i = 0;
+        while (~num[WIDTH-1-i] & $unsigned(i) <= $unsigned(WIDTH-1)) i = i+1;  // search for leading one
+        ZeroCnt = i;
+    end
+endmodule