From 52ab586a9d12bf81f500aecea0bffd16db761f7a Mon Sep 17 00:00:00 2001
From: David Harris <David_Harris@hmc.edu>
Date: Thu, 15 Jun 2023 12:38:33 -0700
Subject: [PATCH] Added input gating on FPU

---
 src/fpu/fctrl.sv       | 9 +++++----
 src/fpu/fpu.sv         | 5 +++--
 src/fpu/unpack.sv      | 7 ++++---
 src/fpu/unpackinput.sv | 7 ++++++-
 4 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/src/fpu/fctrl.sv b/src/fpu/fctrl.sv
index e10ba99c2..76855bf81 100755
--- a/src/fpu/fctrl.sv
+++ b/src/fpu/fctrl.sv
@@ -36,7 +36,7 @@ module fctrl import cvw::*;  #(parameter cvw_t P) (
   input  logic [2:0]           FRM_REGW,                           // rounding mode from CSR
   input  logic [1:0]           STATUS_FS,                          // is FPU enabled?
   input  logic                 FDivBusyE,                          // is the divider busy
-  // intruction                                                   
+  // instruction                                                   
   input  logic [31:0]          InstrD,                             // the full instruction
   input  logic [6:0]           Funct7D,                            // bits 31:25 of instruction - may contain percision
   input  logic [6:0]           OpD,                                // bits 6:0 of instruction
@@ -53,6 +53,7 @@ module fctrl import cvw::*;  #(parameter cvw_t P) (
   output logic                 FpLoadStoreM,                       // FP load or store instruction
   output logic [1:0]           PostProcSelE, PostProcSelM,         // select result in the post processing unit
   output logic [1:0]           FResSelE, FResSelM, FResSelW,       // Select one of the results that finish in the memory stage
+  output logic                 FPUActiveE,                         // FP instruction being executed
   // register control signals
   output logic                 FRegWriteE, FRegWriteM, FRegWriteW, // FP register write enable
   output logic                 FWriteIntE, FWriteIntM,             // Write to integer register
@@ -308,9 +309,9 @@ module fctrl import cvw::*;  #(parameter cvw_t P) (
   assign Adr3D = InstrD[31:27];
  
   // D/E pipleine register
-  flopenrc #(13+P.FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE, 
-              {FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, OpCtrlD, FWriteIntD, FCvtIntD},
-              {FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE});
+  flopenrc #(14+P.FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE, 
+              {FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, OpCtrlD, FWriteIntD, FCvtIntD, ~IllegalFPUInstrD},
+              {FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE, FPUActiveE});
   flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {Adr1D, Adr2D, Adr3D}, {Adr1E, Adr2E, Adr3E});
   flopenrc #(1) DEFDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, FDivStartD, FDivStartE);
   flopenrc #(3) DEEnReg(clk, reset, FlushE, ~StallE, {XEnD, YEnD, ZEnD}, {XEnE, YEnE, ZEnE});
diff --git a/src/fpu/fpu.sv b/src/fpu/fpu.sv
index 3d4981384..f71999471 100755
--- a/src/fpu/fpu.sv
+++ b/src/fpu/fpu.sv
@@ -82,6 +82,7 @@ module fpu import cvw::*;  #(parameter cvw_t P) (
   logic                        XEnD, YEnD, ZEnD;                   // X, Y, Z inputs used for current operation
   logic                        XEnE, YEnE, ZEnE;                   // X, Y, Z inputs used for current operation
   logic                        FRegWriteE;                         // Write floating-point register
+  logic                        FPUActiveE;                         // FP instruction being executed
 
   // regfile signals
   logic [P.FLEN-1:0]           FRD1D, FRD2D, FRD3D;                // Read Data from FP register - decode stage
@@ -171,7 +172,7 @@ module fpu import cvw::*;  #(parameter cvw_t P) (
               .reset, .clk, .FRegWriteE, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM,
               .FDivStartE, .IDivStartE, .FWriteIntE, .FCvtIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .FpLoadStoreM,
               .IllegalFPUInstrD, .XEnD, .YEnD, .ZEnD, .XEnE, .YEnE, .ZEnE,
-              .FResSelE, .FResSelM, .FResSelW, .PostProcSelE, .PostProcSelM, .FCvtIntW, 
+              .FResSelE, .FResSelM, .FResSelW, .FPUActiveE, .PostProcSelE, .PostProcSelM, .FCvtIntW, 
               .Adr1D, .Adr2D, .Adr3D, .Adr1E, .Adr2E, .Adr3E);
 
   // FP register file
@@ -226,7 +227,7 @@ module fpu import cvw::*;  #(parameter cvw_t P) (
 
   // unpack unit: splits FP inputs into their parts and classifies SNaN, NaN, Subnorm, Norm, Zero, Infifnity
   unpack #(P) unpack (.X(XE), .Y(YE), .Z(ZE), .Fmt(FmtE), .Xs(XsE), .Ys(YsE), .Zs(ZsE), 
-    .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), .YEn(YEnE),
+    .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), .YEn(YEnE), .FPUActive(FPUActiveE),
     .XNaN(XNaNE), .YNaN(YNaNE), .ZNaN(ZNaNE), .XSNaN(XSNaNE), .XEn(XEnE), 
     .YSNaN(YSNaNE), .ZSNaN(ZSNaNE), .XSubnorm(XSubnormE), 
     .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .XInf(XInfE), .YInf(YInfE), 
diff --git a/src/fpu/unpack.sv b/src/fpu/unpack.sv
index 14e9a6f66..145d6a701 100644
--- a/src/fpu/unpack.sv
+++ b/src/fpu/unpack.sv
@@ -30,6 +30,7 @@ module unpack import cvw::*;  #(parameter cvw_t P) (
   input  logic [P.FLEN-1:0]       X, Y, Z,              // inputs from register file
   input  logic [P.FMTBITS-1:0]    Fmt,                  // format signal 00 - single 01 - double 11 - quad 10 - half
   input  logic                    XEn, YEn, ZEn,        // input enables
+  input  logic                    FPUActive,            // Kill inputs when FPU is not active
   output logic                    Xs, Ys, Zs,           // sign bits of XYZ
   output logic [P.NE-1:0]         Xe, Ye, Ze,           // exponents of XYZ (converted to largest supported precision)
   output logic [P.NF:0]           Xm, Ym, Zm,           // mantissas of XYZ (converted to largest supported precision)
@@ -46,17 +47,17 @@ module unpack import cvw::*;  #(parameter cvw_t P) (
   logic XFracZero, YFracZero, ZFracZero;                // is the fraction zero
   logic YExpMax, ZExpMax;                               // is the exponent all 1s
   
-  unpackinput #(P) unpackinputX (.In(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm), .En(XEn),
+  unpackinput #(P) unpackinputX (.A(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm), .En(XEn), .FPUActive,
                           .NaN(XNaN), .SNaN(XSNaN), .ExpNonZero(XExpNonZero),
                           .Zero(XZero), .Inf(XInf), .ExpMax(XExpMax), .FracZero(XFracZero), 
                           .Subnorm(XSubnorm), .PostBox(XPostBox));
 
-  unpackinput #(P) unpackinputY (.In(Y), .Fmt, .Sgn(Ys), .Exp(Ye), .Man(Ym), .En(YEn),
+  unpackinput #(P) unpackinputY (.A(Y), .Fmt, .Sgn(Ys), .Exp(Ye), .Man(Ym), .En(YEn), .FPUActive,
                           .NaN(YNaN), .SNaN(YSNaN), .ExpNonZero(YExpNonZero),
                           .Zero(YZero), .Inf(YInf), .ExpMax(YExpMax), .FracZero(YFracZero), 
                           .Subnorm(), .PostBox());
 
-  unpackinput #(P) unpackinputZ (.In(Z), .Fmt, .Sgn(Zs), .Exp(Ze), .Man(Zm), .En(ZEn),
+  unpackinput #(P) unpackinputZ (.A(Z), .Fmt, .Sgn(Zs), .Exp(Ze), .Man(Zm), .En(ZEn), .FPUActive,
                           .NaN(ZNaN), .SNaN(ZSNaN), .ExpNonZero(ZExpNonZero),
                           .Zero(ZZero), .Inf(ZInf), .ExpMax(ZExpMax), .FracZero(ZFracZero), 
                           .Subnorm(), .PostBox());
diff --git a/src/fpu/unpackinput.sv b/src/fpu/unpackinput.sv
index 1d429ed4a..c551e8173 100644
--- a/src/fpu/unpackinput.sv
+++ b/src/fpu/unpackinput.sv
@@ -27,9 +27,10 @@
 ////////////////////////////////////////////////////////////////////////////////////////////////
 
 module unpackinput import cvw::*;  #(parameter cvw_t P) (
-  input  logic [P.FLEN-1:0]        In,         // inputs from register file
+  input  logic [P.FLEN-1:0]        A,          // inputs from register file
   input  logic                     En,         // enable the input
   input  logic [P.FMTBITS-1:0]     Fmt,        // format signal 00 - single 01 - double 11 - quad 10 - half
+  input  logic                     FPUActive,  // Kill inputs when FPU is not active
   output logic                     Sgn,        // sign bits of the number 
   output logic [P.NE-1:0]          Exp,        // exponent of the number  (converted to largest supported precision)
   output logic [P.NF:0]            Man,        // mantissa of the number  (converted to largest supported precision)
@@ -46,6 +47,10 @@ module unpackinput import cvw::*;  #(parameter cvw_t P) (
 
   logic [P.NF-1:0] Frac;        // Fraction of XYZ
   logic            BadNaNBox;   // incorrectly NaN Boxed
+  logic [P.FLEN-1:0] In;
+
+  // Gate input when FPU is not active to save power and simulation
+  assign In = A & {P.FLEN{FPUActive}};
 
   if (P.FPSIZES == 1) begin        // if there is only one floating point format supported
       assign BadNaNBox = 0;