From fb890d621d252202c8fb57eb3c86a3f3be7aa3bd Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Wed, 20 Jul 2022 02:27:39 +0000
Subject: [PATCH] moved ctrl signal registers into fctrl, also a lot of code
 cleaning

---
 pipelined/src/fpu/divsqrt.sv         |  20 +-
 pipelined/src/fpu/fclassify.sv       |  38 +--
 pipelined/src/fpu/fcmp.sv            | 138 ++++++-----
 pipelined/src/fpu/fctrl.sv           |  51 +++-
 pipelined/src/fpu/fcvt.sv            |  12 +-
 pipelined/src/fpu/fhazard.sv         |  28 +--
 pipelined/src/fpu/fma.sv             |  18 +-
 pipelined/src/fpu/fmashiftcalc.sv    |   8 +-
 pipelined/src/fpu/fpu.sv             | 336 +++++++++++++--------------
 pipelined/src/fpu/fsgninj.sv         |  45 ++--
 pipelined/src/fpu/otfc.sv            |   2 +-
 pipelined/src/fpu/postprocess.sv     |  20 +-
 pipelined/src/fpu/qsel.sv            |   2 +-
 pipelined/src/fpu/shiftcorrection.sv |   6 +-
 pipelined/src/fpu/srt.sv             |  26 +--
 pipelined/src/fpu/srtfsm.sv          |   6 +-
 pipelined/src/fpu/unpack.sv          |  45 ++--
 pipelined/src/fpu/unpackinput.sv     |  38 +--
 18 files changed, 439 insertions(+), 400 deletions(-)

diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv
index ffc60026..a2f0ba8e 100644
--- a/pipelined/src/fpu/divsqrt.sv
+++ b/pipelined/src/fpu/divsqrt.sv
@@ -34,20 +34,20 @@ module divsqrt(
   input  logic clk, 
   input  logic reset, 
   input  logic [`FMTBITS-1:0] FmtE,
-  input  logic [`NF:0] XManE, YManE,
-  input  logic [`NE-1:0] XExpE, YExpE,
+  input  logic [`NF:0] XmE, YmE,
+  input  logic [`NE-1:0] XeE, YeE,
   input  logic XInfE, YInfE, 
   input  logic XZeroE, YZeroE, 
   input  logic XNaNE, YNaNE, 
   input  logic DivStartE, 
   input  logic StallM,
-  input  logic StallE,
-  output logic DivStickyM,
+  input logic StallE,
+  output logic DivSM,
   output logic DivBusy,
   output logic DivDone,
-  output logic [`NE+1:0] DivCalcExpM,
+  output logic [`NE+1:0] QeM,
   output logic [`DURLEN-1:0] EarlyTermShiftM,
-  output logic [`QLEN-1-(`RADIX/4):0] QuotM
+  output logic [`QLEN-1-(`RADIX/4):0] QmM
 //   output logic [`XLEN-1:0] RemM,
 );
 
@@ -60,10 +60,10 @@ module divsqrt(
   logic [`DURLEN-1:0] Dur;
   logic NegSticky;
 
-  srtpreproc srtpreproc(.Xm(XManE), .Dur, .Ym(YManE), .X,.Dpreproc, .XZeroCnt, .YZeroCnt);
+  srtpreproc srtpreproc(.Xm(XmE), .Dur, .Ym(YmE), .X,.Dpreproc, .XZeroCnt, .YZeroCnt);
 
-  srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE,
+  srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivSE(DivSM), .XNaNE, .YNaNE,
                .StickyWSA, .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM));
-  srt srt(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XExpE), .Ye(YExpE), .XZeroE, .YZeroE,
-                .StickyWSA, .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM);
+  srt srt(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE,
+                .StickyWSA, .DivBusy, .Qm(QmM), .Rem(), .QeM);
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/fclassify.sv b/pipelined/src/fpu/fclassify.sv
index 6c7ab451..6aaec00a 100644
--- a/pipelined/src/fpu/fclassify.sv
+++ b/pipelined/src/fpu/fclassify.sv
@@ -29,29 +29,29 @@
 `include "wally-config.vh"
 
 module fclassify (
-    input logic         XSgnE,  // sign bit
-    input logic         XNaNE,  // is NaN
-    input logic         XSNaNE, // is signaling NaN
-    input logic         XDenormE, // is denormal
-    input logic         XZeroE, // is zero
-    input logic         XInfE,  // is infinity
-    output logic [`XLEN-1:0] ClassResE // classify result
-    );
+    input logic         Xs,     // sign bit
+    input logic         XNaN,   // is NaN
+    input logic         XSNaN,  // is signaling NaN
+    input logic         XDenorm,// is denormal
+    input logic         XZero,  // is zero
+    input logic         XInf,   // is infinity
+    output logic [`XLEN-1:0] ClassRes// classify result
+);
 
     logic PInf, PZero, PNorm, PDenorm;
     logic NInf, NZero, NNorm, NDenorm;
-    logic XNormE;
+    logic XNorm;
    
     // determine the sub categories
-    assign XNormE = ~(XNaNE | XInfE | XDenormE | XZeroE);
-    assign PInf = ~XSgnE&XInfE;
-    assign NInf = XSgnE&XInfE;
-    assign PNorm = ~XSgnE&XNormE;
-    assign NNorm = XSgnE&XNormE;
-    assign PDenorm = ~XSgnE&XDenormE;
-    assign NDenorm = XSgnE&XDenormE;
-    assign PZero = ~XSgnE&XZeroE;
-    assign NZero = XSgnE&XZeroE;
+    assign XNorm= ~(XNaN | XInf| XDenorm| XZero);
+    assign PInf = ~Xs&XInf;
+    assign NInf = Xs&XInf;
+    assign PNorm = ~Xs&XNorm;
+    assign NNorm = Xs&XNorm;
+    assign PDenorm = ~Xs&XDenorm;
+    assign NDenorm = Xs&XDenorm;
+    assign PZero = ~Xs&XZero;
+    assign NZero = Xs&XZero;
 
     // determine sub category and combine into the result
     //  bit 0 - -Inf
@@ -64,6 +64,6 @@ module fclassify (
     //  bit 7 - +Inf
     //  bit 8 - signaling NaN
     //  bit 9 - quiet NaN
-    assign ClassResE = {{`XLEN-10{1'b0}}, XNaNE&~XSNaNE, XSNaNE, PInf, PNorm,  PDenorm, PZero, NZero, NDenorm, NNorm, NInf};
+    assign ClassRes = {{`XLEN-10{1'b0}}, XNaN&~XSNaN, XSNaN, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf};
 
 endmodule
diff --git a/pipelined/src/fpu/fcmp.sv b/pipelined/src/fpu/fcmp.sv
index 9c675784..48ff536f 100755
--- a/pipelined/src/fpu/fcmp.sv
+++ b/pipelined/src/fpu/fcmp.sv
@@ -27,9 +27,10 @@
 //   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
 //   OR OTHER DEALINGS IN THE SOFTWARE.
 ////////////////////////////////////////////////////////////////////////////////////////////////
+
 `include "wally-config.vh"
 
-// FOpCtrlE values
+// OpCtrl values
 //    110   min
 //    101   max
 //    010   equal
@@ -37,36 +38,32 @@
 //    011   less than or equal
 
 module fcmp (   
-   input logic  [`FMTBITS-1:0]   FmtE,           // precision 1 = double 0 = single
-   input logic  [2:0]            FOpCtrlE,       // see above table
-   input logic                   XSgnE, YSgnE,   // input signs
-   input logic  [`NE-1:0]        XExpE, YExpE,   // input exponents
-   input logic  [`NF:0]          XManE, YManE,   // input mantissa
-   input logic                   XZeroE, YZeroE, // is zero
-   input logic                   XNaNE, YNaNE,   // is NaN
-   input logic                   XSNaNE, YSNaNE, // is signaling NaN
-   input logic  [`FLEN-1:0]      FSrcXE, FSrcYE, // original, non-converted to double, inputs
-   output logic                  CmpNVE,         // invalid flag
-   output logic [`FLEN-1:0]      CmpFpResE,         // compare resilt
-   output logic [`XLEN-1:0]      CmpIntResE         // compare resilt
+   input logic  [`FMTBITS-1:0]   Fmt,      // format of fp number
+   input logic  [2:0]            OpCtrl,   // see above table
+   input logic                   Xs, Ys,   // input signs
+   input logic  [`NE-1:0]        Xe, Ye,   // input exponents
+   input logic  [`NF:0]          Xm, Ym,   // input mantissa
+   input logic                   XZero, YZero, // is zero
+   input logic                   XNaN, YNaN,   // is NaN
+   input logic                   XSNaN, YSNaN, // is signaling NaN
+   input logic  [`FLEN-1:0]      X, Y,       // original inputs (before unpacker)
+   output logic                  CmpNV,      // invalid flag
+   output logic [`FLEN-1:0]      CmpFpRes,   // compare floating-point result
+   output logic [`XLEN-1:0]      CmpIntRes   // compare integer result
    );
 
-   logic LTabs, LT, EQ; // is X < or > or = Y
-   logic [`FLEN-1:0] NaNRes;
-   logic BothZero, EitherNaN, EitherSNaN;
+   logic LTabs, LT, EQ;         // is X < or > or = Y
+   logic [`FLEN-1:0] NaNRes;    // NaN result
+   logic BothZero;              // are both inputs zero
+   logic EitherNaN, EitherSNaN; // are either input a (signaling) NaN
    
-   assign LTabs= {1'b0, XExpE, XManE} < {1'b0, YExpE, YManE}; // unsigned comparison, treating FP as integers
-   assign LT = (XSgnE & ~YSgnE) | (XSgnE & YSgnE & ~LTabs & ~EQ) | (~XSgnE & ~YSgnE & LTabs);
-   // assign LT = {~XSgnE, XExpE, XManE[`NF-1:0]} < {~YSgnE, YExpE, YManE[`NF-1:0]}; // *** James look at whether we can simplify to this, but it fails regression
+   assign LTabs= {1'b0, Xe, Xm} < {1'b0, Ye, Ym}; // unsigned comparison, treating FP as integers
+   assign LT = (Xs & ~Ys) | (Xs & Ys & ~LTabs & ~EQ) | (~Xs & ~Ys & LTabs); // signed comparison
+   assign EQ = (X == Y);
 
-   //assign LT = $signed({XSgnE, XExpE, XManE[`NF-1:0]}) < $signed({YSgnE, YExpE, YManE[`NF-1:0]});
-   //assign LT = XInt < YInt;
-//   assign LT = XSgnE^YSgnE ? XSgnE : XExpE==YExpE ? ((XManE<YManE)^XSgnE)&~EQ : (XExpE<YExpE)^XSgnE;
-   assign EQ = (FSrcXE == FSrcYE);
-
-   assign BothZero = XZeroE&YZeroE;
-   assign EitherNaN = XNaNE|YNaNE;
-   assign EitherSNaN = XSNaNE|YSNaNE;
+   assign BothZero = XZero&YZero;
+   assign EitherNaN = XNaN|YNaN;
+   assign EitherSNaN = XSNaN|YSNaN;
 
 
    // flags
@@ -74,78 +71,91 @@ module fcmp (
    //    LT/LE - signaling - sets invalid if NaN input
    //    EQ - quiet - sets invalid if signaling NaN input
    always_comb begin
-      case (FOpCtrlE[2:0])
-         3'b110: CmpNVE = EitherSNaN;//min 
-         3'b101: CmpNVE = EitherSNaN;//max
-         3'b010: CmpNVE = EitherSNaN;//equal
-         3'b001: CmpNVE = EitherNaN;//less than
-         3'b011: CmpNVE = EitherNaN;//less than or equal
-         default: CmpNVE = 1'bx;
+      case (OpCtrl[2:0])
+         3'b110: CmpNV = EitherSNaN;//min 
+         3'b101: CmpNV = EitherSNaN;//max
+         3'b010: CmpNV = EitherSNaN;//equal
+         3'b001: CmpNV = EitherNaN;//less than
+         3'b011: CmpNV = EitherNaN;//less than or equal
+         default: CmpNV = 1'bx;
       endcase
    end 
 
-   // Min/Max
-   //    - outputs the min/max of X and Y
-   //    - -0 < 0
-   //    - if both are NaN return quiet X
-   //    - if one is a NaN output the non-NaN
-   // LT/LE/EQ
-   //    - -0 = 0
-   //    - inf = inf and -inf = -inf
-   //    - return 0 if comparison with NaN (unordered)
-
    // fmin/fmax of two NaNs returns a quiet NaN of the appropriate size
    // for IEEE, return the payload of X
    // for RISC-V, return the canonical NaN
 
-   
+   // select the NaN result
    if (`FPSIZES == 1)
-      if(`IEEE754) assign NaNRes = {XSgnE, {`NE{1'b1}}, 1'b1, XManE[`NF-2:0]};
+      if(`IEEE754) assign NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
       else         assign NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
 
    else if (`FPSIZES == 2) 
-      if(`IEEE754) assign NaNRes = FmtE ? {XSgnE, {`NE{1'b1}}, 1'b1, XManE[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, XSgnE, {`NE1{1'b1}}, 1'b1, XManE[`NF-2:`NF-`NF1]};
-      else         assign NaNRes = FmtE ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
+      if(`IEEE754) assign NaNRes = Fmt ? {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, Xs, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
+      else         assign NaNRes = Fmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
    
    else if (`FPSIZES == 3)
       always_comb
-            case (FmtE)
+            case (Fmt)
                `FMT:  
-                  if(`IEEE754) NaNRes = {XSgnE, {`NE{1'b1}}, 1'b1, XManE[`NF-2:0]};
+                  if(`IEEE754) NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
                   else         NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
                `FMT1:
-                  if(`IEEE754) NaNRes = {{`FLEN-`LEN1{1'b1}}, XSgnE, {`NE1{1'b1}}, 1'b1, XManE[`NF-2:`NF-`NF1]};
+                  if(`IEEE754) NaNRes = {{`FLEN-`LEN1{1'b1}}, Xs, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
                   else         NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
                `FMT2:
-                  if(`IEEE754) NaNRes = {{`FLEN-`LEN2{1'b1}}, XSgnE, {`NE2{1'b1}}, 1'b1, XManE[`NF-2:`NF-`NF2]};
+                  if(`IEEE754) NaNRes = {{`FLEN-`LEN2{1'b1}}, Xs, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]};
                   else         NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
                default:        NaNRes = {`FLEN{1'bx}};
             endcase
 
    else if (`FPSIZES == 4)
       always_comb
-            case (FmtE)
+            case (Fmt)
                2'h3:  
-                  if(`IEEE754) NaNRes = {XSgnE, {`NE{1'b1}}, 1'b1, XManE[`NF-2:0]};
+                  if(`IEEE754) NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
                   else         NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
                2'h1:  
-                  if(`IEEE754) NaNRes = {{`FLEN-`D_LEN{1'b1}}, XSgnE, {`D_NE{1'b1}}, 1'b1, XManE[`NF-2:`NF-`D_NF]};
+                  if(`IEEE754) NaNRes = {{`FLEN-`D_LEN{1'b1}}, Xs, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]};
                   else         NaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
                2'h0: 
-                  if(`IEEE754) NaNRes = {{`FLEN-`S_LEN{1'b1}}, XSgnE, {`S_NE{1'b1}}, 1'b1, XManE[`NF-2:`NF-`S_NF]};
+                  if(`IEEE754) NaNRes = {{`FLEN-`S_LEN{1'b1}}, Xs, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]};
                   else         NaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
                2'h2:
-                  if(`IEEE754) NaNRes = {{`FLEN-`H_LEN{1'b1}}, XSgnE, {`H_NE{1'b1}}, 1'b1, XManE[`NF-2:`NF-`H_NF]};
+                  if(`IEEE754) NaNRes = {{`FLEN-`H_LEN{1'b1}}, Xs, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]};
                   else         NaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
             endcase
 
- // when one input is a NaN -output the non-NaN
-   assign CmpFpResE = FOpCtrlE[0] ? XNaNE ? YNaNE ? NaNRes : FSrcYE // Max
-                                          : YNaNE ? FSrcXE : LT ? FSrcYE : FSrcXE : 
-                                    XNaNE ? YNaNE ? NaNRes : FSrcYE // Min
-                                          : YNaNE ? FSrcXE : LT ? FSrcXE : FSrcYE;
-                                    
 
-   assign CmpIntResE = {(`XLEN-1)'(0), (((EQ|BothZero)&FOpCtrlE[1])|(LT&FOpCtrlE[0]&~BothZero))&~EitherNaN};
+   // Min/Max
+   //    - outputs the min/max of X and Y
+   //    - -0 < 0
+   //    - if both are NaN return quiet X
+   //    - if one is a NaN output the non-NaN
+   always_comb
+      if(OpCtrl[0]) // MAX
+         if(XNaN)
+            if(YNaN)    CmpFpRes = NaNRes;   // X = NaN Y = NaN
+            else        CmpFpRes = Y;        // X = NaN Y != NaN
+         else
+            if(YNaN)    CmpFpRes = X; // X != NaN Y = NaN
+            else // X,Y != NaN
+               if(LT)   CmpFpRes = Y; // X < Y
+               else     CmpFpRes = X; // X > Y
+      else  // MIN
+         if(XNaN)
+            if(YNaN)    CmpFpRes = NaNRes;   // X = NaN Y = NaN
+            else        CmpFpRes = Y;        // X = NaN Y != NaN
+         else
+            if(YNaN)    CmpFpRes = X; // X != NaN Y = NaN
+            else // X,Y != NaN
+               if(LT)   CmpFpRes = X; // X < Y
+               else     CmpFpRes = Y; // X > Y
+                                    
+   // LT/LE/EQ
+   //    - -0 = 0
+   //    - inf = inf and -inf = -inf
+   //    - return 0 if comparison with NaN (unordered)
+   assign CmpIntRes = {(`XLEN-1)'(0), (((EQ|BothZero)&OpCtrl[1])|(LT&OpCtrl[0]&~BothZero))&~EitherNaN};
    
 endmodule
diff --git a/pipelined/src/fpu/fctrl.sv b/pipelined/src/fpu/fctrl.sv
index 5c553e86..85047248 100755
--- a/pipelined/src/fpu/fctrl.sv
+++ b/pipelined/src/fpu/fctrl.sv
@@ -29,25 +29,41 @@
 `include "wally-config.vh"
 
 module fctrl (
+  input  logic       clk,
+  input  logic       reset,
+  input  logic       StallE, StallM, StallW, // stall signals
+  input  logic       FlushE, FlushM, FlushW, // flush signals
+  input  logic [31:0] InstrD,
   input  logic [6:0] Funct7D,   // bits 31:25 of instruction - may contain percision
   input  logic [6:0] OpD,       // bits 6:0 of instruction
   input  logic [4:0] Rs2D,      // bits 24:20 of instruction
   input  logic [2:0] Funct3D,   // bits 14:12 of instruction - may contain rounding mode
   input  logic [2:0] FRM_REGW,  // rounding mode from CSR
   input  logic [1:0] STATUS_FS, // is FPU enabled?
+  input  logic       FDivBusyE,  // is the divider busy
   output logic       IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
-  output logic       FRegWriteD,  // FP register write enable
-  output logic       FDivStartD,  // Start division or squareroot
-  output logic [1:0] FResSelD, // select result to be written to fp register
-  output logic [2:0] FOpCtrlD,    // chooses which opperation to do - specifics shown at bottom of module and in each unit
-  output logic [1:0] PostProcSelD, 
-  output logic [`FMTBITS-1:0] FmtD,        // precision - single-0 double-1
-  output logic [2:0] FrmD,        // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
-  output logic       FWriteIntD   // is the result written to the integer register
+  output logic 		         FRegWriteM, FRegWriteW, // FP register write enable
+  output logic [2:0] 	      FrmM,                   // FP rounding mode
+  output logic [`FMTBITS-1:0] FmtE, FmtM,             // FP format
+  output logic 		         DivStartE,             // Start division or squareroot
+  output logic 		         FWriteIntE, FWriteIntM,                         // Write to integer register
+  output logic [2:0] 	      OpCtrlE, OpCtrlM,       // Select which opperation to do in each component
+  output logic [1:0] 	      FResSelE, FResSelM, FResSelW,       // Select one of the results that finish in the memory stage
+  output logic [1:0] 	      PostProcSelE, PostProcSelM, // select result in the post processing unit
+  output logic [4:0] 	      Adr1E, Adr2E, Adr3E                // adresses of each input
   );
 
   `define FCTRLW 11
   logic [`FCTRLW-1:0] ControlsD;
+  logic 		  FRegWriteD; // FP register write enable
+  logic 		  DivStartD; // integer register write enable
+  logic 		  FWriteIntD; // integer register write enable
+  logic 		         FRegWriteE; // FP register write enable
+  logic [2:0] 	      OpCtrlD;       // Select which opperation to do in each component
+  logic [1:0] 	      PostProcSelD; // select result in the post processing unit
+  logic [1:0] 	      FResSelD;       // Select one of the results that finish in the memory stage
+  logic [2:0] FrmD, FrmE;                   // FP rounding mode
+  logic [`FMTBITS-1:0] FmtD;             // FP format
   //*** will putting x for don't cares reduce area in synthisis???
   // FPU Instruction Decoder
   always_comb
@@ -130,7 +146,7 @@ module fctrl (
     endcase
 
   // unswizzle control bits
-  assign {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, FOpCtrlD, FDivStartD, IllegalFPUInstrD} = ControlsD;
+  assign {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, OpCtrlD, DivStartD, IllegalFPUInstrD} = ControlsD;
   
   // rounding modes:
   //    000 - round to nearest, ties to even
@@ -168,7 +184,7 @@ module fctrl (
 //  10  fma
 
 //  Other Sel:
-//    Ctrl signal = {FOpCtrl[2], &FOpctrl[1:0]}
+//    Ctrl signal = {OpCtrl[2], &FOpctrl[1:0]}
 //        000 - sign            00
 //        001 - negate sign     00
 //        010 - xor sign        00
@@ -205,5 +221,20 @@ module fctrl (
 //        01 - negate sign
 //        10 - xor sign
     
+  // D/E pipleine register
+  flopenrc #(12+`FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE, 
+              {FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, OpCtrlD, FWriteIntD},
+              {FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE});
+   flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, 
+                           {Adr1E, Adr2E, Adr3E});
+  flopenrc #(1) DEDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, DivStartD, DivStartE);
+  // E/M pipleine register
+  flopenrc #(12+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM,
+              {FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, OpCtrlE, FWriteIntE},
+              {FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, OpCtrlM, FWriteIntM});
+  // M/W pipleine register
+  flopenrc #(3)  MWCtrlReg(clk, reset, FlushW, ~StallW,
+          {FRegWriteM, FResSelM},
+          {FRegWriteW, FResSelW});
 
 endmodule
diff --git a/pipelined/src/fpu/fcvt.sv b/pipelined/src/fpu/fcvt.sv
index 9d7f2d62..d2967887 100644
--- a/pipelined/src/fpu/fcvt.sv
+++ b/pipelined/src/fpu/fcvt.sv
@@ -35,7 +35,7 @@ module fcvt (
     input logic [`NE-1:0]   Xe,          // input's exponent
     input logic [`NF:0]     Xm,          // input's fraction
     input logic [`XLEN-1:0] Int, // integer input - from IEU
-    input logic [2:0]       FOpCtrl,       // choose which opperation (look below for values)
+    input logic [2:0]       OpCtrl,       // choose which opperation (look below for values)
     input logic             ToInt,     // is fp->int (since it's writting to the integer register)
     input logic             XZero,         // is the input zero
     input logic             XDenorm,   // is the input denormalized
@@ -73,17 +73,17 @@ module fcvt (
 
 
     // seperate OpCtrl for code readability
-    assign Signed = FOpCtrl[0];
-    assign Int64 =  FOpCtrl[1];
-    assign IntToFp =   FOpCtrl[2];
+    assign Signed = OpCtrl[0];
+    assign Int64 =  OpCtrl[1];
+    assign IntToFp =   OpCtrl[2];
 
     // choose the ouptut format depending on the opperation
     //      - fp -> fp: OpCtrl contains the percision of the output
     //      - int -> fp: Fmt contains the percision of the output
     if (`FPSIZES == 2) 
-        assign OutFmt = IntToFp ? Fmt : (FOpCtrl[1:0] == `FMT); 
+        assign OutFmt = IntToFp ? Fmt : (OpCtrl[1:0] == `FMT); 
     else if (`FPSIZES == 3 | `FPSIZES == 4) 
-        assign OutFmt = IntToFp ? Fmt : FOpCtrl[1:0]; 
+        assign OutFmt = IntToFp ? Fmt : OpCtrl[1:0]; 
 
 
     ///////////////////////////////////////////////////////////////////////////
diff --git a/pipelined/src/fpu/fhazard.sv b/pipelined/src/fpu/fhazard.sv
index ca31d904..36a0ff82 100644
--- a/pipelined/src/fpu/fhazard.sv
+++ b/pipelined/src/fpu/fhazard.sv
@@ -31,20 +31,20 @@
 `include "wally-config.vh"
 
 module fhazard(
-    input logic [4:0]   Adr1E, Adr2E, Adr3E,    // read data adresses
-    input logic         FRegWriteM, FRegWriteW, // is the fp register being written to
-	  input logic [4:0]   RdM, RdW,               // the adress being written to
-    input logic [1:0]   FResSelM,            // the result being selected
+    input  logic [4:0]  Adr1E, Adr2E, Adr3E,    // read data adresses
+    input  logic        FRegWriteM, FRegWriteW, // is the fp register being written to
+	  input  logic [4:0]  RdM, RdW,               // the adress being written to
+    input  logic [1:0]  FResSelM,            // the result being selected
     output logic        FStallD,                // stall the decode stage
-    output logic [1:0]  FForwardXE, FForwardYE, FForwardZE // select a forwarded value
+    output logic [1:0]  ForwardXE, ForwardYE, ForwardZE // select a forwarded value
 );
 
 
   always_comb begin
     // set defaults
-    FForwardXE = 2'b00; // choose FRD1E
-    FForwardYE = 2'b00; // choose FRD2E
-    FForwardZE = 2'b00; // choose FRD3E
+    ForwardXE = 2'b00; // choose FRD1E
+    ForwardYE = 2'b00; // choose FRD2E
+    ForwardZE = 2'b00; // choose FRD3E
     FStallD = 0;
 
     //*** this hazard unit is waiting for all three inputs, change so that if an input isnt used then don't wait
@@ -52,28 +52,28 @@ module fhazard(
     // if the needed value is in the memory stage - input 1
     if ((Adr1E == RdM) & FRegWriteM) 
       // if the result will be FResM (can be taken from the memory stage)
-      if(FResSelM == 2'b00) FForwardXE = 2'b10; // choose FResM
+      if(FResSelM == 2'b00) ForwardXE = 2'b10; // choose FResM
       else FStallD = 1;                             // otherwise stall
     // if the needed value is in the writeback stage
-    else if ((Adr1E == RdW) & FRegWriteW) FForwardXE = 2'b01; // choose FPUResult64W
+    else if ((Adr1E == RdW) & FRegWriteW) ForwardXE = 2'b01; // choose FPUResult64W
   
 
     // if the needed value is in the memory stage - input 2
     if ((Adr2E == RdM) & FRegWriteM)
       // if the result will be FResM (can be taken from the memory stage)
-      if(FResSelM == 2'b00) FForwardYE = 2'b10; // choose FResM
+      if(FResSelM == 2'b00) ForwardYE = 2'b10; // choose FResM
       else FStallD = 1;                             // otherwise stall
     // if the needed value is in the writeback stage
-    else if ((Adr2E == RdW) & FRegWriteW) FForwardYE = 2'b01; // choose FPUResult64W
+    else if ((Adr2E == RdW) & FRegWriteW) ForwardYE = 2'b01; // choose FPUResult64W
 
 
     // if the needed value is in the memory stage - input 3
     if ((Adr3E == RdM) & FRegWriteM)
       // if the result will be FResM (can be taken from the memory stage)
-      if(FResSelM == 2'b00) FForwardZE = 2'b10; // choose FResM
+      if(FResSelM == 2'b00) ForwardZE = 2'b10; // choose FResM
       else FStallD = 1;                             // otherwise stall
     // if the needed value is in the writeback stage
-    else if ((Adr3E == RdW) & FRegWriteW) FForwardZE = 2'b01; // choose FPUResult64W
+    else if ((Adr3E == RdW) & FRegWriteW) ForwardZE = 2'b01; // choose FPUResult64W
 
   end 
 
diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv
index 3f4cc2ac..067147ee 100644
--- a/pipelined/src/fpu/fma.sv
+++ b/pipelined/src/fpu/fma.sv
@@ -34,7 +34,7 @@ module fma(
     input logic  [`NE-1:0]      Xe, Ye, Ze,    // input's biased exponents in B(NE.0) format
     input logic  [`NF:0]        Xm, Ym, Zm,    // input's significands in U(0.NF) format
     input logic                 XZero, YZero, ZZero, // is the input zero
-    input logic  [2:0]          FOpCtrl,   // 000 = fmadd (X*Y)+Z,  001 = fmsub (X*Y)-Z,  010 = fnmsub -(X*Y)+Z,  011 = fnmadd -(X*Y)-Z,  100 = fmul (X*Y)
+    input logic  [2:0]          OpCtrl,   // 000 = fmadd (X*Y)+Z,  001 = fmsub (X*Y)-Z,  010 = fnmsub -(X*Y)+Z,  011 = fnmadd -(X*Y)-Z,  100 = fmul (X*Y)
     input logic  [`FMTBITS-1:0] Fmt,       // format of the result single double half or quad
     output logic [`NE+1:0]      Pe,       // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign
     output logic                ZmSticky,  // sticky bit that is calculated during alignment
@@ -46,7 +46,7 @@ module fma(
     output logic                Ps,          // the product's sign
     output logic                Ss,          // the sum's sign
     output logic [`NE+1:0]      Se,
-    output logic [$clog2(3*`NF+7)-1:0]          NCnt        // normalization shift count
+    output logic [$clog2(3*`NF+7)-1:0]          SCnt        // normalization shift count
     );
 
     logic [2*`NF+1:0]   Pm;           // the product's significand in U(2.2Nf) format
@@ -72,7 +72,7 @@ module fma(
     // Alignment shifter
     ///////////////////////////////////////////////////////////////////////////////
     // calculate the signs and take the opperation into account
-    sign sign(.FOpCtrl, .Xs, .Ys, .Zs, .Ps, .As);
+    sign sign(.OpCtrl, .Xs, .Ys, .Zs, .Ps, .As);
 
     align align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye,
                 .Am, .ZmSticky, .KillProd);
@@ -85,7 +85,7 @@ module fma(
         
     add add(.Am, .Pm, .Ze, .Pe, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Se, .Ss);
     
-    loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .NCnt);
+    loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .SCnt);
 endmodule
 
 
@@ -120,7 +120,7 @@ endmodule
 
 
 module sign(    
-    input  logic [2:0]  FOpCtrl,               // opperation contol
+    input  logic [2:0]  OpCtrl,               // opperation contol
     input  logic        Xs, Ys, Zs,    // sign of the inputs
     output logic        Ps,     // the product's sign - takes opperation into account
     output logic        As   // aligned addend sign used in fma - takes opperation into account
@@ -130,9 +130,9 @@ module sign(
     //      Negate product's sign if FNMADD or FNMSUB
     
     // flip is negation opperation
-    assign Ps = Xs ^ Ys ^ (FOpCtrl[1]&~FOpCtrl[2]);
+    assign Ps = Xs ^ Ys ^ (OpCtrl[1]&~OpCtrl[2]);
     // flip if subtraction
-    assign As = Zs^FOpCtrl[0];
+    assign As = Zs^OpCtrl[0];
 
 endmodule
 
@@ -275,7 +275,7 @@ endmodule
 module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001]
     input logic  [3*`NF+6:0] A,     // addend
     input logic  [2*`NF+3:0] P,     // product
-    output logic [$clog2(3*`NF+7)-1:0]       NCnt   // normalization shift count for the positive result
+    output logic [$clog2(3*`NF+7)-1:0]       SCnt   // normalization shift count for the positive result
     ); 
     
     logic [3*`NF+6:0] T;
@@ -300,6 +300,6 @@ module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEE
 
 
 
-    lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(NCnt));
+    lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(SCnt));
   
 endmodule
diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv
index d598efb7..7464149f 100644
--- a/pipelined/src/fpu/fmashiftcalc.sv
+++ b/pipelined/src/fpu/fmashiftcalc.sv
@@ -32,7 +32,7 @@ module fmashiftcalc(
     input logic  [3*`NF+5:0]            FmaSm,       // the positive sum
     input logic  [`NE-1:0]              Ze,      // exponent of Z
     input logic  [`NE+1:0]              FmaPe,   // X exponent + Y exponent - bias
-    input logic  [$clog2(3*`NF+7)-1:0]  FmaNCnt,   // normalization shift count
+    input logic  [$clog2(3*`NF+7)-1:0]  FmaSCnt,   // normalization shift count
     input logic  [`FMTBITS-1:0]         Fmt,       // precision 1 = double 0 = single
     input logic                         FmaKillProd,  // is the product set to zero
     input logic [`NE+1:0] FmaSe,
@@ -52,7 +52,7 @@ module fmashiftcalc(
     // Determine if the sum is zero
     assign FmaSZero = ~(|FmaSm);
     // calculate the sum's exponent
-    assign PreNormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4);
+    assign PreNormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaSCnt} + (`NE+2)'(`NF+4);
 
     //convert the sum's exponent into the proper percision
     if (`FPSIZES == 1) begin
@@ -152,7 +152,7 @@ module fmashiftcalc(
     //  - shift once if killing a product and the result is denormalized
     assign FmaShiftIn = {3'b0, FmaSm};
     if (`FPSIZES == 1)
-        assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3): FmaNCnt+1;
+        assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3): FmaSCnt+1;
     else
-        assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3)+BiasCorr[$clog2(3*`NF+7)-1:0]: FmaNCnt+1;
+        assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3)+BiasCorr[$clog2(3*`NF+7)-1:0]: FmaSCnt+1;
 endmodule
diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv
index a9c0ac24..6d9b9cf4 100755
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@@ -30,28 +30,28 @@
 `include "wally-config.vh"
 
 module fpu (
-  input logic 		   clk,
-  input logic 		   reset,
-  input logic [2:0] 	   FRM_REGW, // Rounding mode from CSR
-  input logic [31:0] 	   InstrD, // instruction from IFU
-  input logic [`FLEN-1:0]  ReadDataW,// Read data from memory
-  input logic [`XLEN-1:0]  ForwardedSrcAE, // Integer input being processed (from IEU)
-  input logic 		   StallE, StallM, StallW, // stall signals from HZU
-  input logic 		   FlushE, FlushM, FlushW, // flush signals from HZU
-  input logic [4:0] 	   RdM, RdW, // which FP register to write to (from IEU)
-  input logic [1:0]        STATUS_FS, // Is floating-point enabled?
-  output logic 		   FRegWriteM, // FP register write enable
-  output logic 		   FpLoadStoreM, // Fp load instruction?
-  output logic              FStore2,
-  output logic 		   FStallD, // Stall the decode stage
-  output logic 		   FWriteIntE, // integer register write enables
-  output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
-  output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory
-  output logic [`XLEN-1:0] FIntResM, // data to be written to integer register
-  output logic [`XLEN-1:0] FCvtIntResW, // data to be written to integer register
-  output logic [1:0]       FResSelW,
-  output logic 		   FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage)
-  output logic 		   IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
+  input logic 		         clk,
+  input logic 		         reset,
+  input logic  [2:0] 	   FRM_REGW,   // Rounding mode (from CSR)
+  input logic  [31:0] 	   InstrD,     // instruction (from IFU)
+  input logic  [`FLEN-1:0] ReadDataW,  // Read data (from LSU)
+  input logic  [`XLEN-1:0] ForwardedSrcAE, // Integer input (from IEU)
+  input logic 		         StallE, StallM, StallW, // stall signals (from HZU)
+  input logic 		         FlushE, FlushM, FlushW, // flush signals (from HZU)
+  input logic  [4:0] 	   RdM, RdW,   // which FP register to write to (from IEU)
+  input logic  [1:0]       STATUS_FS,  // Is floating-point enabled? (From privileged unit)
+  output logic 		      FRegWriteM, // FP register write enable (to privileged unit)
+  output logic 		      FpLoadStoreM,  // Fp load instruction? (to LSU)
+  output logic             FStore2,       // store two words into memory (to LSU)
+  output logic 		      FStallD,       // Stall the decode stage (To HZU)
+  output logic 		      FWriteIntE,    // integer register write enable (to IEU)
+  output logic [`XLEN-1:0] FWriteDataE,   // Data to be written to memory (to IEU) - only used if `XLEN >`FLEN
+  output logic [`FLEN-1:0] FWriteDataM,   // Data to be written to memory (to IEU) - only used if `XLEN <`FLEN
+  output logic [`XLEN-1:0] FIntResM,      // data to be written to integer register (to IEU)
+  output logic [`XLEN-1:0] FCvtIntResW,   // convert result to to be written to integer register (to IEU)
+  output logic [1:0]       FResSelW,      // final result selection (to IEU)
+  output logic 		      FDivBusyE,     // Is the divide/sqrt unit busy (stall execute stage) (to HZU)
+  output logic 		      IllegalFPUInstrD, // Is the instruction an illegal fpu instruction (to privileged unit)
   output logic [4:0] 	   SetFflagsM        // FPU flags (to privileged unit)
   );
 
@@ -62,99 +62,88 @@ module fpu (
    //    - sets the underflow after rounding
   
    // control signals
-   logic 		  FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable
-   logic [2:0] 	  FrmD, FrmE, FrmM;                   // FP rounding mode
-   logic [`FMTBITS-1:0] FmtD, FmtE, FmtM, FmtW;             // FP precision 0-single 1-double
-   logic 		  FDivStartD, FDivStartE;             // Start division or squareroot
-   logic 		  FWriteIntD;                         // Write to integer register
-   logic 		  FWriteIntM;                         // Write to integer register
-   logic [1:0] 	  FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals
-   logic [2:0] 	  FOpCtrlD, FOpCtrlE, FOpCtrlM;       // Select which opperation to do in each component
-   logic [1:0] 	  FResSelD, FResSelE, FResSelM;       // Select one of the results that finish in the memory stage
-   logic [1:0] 	  PostProcSelD, PostProcSelE, PostProcSelM; // select result in the post processing unit
-   logic [4:0] 	  Adr1E, Adr2E, Adr3E;                // adresses of each input
+   logic 		         FRegWriteW; // FP register write enable
+   logic [2:0] 	      FrmM;                   // FP rounding mode
+   logic [`FMTBITS-1:0] FmtE, FmtM;             // FP precision 0-single 1-double
+   logic 		         DivStartE;             // Start division or squareroot
+   logic 		         FWriteIntM;                         // Write to integer register
+   logic [1:0] 	      ForwardXE, ForwardYE, ForwardZE; // forwarding mux control signals
+   logic [2:0] 	      OpCtrlE, OpCtrlM;       // Select which opperation to do in each component
+   logic [1:0] 	      FResSelE, FResSelM;       // Select one of the results that finish in the memory stage
+   logic [1:0] 	      PostProcSelE, PostProcSelM; // select result in the post processing unit
+   logic [4:0] 	      Adr1E, Adr2E, Adr3E;                // adresses of each input
 
    // regfile signals
-   logic [`FLEN-1:0] 	  FRD1D, FRD2D, FRD3D;                // Read Data from FP register - decode stage
-   logic [`FLEN-1:0] 	  FRD1E, FRD2E, FRD3E;                // Read Data from FP register - execute stage
-   logic [`FLEN-1:0] 	  FSrcXE;                             // Input 1 to the various units (after forwarding)
-   logic [`XLEN-1:0] 	  IntSrcXE;                             // Input 1 to the various units (after forwarding)
-   logic [`FLEN-1:0] 	  FPreSrcYE, FSrcYE;                  // Input 2 to the various units (after forwarding)
-   logic [`FLEN-1:0] 	  FPreSrcZE, FSrcZE;                  // Input 3 to the various units (after forwarding)
+   logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D;                // Read Data from FP register - decode stage
+   logic [`FLEN-1:0] FRD1E, FRD2E, FRD3E;                // Read Data from FP register - execute stage
+   logic [`FLEN-1:0] XE;                             // Input 1 to the various units (after forwarding)
+   logic [`XLEN-1:0] IntSrcXE;                             // Input 1 to the various units (after forwarding)
+   logic [`FLEN-1:0] PreYE, YE;                  // Input 2 to the various units (after forwarding)
+   logic [`FLEN-1:0] PreZE, ZE;                  // Input 3 to the various units (after forwarding)
 
    // unpacking signals
-   logic 		  XSgnE, YSgnE, ZSgnE;                // input's sign - execute stage
-   logic 		  XSgnM, YSgnM;                       // input's sign - memory stage
-   logic [`NE-1:0] 	  XExpE, YExpE, ZExpE;                // input's exponent - execute stage
-   logic [`NE-1:0] 	  ZExpM;                              // input's exponent - memory stage
-   logic [`NF:0] 	  XManE, YManE, ZManE;                // input's fraction - execute stage
-   logic [`NF:0] 	  XManM, YManM, ZManM;                // input's fraction - memory stage
-   logic 		  XNaNE, YNaNE, ZNaNE;                // is the input a NaN - execute stage
-   logic 		  XNaNM, YNaNM, ZNaNM;                // is the input a NaN - memory stage
-   logic 		  XNaNQ, YNaNQ;                       // is the input a NaN - divide
-   logic 		  XSNaNE, YSNaNE, ZSNaNE;             // is the input a signaling NaN - execute stage
-   logic 		  XSNaNM, YSNaNM, ZSNaNM;             // is the input a signaling NaN - memory stage
-   logic 		  XDenormE, ZDenormE, ZDenormM;       // is the input denormalized
-   logic 		  XZeroE, YZeroE, ZZeroE;             // is the input zero - execute stage
-   logic 		  XZeroM, YZeroM, ZZeroM;             // is the input zero - memory stage
-   logic 		  XZeroQ, YZeroQ;                     // is the input zero - divide
-   logic 		  XInfE, YInfE, ZInfE;                // is the input infinity - execute stage
-   logic 		  XInfM, YInfM, ZInfM;                // is the input infinity - memory stage
-   logic 		  XInfQ, YInfQ;                       // is the input infinity - divide
-   logic 		  XExpMaxE;                           // is the exponent all ones (max value)
-   logic 		  FmtQ;
-   logic 		  FOpCtrlQ;   
+   logic 		      XsE, YsE, ZsE;                // input's sign - execute stage
+   logic 		      XsM, YsM;                       // input's sign - memory stage
+   logic [`NE-1:0] 	XeE, YeE, ZeE;                // input's exponent - execute stage
+   logic [`NE-1:0] 	ZeM;                              // input's exponent - memory stage
+   logic [`NF:0] 	   XmE, YmE, ZmE;                // input's fraction - execute stage
+   logic [`NF:0] 	   XmM, YmM, ZmM;                // input's fraction - memory stage
+   logic 		      XNaNE, YNaNE, ZNaNE;                // is the input a NaN - execute stage
+   logic 		      XNaNM, YNaNM, ZNaNM;                // is the input a NaN - memory stage
+   logic 		      XNaNQ, YNaNQ;                       // is the input a NaN - divide
+   logic 		      XSNaNE, YSNaNE, ZSNaNE;             // is the input a signaling NaN - execute stage
+   logic 		      XSNaNM, YSNaNM, ZSNaNM;             // is the input a signaling NaN - memory stage
+   logic 		      XDenormE, ZDenormE, ZDenormM;       // is the input denormalized
+   logic 		      XZeroE, YZeroE, ZZeroE;             // is the input zero - execute stage
+   logic 		      XZeroM, YZeroM, ZZeroM;             // is the input zero - memory stage
+   logic 		      XInfE, YInfE, ZInfE;                // is the input infinity - execute stage
+   logic 		      XInfM, YInfM, ZInfM;                // is the input infinity - memory stage
+   logic 		      XExpMaxE;                           // is the exponent all ones (max value)
 
    // Fma Signals
-   logic [3*`NF+5:0]	SumE, SumM;                       
-   logic [`NE+1:0]	    ProdExpE, ProdExpM;
-   logic 			    AddendStickyE, AddendStickyM;
-   logic [`NE+1:0]      SeE,SeM;
-   logic 			    KillProdE, KillProdM;
-   logic 			    InvAE, InvAM;
-   logic 			    NegSumE, NegSumM;
-   logic 			    ZSgnEffE, ZSgnEffM;
-   logic 			    PSgnE, PSgnM;
-   logic 			    SsE, SsM;
-   logic [$clog2(3*`NF+7)-1:0]			FmaNormCntE, FmaNormCntM;
+   logic [3*`NF+5:0] SmE, SmM;                       
+   logic [`NE+1:0]	PeE, PeM;
+   logic 			   ZmStickyE, ZmStickyM;
+   logic [`NE+1:0]   SeE,SeM;
+   logic 			   KillProdE, KillProdM;
+   logic 			   InvAE, InvAM;
+   logic 			   NegSumE, NegSumM;
+   logic 			   AsE, AsM;
+   logic 			   PsE, PsM;
+   logic 			   SsE, SsM;
+   logic [$clog2(3*`NF+7)-1:0] SCntE, SCntM;
 
    // Cvt Signals
-   logic [`NE:0]           CvtCalcExpE, CvtCalcExpM;    // the calculated expoent
-   logic [`LOGCVTLEN-1:0]   CvtShiftAmtE, CvtShiftAmtM;  // how much to shift by
+   logic [`NE:0]           CeE, CeM;    // the calculated expoent
+   logic [`LOGCVTLEN-1:0]  CvtShiftAmtE, CvtShiftAmtM;  // how much to shift by
    logic                   CvtResDenormUfE, CvtResDenormUfM;// does the result underflow or is denormalized
-   logic                   CvtResSgnE, CvtResSgnM;     // the result's sign
+   logic                   CsE, CsM;     // the result's sign
    logic                   IntZeroE, IntZeroM;      // is the integer zero?
-   logic [`CVTLEN-1:0]      CvtLzcInE, CvtLzcInM;      // input to the Leading Zero Counter (priority encoder)
+   logic [`CVTLEN-1:0]     CvtLzcInE, CvtLzcInM;      // input to the Leading Zero Counter (priority encoder)
    
    //divide signals
-   logic [`QLEN-1-(`RADIX/4):0] QuotM;
-   logic [`NE+1:0] DivCalcExpE, DivCalcExpM; 
-   logic DivStickyE, DivStickyM;
-   logic DivDoneM;
-   logic [`DURLEN-1:0] EarlyTermShiftM;
+   logic [`QLEN-1-(`RADIX/4):0] QmM;
+   logic [`NE+1:0]      QeE, QeM; 
+   logic                DivSE, DivSM;
+   logic                DivDoneM;
+   logic [`DURLEN-1:0]  EarlyTermShiftM;
 
    // result and flag signals
-   logic [63:0] 	  FDivResM, FDivResW;                 // divide/squareroot result
-   logic [4:0] 	  FDivFlgM;                 // divide/squareroot flags  
-   logic [`FLEN-1:0] 	  ReadResW;                           // read result (load instruction)
-   logic [`XLEN-1:0] 	  ClassResE;               // classify result
-   logic [`XLEN-1:0] 	  FIntResE;               // classify result
-   logic [`FLEN-1:0] 	  FpResM, FpResW;               // classify result
-   logic [`FLEN-1:0] 	  PostProcResM;               // classify result
-   logic [4:0] 	  PostProcFlgM;               // classify result
+   logic [`XLEN-1:0] ClassResE;               // classify result
+   logic [`XLEN-1:0] FIntResE;               // classify result
+   logic [`FLEN-1:0] FpResM, FpResW;               // classify result
+   logic [`FLEN-1:0] PostProcResM;               // classify result
+   logic [4:0] 	   PostProcFlgM;               // classify result
    logic [`XLEN-1:0] FCvtIntResM; 
-   logic [`FLEN-1:0] 	  CmpFpResE;                   // compare result
-   logic [`XLEN-1:0] 	  CmpIntResE;                   // compare result
-   logic 		           CmpNVE;                     // compare invalid flag (Not Valid)     
-   logic [`FLEN-1:0] 	  SgnResE;                   // sign injection result
-   logic [`FLEN-1:0] 	  PreFpResE, PreFpResM, PreFpResW;                // selected result that is ready in the memory stage
-   logic  	        PreNVE, PreNVM;                       // selected flag that is ready in the memory stage     
-   logic [`FLEN-1:0] 	  FPUResultW;                         // final FP result being written to the FP register     
+   logic [`FLEN-1:0] CmpFpResE;                   // compare result
+   logic [`XLEN-1:0] CmpIntResE;                   // compare result
+   logic 		      CmpNVE;                     // compare invalid flag (Not Valid)     
+   logic [`FLEN-1:0] SgnResE;                   // sign injection result
+   logic [`FLEN-1:0] PreFpResE, PreFpResM;                // selected result that is ready in the memory stage
+   logic  	         PreNVE, PreNVM;                       // selected flag that is ready in the memory stage     
+   logic [`FLEN-1:0] FPUResultW;                         // final FP result being written to the FP register     
    // other signals
-   logic 		  FDivSqrtDoneE;                      // is divide done
-   logic [63:0] 	  DivInput1E, DivInput2E;             // inputs to divide/squareroot unit
-   logic 		  load_preload;                       // enable for FF on fpdivsqrt     
-   logic [`FLEN-1:0] 	  AlignedSrcAE;                       // align SrcA to the floating point format
+   logic [`FLEN-1:0] 	 AlignedSrcAE;                       // align SrcA to the floating point format
    logic [`FLEN-1:0]     BoxedZeroE;                         // Zero value for Z for multiplication, with NaN boxing if needed
    logic [`FLEN-1:0]     BoxedOneE;                         // Zero value for Z for multiplication, with NaN boxing if needed
    
@@ -171,9 +160,11 @@ module fpu (
    //////////////////////////////////////////////////////////////////////////////////////////
 
    // calculate FP control signals
-   fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW, .STATUS_FS,
-      .IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResSelD, .FOpCtrlD, .PostProcSelD, 
-      .FmtD, .FrmD, .FWriteIntD);
+   fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .InstrD,
+               .StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE,
+               .reset, .clk, .IllegalFPUInstrD, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM,
+               .DivStartE, .FWriteIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM,
+               .FResSelE, .FResSelM, .FResSelW, .PostProcSelE, .PostProcSelM, .Adr1E, .Adr2E, .Adr3E);
 
    // FP register file
    fregfile fregfile (.clk, .reset, .we4(FRegWriteW),
@@ -185,12 +176,6 @@ module fpu (
    flopenrc #(`FLEN) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
    flopenrc #(`FLEN) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
    flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
-   flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, 
-                           {Adr1E, Adr2E, Adr3E});
-   flopenrc #(12+`FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE, 
-               {FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD},
-               {FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE});
-   flopenrc #(1) DEDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, FDivStartD, FDivStartE);
 
    // EXECUTION STAGE
    
@@ -207,12 +192,12 @@ module fpu (
    // Hazard unit for FPU  
    //    - determines if any forwarding or stalls are needed
    fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResSelM, 
-                  .FStallD, .FForwardXE, .FForwardYE, .FForwardZE);
+                  .FStallD, .ForwardXE, .ForwardYE, .ForwardZE);
 
    // forwarding muxs
-   mux3  #(`FLEN)  fxemux (FRD1E, FPUResultW, PreFpResM, FForwardXE, FSrcXE);
-   mux3  #(`FLEN)  fyemux (FRD2E, FPUResultW, PreFpResM, FForwardYE, FPreSrcYE);
-   mux3  #(`FLEN)  fzemux (FRD3E, FPUResultW, PreFpResM, FForwardZE, FPreSrcZE);
+   mux3  #(`FLEN)  fxemux (FRD1E, FPUResultW, PreFpResM, ForwardXE, XE);
+   mux3  #(`FLEN)  fyemux (FRD2E, FPUResultW, PreFpResM, ForwardYE, PreYE);
+   mux3  #(`FLEN)  fzemux (FRD3E, FPUResultW, PreFpResM, ForwardZE, PreZE);
 
 
    generate
@@ -227,7 +212,7 @@ module fpu (
    endgenerate
 
 
-   mux2  #(`FLEN)  fyaddmux (FPreSrcYE, BoxedOneE, FOpCtrlE[2]&FOpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10), FSrcYE); // Force Z to be 0 for multiply instructions
+   mux2  #(`FLEN)  fyaddmux (PreYE, BoxedOneE, OpCtrlE[2]&OpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10), YE); // Force Z to be 0 for multiply instructions
    
    // Force Z to be 0 for multiply instructions 
    generate
@@ -241,55 +226,76 @@ module fpu (
                                  (`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes
    endgenerate
 
-   mux3  #(`FLEN)  fzmulmux (FPreSrcZE, BoxedZeroE, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE);
+   mux3  #(`FLEN)  fzmulmux (PreZE, BoxedZeroE, PreYE, {OpCtrlE[2]&OpCtrlE[1], OpCtrlE[2]&~OpCtrlE[1]}, ZE);
 
    // unpack unit
    //    - splits FP inputs into their various parts
    //    - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity)
-   unpack unpack (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FmtE,
-         .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, 
-         .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .ZDenormE, 
-         .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE);
+   unpack unpack (.X(XE), .Y(YE), .Z(ZE), .Fmt(FmtE), .Xs(XsE), .Ys(YsE), .Zs(ZsE), 
+                  .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), 
+                  .XNaN(XNaNE), .YNaN(YNaNE), .ZNaN(ZNaNE), .XSNaN(XSNaNE), 
+                  .YSNaN(YSNaNE), .ZSNaN(ZSNaNE), .XDenorm(XDenormE), .ZDenorm(ZDenormE), 
+                  .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .XInf(XInfE), .YInf(YInfE), 
+                  .ZInf(ZInfE), .XExpMax(XExpMaxE));
    
-   // fma - does multiply, add, and multiply-add instructions 
-   fma fma (.Xs(XSgnE), .Ys(YSgnE), .Zs(ZSgnE), 
-            .Xe(XExpE), .Ye(YExpE), .Ze(ZExpE), 
-            .Xm(XManE), .Ym(YManE), .Zm(ZManE), 
+   // fused multiply add
+   //    - fadd/fsub
+   //    - fmul
+   //    - fmadd/fnmadd/fmsub/fnmsub
+   fma fma (.Xs(XsE), .Ys(YsE), .Zs(ZsE), 
+            .Xe(XeE), .Ye(YeE), .Ze(ZeE), 
+            .Xm(XmE), .Ym(YmE), .Zm(ZmE), 
             .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), 
-            .FOpCtrl(FOpCtrlE), .Fmt(FmtE), 
-            .As(ZSgnEffE), .Ps(PSgnE), .Ss(SsE), .Se(SeE),
-            .Sm(SumE), .Pe(ProdExpE), 
-            .NegSum(NegSumE), .InvA(InvAE), .NCnt(FmaNormCntE), 
-            .ZmSticky(AddendStickyE), .KillProd(KillProdE)); 
+            .OpCtrl(OpCtrlE), .Fmt(FmtE), 
+            .As(AsE), .Ps(PsE), .Ss(SsE), .Se(SeE),
+            .Sm(SmE), .Pe(PeE), 
+            .NegSum(NegSumE), .InvA(InvAE), .SCnt(SCntE), 
+            .ZmSticky(ZmStickyE), .KillProd(KillProdE)); 
 
-   divsqrt divsqrt(.clk, .reset, .FmtE, .XManE, .YManE, .XExpE, .YExpE, 
-                  .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(FDivStartE), 
-                  .StallE, .StallM, .DivStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal
-                  .EarlyTermShiftM, .QuotM, .DivDone(DivDoneM));
-   // other FP execution units
-   fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE, 
-            .XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpFpResE, .CmpIntResE);
-   fsgninj fsgninj(.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .SgnResE);
-   fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XSNaNE, .ClassResE);
+   // divide and squareroot
+   //    - fdiv
+   //    - fsqrt
+   // *** add other opperations
+   divsqrt divsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, 
+                  .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(DivStartE), 
+                  .StallE, .StallM, .DivSM, .DivBusy(FDivBusyE), .QeM, //***change divbusyE to M signal
+                  .EarlyTermShiftM, .QmM, .DivDone(DivDoneM));
+   // compare
+   //    - fmin/fmax
+   //    - flt/fle/feq
+   fcmp fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE), 
+               .Xm(XmE), .Ym(YmE), .XZero(XZeroE), .YZero(YZeroE), .XNaN(XNaNE), .YNaN(YNaNE), 
+               .XSNaN(XSNaNE), .YSNaN(YSNaNE), .X(XE), .Y(YE), .CmpNV(CmpNVE), 
+               .CmpFpRes(CmpFpResE), .CmpIntRes(CmpIntResE));
+   // sign injection
+   //    - fsgnj/fsgnjx/fsgnjn
+   fsgninj fsgninj(.OpCtrl(OpCtrlE[1:0]), .Xs(XsE), .Ys(YsE), .X(XE), .Fmt(FmtE), .SgnRes(SgnResE));
 
-   fcvt fcvt (.Xs(XSgnE), .Xe(XExpE), .Xm(XManE), .Int(ForwardedSrcAE), .FOpCtrl(FOpCtrlE), 
-              .ToInt(FWriteIntE), .XZero(XZeroE), .XDenorm(XDenormE), .Fmt(FmtE), .Ce(CvtCalcExpE), 
-              .ShiftAmt(CvtShiftAmtE), .ResDenormUf(CvtResDenormUfE), .Cs(CvtResSgnE), .IntZero(IntZeroE), 
+   // classify
+   //    - fclass
+   fclassify fclassify (.Xs(XsE), .XDenorm(XDenormE), .XZero(XZeroE), .XNaN(XNaNE), 
+                        .XInf(XInfE), .XSNaN(XSNaNE), .ClassRes(ClassResE));
+
+   // convert
+   //    - fcvt.*.*
+   fcvt fcvt (.Xs(XsE), .Xe(XeE), .Xm(XmE), .Int(ForwardedSrcAE), .OpCtrl(OpCtrlE), 
+              .ToInt(FWriteIntE), .XZero(XZeroE), .XDenorm(XDenormE), .Fmt(FmtE), .Ce(CeE), 
+              .ShiftAmt(CvtShiftAmtE), .ResDenormUf(CvtResDenormUfE), .Cs(CsE), .IntZero(IntZeroE), 
               .LzcIn(CvtLzcInE));
 
    // data to be stored in memory - to IEU
    //    - FP uses NaN-blocking format
    //        - if there are any unsused bits the most significant bits are filled with 1s
    if (`LLEN==`XLEN) begin
-      assign FWriteDataE = FSrcYE[`XLEN-1:0]; 
+      assign FWriteDataE = YE[`XLEN-1:0]; 
    end else begin
       logic [`FLEN-1:0] FWriteDataE;
       if(`FMTBITS == 2) assign FStore2 = FmtM == `FMT;
       else assign FStore2 = FmtM;
 
-      if (`FPSIZES==1) assign FWriteDataE = FSrcYE;
-      else if (`FPSIZES==2) assign FWriteDataE = FmtE ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}};
-      else assign FWriteDataE = FmtE == `FMT ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}};
+      if (`FPSIZES==1) assign FWriteDataE = YE;
+      else if (`FPSIZES==2) assign FWriteDataE = FmtE ? YE : {2{YE[`LEN1-1:0]}};
+      else assign FWriteDataE = FmtE == `FMT ? YE : {2{YE[`LEN1-1:0]}};
 
       flopenrc #(`FLEN) EMWriteDataReg (clk, reset, FlushM, ~StallM, FWriteDataE, FWriteDataM);
    end
@@ -306,14 +312,14 @@ module fpu (
                              {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes
    endgenerate
    // select a result that may be written to the FP register
-   mux3  #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {FOpCtrlE[2], &FOpCtrlE[1:0]}, PreFpResE);
-   assign PreNVE = CmpNVE&(FOpCtrlE[2]|FWriteIntE);
+   mux3  #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE);
+   assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE);
 
    // select the result that may be written to the integer register - to IEU
    if (`FLEN>`XLEN)
-      assign IntSrcXE = FSrcXE[`XLEN-1:0];
+      assign IntSrcXE = XE[`XLEN-1:0];
    else 
-      assign IntSrcXE = {{`XLEN-`FLEN{FSrcXE[`FLEN-1:0]}}, FSrcXE};
+      assign IntSrcXE = {{`XLEN-`FLEN{XE[`FLEN-1:0]}}, XE};
 
    mux3 #(`XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE);
    // *** DH 5/25/22: CvtRes will move to mem stage.  Premux in execute to save area, then make sure stalls are ok
@@ -321,27 +327,24 @@ module fpu (
 
    // E/M pipe registers
 
-   // flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
-   flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XSgnE,XManE}, {XSgnM,XManM});
-   flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YSgnE,YManE}, {YSgnM,YManM});
-   flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM});
+   // flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, XE, FSrcXM);
+   flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XsE,XmE}, {XsM,XmM});
+   flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YsE,YmE}, {YsM,YmM});
+   flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZeE,ZmE}, {ZeM,ZmM});
    flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
    flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM);
    flopenrc #(13) EMFpReg5 (clk, reset, FlushM, ~StallM, 
             {XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE, ZDenormE},
             {XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM, ZDenormM});     
    flopenrc #(1)  EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM);      
-   flopenrc #(12+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM,
-               {FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE},
-               {FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM});
-   flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM); 
-   flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);  
+   flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM); 
+   flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, PeE, PeM);  
    flopenrc #($clog2(3*`NF+7)+9+`NE) EMRegFma4(clk, reset, FlushM, ~StallM, 
-                           {AddendStickyE, KillProdE, InvAE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE, SsE, SeE},
-                           {AddendStickyM, KillProdM, InvAM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM, SsM, SeM});
+                           {ZmStickyE, KillProdE, InvAE, SCntE, NegSumE, AsE, PsE, SsE, SeE},
+                           {ZmStickyM, KillProdM, InvAM, SCntM, NegSumM, AsM, PsM, SsM, SeM});
    flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, 
-                           {CvtCalcExpE, CvtShiftAmtE, CvtResDenormUfE, CvtResSgnE, IntZeroE, CvtLzcInE},
-                           {CvtCalcExpM, CvtShiftAmtM, CvtResDenormUfM, CvtResSgnM, IntZeroM, CvtLzcInM});
+                           {CeE, CvtShiftAmtE, CvtResDenormUfE, CsE, IntZeroE, CvtLzcInE},
+                           {CeM, CvtShiftAmtM, CvtResDenormUfM, CsM, IntZeroM, CvtLzcInM});
 
    // BEGIN MEMORY STAGE
 
@@ -357,11 +360,11 @@ module fpu (
 
    assign FpLoadStoreM = FResSelM[1];
 
-   postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShift(EarlyTermShiftM),
-                           .FmaZmS(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QuotM), .FmaSs(SsM),
-                           .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivQe(DivCalcExpM), .DivDone(DivDoneM),
-                           .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM), .FmaSe(SeM),
-                           .CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivS(DivStickyM),
+   postprocess postprocess(.Xs(XsM), .Ys(YsM), .Ze(ZeM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(PeM), .DivEarlyTermShift(EarlyTermShiftM),
+                           .FmaZmS(ZmStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM),
+                           .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SmM), .DivQe(QeM), .DivDone(DivDoneM),
+                           .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM),
+                           .CvtCe(CeM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CsM), .ToInt(FWriteIntM), .DivS(DivSM),
                            .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM));
 
    // FPU flag selection - to privileged
@@ -371,9 +374,6 @@ module fpu (
    // M/W pipe registers
    flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW); 
    flopenrc #(`XLEN) MWRegInt(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW); 
-   flopenrc #(4+int'(`FMTBITS-1))  MWCtrlReg(clk, reset, FlushW, ~StallW,
-            {FRegWriteM, FResSelM, FmtM},
-            {FRegWriteW, FResSelW, FmtW});
 
    // BEGIN WRITEBACK STAGE
 
diff --git a/pipelined/src/fpu/fsgninj.sv b/pipelined/src/fpu/fsgninj.sv
index 17d15669..a5b7e774 100755
--- a/pipelined/src/fpu/fsgninj.sv
+++ b/pipelined/src/fpu/fsgninj.sv
@@ -26,60 +26,59 @@
 //   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
 //   OR OTHER DEALINGS IN THE SOFTWARE.
 ////////////////////////////////////////////////////////////////////////////////////////////////
+
 `include "wally-config.vh"
 
 module fsgninj (  
-	input logic        	XSgnE, YSgnE,	// X and Y sign bits
-	input logic [`FLEN-1:0] 	FSrcXE,			// X
-	input logic [`FMTBITS-1:0]		FmtE,			// precision 1 = double 0 = single
-	input  logic [1:0]  SgnOpCodeE,		// operation control
-	output logic [`FLEN-1:0] SgnResE			// result
+	input logic        			Xs, Ys,	// X and Y sign bits
+	input logic [`FLEN-1:0] 	X,		// X
+	input logic [`FMTBITS-1:0]	Fmt,	// format
+	input  logic [1:0]  		OpCtrl,	// operation control
+	output logic [`FLEN-1:0] 	SgnRes	// result
 );
 
 	logic ResSgn;
 
-	//op code designation:
-	//
-	//00 - fsgnj - directly copy over sign value of FSrcYE
-	//01 - fsgnjn - negate sign value of FSrcYE
-	//10 - fsgnjx - XOR sign values of FSrcXE & FSrcYE
-	//
+	// OpCtrl:
+	// 		00 - fsgnj  - directly copy over sign value of Y
+	// 		01 - fsgnjn - negate sign value of Y
+	// 		10 - fsgnjx - XOR sign values of X and Y
 	
 	// calculate the result's sign
-	assign ResSgn = (SgnOpCodeE[1] ? XSgnE : SgnOpCodeE[0]) ^ YSgnE;
+	assign ResSgn = (OpCtrl[1] ? Xs : OpCtrl[0]) ^ Ys;
 	
 	// format final result based on precision
 	//    - uses NaN-blocking format
 	//        - if there are any unsused bits the most significant bits are filled with 1s
 	
     if (`FPSIZES == 1)
-		assign SgnResE = {ResSgn, FSrcXE[`FLEN-2:0]};
+		assign SgnRes = {ResSgn, X[`FLEN-2:0]};
 
     else if (`FPSIZES == 2)
-		assign SgnResE = {~FmtE|ResSgn, FSrcXE[`FLEN-2:`LEN1], FmtE ? FSrcXE[`LEN1-1] : ResSgn, FSrcXE[`LEN1-2:0]};
+		assign SgnRes = {~Fmt|ResSgn, X[`FLEN-2:`LEN1], Fmt ? X[`LEN1-1] : ResSgn, X[`LEN1-2:0]};
 
     else if (`FPSIZES == 3) begin
 		logic [2:0] SgnBits;
         always_comb
-            case (FmtE)
-                `FMT: SgnBits = {ResSgn, FSrcXE[`LEN1-1], FSrcXE[`LEN2-1]};
-                `FMT1: SgnBits = {1'b1, ResSgn, FSrcXE[`LEN2-1]};
+            case (Fmt)
+                `FMT: SgnBits = {ResSgn, X[`LEN1-1], X[`LEN2-1]};
+                `FMT1: SgnBits = {1'b1, ResSgn, X[`LEN2-1]};
                 `FMT2: SgnBits = {2'b11, ResSgn};
                 default: SgnBits = {3{1'bx}};
             endcase
-		assign SgnResE = {SgnBits[2], FSrcXE[`FLEN-2:`LEN1], SgnBits[1], FSrcXE[`LEN1-2:`LEN2], SgnBits[0], FSrcXE[`LEN2-2:0]};
+		assign SgnRes = {SgnBits[2], X[`FLEN-2:`LEN1], SgnBits[1], X[`LEN1-2:`LEN2], SgnBits[0], X[`LEN2-2:0]};
         
 
 	end else if (`FPSIZES == 4) begin
 		logic [3:0] SgnBits;
         always_comb
-            case (FmtE)
-                `Q_FMT: SgnBits = {ResSgn, FSrcXE[`D_LEN-1], FSrcXE[`S_LEN-1], FSrcXE[`H_LEN-1]};
-                `D_FMT: SgnBits = {1'b1, ResSgn, FSrcXE[`S_LEN-1], FSrcXE[`H_LEN-1]};
-                `S_FMT: SgnBits = {2'b11, ResSgn, FSrcXE[`H_LEN-1]};
+            case (Fmt)
+                `Q_FMT: SgnBits = {ResSgn, X[`D_LEN-1], X[`S_LEN-1], X[`H_LEN-1]};
+                `D_FMT: SgnBits = {1'b1, ResSgn, X[`S_LEN-1], X[`H_LEN-1]};
+                `S_FMT: SgnBits = {2'b11, ResSgn, X[`H_LEN-1]};
                 `H_FMT: SgnBits = {3'b111, ResSgn};
             endcase
-		assign SgnResE = {SgnBits[3], FSrcXE[`Q_LEN-2:`D_LEN], SgnBits[2], FSrcXE[`D_LEN-2:`S_LEN], SgnBits[1], FSrcXE[`S_LEN-2:`H_LEN], SgnBits[0], FSrcXE[`H_LEN-2:0]};
+		assign SgnRes = {SgnBits[3], X[`Q_LEN-2:`D_LEN], SgnBits[2], X[`D_LEN-2:`S_LEN], SgnBits[1], X[`S_LEN-2:`H_LEN], SgnBits[0], X[`H_LEN-2:0]};
 	end
 
 endmodule
diff --git a/pipelined/src/fpu/otfc.sv b/pipelined/src/fpu/otfc.sv
index 8d11273a..66af5b3c 100644
--- a/pipelined/src/fpu/otfc.sv
+++ b/pipelined/src/fpu/otfc.sv
@@ -107,6 +107,6 @@ module otfc4 (
       QMNext = {QMR, 2'b11};
     end 
   end
-  // Final Quoteint is in the range [.5, 2)
+  // Final Qmeint is in the range [.5, 2)
 
 endmodule
diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv
index de3c4f30..d3169d47 100644
--- a/pipelined/src/fpu/postprocess.sv
+++ b/pipelined/src/fpu/postprocess.sv
@@ -36,7 +36,7 @@ module postprocess (
     input logic  [`NF:0]                    Xm, Ym, Zm, // input mantissas
     input logic  [2:0]                      Frm,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
     input logic  [`FMTBITS-1:0]             Fmt,       // precision 1 = double 0 = single
-    input logic  [2:0]                      FOpCtrl,       // choose which opperation (look below for values)
+    input logic  [2:0]                      OpCtrl,       // choose which opperation (look below for values)
     input logic                             XZero, YZero, ZZero, // inputs are zero
     input logic                             XInf, YInf, ZInf,    // inputs are infinity
     input logic                             XNaN, YNaN, ZNaN,    // inputs are NaN
@@ -54,7 +54,7 @@ module postprocess (
     input logic                             FmaNegSum,    // was the sum negitive
     input logic                             FmaInvA,      // do you invert Z
     input logic                             FmaSs,
-    input logic  [$clog2(3*`NF+7)-1:0]      FmaNCnt,   // the normalization shift count
+    input logic  [$clog2(3*`NF+7)-1:0]      FmaSCnt,   // the normalization shift count
     //divide signals
     input logic  [`DURLEN-1:0]              DivEarlyTermShift,
     input logic                             DivS,
@@ -125,14 +125,14 @@ module postprocess (
     logic Sqrt;
 
     // signals to help readability
-    assign Signed =  FOpCtrl[0];
-    assign Int64 =   FOpCtrl[1];
-    assign IntToFp = FOpCtrl[2];
-    assign Mult = FOpCtrl[2]&~FOpCtrl[1]&~FOpCtrl[0];
+    assign Signed =  OpCtrl[0];
+    assign Int64 =   OpCtrl[1];
+    assign IntToFp = OpCtrl[2];
+    assign Mult = OpCtrl[2]&~OpCtrl[1]&~OpCtrl[0];
     assign CvtOp = (PostProcSel == 2'b00);
     assign FmaOp = (PostProcSel == 2'b10);
     assign DivOp = (PostProcSel == 2'b01)&DivDone;
-    assign Sqrt =  FOpCtrl[0];
+    assign Sqrt =  OpCtrl[0];
 
     // is there an input of infinity or NaN being used
     assign InfIn = (XInf&~(IntToFp&CvtOp))|(YInf&~CvtOp)|(ZInf&FmaOp);
@@ -142,9 +142,9 @@ module postprocess (
     //      - fp -> fp: OpCtrl contains the percision of the output
     //      - otherwise: Fmt contains the percision of the output
     if (`FPSIZES == 2) 
-        assign OutFmt = IntToFp|~CvtOp ? Fmt : (FOpCtrl[1:0] == `FMT); 
+        assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == `FMT); 
     else if (`FPSIZES == 3 | `FPSIZES == 4) 
-        assign OutFmt = IntToFp|~CvtOp ? Fmt : FOpCtrl[1:0]; 
+        assign OutFmt = IntToFp|~CvtOp ? Fmt : OpCtrl[1:0]; 
 
     ///////////////////////////////////////////////////////////////////////////////
     // Normalization
@@ -152,7 +152,7 @@ module postprocess (
 
     cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn,  
                               .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
-    fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .NormSumExp, .FmaSe,
+    fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaSCnt, .Fmt, .FmaKillProd, .NormSumExp, .FmaSe,
                           .FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
     divshiftcalc divshiftcalc(.Fmt, .DivQe, .DivQm, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn);
 
diff --git a/pipelined/src/fpu/qsel.sv b/pipelined/src/fpu/qsel.sv
index 396ca776..202b3ee8 100644
--- a/pipelined/src/fpu/qsel.sv
+++ b/pipelined/src/fpu/qsel.sv
@@ -42,7 +42,7 @@ module qsel2 ( // *** eventually just change to 4 bits
   // for efficiency.  You can probably optimize your logic to
   // select the proper divisor with less delay.
 
-  // Quotient equations from EE371 lecture notes 13-20
+  // Qmient equations from EE371 lecture notes 13-20
   assign p = ps ^ pc;
   assign g = ps & pc;
 
diff --git a/pipelined/src/fpu/shiftcorrection.sv b/pipelined/src/fpu/shiftcorrection.sv
index 514edbee..6329ffe2 100644
--- a/pipelined/src/fpu/shiftcorrection.sv
+++ b/pipelined/src/fpu/shiftcorrection.sv
@@ -43,7 +43,7 @@ module shiftcorrection(
     output logic [`NE+1:0]          FmaMe         // exponent of the normalized sum
 );
     logic [3*`NF+5:0]      CorrSumShifted;     // the shifted sum after LZA correction
-    logic [`CORRSHIFTSZ-1:0] CorrQuotShifted;
+    logic [`CORRSHIFTSZ-1:0] CorrQmShifted;
     logic                  ResDenorm;    // is the result denormalized
     logic                  LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
 
@@ -53,11 +53,11 @@ module shiftcorrection(
 	// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
     assign CorrSumShifted =  LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0];
     //                        if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm)
-    assign CorrQuotShifted = (LZAPlus2|(DivQe==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2];
+    assign CorrQmShifted = (LZAPlus2|(DivQe==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2];
     // if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits
     always_comb
         if(FmaOp)                       Mf = {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}};
-        else if (DivOp&~DivResDenorm)   Mf = CorrQuotShifted;
+        else if (DivOp&~DivResDenorm)   Mf = CorrQmShifted;
         else                            Mf = Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
     // Determine sum's exponent
     //                          if plus1                     If plus2                                      if said denorm but norm plus 1           if said denorm but norm plus 2
diff --git a/pipelined/src/fpu/srt.sv b/pipelined/src/fpu/srt.sv
index ee5ae9a3..7e9f9922 100644
--- a/pipelined/src/fpu/srt.sv
+++ b/pipelined/src/fpu/srt.sv
@@ -37,15 +37,15 @@ module srt(
   input  logic [`FMTBITS-1:0] FmtE,
   input  logic [`NE-1:0] Xe, Ye,
   input  logic XZeroE, YZeroE, 
-  input  logic [`DIVLEN-1:0] X,
-  input  logic [`DIVLEN-1:0] Dpreproc,
-  input  logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
-  input  logic NegSticky,
-  output logic [`QLEN-1-(`RADIX/4):0] Quot,
+  input logic [`DIVLEN-1:0] X,
+  input logic [`DIVLEN-1:0] Dpreproc,
+  input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
+  input logic NegSticky,
+  output logic [`QLEN-1-(`RADIX/4):0] Qm,
   output logic [`DIVLEN+3:0]  NextWSN, NextWCN,
   output logic [`DIVLEN+3:0]  StickyWSA,
   output logic [`DIVLEN+3:0]  FirstWS, FirstWC,
-  output logic [`NE+1:0] DivCalcExpM,
+  output logic  [`NE+1:0] QeM,
   output logic [`XLEN-1:0] Rem
 );
 
@@ -62,7 +62,7 @@ module srt(
  /* verilator lint_on UNOPTFLAT */
   logic [`DIVLEN+3:0]  WSN, WCN;
   logic [`DIVLEN+3:0]  D, DBar, D2, DBar2;
-  logic [`NE+1:0] DivCalcExp;
+  logic [`NE+1:0] Qe;
   logic [$clog2(`XLEN+1)-1:0] intExp;
   logic           intSign;
   logic [`QLEN-1:0] QMMux;
@@ -88,7 +88,7 @@ module srt(
   mux2   #(`DIVLEN+4) wcmux(NextWCN, {`DIVLEN+4{1'b0}}, DivStart, WCN);
   flopen   #(`DIVLEN+4) wcflop(clk, DivStart|DivBusy, WCN, WC[0]);
   flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D);
-  flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM);
+  flopen #(`NE+2) expflop(clk, DivStart, Qe, QeM);
 
 
   // Divisor Selections
@@ -123,7 +123,7 @@ module srt(
   flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]);
   flopen #(`QLEN) QMreg(clk, DivBusy, QMMux, QM[0]);
 
-  assign Quot = NegSticky ? QM[0][`QLEN-1-(`RADIX/4):0] : Q[0][`QLEN-1-(`RADIX/4):0];
+  assign Qm = NegSticky ? QM[0][`QLEN-1-(`RADIX/4):0] : Q[0][`QLEN-1-(`RADIX/4):0];
   assign FirstWS = WS[0];
   assign FirstWC = WC[0];
   if(`RADIX==2)
@@ -132,7 +132,7 @@ module srt(
     else
       assign StickyWSA = {WSA[1][`DIVLEN+2:0], 1'b0};
 
-  expcalc expcalc(.FmtE, .Xe, .Ye, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp);
+  expcalc expcalc(.FmtE, .Xe, .Ye, .XZeroE, .XZeroCnt, .YZeroCnt, .Qe);
 
 endmodule
 
@@ -155,7 +155,7 @@ module divinteration (
   logic [3:0]     q;
   logic qp, qz;//, qn;
 
-  // Quotient Selection logic
+  // Qmient Selection logic
   // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
   // q encoding:
 	// 1000 = +2
@@ -226,7 +226,7 @@ module expcalc(
   input  logic [`NE-1:0] Xe, Ye,
   input logic XZeroE, 
   input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
-  output logic  [`NE+1:0] DivCalcExp
+  output logic  [`NE+1:0] Qe
   );
     logic [`NE-2:0] Bias;
     
@@ -255,5 +255,5 @@ module expcalc(
             endcase
     end
     // correct exponent for denormalized input's normalization shifts
-    assign DivCalcExp = ({2'b0, Xe} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, Ye} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}};
+    assign Qe = ({2'b0, Xe} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, Ye} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}};
     endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/srtfsm.sv b/pipelined/src/fpu/srtfsm.sv
index 634ecc1d..597f96cd 100644
--- a/pipelined/src/fpu/srtfsm.sv
+++ b/pipelined/src/fpu/srtfsm.sv
@@ -43,7 +43,7 @@ module srtfsm(
   input  logic [`DIVLEN+3:0] StickyWSA,
   input  logic [`DURLEN-1:0] Dur,
   output logic [`DURLEN-1:0] EarlyTermShiftE,
-  output logic DivStickyE,
+  output logic DivSE,
   output logic DivDone,
   output logic NegSticky,
   output logic DivBusy
@@ -65,9 +65,9 @@ module srtfsm(
   //      this is only a problem on radix 2 (and pssibly maximally redundant 4) since minimally redundant
   //      radix-4 division can't create a QM that continually adds 0's
   if (`RADIX == 2)
-    assign DivStickyE = |W&~(StickyWSA == WS);
+    assign DivSE = |W&~(StickyWSA == WS);
   else
-    assign DivStickyE = |W;
+    assign DivSE = |W;
   assign DivDone = (state == DONE);
   assign W = WC+WS;
   assign NegSticky = W[`DIVLEN+3]; //*** is there a better way to do this???
diff --git a/pipelined/src/fpu/unpack.sv b/pipelined/src/fpu/unpack.sv
index 71cad187..050839c2 100644
--- a/pipelined/src/fpu/unpack.sv
+++ b/pipelined/src/fpu/unpack.sv
@@ -30,35 +30,34 @@
 
 module unpack ( 
     input logic  [`FLEN-1:0]        X, Y, Z,    // inputs from register file
-    input logic  [`FMTBITS-1:0]     FmtE,       // format signal 00 - single 01 - double 11 - quad 10 - half
-    output logic                    XSgnE, YSgnE, ZSgnE,    // sign bits of XYZ
-    output logic [`NE-1:0]          XExpE, YExpE, ZExpE,    // exponents of XYZ (converted to largest supported precision)
-    output logic [`NF:0]            XManE, YManE, ZManE,    // mantissas of XYZ (converted to largest supported precision)
-    output logic                    XNaNE, YNaNE, ZNaNE,    // is XYZ a NaN
-    output logic                    XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN
-    output logic                    XDenormE, ZDenormE,   // is XYZ denormalized
-    output logic                    XZeroE, YZeroE, ZZeroE,         // is XYZ zero
-    output logic                    XInfE, YInfE, ZInfE,            // is XYZ infinity
-    output logic                    XExpMaxE                        // does X have the maximum exponent (NaN or Inf)
+    input logic  [`FMTBITS-1:0]     Fmt,       // format signal 00 - single 01 - double 11 - quad 10 - half
+    output logic                    Xs, Ys, Zs,    // sign bits of XYZ
+    output logic [`NE-1:0]          Xe, Ye, Ze,    // exponents of XYZ (converted to largest supported precision)
+    output logic [`NF:0]            Xm, Ym, Zm,    // mantissas of XYZ (converted to largest supported precision)
+    output logic                    XNaN, YNaN, ZNaN,    // is XYZ a NaN
+    output logic                    XSNaN, YSNaN, ZSNaN, // is XYZ a signaling NaN
+    output logic                    XDenorm, ZDenorm,   // is XYZ denormalized
+    output logic                    XZero, YZero, ZZero,         // is XYZ zero
+    output logic                    XInf, YInf, ZInf,            // is XYZ infinity
+    output logic                    XExpMax                        // does X have the maximum exponent (NaN or Inf)
 );
  
-    logic [`NF-1:0] XFracE, YFracE, ZFracE; //Fraction of XYZ
     logic           XExpNonZero, YExpNonZero, ZExpNonZero; // is the exponent of XYZ non-zero
     logic           XFracZero, YFracZero, ZFracZero; // is the fraction zero
-    logic           YExpMaxE, ZExpMaxE;  // is the exponent all 1s
+    logic           YExpMax, ZExpMax;  // is the exponent all 1s
     
-    unpackinput unpackinputX (.In(X), .FmtE, .Sgn(XSgnE), .Exp(XExpE), .Man(XManE), 
-                            .NaN(XNaNE), .SNaN(XSNaNE), .ExpNonZero(XExpNonZero),
-                            .Zero(XZeroE), .Inf(XInfE), .ExpMax(XExpMaxE), .FracZero(XFracZero));
+    unpackinput unpackinputX (.In(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm), 
+                            .NaN(XNaN), .SNaN(XSNaN), .ExpNonZero(XExpNonZero),
+                            .Zero(XZero), .Inf(XInf), .ExpMax(XExpMax), .FracZero(XFracZero));
 
-    unpackinput unpackinputY (.In(Y), .FmtE, .Sgn(YSgnE), .Exp(YExpE), .Man(YManE), 
-                            .NaN(YNaNE), .SNaN(YSNaNE), .ExpNonZero(YExpNonZero),
-                            .Zero(YZeroE), .Inf(YInfE), .ExpMax(YExpMaxE), .FracZero(YFracZero));
+    unpackinput unpackinputY (.In(Y), .Fmt, .Sgn(Ys), .Exp(Ye), .Man(Ym), 
+                            .NaN(YNaN), .SNaN(YSNaN), .ExpNonZero(YExpNonZero),
+                            .Zero(YZero), .Inf(YInf), .ExpMax(YExpMax), .FracZero(YFracZero));
 
-    unpackinput unpackinputZ (.In(Z), .FmtE, .Sgn(ZSgnE), .Exp(ZExpE), .Man(ZManE), 
-                            .NaN(ZNaNE), .SNaN(ZSNaNE), .ExpNonZero(ZExpNonZero),
-                            .Zero(ZZeroE), .Inf(ZInfE), .ExpMax(ZExpMaxE), .FracZero(ZFracZero));
+    unpackinput unpackinputZ (.In(Z), .Fmt, .Sgn(Zs), .Exp(Ze), .Man(Zm), 
+                            .NaN(ZNaN), .SNaN(ZSNaN), .ExpNonZero(ZExpNonZero),
+                            .Zero(ZZero), .Inf(ZInf), .ExpMax(ZExpMax), .FracZero(ZFracZero));
     // is the input denormalized
-    assign XDenormE = ~XExpNonZero & ~XFracZero;
-    assign ZDenormE = ~ZExpNonZero & ~ZFracZero;
+    assign XDenorm = ~XExpNonZero & ~XFracZero;
+    assign ZDenorm = ~ZExpNonZero & ~ZFracZero;
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/unpackinput.sv b/pipelined/src/fpu/unpackinput.sv
index 2b078cc6..7be92250 100644
--- a/pipelined/src/fpu/unpackinput.sv
+++ b/pipelined/src/fpu/unpackinput.sv
@@ -30,7 +30,7 @@
 
 module unpackinput ( 
     input logic  [`FLEN-1:0]        In,    // inputs from register file
-    input logic  [`FMTBITS-1:0]     FmtE,       // format signal 00 - single 01 - double 11 - quad 10 - half
+    input logic  [`FMTBITS-1:0]     Fmt,       // format signal 00 - single 01 - double 11 - quad 10 - half
     output logic                    Sgn,    // sign bits of XYZ
     output logic [`NE-1:0]          Exp,    // exponents of XYZ (converted to largest supported precision)
     output logic [`NF:0]            Man,    // mantissas of XYZ (converted to largest supported precision)
@@ -74,16 +74,16 @@ module unpackinput (
         //      quad   and half
         //      double and half
 
-        assign BadNaNBox = ~(FmtE|(&In[`FLEN-1:`LEN1])); // Check NaN boxing
+        assign BadNaNBox = ~(Fmt|(&In[`FLEN-1:`LEN1])); // Check NaN boxing
 
         // choose sign bit depending on format - 1=larger precsion 0=smaller precision
-        assign Sgn = FmtE ? In[`FLEN-1] : In[`LEN1-1];
+        assign Sgn = Fmt ? In[`FLEN-1] : In[`LEN1-1];
 
         // extract the fraction, add trailing zeroes to the mantissa if nessisary
-        assign Frac = FmtE ? In[`NF-1:0] : {In[`NF1-1:0], (`NF-`NF1)'(0)};
+        assign Frac = Fmt ? In[`NF-1:0] : {In[`NF1-1:0], (`NF-`NF1)'(0)};
 
         // is the exponent non-zero
-        assign ExpNonZero = FmtE ? |In[`FLEN-2:`NF] : |In[`LEN1-2:`NF1]; 
+        assign ExpNonZero = Fmt ? |In[`FLEN-2:`NF] : |In[`LEN1-2:`NF1]; 
 
         // example double to single conversion:
         // 1023 = 0011 1111 1111
@@ -95,10 +95,10 @@ module unpackinput (
 
         // extract the exponent, converting the smaller exponent into the larger precision if nessisary
         //      - if the original precision had a denormal number convert the exponent value 1
-        assign Exp = FmtE ? {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero} : {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; 
+        assign Exp = Fmt ? {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero} : {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; 
  
         // is the exponent all 1's
-        assign ExpMax = FmtE ? &In[`FLEN-2:`NF] : &In[`LEN1-2:`NF1];
+        assign ExpMax = Fmt ? &In[`FLEN-2:`NF] : &In[`LEN1-2:`NF1];
     
 
     end else if (`FPSIZES == 3) begin       // three floating point precsions supported
@@ -122,7 +122,7 @@ module unpackinput (
 
         // Check NaN boxing
         always_comb
-            case (FmtE)
+            case (Fmt)
                 `FMT:  BadNaNBox = 0;
                 `FMT1: BadNaNBox = ~&In[`FLEN-1:`LEN1];
                 `FMT2: BadNaNBox = ~&In[`FLEN-1:`LEN2];
@@ -131,7 +131,7 @@ module unpackinput (
 
         // extract the sign bit
         always_comb
-            case (FmtE)
+            case (Fmt)
                 `FMT:  Sgn = In[`FLEN-1];
                 `FMT1: Sgn = In[`LEN1-1];
                 `FMT2: Sgn = In[`LEN2-1];
@@ -140,7 +140,7 @@ module unpackinput (
 
         // extract the fraction
         always_comb
-            case (FmtE)
+            case (Fmt)
                 `FMT: Frac = In[`NF-1:0];
                 `FMT1: Frac = {In[`NF1-1:0], (`NF-`NF1)'(0)};
                 `FMT2: Frac = {In[`NF2-1:0], (`NF-`NF2)'(0)};
@@ -149,7 +149,7 @@ module unpackinput (
 
         // is the exponent non-zero
         always_comb
-            case (FmtE)
+            case (Fmt)
                 `FMT:  ExpNonZero = |In[`FLEN-2:`NF];     // if input is largest precision (`FLEN - ie quad or double)
                 `FMT1: ExpNonZero = |In[`LEN1-2:`NF1];  // if input is larger precsion (`LEN1 - double or single)
                 `FMT2: ExpNonZero = |In[`LEN2-2:`NF2]; // if input is smallest precsion (`LEN2 - single or half)
@@ -166,7 +166,7 @@ module unpackinput (
 
         // convert the larger precision's exponent to use the largest precision's bias
         always_comb 
-            case (FmtE)
+            case (Fmt)
                 `FMT:  Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero};
                 `FMT1: Exp = {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; 
                 `FMT2: Exp = {In[`LEN2-2], {`NE-`NE2{~In[`LEN2-2]}}, In[`LEN2-3:`NF2+1], In[`NF2]|~ExpNonZero}; 
@@ -175,7 +175,7 @@ module unpackinput (
 
         // is the exponent all 1's
         always_comb
-            case (FmtE)
+            case (Fmt)
                 `FMT:  ExpMax = &In[`FLEN-2:`NF];
                 `FMT1: ExpMax = &In[`LEN1-2:`NF1];
                 `FMT2: ExpMax = &In[`LEN2-2:`NF2];
@@ -194,7 +194,7 @@ module unpackinput (
 
         // Check NaN boxing
         always_comb
-            case (FmtE)
+            case (Fmt)
                 2'b11:  BadNaNBox = 0;
                 2'b01: BadNaNBox = ~&In[`Q_LEN-1:`D_LEN];
                 2'b00: BadNaNBox = ~&In[`Q_LEN-1:`S_LEN];
@@ -203,7 +203,7 @@ module unpackinput (
 
         // extract sign bit
         always_comb
-            case (FmtE)
+            case (Fmt)
                 2'b11: Sgn = In[`Q_LEN-1];
                 2'b01: Sgn = In[`D_LEN-1];
                 2'b00: Sgn = In[`S_LEN-1];
@@ -213,7 +213,7 @@ module unpackinput (
 
         // extract the fraction
         always_comb
-            case (FmtE)
+            case (Fmt)
                 2'b11: Frac = In[`Q_NF-1:0];
                 2'b01: Frac = {In[`D_NF-1:0], (`Q_NF-`D_NF)'(0)};
                 2'b00: Frac = {In[`S_NF-1:0], (`Q_NF-`S_NF)'(0)};
@@ -222,7 +222,7 @@ module unpackinput (
 
         // is the exponent non-zero
         always_comb
-            case (FmtE)
+            case (Fmt)
                 2'b11: ExpNonZero = |In[`Q_LEN-2:`Q_NF];
                 2'b01: ExpNonZero = |In[`D_LEN-2:`D_NF];
                 2'b00: ExpNonZero = |In[`S_LEN-2:`S_NF]; 
@@ -240,7 +240,7 @@ module unpackinput (
         
         // convert the double precsion exponent into quad precsion
         always_comb
-            case (FmtE)
+            case (Fmt)
                 2'b11: Exp = {In[`Q_LEN-2:`Q_NF+1], In[`Q_NF]|~ExpNonZero};
                 2'b01: Exp = {In[`D_LEN-2], {`Q_NE-`D_NE{~In[`D_LEN-2]}}, In[`D_LEN-3:`D_NF+1], In[`D_NF]|~ExpNonZero};
                 2'b00: Exp = {In[`S_LEN-2], {`Q_NE-`S_NE{~In[`S_LEN-2]}}, In[`S_LEN-3:`S_NF+1], In[`S_NF]|~ExpNonZero};
@@ -250,7 +250,7 @@ module unpackinput (
 
         // is the exponent all 1's
         always_comb 
-            case (FmtE)
+            case (Fmt)
                 2'b11: ExpMax = &In[`Q_LEN-2:`Q_NF];
                 2'b01: ExpMax = &In[`D_LEN-2:`D_NF];
                 2'b00: ExpMax = &In[`S_LEN-2:`S_NF];