renamed top level FPU wires

2025-02-11 06:05:49 +00:00 · 2021-05-25 20:04:34 -04:00 · 2021-05-25 20:04:34 -04:00 · e7190b0690
commit e7190b0690
parent 33cd133a65
12 changed files with 707 additions and 811 deletions
--- a/wally-pipelined/src/fpu/fctrl.sv
+++ b/wally-pipelined/src/fpu/fctrl.sv
@ -6,16 +6,15 @@ module fctrl (
  input  logic [2:0] Funct3D,
  input  logic [2:0] FRM_REGW,
  output logic       IllegalFPUInstrD,
-  output logic       FRegWriteD,
-  output logic       DivSqrtStartD,
-  //output logic [2:0] regSelD,
+  output logic       FWriteEnD,
+  output logic       FDivStartD,
  output logic [2:0] FResultSelD,
-  output logic [3:0] OpCtrlD,
+  output logic [3:0] FOpCtrlD,
  output logic       FmtD,
  output logic [2:0] FrmD,
  output logic [1:0] FMemRWD,
-  output logic       OutputInput2D,
-  output logic       In2UsedD, In3UsedD,
+  output logic       FOutputInput2D,
+  output logic       FInput2UsedD, FInput3UsedD,
  output logic       FWriteIntD);


@ -102,9 +101,9 @@ module fctrl (
    end
  end

-  assign OutputInput2D = OpD == 7'b0100111;
+  assign FOutputInput2D = OpD == 7'b0100111;

-  assign FMemRWD[0] = OutputInput2D;
+  assign FMemRWD[0] = FOutputInput2D;
  assign FMemRWD[1] = OpD == 7'b0000111;


@ -131,7 +130,7 @@ module fctrl (
  //this value is used enough to be shorthand

  //if op is div/sqrt - start div/sqrt
-  assign DivSqrtStartD = ~|FResultSelD; // is FResultSelD == 000
+  assign FDivStartD = ~|FResultSelD; // is FResultSelD == 000

  //operation control for each fp operation
  //has to be expanded over standard to account for
@ -144,7 +143,7 @@ module fctrl (
  //version I used for this repo

  //let's do separate SOP for each type of operation
-//  assign OpCtrlD[3] = 1'b0;
+//  assign FOpCtrlD[3] = 1'b0;
 //
 //

@ -152,12 +151,12 @@ module fctrl (
 
  always_comb begin
    IllegalFPUInstr1D = 0;
-    In3UsedD = 0;
+    FInput3UsedD = 0;
    case (FResultSelD)
      // div/sqrt
      //  fdiv  = ???0
      //  fsqrt = ???1
-      3'b000 : begin OpCtrlD = {3'b0, Funct7D[5]}; In2UsedD = ~Funct7D[5]; end
+      3'b000 : begin FOpCtrlD = {3'b0, Funct7D[5]}; FInput2UsedD = ~Funct7D[5]; end
      // cmp		
      //  fmin = ?100
      //  fmax = ?101
@ -165,7 +164,7 @@ module fctrl (
      //  flt  = ?001
      //  fle  = ?011
      //		   {?,    is min or max, is eq or le, is lt or le}
-      3'b001 : begin OpCtrlD = {1'b0, Funct7D[2], ~Funct3D[0], ~(|Funct3D[2:1])}; In2UsedD = 1'b1; end
+      3'b001 : begin FOpCtrlD = {1'b0, Funct7D[2], ~Funct3D[0], ~(|Funct3D[2:1])}; FInput2UsedD = 1'b1; end
      //fma/mult	
      //  fmadd  = ?000
      //  fmsub  = ?001
@ -173,12 +172,12 @@ module fctrl (
      //  fnmsub = ?011
      //  fmul   = ?100
      //		  {?, is mul, is negitive, is sub}
-      3'b010 : begin OpCtrlD = {1'b0, OpD[4:2]}; In2UsedD = 1'b1; In3UsedD = ~OpD[4]; end
+      3'b010 : begin FOpCtrlD = {1'b0, OpD[4:2]}; FInput2UsedD = 1'b1; FInput3UsedD = ~OpD[4]; end
      // sgn inj
      //  fsgnj  = ??00
      //  fsgnjn = ??01
      //  fsgnjx = ??10
-      3'b011 : begin OpCtrlD = {2'b0, Funct3D[1:0]}; In2UsedD = 1'b1; end
+      3'b011 : begin FOpCtrlD = {2'b0, Funct3D[1:0]}; FInput2UsedD = 1'b1; end
      // add/sub/cnvt
      //  fadd      = 0000
      //  fsub      = 0001
@ -193,13 +192,13 @@ module fctrl (
      //  fcvt.d.wu = 1111
      //  fcvt.d.s  = 1000
      //		   { is double and not add/sub, is to/from int, is to int or float to double,      is unsigned or sub
-      3'b100 : begin OpCtrlD = {Funct7D[0]&Funct7D[5], Funct7D[6], Funct7D[3] | (~Funct7D[6]&Funct7D[5]&~Funct7D[0]), Rs2D[0]|(Funct7D[2]&~Funct7D[5])}; In2UsedD = ~Funct7D[5]; end
+      3'b100 : begin FOpCtrlD = {Funct7D[0]&Funct7D[5], Funct7D[6], Funct7D[3] | (~Funct7D[6]&Funct7D[5]&~Funct7D[0]), Rs2D[0]|(Funct7D[2]&~Funct7D[5])}; FInput2UsedD = ~Funct7D[5]; end
      // classify	  {?, ?, ?, ?}
-      3'b101 : begin OpCtrlD = 4'b0; In2UsedD = 1'b0; end
+      3'b101 : begin FOpCtrlD = 4'b0; FInput2UsedD = 1'b0; end
      // output SrcAW
      //  fmv.w.x = ???0
      //  fmv.w.d = ???1
-      3'b110 : begin OpCtrlD = {3'b0, Funct7D[0]}; In2UsedD = 1'b0; end
+      3'b110 : begin FOpCtrlD = {3'b0, Funct7D[0]}; FInput2UsedD = 1'b0; end
      // output Input1
      //  flw       = ?000
      //  fld       = ?001 
@ -207,9 +206,9 @@ module fctrl (
      //  fsd       = ?011 // output Input2
      //  fmv.x.w  = ?100
      //  fmv.x.d  = ?101
-      //		   {?, is mv, is store, is double or fcvt.d.w}
-      3'b111 : begin OpCtrlD = {1'b0, OpD[6:5], Funct3D[0] | (OpD[6]&Funct7D[0])}; In2UsedD = OpD[5]; end
-      default : begin OpCtrlD = 4'b0; IllegalFPUInstr1D = 1'b1; In2UsedD = 1'b0; end
+      //		   {?, is mv, is store, is double or fmv}
+      3'b111 : begin FOpCtrlD = {1'b0, OpD[6:5], Funct3D[0] | (OpD[6]&Funct7D[0])}; FInput2UsedD = OpD[5]; end
+      default : begin FOpCtrlD = 4'b0; IllegalFPUInstr1D = 1'b1; FInput2UsedD = 1'b0; end
    endcase
  end

@ -219,5 +218,5 @@ module fctrl (
  //			is add/cvt       and  is to int  or is classify		 or     is cmp	       	and not max/min or is output ReadData1 and is mv
  assign FWriteIntD = ((FResultSelD == 3'b100)&Funct7D[3]) | (FResultSelD == 3'b101) | ((FResultSelD == 3'b001)&~Funct7D[2]) | ((FResultSelD == 3'b111)&OpD[6]);
  // 		      if not writting to int reg and not a store function and not move
-  assign FRegWriteD = ~FWriteIntD & ~OpD[5] & ~((FResultSelD == 3'b111)&OpD[6]) & isFP;
+  assign FWriteEnD = ~FWriteIntD & ~OpD[5] & ~((FResultSelD == 3'b111)&OpD[6]) & isFP;
 endmodule
--- a/wally-pipelined/src/fpu/fma1.sv
+++ b/wally-pipelined/src/fpu/fma1.sv
@ -15,13 +15,13 @@
 //    normalize Normalization shifter
 //    round     Rounding of result
 //    exception Handles exceptional cases
-//    bypass    Handles bypass of result to Input1E or Input3E inputs
+//    bypass    Handles bypass of result to FInput1E or FInput3E inputs
 //    sign      One bit sign handling block 
 //    special   Catch special cases (inputs = 0  / infinity /  etc.) 
 //
-//   The FMAC computes FmaResultM=Input1E*Input2E+Input3E, rounded with the mode specified by
+//   The FMAC computes FmaResultM=FInput1E*FInput2E+FInput3E, rounded with the mode specified by
 //   RN, RZ, RM, or RP.  The result is optionally bypassed back to
-//   the Input1E or Input3E inputs for use on the next cycle.  In addition,  four signals
+//   the FInput1E or FInput3E inputs for use on the next cycle.  In addition,  four signals
 //   are produced: trap, overflow, underflow, and inexact.  Trap indicates
 //   an infinity, NaN, or denormalized number to be handled in software;
 //   the other three signals are IEEE flags.
@ -29,15 +29,15 @@
 /////////////////////////////////////////////////////////////////////////////

 /////////////////////////////////////////////////////////////////////////////
-module fma1(Input1E, Input2E, Input3E, FrmE,  
+module fma1(FInput1E, FInput2E, FInput3E, FrmE,  
 			rE, sE, tE, bsE, killprodE, sumshiftE, sumshiftzeroE,  aligncntE, aeE
 			, xzeroE, yzeroE, zzeroE, xnanE,ynanE, znanE, xdenormE, ydenormE, zdenormE,
 			xinfE, yinfE, zinfE, nanE, prodinfE);
 /////////////////////////////////////////////////////////////////////////////
 
-	input logic 		[63:0]		Input1E;		// input 1
-	input logic		[63:0]		Input2E;     // input 2 
-	input logic 		[63:0]		Input3E;     // input 3
+	input logic 		[63:0]		FInput1E;		// input 1
+	input logic		[63:0]		FInput2E;     // input 2 
+	input logic 		[63:0]		FInput3E;     // input 3
 	input logic 		[2:0]	 	FrmE;          	// Rounding mode
 	output logic 		[12:0]		aligncntE;    	// status flags
 	output logic 		[105:0]		rE; 				// one result of partial product sum
@ -45,7 +45,7 @@ module fma1(Input1E, Input2E, Input3E, FrmE,
 	output logic 		[163:0]		tE;				// output logic of alignment shifter	
 	output logic 		[12:0]		aeE; 		// multiplier expoent
 	output logic 					bsE;				// sticky bit of addend
-	output logic 					killprodE; 		// Input3E >> product
+	output logic 					killprodE; 		// FInput3E >> product
 	output logic					xzeroE;
 	output logic					yzeroE;
 	output logic					zzeroE;
@ -68,7 +68,7 @@ module fma1(Input1E, Input2E, Input3E, FrmE,
 //	output logic 		[12:0]		aligncntE; 		// shift count for alignment


-	logic 					prodof; 		// Input1E*Input2E out of range
+	logic 					prodof; 		// FInput1E*FInput2E out of range



@ -84,12 +84,12 @@ module fma1(Input1E, Input2E, Input3E, FrmE,

 //   Instantiate fraction datapath

-	multiply		multiply(.xman(Input1E[51:0]), .yman(Input2E[51:0]), .*);
-	align			align(.zman(Input3E[51:0]),.*);
+	multiply		multiply(.xman(FInput1E[51:0]), .yman(FInput2E[51:0]), .*);
+	align			align(.zman(FInput3E[51:0]),.*);

 // Instantiate exponent datapath

-	expgen1			expgen1(.xexp(Input1E[62:52]),.yexp(Input2E[62:52]),.zexp(Input3E[62:52]),.*);
+	expgen1			expgen1(.xexp(FInput1E[62:52]),.yexp(FInput2E[62:52]),.zexp(FInput3E[62:52]),.*);
 // Instantiate special case detection across datapath & exponent path 

 	special			special(.*);
--- a/wally-pipelined/src/fpu/fma2.sv
+++ b/wally-pipelined/src/fpu/fma2.sv
@ -15,13 +15,13 @@
 //    normalize Normalization shifter
 //    round     Rounding of result
 //    exception Handles exceptional cases
-//    bypass    Handles bypass of result to Input1M or Input3M input logics
+//    bypass    Handles bypass of result to FInput1M or FInput3M input logics
 //    sign      One bit sign handling block 
 //    special   Catch special cases (input logics = 0  / infinity /  etc.) 
 //
-//   The FMAC computes FmaResultM=Input1M*Input2M+Input3M, rounded with the mode specified by
+//   The FMAC computes FmaResultM=FInput1M*FInput2M+FInput3M, rounded with the mode specified by
 //   RN, RZ, RM, or RP.  The result is optionally bypassed back to
-//   the Input1M or Input3M input logics for use on the next cycle.  In addition,  four signals
+//   the FInput1M or FInput3M input logics for use on the next cycle.  In addition,  four signals
 //   are produced: trap, overflow, underflow, and inexact.  Trap indicates
 //   an infinity, NaN, or denormalized number to be handled in software;
 //   the other three signals are IMMM flags.
@ -29,7 +29,7 @@
 /////////////////////////////////////////////////////////////////////////////

 /////////////////////////////////////////////////////////////////////////////
-module fma2(Input1M, Input2M, Input3M, FrmM,
+module fma2(FInput1M, FInput2M, FInput3M, FrmM,
 			FmaResultM, FmaFlagsM, aligncntM, rM, sM,
 			tM,	normcntM, aeM, bsM,killprodM,
 			xzeroM,	yzeroM,zzeroM,xdenormM,ydenormM,
@ -39,9 +39,9 @@ module fma2(Input1M, Input2M, Input3M, FrmM,
 );
 /////////////////////////////////////////////////////////////////////////////
 
-	input logic 		[63:0]		Input1M;		// input logic 1
-	input logic		[63:0]		Input2M;     // input logic 2 
-	input logic 		[63:0]		Input3M;     // input logic 3
+	input logic 		[63:0]		FInput1M;		// input logic 1
+	input logic		[63:0]		FInput2M;     // input logic 2 
+	input logic 		[63:0]		FInput3M;     // input logic 3
 	input logic 		[2:0]	 	FrmM;          	// Rounding mode
 	input logic 		[12:0]		aligncntM;    	// status flags
 	input logic 		[105:0]		rM; 				// one result of partial product sum
@ -50,7 +50,7 @@ module fma2(Input1M, Input2M, Input3M, FrmM,
 	input logic 		[8:0]		normcntM; 		// shift count for normalizer
 	input logic 		[12:0]		aeM; 		// multiplier expoent
 	input logic 					bsM;				// sticky bit of addend
-	input logic 					killprodM; 		// Input3M >> product
+	input logic 					killprodM; 		// FInput3M >> product
 	input logic					prodinfM;
 	input logic					xzeroM;
 	input logic					yzeroM;
@ -69,7 +69,7 @@ module fma2(Input1M, Input2M, Input3M, FrmM,
 	input logic					sumshiftzeroM;


-	output logic 		[63:0]		FmaResultM;     // output FmaResultM=Input1M*Input2M+Input3M
+	output logic 		[63:0]		FmaResultM;     // output FmaResultM=FInput1M*FInput2M+FInput3M
 	output logic 		[4:0]		FmaFlagsM;    	// status flags
 	

@ -120,18 +120,18 @@ module fma2(Input1M, Input2M, Input3M, FrmM,

 	add				add(.*);
 	lza				lza(.*);
-	normalize		normalize(.zexp(Input3M[62:52]),.*); 
-	round			round(.xman(Input1M[51:0]), .yman(Input2M[51:0]),.zman(Input3M[51:0]),.*);
+	normalize		normalize(.zexp(FInput3M[62:52]),.*); 
+	round			round(.xman(FInput1M[51:0]), .yman(FInput2M[51:0]),.zman(FInput3M[51:0]),.*);

 // Instantiate exponent datapath

-	expgen2			expgen2(.xexp(Input1M[62:52]),.yexp(Input2M[62:52]),.zexp(Input3M[62:52]),.*);
+	expgen2			expgen2(.xexp(FInput1M[62:52]),.yexp(FInput2M[62:52]),.zexp(FInput3M[62:52]),.*);


 // Instantiate control logic
 
-sign				sign(.xsign(Input1M[63]),.ysign(Input2M[63]),.zsign(Input3M[63]),.*); 
-flag2				flag2(.xsign(Input1M[63]),.ysign(Input2M[63]),.zsign(Input3M[63]),.vbits(v[1:0]),.*); 
+sign				sign(.xsign(FInput1M[63]),.ysign(FInput2M[63]),.zsign(FInput3M[63]),.*); 
+flag2				flag2(.xsign(FInput1M[63]),.ysign(FInput2M[63]),.zsign(FInput3M[63]),.vbits(v[1:0]),.*); 

 assign FmaResultM = {wsign,wexp,wman};

--- a/wally-pipelined/src/fpu/fpdiv.sv
+++ b/wally-pipelined/src/fpu/fpdiv.sv
@ -23,25 +23,25 @@
 //

 // `timescale 1ps/1ps
-module fpdiv (DivSqrtDone, DivResultM, DivFlagsM, DivDenormM, DivOp1, DivOp2, DivFrm, DivOpType, DivP, DivOvEn, DivUnEn,
-	      DivStart, reset, clk, DivBusyM);
+module fpdiv (FDivSqrtDoneM, FDivResultM, FDivFlagsM, DivDenormM, FInput1E, FInput2E, FrmE, DivOpType, FmtE, DivOvEn, DivUnEn,
+	      FDivStartE, reset, clk, DivBusyM);

-   input [63:0] DivOp1;		// 1st input operand (A)
-   input [63:0] DivOp2;		// 2nd input operand (B)
-   input [2:0] 	DivFrm;		// Rounding mode - specify values 
+   input [63:0] FInput1E;		// 1st input operand (A)
+   input [63:0] FInput2E;		// 2nd input operand (B)
+   input [2:0] 	FrmE;		// Rounding mode - specify values 
   input 	DivOpType;	// Function opcode
-   input 	DivP;   		// Result Precision (0 for double, 1 for single)
+   input 	FmtE;   		// Result Precision (0 for double, 1 for single)
   input 	DivOvEn;		// Overflow trap enabled
   input 	DivUnEn;   	// Underflow trap enabled

-   input 	DivStart;
+   input 	FDivStartE;
   input 	reset;
   input 	clk;   

-   output [63:0] DivResultM;	// Result of operation
-   output [4:0]  DivFlagsM;   	// IEEE exception flags 
+   output [63:0] FDivResultM;	// Result of operation
+   output [4:0]  FDivFlagsM;   	// IEEE exception flags 
   output 	 DivDenormM;   	// DivDenormM on input or output
-   output 	 DivSqrtDone;
+   output 	 FDivSqrtDoneM;
   output    DivBusyM;

   supply1 	  vdd;
@ -94,16 +94,16 @@ module fpdiv (DivSqrtDone, DivResultM, DivFlagsM, DivDenormM, DivOp1, DivOp2, Di
   
   logic exp_cout1, exp_cout2, exp_odd, open;
   // Convert the input operands to their appropriate forms based on 
-   // the orignal operands, the DivOpType , and their precision DivP. 
+   // the orignal operands, the DivOpType , and their precision FmtE. 
   // Single precision inputs are converted to double precision 
   // and the sign of the first operand is set appropratiately based on
   // if the operation is absolute value or negation. 
-   convert_inputs_div divconv1 (Float1, Float2, DivOp1, DivOp2, DivOpType, DivP);
+   convert_inputs_div divconv1 (Float1, Float2, FInput1E, FInput2E, DivOpType, FmtE);

   // Test for exceptions and return the "Invalid Operation" and
-   // "Denormalized" Input DivFlagsM. The "sel_inv" is used in
+   // "Denormalized" Input FDivFlagsM. The "sel_inv" is used in
   // the third pipeline stage to select the result. Also, op1_Norm
-   // and op2_Norm are one if DivOp1 and DivOp2 are not zero or denormalized.
+   // and op2_Norm are one if FInput1E and FInput2E are not zero or denormalized.
   // sub is one if the effective operation is subtaction. 
   exception_div divexc1 (sel_inv, Invalid, DenormIn, op1_Norm, op2_Norm, 
 		   Float1, Float2, DivOpType);
@ -135,26 +135,26 @@ module fpdiv (DivSqrtDone, DivResultM, DivFlagsM, DivDenormM, DivOp1, DivOp2, Di
 		  sel_muxa, sel_muxb, sel_muxr, 
 		  reset, clk,
 		  load_rega, load_regb, load_regc, load_regd,
-		  load_regr, load_regs, DivP, DivOpType, exp_odd);
+		  load_regr, load_regs, FmtE, DivOpType, exp_odd);

   // FSM : control divider
-   fsm control (DivSqrtDone, load_rega, load_regb, load_regc, load_regd, 
+   fsm control (FDivSqrtDoneM, load_rega, load_regb, load_regc, load_regd, 
 		load_regr, load_regs, sel_muxa, sel_muxb, sel_muxr, 
-		clk, reset, DivStart, DivOpType, DivBusyM);
+		clk, reset, FDivStartE, DivOpType, DivBusyM);
   
   // Round the mantissa to a 52-bit value, with the leading one
   // removed. The rounding units also handles special cases and 
   // set the exception flags.
   //***add max magnitude and swap negitive and positive infinity
   rounder_div divround1 (Result, DenormIO, FlagsIn, 
-		   DivFrm, DivP, DivOvEn, DivUnEn, expF, 
+		   FrmE, FmtE, DivOvEn, DivUnEn, expF, 
   		   sel_inv, Invalid, DenormIn, signResult, 
 		   q1, qm1, qp1, q0, qm0, qp0, regr_out);

   // Store the final result and the exception flags in registers.
-   flopenr #(64) rega (clk, reset, DivSqrtDone, Result, DivResultM);
-   flopenr #(1) regb (clk, reset, DivSqrtDone, DenormIO, DivDenormM);   
-   flopenr #(5) regc (clk, reset, DivSqrtDone, FlagsIn, DivFlagsM);   
+   flopenr #(64) rega (clk, reset, FDivSqrtDoneM, Result, FDivResultM);
+   flopenr #(1) regb (clk, reset, FDivSqrtDoneM, DenormIO, DivDenormM);   
+   flopenr #(5) regc (clk, reset, FDivSqrtDoneM, FlagsIn, FDivFlagsM);   
   
 endmodule // fpadd

@ -198,7 +198,7 @@ module brent_kung (c, p, g);
   logic G_7_0,G_11_0,G_5_0,G_9_0,G_13_0,G_2_0,G_4_0,G_6_0,G_8_0,G_10_0,G_12_0;
   // parallel-prefix, Brent-Kung

-   // Stage 1: Generates G/DivP pairs that span 1 bits
+   // Stage 1: Generates G/FmtE pairs that span 1 bits
   grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
   black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
   black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
@ -207,20 +207,20 @@ module brent_kung (c, p, g);
   black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
   black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});

-   // Stage 2: Generates G/DivP pairs that span 2 bits
+   // Stage 2: Generates G/FmtE pairs that span 2 bits
   grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
   black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
   black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});

-   // Stage 3: Generates G/DivP pairs that span 4 bits
+   // Stage 3: Generates G/FmtE pairs that span 4 bits
   grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);

-   // Stage 4: Generates G/DivP pairs that span 8 bits
+   // Stage 4: Generates G/FmtE pairs that span 8 bits

-   // Stage 5: Generates G/DivP pairs that span 4 bits
+   // Stage 5: Generates G/FmtE pairs that span 4 bits
   grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);

-   // Stage 6: Generates G/DivP pairs that span 2 bits
+   // Stage 6: Generates G/FmtE pairs that span 2 bits
   grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
   grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
   grey g_13_0 (G_13_0, {G_13_12,G_11_0}, P_13_12);
--- a/wally-pipelined/src/fpu/fpu.sv
+++ b/wally-pipelined/src/fpu/fpu.sv
@ -23,10 +23,8 @@
 ///////////////////////////////////////////

 `include "wally-config.vh"
-//  `include "../../config/rv64icfd/wally-config.vh" //debug

 module fpu (
-  //input  logic [2:0]       FrmD,
  input  logic [2:0]       FRM_REGW,    // Rounding mode from CSR
  input  logic             reset,
  //input  logic             clear,     // *** not being used anywhere
@ -42,147 +40,79 @@ module fpu (
  output logic [31:0]      FSROutW,
  output logic [1:0]       FMemRWM,
 	output logic             FStallD,
-  output logic             FWriteIntW,
-  output logic             FWriteIntM,
-  output logic [`XLEN-1:0] FWriteDataM,       // Integer input being written into fpreg
-  output logic             DivSqrtDoneE,
+  output logic             FWriteIntM, FWriteIntW,
+  output logic [`XLEN-1:0] FWriteDataM,
+  output logic             FDivSqrtDoneM,
  output logic             IllegalFPUInstrD,
  output logic [`XLEN-1:0] FPUResultW);

-   //NOTE:
-   //For readability and ease of modification, logic signals will be
-   //instantiated as they occur within the pipeline. This will keep local
-   //signals, modules, and combinational logic closely defined.

-   //used for OSU DP-size hardware to wally XLEN interfacing

-   integer 		   XLENDIFF;
-   assign XLENDIFF = `XLEN - 64;
-   integer 		   XLENDIFFN;
-   assign XLENDIFFN = 63 - `XLEN;

-   // BEGIN PIPELINE CONTROL LOGIC
-   logic 		   PipeEnableDE;
-   logic 		   PipeEnableEM;
-   logic 		   PipeEnableMW;
-   logic 		   PipeClearDE;
-   logic 		   PipeClearEM;
-   logic 		   PipeClearMW;

-   //temporarily assign pipe clear and enable signals
-   //to never flush & always be running
-   localparam PipeClear = 1'b0;
-   localparam PipeEnable = 1'b1;
-   always_comb begin
-      PipeEnableDE = ~StallE;
-      PipeEnableEM = ~StallM;
-      PipeEnableMW = ~StallW;
-      PipeClearDE = FlushE;
-      PipeClearEM = FlushM;
-      PipeClearMW = FlushW;
-   end   
+  //control logic signal instantiation
+  logic             FWriteEnD, FWriteEnE, FWriteEnM, FWriteEnW;             // FP register write enable
+  logic [2:0]       FrmD, FrmE, FrmM, FrmW;                                 // FP rounding mode
+  logic             FmtD, FmtE, FmtM, FmtW;                                 // FP precision 0-single 1-double
+  logic             FDivStartD, FDivStartE;                                 // Start division
+  logic             FWriteIntD, FWriteIntE;                                 // Write to integer register
+  logic             FOutputInput2D, FOutputInput2E;                         // Put Input2 in Input1 if a store instruction
+  logic [1:0]       FMemRWD, FMemRWE;                                       // Read and write enable for memory
+  logic [1:0]       FForwardInput1D, FForwardInput1E;                       // Input1 forwarding mux control signal
+  logic [1:0]       FForwardInput2D, FForwardInput2E;                       // Input2 forwarding mux control signal
+  logic             FForwardInput3D, FForwardInput3E;                       // Input3 forwarding mux control signal
+  logic             FInput2UsedD;                                           // Is input 2 used
+  logic             FInput3UsedD;                                           // Is input 3 used
+  logic [2:0]       FResultSelD, FResultSelE, FResultSelM, FResultSelW;     // Select FP result
+  logic [3:0]       FOpCtrlD, FOpCtrlE, FOpCtrlM;                           // Select which opperation to do in each component
  
-   // Wally-spec D stage control logic signal instantiation
-   logic                    FRegWriteD;
-   logic [2:0] 		    FResultSelD;
-   logic [2:0] 		    FrmD;
-   logic                    FmtD;
-   logic                    DivSqrtStartD;
-   logic [3:0] 		    OpCtrlD;
-   logic                    FWriteIntD;
-   logic                    OutputInput2D;
-   logic [1:0] 		    FMemRWD;
+  // regfile signals
+  logic [4:0]       RdE, RdM, RdW; // ***Can take from ieu
+  logic [`XLEN-1:0] FWDM;                                                   // Write data for FP register
+  logic [`XLEN-1:0] FRD1D, FRD2D, FRD3D;                                    // Read Data from FP register
+  logic [`XLEN-1:0] FRD1E, FRD2E, FRD3E;
+  logic [`XLEN-1:0] FInput1E, FInput1M, FInput1tmpE;
+  logic [`XLEN-1:0] FInput2E, FInput2M;
+  logic [`XLEN-1:0] FInput3E, FInput3M;
+  logic [`XLEN-1:0] FLoadStoreResultM, FLoadStoreResultW;                   // Result for load, store, and move to int-reg instructions

-   logic 		    DivBusyM;
-   logic [1:0] 		    Input1MuxD, Input2MuxD;
-   logic 		    Input3MuxD;
-   logic                    In2UsedD, In3UsedD;
-   
-   //Hazard unit for FPU
-   fpuhazard hazard(.Adr1(InstrD[19:15]), .Adr2(InstrD[24:20]), .Adr3(InstrD[31:27]), .*);
-   
-   //top-level controller for FPU
-   fctrl ctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .*);
-   
-   //instantiation of D stage regfile signals (includes some W stage signals
-   //for easy reference)
-   logic [2:0] 		    FrmW;
-   logic                    FmtW;
-   logic                    FRegWriteW;
-   logic [4:0] 		    RdW, Rs1D, Rs2D, Rs3D;
-   logic [`XLEN-1:0] 	    WriteDataW;
-   logic [63:0] 	    FPUResultDirW; 
-   logic [`XLEN-1:0] 	    ReadData1D, ReadData2D, ReadData3D; 
-   
-   //regfile instantiation
-   //freg3adr fpregfile (FmtW, reset, PipeClear, clk, RdW, 
-   //		       FRegWriteW, 
-   //		       InstrD[19:15], InstrD[24:20], InstrD[31:27], 
-   //		       FPUResultDirW, 
-   //		       ReadData1D, ReadData2D, ReadData3D);
-   FPregfile fpregfile (clk, reset, FRegWriteW,
-			InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW,
-			FPUResultDirW,
-			ReadData1D, ReadData2D, ReadData3D);		
-
-  // wally-spec E stage control logic signal instantiation
-   logic                    FRegWriteE;
-   logic [2:0] 		    FResultSelE;
-   logic [2:0] 		    FrmE;
-   logic                    FmtE;
-   logic                    DivSqrtStartE;
-   logic [3:0] 		    OpCtrlE;
-   logic [1:0] 		    Input1MuxE, Input2MuxE;
-   logic                    Input3MuxE;
-   logic [63:0] 	    FPUResultDirE;
-   logic                    FWriteIntE;
-   logic                    OutputInput2E;
-   logic [1:0] 		    FMemRWE;
-   
-   //instantiation of E stage regfile signals
-   logic [4:0] 		    RdE;
-   logic [`XLEN-1:0] 	    ReadData1E, ReadData2E, ReadData3E;
-   logic [`XLEN-1:0] 	    Input1E, Input2E, Input3E, Input1tmpE;
-   
-   //instantiation of E/M stage div/sqrt signals
-   logic                    DivSqrtDone, DivDenormM;
-   logic [63:0] 	    DivResultM;
-   logic [4:0] 		    DivFlagsM;
-   logic [63:0] 	    DivOp1, DivOp2;
-   logic [2:0] 		    DivFrm;
-   logic                    DivOpType;
-   logic                    DivP;
+  // div/sqrt signals
+  logic             DivDenormM, DivDenormW;
  logic             DivOvEn, DivUnEn;
-   logic                    DivStart;
+  logic             DivBusyM;
+  logic [63:0]      FDivResultM, FDivResultW;
+  logic [4:0]       FDivFlagsM, FDivFlagsW;

-   //instantiate E stage FMA signals here
-   logic [12:0] 	    aligncntE; 
-   logic [105:0] 	    rE; 
-   logic [105:0] 	    sE; 
-   logic [163:0] 	    tE;	
-   logic [8:0] 		    normcntE; 
-   logic [12:0] 	    aeE; 
-   logic 		    bsE;
-   logic 		    killprodE; 
-   logic 		    prodofE; 
-   logic 		    xzeroE;
-   logic 		    yzeroE;
-   logic 		    zzeroE;
-   logic 		    xdenormE;
-   logic 		    ydenormE;
-   logic 		    zdenormE;
-   logic 		    xinfE;
-   logic 		    yinfE;
-   logic 		    zinfE;
-   logic 		    xnanE;
-   logic 		    ynanE;
-   logic 		    znanE;
-   logic 		    nanE;
-   logic [8:0] 		    sumshiftE;
-   logic 		    sumshiftzeroE;
-   logic 		    prodinfE;
+  // FMA signals
+  logic [12:0]		  aligncntE, aligncntM; 
+  logic [105:0]		  rE, rM; 
+  logic [105:0]		  sE, sM; 
+  logic [163:0]		  tE, tM;	
+  logic [8:0]		    normcntE, normcntM; 
+  logic [12:0]		  aeE, aeM; 
+  logic 		        bsE, bsM;
+  logic 		        killprodE, killprodM; 
+  logic 		        prodofE, prodofM; 
+  logic			        xzeroE, xzeroM;
+  logic			        yzeroE, yzeroM;
+  logic			        zzeroE, zzeroM;
+  logic			        xdenormE, xdenormM;
+  logic			        ydenormE, ydenormM;
+  logic			        zdenormE, zdenormM;
+  logic			        xinfE, xinfM;
+  logic			        yinfE, yinfM;
+  logic			        zinfE, zinfM;
+  logic			        xnanE, xnanM;
+  logic			        ynanE, ynanM;
+  logic			        znanE, znanM;
+  logic			        nanE, nanM;
+  logic	[8:0]		    sumshiftE, sumshiftM;
+  logic			        sumshiftzeroE, sumshiftzeroM;
+  logic             prodinfE, prodinfM;
+  logic [63:0]      FmaResultM, FmaResultW;
+  logic [4:0]       FmaFlagsM, FmaFlagsW;
  
-   //instantiation of E stage add/cvt signals
+  // add/cvt signals
  logic [63:0]      AddSumE, AddSumTcE;
  logic [3:0]       AddSelInvE;
  logic [10:0]      AddExpPostSumE;
@ -192,158 +122,9 @@ module fpu (
  logic [63:0]      AddFloat1E, AddFloat2E;
  logic [11:0]      AddExp1DenormE, AddExp2DenormE;
  logic [10:0]      AddExponentE;
-   logic [63:0] 	    AddOp1E, AddOp2E;
  logic [2:0]       AddRmE;
  logic [3:0]       AddOpTypeE;
  logic             AddPE, AddOvEnE, AddUnEnE;    
-   
-   //instantiation of E stage cmp signals 
-   logic [7:0] 		    WE, XE;
-   logic                    ANaNE, BNaNE, AzeroE, BzeroE;
-   logic [63:0] 	    CmpOp1E, CmpOp2E;
-   logic [1:0] 		    CmpSelE;
-   
-   //instantiation of E/M stage fsgn signals (due to bypass logic)
-   logic [63:0] 	    SgnOp1E, SgnOp2E;
-   logic [1:0] 		    SgnOpCodeE, SgnOpCodeM;
-   logic [63:0] 	    SgnResultE, SgnResultM;
-   logic [4:0] 		    SgnFlagsE, SgnFlagsM;
-   
-   //*****************
-   //fpregfile D/E pipe registers
-   //*****************
-   flopenrc #(64) DEReg1(clk, reset, PipeClearDE, PipeEnableDE, ReadData1D, ReadData1E);
-   flopenrc #(64) DEReg2(clk, reset, PipeClearDE, PipeEnableDE, ReadData2D, ReadData2E);
-   flopenrc #(64) DEReg3(clk, reset, PipeClearDE, PipeEnableDE, ReadData3D, ReadData3E);
-   
-   //*****************
-   //other  D/E pipe registers
-   //*****************
-   flopenrc #(1) DEReg4(clk, reset, PipeClearDE, PipeEnableDE, FRegWriteD, FRegWriteE);
-   flopenrc #(3) DEReg5(clk, reset, PipeClearDE, PipeEnableDE, FResultSelD, FResultSelE);
-   flopenrc #(3) DEReg6(clk, reset, PipeClearDE, PipeEnableDE, FrmD, FrmE);
-   flopenrc #(1) DEReg7(clk, reset, PipeClearDE, PipeEnableDE, FmtD, FmtE);
-   flopenrc #(5) DEReg8(clk, reset, PipeClearDE, PipeEnableDE, InstrD[11:7], RdE);
-   flopenrc #(4) DEReg9(clk, reset, PipeClearDE, PipeEnableDE, OpCtrlD, OpCtrlE);
-   flopenrc #(1) DEReg10(clk, reset, PipeClearDE, PipeEnableDE, DivSqrtStartD, DivSqrtStartE);
-   flopenrc #(2) DEReg11(clk, reset, PipeClearDE, PipeEnableDE, Input1MuxD, Input1MuxE);
-   flopenrc #(2) DEReg12(clk, reset, PipeClearDE, PipeEnableDE, Input2MuxD, Input2MuxE);
-   flopenrc #(1) DEReg13(clk, reset, PipeClearDE, PipeEnableDE, Input3MuxD, Input3MuxE);
-   flopenrc #(64) DEReg14(clk, reset, PipeClearDE, PipeEnableDE, FPUResultDirW, FPUResultDirE);
-   flopenrc #(1) DEReg15(clk, reset, PipeClearDE, PipeEnableDE, FWriteIntD, FWriteIntE);
-   flopenrc #(1) DEReg16(clk, reset, PipeClearDE, PipeEnableDE, OutputInput2D, OutputInput2E);
-   flopenrc #(2) DEReg17(clk, reset, PipeClearDE, PipeEnableDE, FMemRWD, FMemRWE);
-   
-  // input muxs for forwarding
-   mux4  #(64)  Input1Emux(ReadData1E, FPUResultDirW, FPUResultDirE, SrcAM, Input1MuxE, Input1tmpE);
-   mux3  #(64)  Input2Emux(ReadData2E, FPUResultDirW, FPUResultDirE, Input2MuxE, Input2E);
-   mux2  #(64)  Input3Emux(ReadData3E, FPUResultDirE, Input3MuxE, Input3E);
-   mux2  #(64)  OutputInput2mux(Input1tmpE, Input2E, OutputInput2E, Input1E);
-
-   fma1 fma1 (.*);
-
-   //first and only instance of floating-point divider
-   fpdiv fpdivsqrt (.*);
-   
-   //first of two-stage instance of floating-point add/cvt unit
-   fpuaddcvt1 fpadd1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, 
-		      AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, 
-		      AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, 
-		      AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, 
-		      AddExp1DenormE, AddExp2DenormE, AddExponentE, 
-		      Input1E, Input2E, FrmE, OpCtrlE, FmtE);
-   
-   //first of two-stage instance of floating-point comparator
-   fpucmp1 fpcmp1 (WE, XE, ANaNE, BNaNE, AzeroE, BzeroE, Input1E, Input2E, OpCtrlE[1:0]);
-   
-   //first and only instance of floating-point sign converter
-   fpusgn fpsgn (.*);
-   
-   //interface between XLEN size datapath and double-precision sized
-   //floating-point results
-   //
-   //define offsets for LSB zero extension or truncation
-   always_comb begin
-      
-      //truncate to 64 bits
-      //(causes warning during compilation - case never reached) 
-      //   if(`XLEN > 64) begin // ***KEP this isn't usedand it causes a lint error
-      //         DivOp1 = Input1E[`XLEN-1:`XLEN-64];
-      // 	DivOp2 = Input2E[`XLEN-1:`XLEN-64];
-      //         AddOp1E = Input1E[`XLEN-1:`XLEN-64];
-      // 	AddOp2E = Input2E[`XLEN-1:`XLEN-64];
-      //         CmpOp1E = Input1E[`XLEN-1:`XLEN-64];
-      // 	CmpOp2E = Input2E[`XLEN-1:`XLEN-64];
-      //         SgnOp1E = Input1E[`XLEN-1:`XLEN-64];
-      // 	SgnOp2E = Input2E[`XLEN-1:`XLEN-64];
-      //   end
-      //   //zero extend to 64 bits
-      //   else begin
-      //         DivOp1 = {Input1E,{64-`XLEN{1'b0}}};
-      // 	DivOp2 = {Input2E,{64-`XLEN{1'b0}}};
-      //         AddOp1E = {Input1E,{64-`XLEN{1'b0}}};
-      // 	AddOp2E = {Input2E,{64-`XLEN{1'b0}}};
-      //         CmpOp1E = {Input1E,{64-`XLEN{1'b0}}};
-      // 	CmpOp2E = {Input2E,{64-`XLEN{1'b0}}};
-      //         SgnOp1E = {Input1E,{64-`XLEN{1'b0}}};
-      // 	SgnOp2E = {Input2E,{64-`XLEN{1'b0}}};
-      //   end
-      
-      //assign op codes
-      AddOpTypeE[3:0] = OpCtrlE[3:0];
-      CmpSelE[1:0] = OpCtrlE[1:0];
-      DivOpType = OpCtrlE[0];
-      SgnOpCodeE[1:0] = OpCtrlE[1:0];
-      
-   end 
-   
-   //E stage control signal interfacing between wally spec and OSU fp hardware
-   //op codes
-   
-   //wally-spec M stage control logic signal instantiation
-   logic                    FRegWriteM;
-   logic [2:0] 		    FResultSelM;
-   logic [2:0] 		    FrmM;
-   logic                    FmtM;
-   logic [3:0] 		    OpCtrlM;
-   
-   //instantiate M stage FMA signals here ***rename fma signals and resize for XLEN
-   logic [63:0] 	    FmaResultM;
-   logic [4:0] 		    FmaFlagsM;
-   logic [12:0] 	    aligncntM; 
-   logic [105:0] 	    rM; 
-   logic [105:0] 	    sM; 
-   logic [163:0] 	    tM;	
-   logic [8:0] 		    normcntM; 
-   logic [12:0] 	    aeM; 
-   logic 		    bsM;
-   logic 		    killprodM; 
-   logic 		    prodofM; 
-   logic 		    xzeroM;
-   logic 		    yzeroM;
-   logic 		    zzeroM;
-   logic 		    xdenormM;
-   logic 		    ydenormM;
-   logic 		    zdenormM;
-   logic 		    xinfM;
-   logic 		    yinfM;
-   logic 		    zinfM;
-   logic 		    xnanM;
-   logic 		    ynanM;
-   logic 		    znanM;
-   logic 		    nanM;
-   logic [8:0] 		    sumshiftM;
-   logic 		    sumshiftzeroM;
-   logic 		    prodinfM;
-   
-   //instantiation of M stage regfile signals
-   logic [4:0] 		    RdM;
-   logic [`XLEN-1:0] 	    Input1M, Input2M, Input3M;
-   logic [`XLEN-1:0] 	    LoadStoreResultM;
-   
-   //instantiation of M stage add/cvt signals
-   logic [63:0] 	    AddResultM;
-   logic [4:0] 		    AddFlagsM;
  logic             AddDenormM;
  logic [63:0]      AddSumM, AddSumTcM;
  logic [3:0]       AddSelInvM;
@ -358,22 +139,173 @@ module fpu (
  logic [2:0]       AddRmM;
  logic [3:0]       AddOpTypeM;
  logic             AddPM, AddOvEnM, AddUnEnM;  
+  logic [63:0]      FAddResultM, FAddResultW;
+  logic [4:0]       FAddFlagsM, FAddFlagsW;
+
+  //cmp signals 
+  logic [7:0]       WE, WM;
+  logic [7:0]       XE, XM;
+  logic             ANaNE, ANaNM;
+  logic             BNaNE, BNaNM;
+  logic             AzeroE, AzeroM;
+  logic             BzeroE, BzeroM;
+  logic             CmpInvalidM, CmpInvalidW;
+  logic [1:0]       CmpFCCM, CmpFCCW; 
+  logic [63:0]      FCmpResultW;
+
+  // fsgn signals
+  logic [63:0]      SgnResultE, SgnResultM, SgnResultW;
+  logic [4:0]       SgnFlagsE, SgnFlagsM, SgnFlagsW;
+
+  //instantiation of W stage regfile signals
+  logic [`XLEN-1:0] SrcAW;
+
+  // classify signals
+  logic [63:0]      ClassResultE, ClassResultM, ClassResultW;
+  logic [4:0]       ClassFlagsE, ClassFlagsM, ClassFlagsW;
+
+  // other
+  logic [63:0]      FPUResult64W, FPUResult64E;                                           // 64-bit FPU result
+  logic [4:0]       FPUFlagsW;
+
+  // pipeline control logic
+  logic	                   PipeEnableDE;
+  logic	                   PipeEnableEM;
+  logic	                   PipeEnableMW;
+  logic                    PipeClearDE;
+  logic                    PipeClearEM;
+  logic                    PipeClearMW;
+
+  //temporarily assign pipe clear and enable signals
+  //to never flush & always be running
+  localparam PipeClear = 1'b0;
+  localparam PipeEnable = 1'b1;
+  always_comb begin
+
+	  PipeEnableDE = ~StallE;
+	  PipeEnableEM = ~StallM;
+	  PipeEnableMW = ~StallW;
+	  PipeClearDE = FlushE;
+	  PipeClearEM = FlushM;
+	  PipeClearMW = FlushW;
+
+  end
+
+ 
+
+
+
+
+
+
+
+
+
+
+
+  //DECODE STAGE
+
+  //Hazard unit for FPU
+  fpuhazard hazard(.Adr1(InstrD[19:15]), .Adr2(InstrD[24:20]), .Adr3(InstrD[31:27]), .*);
+
+  //top-level controller for FPU
+  fctrl ctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .*);
+
+
+  //regfile instantiation
+   FPregfile fpregfile (clk, reset, FWriteEnW,
+			InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW,
+			FPUResult64W,
+			FRD1D, FRD2D, FRD3D);	
+
+
+
+
+
+

-   //instantiation of M stage cmp signals
-   logic                    CmpInvalidM;
-   logic [1:0] 		    CmpFCCM; 
-   logic [7:0] 		    WM, XM;
-   logic                    ANaNM, BNaNM, AzeroM, BzeroM;
-   logic [63:0] 	    CmpOp1M, CmpOp2M;
-   logic [1:0] 		    CmpSelM;


  //*****************
  //fpregfile D/E pipe registers
  //*****************
-   flopenrc #(64) EMFpReg1(clk, reset, PipeClearEM, PipeEnableEM, Input1E, Input1M);
-   flopenrc #(64) EMFpReg2(clk, reset, PipeClearEM, PipeEnableEM, Input2E, Input2M);
-   flopenrc #(64) EMFpReg3(clk, reset, PipeClearEM, PipeEnableEM, Input3E, Input3M);
+  flopenrc #(64) DEReg1(clk, reset, PipeClearDE, PipeEnableDE, FRD1D, FRD1E);
+  flopenrc #(64) DEReg2(clk, reset, PipeClearDE, PipeEnableDE, FRD2D, FRD2E);
+  flopenrc #(64) DEReg3(clk, reset, PipeClearDE, PipeEnableDE, FRD3D, FRD3E);
+
+  //*****************
+  //other  D/E pipe registers
+  //*****************
+  flopenrc #(1) DEReg4(clk, reset, PipeClearDE, PipeEnableDE, FWriteEnD, FWriteEnE);
+  flopenrc #(3) DEReg5(clk, reset, PipeClearDE, PipeEnableDE, FResultSelD, FResultSelE);
+  flopenrc #(3) DEReg6(clk, reset, PipeClearDE, PipeEnableDE, FrmD, FrmE);
+  flopenrc #(1) DEReg7(clk, reset, PipeClearDE, PipeEnableDE, FmtD, FmtE);
+  flopenrc #(5) DEReg8(clk, reset, PipeClearDE, PipeEnableDE, InstrD[11:7], RdE);
+  flopenrc #(4) DEReg9(clk, reset, PipeClearDE, PipeEnableDE, FOpCtrlD, FOpCtrlE);
+  flopenrc #(1) DEReg10(clk, reset, PipeClearDE, PipeEnableDE, FDivStartD, FDivStartE);
+  flopenrc #(2) DEReg11(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput1D, FForwardInput1E);
+  flopenrc #(2) DEReg12(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput2D, FForwardInput2E);
+  flopenrc #(1) DEReg13(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput3D, FForwardInput3E);
+  flopenrc #(64) DEReg14(clk, reset, PipeClearDE, PipeEnableDE, FPUResult64W, FPUResult64E);
+  flopenrc #(1) DEReg15(clk, reset, PipeClearDE, PipeEnableDE, FWriteIntD, FWriteIntE);
+  flopenrc #(1) DEReg16(clk, reset, PipeClearDE, PipeEnableDE, FOutputInput2D, FOutputInput2E);
+  flopenrc #(2) DEReg17(clk, reset, PipeClearDE, PipeEnableDE, FMemRWD, FMemRWE);
+
+
+
+
+
+
+
+
+
+
+
+
+
+  //EXECUTION STAGE
+
+
+
+  // input muxs for forwarding
+  mux4  #(64)  FInput1Emux(FRD1E, FPUResult64W, FPUResult64E, SrcAM, FForwardInput1E, FInput1tmpE);
+  mux3  #(64)  FInput2Emux(FRD2E, FPUResult64W, FPUResult64E, FForwardInput2E, FInput2E);
+  mux2  #(64)  FInput3Emux(FRD3E, FPUResult64E, FForwardInput3E, FInput3E);
+  mux2  #(64)  FOutputInput2mux(FInput1tmpE, FInput2E, FOutputInput2E, FInput1E);
+
+  fma1 fma1 (.*);
+
+  //first and only instance of floating-point divider
+  fpdiv fpdivsqrt (.DivOpType(FOpCtrlE[0]), .*);
+
+  //first of two-stage instance of floating-point add/cvt unit
+  fpuaddcvt1 fpadd1 (.*);
+
+  //first of two-stage instance of floating-point comparator
+  fpucmp1 fpcmp1 (WE, XE, ANaNE, BNaNE, AzeroE, BzeroE, FInput1E, FInput2E, FOpCtrlE[1:0]);
+
+  //first and only instance of floating-point sign converter
+  fpusgn fpsgn (.SgnOpCodeE(FOpCtrlE[1:0]),.*);
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+  //*****************
+  //fpregfile D/E pipe registers
+  //*****************
+  flopenrc #(64) EMFpReg1(clk, reset, PipeClearEM, PipeEnableEM, FInput1E, FInput1M);
+  flopenrc #(64) EMFpReg2(clk, reset, PipeClearEM, PipeEnableEM, FInput2E, FInput2M);
+  flopenrc #(64) EMFpReg3(clk, reset, PipeClearEM, PipeEnableEM, FInput3E, FInput3M);

  //*****************
  //fma E/M pipe registers
@ -427,8 +359,6 @@ module fpu (
  flopenrc #(12) EMRegAdd18(clk, reset, PipeClearEM, PipeEnableEM, AddExp1DenormE, AddExp1DenormM); 
  flopenrc #(12) EMRegAdd19(clk, reset, PipeClearEM, PipeEnableEM, AddExp2DenormE, AddExp2DenormM); 
  flopenrc #(11) EMRegAdd20(clk, reset, PipeClearEM, PipeEnableEM, AddExponentE, AddExponentM); 
-   flopenrc #(64) EMRegAdd21(clk, reset, PipeClearEM, PipeEnableEM, AddOp1E, AddOp1M); 
-   flopenrc #(64) EMRegAdd22(clk, reset, PipeClearEM, PipeEnableEM, AddOp2E, AddOp2M); 
  flopenrc #(3) EMRegAdd23(clk, reset, PipeClearEM, PipeEnableEM, AddRmE, AddRmM); 
  flopenrc #(4) EMRegAdd24(clk, reset, PipeClearEM, PipeEnableEM, AddOpTypeE, AddOpTypeM); 
  flopenrc #(1) EMRegAdd25(clk, reset, PipeClearEM, PipeEnableEM, AddPE, AddPM); 
@ -444,73 +374,58 @@ module fpu (
  flopenrc #(1) EMRegCmp4(clk, reset, PipeClearEM, PipeEnableEM, BNaNE, BNaNM); 
  flopenrc #(1) EMRegCmp5(clk, reset, PipeClearEM, PipeEnableEM, AzeroE, AzeroM); 
  flopenrc #(1) EMRegCmp6(clk, reset, PipeClearEM, PipeEnableEM, BzeroE, BzeroM); 
-   flopenrc #(64) EMRegCmp7(clk, reset, PipeClearEM, PipeEnableEM, CmpOp1E, CmpOp1M); 
-   flopenrc #(64) EMRegCmp8(clk, reset, PipeClearEM, PipeEnableEM, CmpOp2E, CmpOp2M); 
-   flopenrc #(2) EMRegCmp9(clk, reset, PipeClearEM, PipeEnableEM, CmpSelE, CmpSelM);

  //put this in for the event we want to delay fsgn - will otherwise bypass
  //*****************
  //fpsgn E/M pipe registers
  //***************** 
-   flopenrc #(2) EMRegSgn1(clk, reset, PipeClearEM, PipeEnableEM, SgnOpCodeE, SgnOpCodeM);
  flopenrc #(64) EMRegSgn2(clk, reset, PipeClearEM, PipeEnableEM, SgnResultE, SgnResultM);
  flopenrc #(5) EMRegSgn3(clk, reset, PipeClearEM, PipeEnableEM, SgnFlagsE, SgnFlagsM);

  //*****************
  //other E/M pipe registers
  //*****************
-   flopenrc #(1) EMReg1(clk, reset, PipeClearEM, PipeEnableEM, FRegWriteE, FRegWriteM);
+  flopenrc #(1) EMReg1(clk, reset, PipeClearEM, PipeEnableEM, FWriteEnE, FWriteEnM);
  flopenrc #(3) EMReg2(clk, reset, PipeClearEM, PipeEnableEM, FResultSelE, FResultSelM);
  flopenrc #(3) EMReg3(clk, reset, PipeClearEM, PipeEnableEM, FrmE, FrmM);
  flopenrc #(1) EMReg4(clk, reset, PipeClearEM, PipeEnableEM, FmtE, FmtM);
  flopenrc #(5) EMReg5(clk, reset, PipeClearEM, PipeEnableEM, RdE, RdM);
-   flopenrc #(4) EMReg6(clk, reset, PipeClearEM, PipeEnableEM, OpCtrlE, OpCtrlM);
+  flopenrc #(4) EMReg6(clk, reset, PipeClearEM, PipeEnableEM, FOpCtrlE, FOpCtrlM);
  flopenrc #(1) EMReg7(clk, reset, PipeClearEM, PipeEnableEM, FWriteIntE, FWriteIntM);
  flopenrc #(2) EMReg8(clk, reset, PipeClearEM, PipeEnableEM, FMemRWE, FMemRWM);

-  assign FWriteDataM = Input1M;
-  mux2  #(64)  LoadStoreResultMux(HRDATA, Input1M, |OpCtrlM[2:1], LoadStoreResultM);
+
+
+
+
+
+
+
+
+
+  //BEGIN MEMORY STAGE
+
+  assign FWriteDataM = FInput1M;
+
+  mux2  #(64)  FLoadStoreResultMux(HRDATA, FInput1M, |FOpCtrlM[2:1], FLoadStoreResultM);
+
  fma2 fma2(.*);

  //second instance of two-stage floating-point add/cvt unit
  fpuaddcvt2 fpadd2 (.*);

  //second instance of two-stage floating-point comparator
-   fpucmp2 fpcmp2 (CmpInvalidM, CmpFCCM, ANaNM, BNaNM, AzeroM, BzeroM, WM, XM, CmpSelM, CmpOp1M, CmpOp2M);
+  fpucmp2 fpcmp2 (CmpInvalidM, CmpFCCM, ANaNM, BNaNM, AzeroM, BzeroM, WM, XM, {1'b0, FmtM}, FInput1M, FInput2M);
+
+

-   //wally-spec W stage control logic signal instantiation
-   logic [2:0] 		    FResultSelW;

-   //instantiate W stage fma signals here
-   logic [63:0] 	    FmaResultW;
-   logic [4:0] 		    FmaFlagsW;

-   //instantiation of W stage div/sqrt signals
-   logic                    DivDenormW;
-   logic [63:0] 	    DivResultW;
-   logic [4:0] 		    DivFlagsW;

-   //instantiation of W stage fsgn signals
-   logic [63:0] 	    SgnResultW;
-   logic [4:0] 		    SgnFlagsW;

-   //instantiation of W stage regfile signals
-   logic [`XLEN-1:0] 	    LoadStoreResultW;
-   logic [`XLEN-1:0] 	    SrcAW;

-   //instantiation of W stage add/cvt signals
-   logic [63:0] 	    AddResultW;
-   logic [4:0] 		    AddFlagsW;
-   logic                    AddDenormW;

-   //instantiation of W stage cmp signals
-   logic [63:0] 	    CmpResultW;
-   logic                    CmpInvalidW;
-   logic [1:0] 		    CmpFCCW; 

-   //instantiation of W stage classify signals
-   logic [63:0] 	    ClassResultW;
-   logic [4:0] 		    ClassFlagsW;
  
  //*****************
  //fma M/W pipe registers
@ -521,16 +436,15 @@ module fpu (
  //*****************
  //fpdiv M/W pipe registers
  //*****************
-   flopenrc #(64) MWRegDiv1(clk, reset, PipeClearMW, PipeEnableMW, DivResultM, DivResultW); 
-   flopenrc #(5) MWRegDiv2(clk, reset, PipeClearMW, PipeEnableMW, DivFlagsM, DivFlagsW);
+  flopenrc #(64) MWRegDiv1(clk, reset, PipeClearMW, PipeEnableMW, FDivResultM, FDivResultW); 
+  flopenrc #(5) MWRegDiv2(clk, reset, PipeClearMW, PipeEnableMW, FDivFlagsM, FDivFlagsW);
  flopenrc #(1) MWRegDiv3(clk, reset, PipeClearMW, PipeEnableMW, DivDenormM, DivDenormW); 

  //*****************
  //fpadd M/W pipe registers
  //*****************
-   flopenrc #(64) MWRegAdd1(clk, reset, PipeClearMW, PipeEnableMW, AddResultM, AddResultW); 
-   flopenrc #(5) MWRegAdd2(clk, reset, PipeClearMW, PipeEnableMW, AddFlagsM, AddFlagsW); 
-   flopenrc #(1) MWRegAdd3(clk, reset, PipeClearMW, PipeEnableMW, AddDenormM, AddDenormW); 
+  flopenrc #(64) MWRegAdd1(clk, reset, PipeClearMW, PipeEnableMW, FAddResultM, FAddResultW); 
+  flopenrc #(5) MWRegAdd2(clk, reset, PipeClearMW, PipeEnableMW, FAddFlagsM, FAddFlagsW); 

  //*****************
  //fpcmp M/W pipe registers
@ -547,30 +461,31 @@ module fpu (
  //*****************
  //other M/W pipe registers
  //*****************
-   flopenrc #(1) MWReg1(clk, reset, PipeClearMW, PipeEnableMW, FRegWriteM, FRegWriteW);
+  flopenrc #(1) MWReg1(clk, reset, PipeClearMW, PipeEnableMW, FWriteEnM, FWriteEnW);
  flopenrc #(3) MWReg2(clk, reset, PipeClearMW, PipeEnableMW, FResultSelM, FResultSelW);
  flopenrc #(1) MWReg3(clk, reset, PipeClearMW, PipeEnableMW, FmtM, FmtW);
  flopenrc #(5) MWReg4(clk, reset, PipeClearMW, PipeEnableMW, RdM, RdW);
  flopenrc #(`XLEN) MWReg5(clk, reset, PipeClearMW, PipeEnableMW, SrcAM, SrcAW);
-   flopenrc #(64) MWReg6(clk, reset, PipeClearMW, PipeEnableMW, LoadStoreResultM, LoadStoreResultW);
+  flopenrc #(64) MWReg6(clk, reset, PipeClearMW, PipeEnableMW, FLoadStoreResultM, FLoadStoreResultW);
  flopenrc #(1) MWReg7(clk, reset, PipeClearMW, PipeEnableMW, FWriteIntM, FWriteIntW);

-   //flag signal mux via in-line ternaries
-   logic [4:0] 		    FPUFlagsW;
-   //if bit 2 is active set to sign flags - otherwise:
-   //iff bit one is high - if bit zero is active set to fma flags - otherwise
-   //set to cmp flags
-   //iff bit one is low - if bit zero is active set to add/cvt flags - otherwise
-   //set to div/sqrt flags
-   //assign FPUFlagsW = (FResultSelW[2]) ? (SgnFlagsW) : (
-   //	             (FResultSelW[1]) ? 
-   //		     ( (FResultSelW[0]) ? (FmaFlagsW) : ({CmpInvalidW,4'b0000}) ) 
-   //		     : ( (FResultSelW[0]) ? (AddFlagsW) : (DivFlagsW) ) 
-   //                     );
+
+
+
+
+
+
+
+
+
+  //#########################################
+  //BEGIN WRITEBACK STAGE
+  //#########################################
+
  always_comb begin
 	case (FResultSelW)
 		// div/sqrt
-	3'b000 : FPUFlagsW = DivFlagsW;
+		3'b000 : FPUFlagsW = FDivFlagsW;
 		// cmp		
 		3'b001 : FPUFlagsW = {CmpInvalidW, 4'b0};
 		//fma/mult
@ -578,45 +493,37 @@ module fpu (
 		// sgn inj
 		3'b011 : FPUFlagsW = SgnFlagsW;
 		// add/sub/cnvt
-	3'b100 : FPUFlagsW = AddFlagsW;
+		3'b100 : FPUFlagsW = FAddFlagsW;
 		// classify
 		3'b101 : FPUFlagsW = ClassFlagsW;
 		// output SrcAW
 		3'b110 : FPUFlagsW = 5'b0;
-	// output ReadData1
+		// output FRD1
 		3'b111 : FPUFlagsW = 5'b0;
 		default : FPUFlagsW = 5'bxxxxx;
 	endcase
  end

-   //result mux via in-line ternaries
-   //the uses the same logic as for flag signals
-   //assign FPUResultDirW = (FResultSelW[2]) ? (SgnResultW) : (
-   //	             (FResultSelW[1]) ? 
-   //		     ( (FResultSelW[0]) ? (FmaResultW) : ({62'b0,CmpFCCW}) ) 
-   //		     : ( (FResultSelW[0]) ? (AddResultW) : (DivResultW) ) 
-   //                   );
-   

  always_comb begin
 	case (FResultSelW)
 		// div/sqrt
-	3'b000 : FPUResultDirW = DivResultW;
+		3'b000 : FPUResult64W = FDivResultW;
 		// cmp		
-	3'b001 : FPUResultDirW = CmpResultW;
+		3'b001 : FPUResult64W = FCmpResultW;
 		//fma/mult
-	3'b010 : FPUResultDirW = FmaResultW;
+		3'b010 : FPUResult64W = FmaResultW;
 		// sgn inj
-	3'b011 : FPUResultDirW = SgnResultW;
+		3'b011 : FPUResult64W = SgnResultW;
 		// add/sub/cnvt
-	3'b100 : FPUResultDirW = AddResultW;
+		3'b100 : FPUResult64W = FAddResultW;
 		// classify
-	3'b101 : FPUResultDirW = ClassResultW;
+		3'b101 : FPUResult64W = ClassResultW;
 		// output SrcAW
-	3'b110 : FPUResultDirW = SrcAW;
+		3'b110 : FPUResult64W = SrcAW;
 		// Load/Store/Move to FP-register
-	3'b111 : FPUResultDirW = LoadStoreResultW;
-	default : FPUResultDirW = {64{1'bx}};
+		3'b111 : FPUResult64W = FLoadStoreResultW;
+		default : FPUResult64W = {64{1'bx}};
 	endcase
  end
  //interface between XLEN size datapath and double-precision sized
@ -626,21 +533,8 @@ module fpu (
  always_comb begin
           
  //zero extension 
-      
-      // Teo 04/13/2021
-      // Commented out XLENDIFF{1'b0} due to error:
-      // Repetition multiplier must be constant.
-      
-      //if(`XLEN > 64) begin
-      //    FPUResultW = {FPUResultDirW,{XLENDIFF{1'b0}}};
-      //end
-      //truncate
-      //else begin
-      FPUResultW = FPUResultDirW[63:64-`XLEN];
+      FPUResultW = FPUResult64W[63:64-`XLEN];
      SetFflagsM = FPUFlagsW;
-      //end

  end  
-   
-endmodule // fpu
-
+endmodule
--- a/wally-pipelined/src/fpu/fpuaddcvt1.sv
+++ b/wally-pipelined/src/fpu/fpuaddcvt1.sv
@ -27,16 +27,15 @@
 //


-module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm, op2_Norm, opA_Norm, opB_Norm, Invalid, DenormIn, convert, swap, normal_overflow, signA, Float1, Float2, exp1_denorm, exp2_denorm, exponent, op1, op2, rm, op_type, Pin);
+module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, FInput1E, FInput2E, FOpCtrlE, FmtE);

-   input logic [63:0] op1;		// 1st input operand (A)
-   input logic [63:0] op2;		// 2nd input operand (B)
-   input logic [2:0] 	rm;		// Rounding mode - specify values 
-   input logic [3:0]	op_type;	// Function opcode
-   input logic 	Pin;   		// Result Precision (1 for double, 0 for single)
+   input logic [63:0] FInput1E;		// 1st input operand (A)
+   input logic [63:0] FInput2E;		// 2nd input operand (B)
+   input logic [3:0]	FOpCtrlE;	// Function opcode
+   input logic 	FmtE;   		// Result Precision (1 for double, 0 for single)

   wire          P;
-   assign P = ~Pin | op_type[2];
+   assign P = ~FmtE | FOpCtrlE[2];

   wire [63:0] 	 IntValue;
   wire [11:0] 	 exp1, exp2;
@ -54,44 +53,44 @@ module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm,
   wire 	 zeroB;
   wire [5:0]	 align_shift; 

-   output logic [63:0] 	 Float1; 
-   output logic [63:0] 	 Float2;
-   output logic [10:0] 	 exponent;
-   output logic [10:0]	 exponent_postsum;
-   output logic [11:0]	 exp1_denorm, exp2_denorm;//KEP used to be [10:0]
-   output logic [63:0] sum, sum_tc;
-   output logic [3:0]  sel_inv;
-   output logic        corr_sign;
-   output logic 	 signA;
-   output logic	 op1_Norm, op2_Norm;
-   output logic	 opA_Norm, opB_Norm;
-   output logic	 Invalid;
-   output logic 	 DenormIn;
+   output logic [63:0] 	 AddFloat1E; 
+   output logic [63:0] 	 AddFloat2E;
+   output logic [10:0] 	 AddExponentE;
+   output logic [10:0]	 AddExpPostSumE;
+   output logic [11:0]	 AddExp1DenormE, AddExp2DenormE;//KEP used to be [10:0]
+   output logic [63:0] AddSumE, AddSumTcE;
+   output logic [3:0]  AddSelInvE;
+   output logic        AddCorrSignE;
+   output logic 	 AddSignAE;
+   output logic	 AddOp1NormE, AddOp2NormE;
+   output logic	 AddOpANormE, AddOpBNormE;
+   output logic	 AddInvalidE;
+   output logic 	 AddDenormInE;
 //   output logic 	 exp_valid;
-   output logic 	 convert;
-   output logic        swap;
-   output logic 	 normal_overflow;
+   output logic 	 AddConvertE;
+   output logic        AddSwapE;
+   output logic 	 AddNormOvflowE;
   wire [5:0]	 ZP_mantissaA;
   wire [5:0]	 ZP_mantissaB;
   wire		 ZV_mantissaA;
   wire		 ZV_mantissaB;

   // Convert the input operands to their appropriate forms based on 
-   // the orignal operands, the op_type , and their precision P. 
+   // the orignal operands, the FOpCtrlE , and their precision P. 
   // Single precision inputs are converted to double precision 
   // and the sign of the first operand is set appropratiately based on
   // if the operation is absolute value or negation. 

-   convert_inputs conv1 (Float1, Float2, op1, op2, op_type, P);
+   convert_inputs conv1 (AddFloat1E, AddFloat2E, FInput1E, FInput2E, FOpCtrlE, P);

   // Test for exceptions and return the "Invalid Operation" and
-   // "Denormalized" Input Flags. The "sel_inv" is used in
-   // the third pipeline stage to select the result. Also, op1_Norm
-   // and op2_Norm are one if op1 and op2 are not zero or denormalized.
+   // "Denormalized" Input Flags. The "AddSelInvE" is used in
+   // the third pipeline stage to select the result. Also, AddOp1NormE
+   // and AddOp2NormE are one if FInput1E and FInput2E are not zero or denormalized.
   // sub is one if the effective operation is subtaction. 

-   exception exc1 (sel_inv, Invalid, DenormIn, op1_Norm, op2_Norm, sub, 
-		   Float1, Float2, op_type);
+   exception exc1 (AddSelInvE, AddInvalidE, AddDenormInE, AddOp1NormE, AddOp2NormE, sub, 
+		   AddFloat1E, AddFloat2E, FOpCtrlE);

   // Perform Exponent Subtraction (used for alignment). For performance
   // both exponent subtractions are performed in parallel. This was 
@ -99,25 +98,25 @@ module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm,
   // the two parallel additions. The input values are zero-extended to 12 
   // bits prior to performing the addition. 

-   assign exp1 = {1'b0, Float1[62:52]};
-   assign exp2 = {1'b0, Float2[62:52]};
+   assign exp1 = {1'b0, AddFloat1E[62:52]};
+   assign exp2 = {1'b0, AddFloat2E[62:52]};
   assign exp_diff1 = exp1 - exp2;
-   assign exp_diff2 = DenormIn ? ({Float2[63], exp2[10:0]} - {Float1[63], exp1[10:0]}): exp2 - exp1;
+   assign exp_diff2 = AddDenormInE ? ({AddFloat2E[63], exp2[10:0]} - {AddFloat1E[63], exp1[10:0]}): exp2 - exp1;

-   // The second operand (B) should be set to zero, if op_type does not
+   // The second operand (B) should be set to zero, if FOpCtrlE does not
   // specify addition or subtraction
-   assign zeroB = op_type[2] | op_type[1];
+   assign zeroB = FOpCtrlE[2] | FOpCtrlE[1];

   // Swapped operands if zeroB is not one and exp1 < exp2. 
-   // Swapping causes exp2 to be used for the result exponent. 
+   // SwapFmtEg causes exp2 to be used for the result exponent. 
   // Only the exponent of the larger operand is used to determine
   // the final result. 
-   assign swap = exp_diff1[11] & ~zeroB;
-   assign exponent = swap ? exp2[10:0] : exp1[10:0];
-   assign exponent_postsum = swap ? exp2[10:0] : exp1[10:0];
-   assign mantissaA = swap ? Float2[51:0] : Float1[51:0];
-   assign mantissaB = swap ? Float1[51:0] : Float2[51:0];
-   assign signA     = swap ? Float2[63] : Float1[63];   
+   assign AddSwapE = exp_diff1[11] & ~zeroB;
+   assign AddExponentE = AddSwapE ? exp2[10:0] : exp1[10:0];
+   assign AddExpPostSumE = AddSwapE ? exp2[10:0] : exp1[10:0];
+   assign mantissaA = AddSwapE ? AddFloat2E[51:0] : AddFloat1E[51:0];
+   assign mantissaB = AddSwapE ? AddFloat1E[51:0] : AddFloat2E[51:0];
+   assign AddSignAE     = AddSwapE ? AddFloat2E[63] : AddFloat1E[63];   

   // Leading-Zero Detector. Determine the size of the shift needed for
   // normalization. If sum_corrected is all zeros, the exp_valid is 
@ -127,12 +126,12 @@ module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm,
   lz52 lz_norm_2 (ZP_mantissaB, ZV_mantissaB, mantissaB);

   // Denormalized exponents created by subtracting the leading zeroes from the original exponents
-   assign exp1_denorm = swap ? (exp1 - {6'b0, ZP_mantissaB}) : (exp1 - {6'b0, ZP_mantissaA}); //KEP extended ZP_mantissa 
-   assign exp2_denorm = swap ? (exp2 - {6'b0, ZP_mantissaA}) : (exp2 - {6'b0, ZP_mantissaB});
+   assign AddExp1DenormE = AddSwapE ? (exp1 - {6'b0, ZP_mantissaB}) : (exp1 - {6'b0, ZP_mantissaA}); //KEP extended ZP_mantissa 
+   assign AddExp2DenormE = AddSwapE ? (exp2 - {6'b0, ZP_mantissaA}) : (exp2 - {6'b0, ZP_mantissaB});

   // Determine the alignment shift and limit it to 63. If any bit from 
   // exp_shift[6] to exp_shift[11] is one, then shift is set to all ones. 
-   assign exp_shift = swap ? exp_diff2 : exp_diff1;
+   assign exp_shift = AddSwapE ? exp_diff2 : exp_diff1;
   assign exp_gt63 = exp_shift[11] | exp_shift[10] | exp_shift[9] 
     | exp_shift[8] | exp_shift[7] | exp_shift[6];
   assign align_shift = exp_shift[5:0] | {6{exp_gt63}}; //KEP used to be all of exp_shift
@ -147,10 +146,10 @@ module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm,
   // and loss of sign information. The two bits to the right of the 
   // original mantissa form the "guard" and "round" bits that are used
   // to round the result. 
-   assign opA_Norm = swap ? op2_Norm : op1_Norm;
-   assign opB_Norm = swap ? op1_Norm : op2_Norm;
-   assign mantissaA1 = {2'h0, opA_Norm, mantissaA[51:0]&{52{opA_Norm}}, 2'h0};
-   assign mantissaB1 = {2'h0, opB_Norm, mantissaB[51:0]&{52{opB_Norm}}, 2'h0};
+   assign AddOpANormE = AddSwapE ? AddOp2NormE : AddOp1NormE;
+   assign AddOpBNormE = AddSwapE ? AddOp1NormE : AddOp2NormE;
+   assign mantissaA1 = {2'h0, AddOpANormE, mantissaA[51:0]&{52{AddOpANormE}}, 2'h0};
+   assign mantissaB1 = {2'h0, AddOpBNormE, mantissaB[51:0]&{52{AddOpBNormE}}, 2'h0};

   // Perform mantissa alignment using a 57-bit barrel shifter 
   // If any of the bits shifted out are one, Sticky_out is set. 
@ -160,8 +159,8 @@ module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm,

   // Place either the sign-extened 32-bit value or the original 64-bit value 
   // into IntValue (to be used for integer to floating point conversion)
-   assign IntValue [31:0] = op1[31:0];
-   assign IntValue [63:32] = op_type[0] ? {32{op1[31]}} : op1[63:32];
+   assign IntValue [31:0] = FInput1E[31:0];
+   assign IntValue [63:32] = FOpCtrlE[0] ? {32{FInput1E[31]}} : FInput1E[63:32];

   // If doing an integer to floating point conversion, mantissaA3 is set to 
   // IntVal and the prenomalized exponent is set to 1084. Otherwise, 
@ -169,30 +168,30 @@ module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm,
   // and the exponent value is left unchanged. 
   // Under denormalized cases, the exponent before the rounder is set to 1
   // if the normal shift value is 11.
-   assign convert       = ~op_type[2] & op_type[1];
-   assign mantissaA3    = (op_type[3]) ? (op_type[0] ? Float1 : ~Float1) : (DenormIn ? ({12'h0, mantissaA}) : (convert ? IntValue : {mantissaA1, 7'h0}));
+   assign AddConvertE       = ~FOpCtrlE[2] & FOpCtrlE[1];
+   assign mantissaA3    = (FOpCtrlE[3]) ? (FOpCtrlE[0] ? AddFloat1E : ~AddFloat1E) : (AddDenormInE ? ({12'h0, mantissaA}) : (AddConvertE ? IntValue : {mantissaA1, 7'h0}));

   // Put zero in for mantissaB3, if zeroB is one. Otherwise, B is extended to 
   // 64-bits by setting the 7 LSBs to the Sticky_out bit followed by six  
   // zeros. 
-   assign mantissaB3[63:7] = (op_type[3]) ? (57'h0) : (DenormIn ? {12'h0, mantissaB[51:7]} : mantissaB2 & {57{~zeroB}});
-   assign mantissaB3[6]    = (op_type[3]) ? (1'b0) : (DenormIn ? mantissaB[6] : Sticky_out & ~zeroB);
-   assign mantissaB3[5:0]  = (op_type[3]) ? (6'h01) : (DenormIn ? mantissaB[5:0] : 6'h0);
+   assign mantissaB3[63:7] = (FOpCtrlE[3]) ? (57'h0) : (AddDenormInE ? {12'h0, mantissaB[51:7]} : mantissaB2 & {57{~zeroB}});
+   assign mantissaB3[6]    = (FOpCtrlE[3]) ? (1'b0) : (AddDenormInE ? mantissaB[6] : Sticky_out & ~zeroB);
+   assign mantissaB3[5:0]  = (FOpCtrlE[3]) ? (6'h01) : (AddDenormInE ? mantissaB[5:0] : 6'h0);

   // The sign of the result needs to be corrected if the true
   // operation is subtraction and the input operands were swapped. 
-   assign corr_sign = ~op_type[2]&~op_type[1]&op_type[0]&swap;
+   assign AddCorrSignE = ~FOpCtrlE[2]&~FOpCtrlE[1]&FOpCtrlE[0]&AddSwapE;

   // 64-bit Mantissa Adder/Subtractor
-   cla64 add1 (sum, mantissaA3, mantissaB3, sub);
+   cla64 add1 (AddSumE, mantissaA3, mantissaB3, sub);

   // 64-bit Mantissa Subtractor - to get the two's complement of the 
   // result when the sign from the adder/subtractor is negative. 
-   cla_sub64 sub1 (sum_tc, mantissaB3, mantissaA3);
+   cla_sub64 sub1 (AddSumTcE, mantissaB3, mantissaA3);
 
   // Finds normal underflow result to determine whether to round final exponent down
-   //***KEP used to be (sum == 16'h0) I am unsure what it's supposed to be
-   assign normal_overflow = (DenormIn & (sum == 64'h0) & (opA_Norm | opB_Norm) & ~op_type[0]) ? 1'b1 : (sum[63] ? sum_tc[52] : sum[52]);
+   //***KEP used to be (AddSumE == 16'h0) I am unsure what it's supposed to be
+   assign AddNormOvflowE = (AddDenormInE & (AddSumE == 64'h0) & (AddOpANormE | AddOpBNormE) & ~FOpCtrlE[0]) ? 1'b1 : (AddSumE[63] ? AddSumTcE[52] : AddSumE[52]);

 endmodule // fpadd

--- a/wally-pipelined/src/fpu/fpuaddcvt2.sv
+++ b/wally-pipelined/src/fpu/fpuaddcvt2.sv
@ -27,15 +27,13 @@
 //


-module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSelInvM, AddExpPostSumM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddSignAM, AddFloat1M, AddFloat2M, AddExp1DenormM, AddExp2DenormM, AddExponentM, AddOp1M, AddOp2M, AddRmM, AddOpTypeM, AddPM, AddOvEnM, AddUnEnM);
+module fpuaddcvt2 (FAddResultM, FAddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSelInvM, AddExpPostSumM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddSignAM, AddFloat1M, AddFloat2M, AddExp1DenormM, AddExp2DenormM, AddExponentM, FrmM, FOpCtrlM, FmtM);

-   input [63:0] AddOp1M;		// 1st input operand (A)
-   input [63:0] AddOp2M;		// 2nd input operand (B)
-   input [2:0] 	AddRmM;		// Rounding mode - specify values 
-   input [3:0]	AddOpTypeM;	// Function opcode
-   input 	AddPM;   		// Result Precision (0 for double, 1 for single)
-   input 	AddOvEnM;		// Overflow trap enabled
-   input 	AddUnEnM;   	// Underflow trap enabled
+   input [2:0] 	FrmM;		// Rounding mode - specify values 
+   input [3:0]	FOpCtrlM;	// Function opcode
+   input 	FmtM;   		// Result Precision (0 for double, 1 for single)
+   // input 	AddOvEnM;		// Overflow trap enabled
+   // input 	AddUnEnM;   	// Underflow trap enabled
   input [63:0] AddSumM, AddSumTcM;
   input [63:0] 	 AddFloat1M; 
   input [63:0] 	 AddFloat2M;
@ -53,12 +51,12 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel
   input          AddSwapM;
   // input 	 AddNormOvflowM;

-   output [63:0] AddResultM;	// Result of operation
-   output [4:0]  AddFlagsM;   	// IEEE exception flags 
+   output [63:0] FAddResultM;	// Result of operation
+   output [4:0]  FAddFlagsM;   	// IEEE exception flags 
   output 	 AddDenormM;   	// AddDenormM on input or output   

   wire          P;
-   assign P = AddPM | AddOpTypeM[2];
+   assign P = FmtM | FOpCtrlM[2];

   wire [10:0]   exp_pre;
   wire [63:0] 	 Result;   
@ -82,6 +80,12 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel
   wire [63:0]   sum_corr;
   logic AddNormOvflowM;
 
+ 
+   logic 	AddOvEnM;		// Overflow trap enabled
+   logic 	AddUnEnM;   	// Underflow trap enabled
+
+   assign AddOvEnM = 1'b1;
+   assign AddUnEnM = 1'b1;
   //AddExponentM value pre-rounding with considerations for denormalized
   //cases/conversion cases
   assign exp_pre       = AddDenormInM ?
@ -101,7 +105,7 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel
   assign mantissa_comp_sum_tc = AddSwapM ? Float2_sum_tc_comp : Float1_sum_tc_comp;

   // Determines the correct comparison result based on operation and sign of resulting AddSumM
-   assign mantissa_comp = (AddOpTypeM[0] ^ AddSumM[63]) ? mantissa_comp_sum_tc : mantissa_comp_sum;
+   assign mantissa_comp = (FOpCtrlM[0] ^ AddSumM[63]) ? mantissa_comp_sum_tc : mantissa_comp_sum;

   // If the signs are different and both operands aren't denormalized
   // the normal underflow bit is needed and therefore updated.
@ -113,12 +117,12 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel
   // If the AddSumM is negative, use its two complement instead. 
   // This value has to be 64-bits to correctly handle the 
   // case 10...00
-   assign sum_corr = (AddDenormInM & (AddOpANormM | AddOpBNormM) & ( ( (AddFloat1M[63] ~^ AddFloat2M[63]) & AddOpTypeM[0] ) | ((AddFloat1M[63] ^ AddFloat2M[63]) & ~AddOpTypeM[0]) ))
-			 ? (AddSumM[63] ? AddSumM : AddSumTcM) : ( (AddOpTypeM[3]) ? AddSumM : (AddSumM[63] ? AddSumTcM : AddSumM));
+   assign sum_corr = (AddDenormInM & (AddOpANormM | AddOpBNormM) & ( ( (AddFloat1M[63] ~^ AddFloat2M[63]) & FOpCtrlM[0] ) | ((AddFloat1M[63] ^ AddFloat2M[63]) & ~FOpCtrlM[0]) ))
+			 ? (AddSumM[63] ? AddSumM : AddSumTcM) : ( (FOpCtrlM[3]) ? AddSumM : (AddSumM[63] ? AddSumTcM : AddSumM));

   // Finds normal underflow result to determine whether to round final AddExponentM down
   //KEP used to be (AddSumM == 16'h0) not sure what it is supposed to be
-   assign AddNormOvflowM = (AddDenormInM & (AddSumM == 64'h0) & (AddOpANormM | AddOpBNormM) & ~AddOpTypeM[0]) ? 1'b1 : (AddSumM[63] ? AddSumTcM[52] : AddSumM[52]);
+   assign AddNormOvflowM = (AddDenormInM & (AddSumM == 64'h0) & (AddOpANormM | AddOpBNormM) & ~FOpCtrlM[0]) ? 1'b1 : (AddSumM[63] ? AddSumTcM[52] : AddSumM[52]);

   // Leading-Zero Detector. Determine the size of the shift needed for
   // normalization. If sum_corrected is all zeros, the exp_valid is 
@ -132,7 +136,7 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel
   // be right shifted. It outputs the normalized AddSumM. 
   barrel_shifter_l64 bs2 (sum_norm, sum_corr, norm_shift_denorm);
  
-   assign sum_norm_w_bypass = (AddOpTypeM[3]) ? (AddOpTypeM[0] ? ~sum_corr : sum_corr) : (sum_norm);
+   assign sum_norm_w_bypass = (FOpCtrlM[3]) ? (FOpCtrlM[0] ? ~sum_corr : sum_corr) : (sum_norm);

   // Round the mantissa to a 52-bit value, with the leading one
   // removed. If the result is a single precision number, the actual 
@ -141,18 +145,18 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel
   // exactly where the rounding point is. The rounding units also
   // handles special cases and set the exception flags.

-   // Changed DenormIO -> AddDenormM and FlagsIn -> AddFlagsM in order to
+   // Changed DenormIO -> AddDenormM and FlagsIn -> FAddFlagsM in order to
   // help in processor reservation station detection of load/stores. In
   // other words, the processor would like to know ahead of time that
   // if the result is an exception then don't load or store.
-   rounder round1 (Result, DenormIO, FlagsIn, AddRmM, P, AddOvEnM, AddUnEnM, exp_valid, 
+   rounder round1 (Result, DenormIO, FlagsIn, FrmM, P, AddOvEnM, AddUnEnM, exp_valid, 
 		   AddSelInvM, AddInvalidM, AddDenormInM, AddConvertM, sign_corr, exp_pre, norm_shift, sum_norm_w_bypass,
 		   AddExpPostSumM, AddOp1NormM, AddOp2NormM, AddFloat1M[63:52], AddFloat2M[63:52],
-		   AddNormOvflowM, normal_underflow, AddSwapM, AddOpTypeM, AddSumM);
+		   AddNormOvflowM, normal_underflow, AddSwapM, FOpCtrlM, AddSumM);

   // Store the final result and the exception flags in registers.
-   assign AddResultM = Result;
-   assign {AddDenormM, AddFlagsM} = {DenormIO, FlagsIn};
+   assign FAddResultM = Result;
+   assign {AddDenormM, FAddFlagsM} = {DenormIO, FlagsIn};
   
 endmodule // fpadd

--- a/wally-pipelined/src/fpu/fpucmp1.sv
+++ b/wally-pipelined/src/fpu/fpucmp1.sv
@ -37,7 +37,7 @@
 // It also produces an invalid operation flag, which is one
 // if either of the input operands is a signaling NaN per 754

-module fpucmp1 (w, x, ANaN, BNaN, Azero, Bzero, op1, op2, Sel);
+module fpucmp1 (w, x, ANaN, BNaN, Azero, Bzero, op1, op2, Sel);///***fix Sel to match spec
   
   input logic [63:0] op1; 
   input logic [63:0] op2;
--- a/wally-pipelined/src/fpu/fpuhazard.sv
+++ b/wally-pipelined/src/fpu/fpuhazard.sv
@ -27,45 +27,45 @@

 module fpuhazard(
    input logic [4:0] Adr1, Adr2, Adr3,
-    input logic  FRegWriteE, FRegWriteM, FRegWriteW, 
+    input logic FWriteEnE, FWriteEnM, FWriteEnW, 
 	  input logic [4:0] RdE, RdM, RdW,
 	  input logic DivBusyM,
 	  input logic	RegWriteD,
    input logic [2:0] FResultSelD, FResultSelE,
    input logic IllegalFPUInstrD,
-    input logic In2UsedD, In3UsedD,
+    input logic FInput2UsedD, FInput3UsedD,
  // Stall outputs
 	  output logic FStallD,
-    output logic [1:0] Input1MuxD, Input2MuxD, 
-    output logic Input3MuxD
+    output logic [1:0] FForwardInput1D, FForwardInput2D, 
+    output logic FForwardInput3D
 );


  always_comb begin
    // set ReadData as default
-    Input1MuxD = 2'b00; 
-    Input2MuxD = 2'b00;
-    Input3MuxD = 1'b0;
+    FForwardInput1D = 2'b00; 
+    FForwardInput2D = 2'b00;
+    FForwardInput3D = 1'b0;
    FStallD = DivBusyM;
    if (~IllegalFPUInstrD) begin
 //					if taking a value from int register
-      if ((Adr1 == RdE) & (FRegWriteE | ((FResultSelE == 3'b110) & RegWriteD))) 
-        if (FResultSelE == 3'b110) Input1MuxD = 2'b11; // choose SrcAM
+      if ((Adr1 == RdE) & (FWriteEnE | ((FResultSelE == 3'b110) & RegWriteD))) 
+        if (FResultSelE == 3'b110) FForwardInput1D = 2'b11; // choose SrcAM
        else FStallD = 1'b1;                           // otherwise stall
-      else if ((Adr1 == RdM) & FRegWriteM) Input1MuxD = 2'b01; // choose FPUResultDirW
-      else if ((Adr1 == RdW) & FRegWriteW) Input1MuxD = 2'b11; // choose FPUResultDirE
+      else if ((Adr1 == RdM) & FWriteEnM) FForwardInput1D = 2'b01; // choose FPUResultDirW
+      else if ((Adr1 == RdW) & FWriteEnW) FForwardInput1D = 2'b11; // choose FPUResultDirE
    

-      if(In2UsedD)
-        if      ((Adr2 == RdE) & FRegWriteE) FStallD = 1'b1;
-        else if ((Adr2 == RdM) & FRegWriteM) Input2MuxD = 2'b01; // choose FPUResultDirW
-        else if ((Adr2 == RdW) & FRegWriteW) Input2MuxD = 2'b10; // choose FPUResultDirE
+      if(FInput2UsedD)
+        if      ((Adr2 == RdE) & FWriteEnE) FStallD = 1'b1;
+        else if ((Adr2 == RdM) & FWriteEnM) FForwardInput2D = 2'b01; // choose FPUResultDirW
+        else if ((Adr2 == RdW) & FWriteEnW) FForwardInput2D = 2'b10; // choose FPUResultDirE


-      if(In3UsedD)
-        if      ((Adr3 == RdE) & FRegWriteE) FStallD = 1'b1;
-        else if ((Adr3 == RdM) & FRegWriteM) FStallD = 1'b1;
-        else if ((Adr3 == RdW) & FRegWriteW) Input3MuxD = 1'b1; // choose FPUResultDirE
+      if(FInput3UsedD)
+        if      ((Adr3 == RdE) & FWriteEnE) FStallD = 1'b1;
+        else if ((Adr3 == RdM) & FWriteEnM) FStallD = 1'b1;
+        else if ((Adr3 == RdW) & FWriteEnW) FForwardInput3D = 1'b1; // choose FPUResultDirE
    end

  end 
--- a/wally-pipelined/src/fpu/fsgn.sv
+++ b/wally-pipelined/src/fpu/fsgn.sv
@ -1,8 +1,8 @@
 //performs the fsgnj/fsgnjn/fsgnjx RISCV instructions

-module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, SgnOp1E, SgnOp2E);
+module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, FInput1E, FInput2E);

-	input  [63:0]  SgnOp1E, SgnOp2E;
+	input  [63:0]  FInput1E, FInput2E;
 	input  [1:0]   SgnOpCodeE;
 	output [63:0]  SgnResultE;
 	output [4:0]   SgnFlagsE;
@ -11,18 +11,18 @@ module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, SgnOp1E, SgnOp2E);

 	//op code designation:
 	//
-	//00 - fsgnj - directly copy over sign value of SgnOp2E
-	//01 - fsgnjn - negate sign value of SgnOp2E
-	//10 - fsgnjx - XOR sign values of SgnOp1E & SgnOp2E
+	//00 - fsgnj - directly copy over sign value of FInput2E
+	//01 - fsgnjn - negate sign value of FInput2E
+	//10 - fsgnjx - XOR sign values of FInput1E & FInput2E
 	//
 	
-	assign SgnResultE[63] = SgnOpCodeE[1] ? (SgnOp1E[63] ^ SgnOp2E[63]) : (SgnOp2E[63] ^ SgnOpCodeE[0]);
-	assign SgnResultE[62:0] = SgnOp1E[62:0];
+	assign SgnResultE[63] = SgnOpCodeE[1] ? (FInput1E[63] ^ FInput2E[63]) : (FInput2E[63] ^ SgnOpCodeE[0]);
+	assign SgnResultE[62:0] = FInput1E[62:0];

 	//If the exponent is all ones, then the value is either Inf or NaN,
 	//both of which will produce a QNaN/SNaN value of some sort. This will 
 	//set the invalid flag high.
-	assign AonesExp = SgnOp1E[62]&SgnOp1E[61]&SgnOp1E[60]&SgnOp1E[59]&SgnOp1E[58]&SgnOp1E[57]&SgnOp1E[56]&SgnOp1E[55]&SgnOp1E[54]&SgnOp1E[53]&SgnOp1E[52];
+	assign AonesExp = FInput1E[62]&FInput1E[61]&FInput1E[60]&FInput1E[59]&FInput1E[58]&FInput1E[57]&FInput1E[56]&FInput1E[55]&FInput1E[54]&FInput1E[53]&FInput1E[52];

 	//the only flag that can occur during this operation is invalid
 	//due to changing sign on already existing NaN
--- a/wally-pipelined/src/fpu/special.sv
+++ b/wally-pipelined/src/fpu/special.sv
@ -10,46 +10,46 @@
 /////////////////////////////////////////////////////////////////////////////

 /////////////////////////////////////////////////////////////////////////////
-module special(Input1E, Input2E, Input3E, xzeroE, yzeroE, zzeroE,
+module special(FInput1E, FInput2E, FInput3E, xzeroE, yzeroE, zzeroE,
 				xnanE, ynanE, znanE, xdenormE, ydenormE, zdenormE, xinfE, yinfE, zinfE);
 /////////////////////////////////////////////////////////////////////////////

-	input logic   	[63:0]     	Input1E;              // Input Input1E
-	input logic     	[63:0]     	Input2E;           	// Input Input2E
-	input logic      	[63:0]    	Input3E;            	// Input Input3E 
-	output logic				xzeroE;		// Input Input1E = 0
-	output logic				yzeroE;		// Input Input2E = 0
-	output logic				zzeroE;		// Input Input3E = 0
-	output logic				xnanE;		// Input1E is NaN
-	output logic				ynanE;		// Input2E is NaN
-	output logic				znanE;		// Input3E is NaN
-	output logic				xdenormE;	// Input1E is denormalized
-	output logic				ydenormE;	// Input2E is denormalized
-	output logic				zdenormE;	// Input3E is denormalized
-	output logic				xinfE;		// Input1E is infinity
-	output logic				yinfE;		// Input2E is infinity
-	output logic				zinfE;		// Input3E is infinity
+	input logic   	[63:0]     	FInput1E;              // Input FInput1E
+	input logic     	[63:0]     	FInput2E;           	// Input FInput2E
+	input logic      	[63:0]    	FInput3E;            	// Input FInput3E 
+	output logic				xzeroE;		// Input FInput1E = 0
+	output logic				yzeroE;		// Input FInput2E = 0
+	output logic				zzeroE;		// Input FInput3E = 0
+	output logic				xnanE;		// FInput1E is NaN
+	output logic				ynanE;		// FInput2E is NaN
+	output logic				znanE;		// FInput3E is NaN
+	output logic				xdenormE;	// FInput1E is denormalized
+	output logic				ydenormE;	// FInput2E is denormalized
+	output logic				zdenormE;	// FInput3E is denormalized
+	output logic				xinfE;		// FInput1E is infinity
+	output logic				yinfE;		// FInput2E is infinity
+	output logic				zinfE;		// FInput3E is infinity

 	// In the actual circuit design, the gates looking at bits
 	// 51:0 and at bits 62:52 should be shared among the various detectors.

 	// Check if input is NaN

-	assign xnanE = &Input1E[62:52] && |Input1E[51:0]; 
-	assign ynanE = &Input2E[62:52] && |Input2E[51:0]; 
-	assign znanE = &Input3E[62:52] && |Input3E[51:0];
+	assign xnanE = &FInput1E[62:52] && |FInput1E[51:0]; 
+	assign ynanE = &FInput2E[62:52] && |FInput2E[51:0]; 
+	assign znanE = &FInput3E[62:52] && |FInput3E[51:0];

 	// Check if input is denormalized

-	assign xdenormE = ~(|Input1E[62:52]) && |Input1E[51:0]; 
-	assign ydenormE = ~(|Input2E[62:52]) && |Input2E[51:0]; 
-	assign zdenormE = ~(|Input3E[62:52]) && |Input3E[51:0];
+	assign xdenormE = ~(|FInput1E[62:52]) && |FInput1E[51:0]; 
+	assign ydenormE = ~(|FInput2E[62:52]) && |FInput2E[51:0]; 
+	assign zdenormE = ~(|FInput3E[62:52]) && |FInput3E[51:0];

 	// Check if input is infinity

-	assign xinfE = &Input1E[62:52] && ~(|Input1E[51:0]); 
-	assign yinfE = &Input2E[62:52] && ~(|Input2E[51:0]); 
-	assign zinfE = &Input3E[62:52] && ~(|Input3E[51:0]);
+	assign xinfE = &FInput1E[62:52] && ~(|FInput1E[51:0]); 
+	assign yinfE = &FInput2E[62:52] && ~(|FInput2E[51:0]); 
+	assign zinfE = &FInput3E[62:52] && ~(|FInput3E[51:0]);

 	// Check if inputs are all zero
 	// Also forces denormalized inputs to zero.
@ -57,11 +57,11 @@ module special(Input1E, Input2E, Input3E, xzeroE, yzeroE, zzeroE,
 	// to just check if the exponent is zero.
 	
 	// KATHERINE - commented following (21/01/11)
-	// assign xzeroE = ~(|Input1E[62:0]) || xdenormE;
-	// assign yzeroE = ~(|Input2E[62:0]) || ydenormE;
-	// assign zzeroE = ~(|Input3E[62:0]) || zdenormE;
+	// assign xzeroE = ~(|FInput1E[62:0]) || xdenormE;
+	// assign yzeroE = ~(|FInput2E[62:0]) || ydenormE;
+	// assign zzeroE = ~(|FInput3E[62:0]) || zdenormE;
 	// KATHERINE - removed denorm to prevent output logicing zero when computing with a denormalized number
-	assign xzeroE = ~(|Input1E[62:0]);
-	assign yzeroE = ~(|Input2E[62:0]);
-	assign zzeroE = ~(|Input3E[62:0]);
+	assign xzeroE = ~(|FInput1E[62:0]);
+	assign yzeroE = ~(|FInput2E[62:0]);
+	assign zzeroE = ~(|FInput3E[62:0]);
 endmodule
--- a/wally-pipelined/src/wally/wallypipelinedhart.sv
+++ b/wally-pipelined/src/wally/wallypipelinedhart.sv
@ -100,7 +100,7 @@ module wallypipelinedhart (
  logic       FStallD;
  logic       FWriteIntW, FWriteIntM;
  logic [31:0]      FSROutW;
-  logic             DivSqrtDoneE;
+  logic             FDivSqrtDoneM;
  logic             IllegalFPUInstrD, IllegalFPUInstrE;
  logic [`XLEN-1:0] FPUResultW;