From 12e09a7ace68880dab8feb333b034724400f0a7a Mon Sep 17 00:00:00 2001
From: "James E. Stine" <james.stine@okstate.edu>
Date: Tue, 20 Jul 2021 01:47:46 -0400
Subject: [PATCH] slight mod to fpdiv - still bug in batch vs. non-batch

---
 wally-pipelined/src/fpu/fpu.sv | 524 +++++++++++++++++----------------
 wally-pipelined/src/fpu/fsm.sv | 106 +++----
 2 files changed, 316 insertions(+), 314 deletions(-)

diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv
index 2d1351ec5..f283f5e4f 100755
--- a/wally-pipelined/src/fpu/fpu.sv
+++ b/wally-pipelined/src/fpu/fpu.sv
@@ -25,23 +25,23 @@
 `include "wally-config.vh"
 
 module fpu (
-  input logic 		         clk,
-  input logic 		         reset,
-  input logic [2:0]        FRM_REGW,   // Rounding mode from CSR
-  input logic [31:0]       InstrD,
-  input logic [`XLEN-1:0]  ReadDataW,     // Read data from memory
-  input logic [`XLEN-1:0]  SrcAE,      // Integer input being processed
-  input logic [`XLEN-1:0]  SrcAM,      // Integer input being written into fpreg
-  input logic 		         StallE, StallM, StallW,
-  input logic 		         FlushE, FlushM, FlushW,
-  input logic [4:0]        RdE, RdM, RdW, 
-  output logic          FRegWriteM,
-  output logic 		      FStallD,    // Stall the decode stage
-  output logic 		      FWriteIntE, FWriteIntM, FWriteIntW, // Write integer register enable
-  output logic [`XLEN-1:0] FWriteDataE,      // Data to be written to memory
-  output logic [`XLEN-1:0] FIntResM,     
-  output logic 		      FDivBusyE,        // Is the divison/sqrt unit busy
-  output logic 		      IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
+  input logic 		   clk,
+  input logic 		   reset,
+  input logic [2:0] 	   FRM_REGW, // Rounding mode from CSR
+  input logic [31:0] 	   InstrD,
+  input logic [`XLEN-1:0]  ReadDataW, // Read data from memory
+  input logic [`XLEN-1:0]  SrcAE, // Integer input being processed
+  input logic [`XLEN-1:0]  SrcAM, // Integer input being written into fpreg
+  input logic 		   StallE, StallM, StallW,
+  input logic 		   FlushE, FlushM, FlushW,
+  input logic [4:0] 	   RdE, RdM, RdW, 
+  output logic 		   FRegWriteM,
+  output logic 		   FStallD, // Stall the decode stage
+  output logic 		   FWriteIntE, FWriteIntM, FWriteIntW, // Write integer register enable
+  output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
+  output logic [`XLEN-1:0] FIntResM, 
+  output logic 		   FDivBusyE, // Is the divison/sqrt unit busy
+  output logic 		   IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
   output logic [4:0] 	   SetFflagsM);      // FPU result
 // *** change FMA to do 16 - 32 - 64 - 128 FEXPBITS 
 // *** folder at same level of src for tests fpu tests
@@ -50,254 +50,256 @@ module fpu (
   generate
      if (`F_SUPPORTED | `D_SUPPORTED) begin 
       // control logic signal instantiation
-      logic 		   FRegWriteD, FRegWriteE, FRegWriteW;              // FP register write enable
-      logic [2:0] 	FrmD, FrmE, FrmM;                                  // FP rounding mode
-      logic 		   FmtD, FmtE, FmtM, FmtW;                                  // FP precision 0-single 1-double
-      logic 		   FDivStartD, FDivStartE;                                  // Start division
-      logic 		   FWriteIntD;                                              // Write to integer register
-      logic [1:0]    FForwardXE, FForwardYE, FForwardZE;                        // Input3 forwarding mux control signal
-      logic [2:0] 	FResultSelD, FResultSelE, FResultSelM, FResultSelW;      // Select FP result
-      logic [3:0] 	FOpCtrlD, FOpCtrlE, FOpCtrlM;                  // Select which opperation to do in each component
-      logic [1:0]    FResSelD, FResSelE, FResSelM;  
-      logic [1:0]    FIntResSelD, FIntResSelE, FIntResSelM;                                   
-      logic [4:0] 	Adr1E, Adr2E, Adr3E;
-      
-      // regfile signals
-      logic [63:0] 	FRD1D, FRD2D, FRD3D;                                     // Read Data from FP register - decode stage
-      logic [63:0] 	FRD1E, FRD2E, FRD3E;                                     // Read Data from FP register - execute stage
-      logic [`XLEN-1:0]   FSrcXMAligned;
-      logic [63:0] 	FSrcXE, FSrcXM;                         // Input 1 to the various units (after forwarding)
-      logic [63:0] 	FSrcYE;                                      // Input 2 to the various units (after forwarding)
-      logic [63:0] 	FSrcZE;                                      // Input 3 to the various units (after forwarding)
-      
-      // unpacking signals
-      logic XSgnE, YSgnE, ZSgnE;
-      logic [10:0] XExpE, YExpE, ZExpE;
-      logic [51:0] XFracE, YFracE, ZFracE;
-      logic        XAssumed1E, YAssumed1E, ZAssumed1E;
-      logic XNaNE, YNaNE, ZNaNE;
-      logic XSNaNE, YSNaNE, ZSNaNE;
-      logic XDenormE, YDenormE, ZDenormE;
-      logic XZeroE, YZeroE, ZZeroE;
-      logic [10:0] BiasE;
-      logic XInfE, YInfE, ZInfE;
-      logic XExpMaxE;
-      logic XNormE;
-
-      logic XSgnM, YSgnM, ZSgnM;
-      logic [10:0] XExpM, YExpM, ZExpM;
-      logic [51:0] XFracM, YFracM, ZFracM;
-      logic XNaNM, YNaNM, ZNaNM;
-      logic XSNaNM, YSNaNM, ZSNaNM;
-      logic XZeroM, YZeroM, ZZeroM;
-      logic XInfM, YInfM, ZInfM;
-      
-      // div/sqrt signals
-      logic [63:0] 	FDivResultM, FDivResultW;
-      logic [4:0]    FDivSqrtFlgM, FDivSqrtFlgW;
-      logic          FDivSqrtDoneE;
-      logic [63:0] 	DivInput1E, DivInput2E;
-      logic          HoldInputs;                                              // keep forwarded inputs arround durring division
-      
-      //fpu signals
-      logic [63:0]   FMAResM, FMAResW;
-      logic [4:0]    FMAFlgM, FMAFlgW;
-
-
-      logic [63:0]   ReadResW;
-
-      // add/cvt signals
-      logic [63:0] 	FAddResM, FAddResW;
-      logic [4:0] 	FAddFlgM, FAddFlgW;  
-      logic [63:0] 	CvtResE, CvtResM;
-      logic [4:0] 	CvtFlgE, CvtFlgM;  
-      
-      // cmp signals 
-      logic 		   CmpNVE, CmpNVM, CmpNVW;
-      logic [63:0] 	CmpResE, CmpResM, CmpResW;
-      
-      // fsgn signals
-      logic [63:0] 	SgnResE, SgnResM;
-      logic        	SgnNVE, SgnNVM, SgnNVW;
-      logic [63:0]   FResM, FResW;
-      logic [4:0]         FFlgM, FFlgW;
-      
-      // instantiation of W stage regfile signals
-      logic [63:0] 	AlignedSrcAM;
-      
-      // classify signals
-      logic [63:0] 	ClassResE, ClassResM;
-      
-      // 64-bit FPU result   
-      logic [63:0] 	FPUResultW;                                           
-      logic [4:0] 	FPUFlagsW;
-      
-
-      //DECODE STAGE
-      
-      // top-level controller for FPU
-      fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), 
-                  .FRM_REGW, .IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD, 
-                  .FIntResSelD, .FmtD, .FrmD, .FWriteIntD);
-      
-      // regfile instantiation
-      fregfile fregfile (clk, reset, FRegWriteW,
-            InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW,
-            FPUResultW,
-            FRD1D, FRD2D, FRD3D);	
-
-      //*****************
-      // D/E pipe registers
-      //*****************
-      flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
-      flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
-      flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
-      flopenrc #(1) DECtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE);
-      flopenrc #(15) DECtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, 
-                                                            {Adr1E,         Adr2E,         Adr3E});
-      flopenrc #(17) DECtrlReg3(clk, reset, FlushE, ~StallE, 
-                           {FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD},
-                           {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE});
-
-
-      //EXECUTION STAGE
-      
-      // Hazard unit for FPU
-      fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM, .FStallD, 
+	logic 		   FRegWriteD, FRegWriteE, FRegWriteW;                 // FP register write enable
+	logic [2:0] 	   FrmD, FrmE, FrmM;                                   // FP rounding mode
+	logic 		   FmtD, FmtE, FmtM, FmtW;                             // FP precision 0-single 1-double
+	logic 		   FDivStartD, FDivStartE;                             // Start division
+	logic 		   FWriteIntD;                                         // Write to integer register
+	logic [1:0] 	   FForwardXE, FForwardYE, FForwardZE;                 // Input3 forwarding mux control signal
+	logic [2:0] 	   FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result
+	logic [3:0] 	   FOpCtrlD, FOpCtrlE, FOpCtrlM;                       // Select which opperation to do in each component
+	logic [1:0] 	   FResSelD, FResSelE, FResSelM;  
+	logic [1:0] 	   FIntResSelD, FIntResSelE, FIntResSelM;                                   
+	logic [4:0] 	   Adr1E, Adr2E, Adr3E;
+	
+	// regfile signals
+	logic [63:0] 	   FRD1D, FRD2D, FRD3D;                                // Read Data from FP register - decode stage
+	logic [63:0] 	   FRD1E, FRD2E, FRD3E;                                // Read Data from FP register - execute stage
+	logic [`XLEN-1:0]  FSrcXMAligned;
+	logic [63:0] 	   FSrcXE, FSrcXM;                                     // Input 1 to the various units (after forwarding)
+	logic [63:0] 	   FSrcYE;                                             // Input 2 to the various units (after forwarding)
+	logic [63:0] 	   FSrcZE;                                             // Input 3 to the various units (after forwarding)
+	
+	// unpacking signals
+	logic 		   XSgnE, YSgnE, ZSgnE;
+	logic [10:0] 	   XExpE, YExpE, ZExpE;
+	logic [51:0] 	   XFracE, YFracE, ZFracE;
+	logic 		   XAssumed1E, YAssumed1E, ZAssumed1E;
+	logic 		   XNaNE, YNaNE, ZNaNE;
+	logic 		   XSNaNE, YSNaNE, ZSNaNE;
+	logic 		   XDenormE, YDenormE, ZDenormE;
+	logic 		   XZeroE, YZeroE, ZZeroE;
+	logic [10:0] 	   BiasE;
+	logic 		   XInfE, YInfE, ZInfE;
+	logic 		   XExpMaxE;
+	logic 		   XNormE;
+	
+	logic 		   XSgnM, YSgnM, ZSgnM;
+	logic [10:0] 	   XExpM, YExpM, ZExpM;
+	logic [51:0] 	   XFracM, YFracM, ZFracM;
+	logic 		   XNaNM, YNaNM, ZNaNM;
+	logic 		   XSNaNM, YSNaNM, ZSNaNM;
+	logic 		   XZeroM, YZeroM, ZZeroM;
+	logic 		   XInfM, YInfM, ZInfM;
+	
+	// div/sqrt signals
+	logic [63:0] 	   FDivResultM, FDivResultW;
+	logic [4:0] 	   FDivSqrtFlgM, FDivSqrtFlgW;
+	logic 		   FDivSqrtDoneE;
+	logic [63:0] 	   DivInput1E, DivInput2E;
+	logic 		   HoldInputs;                                              // keep forwarded inputs arround durring division
+	
+	//fpu signals
+	logic [63:0] 	   FMAResM, FMAResW;
+	logic [4:0] 	   FMAFlgM, FMAFlgW;
+	
+	logic [63:0] 	   ReadResW;
+	
+	// add/cvt signals
+	logic [63:0] 	   FAddResM, FAddResW;
+	logic [4:0] 	   FAddFlgM, FAddFlgW;  
+	logic [63:0] 	   CvtResE, CvtResM;
+	logic [4:0] 	   CvtFlgE, CvtFlgM;  
+	
+	// cmp signals 
+	logic 		   CmpNVE, CmpNVM, CmpNVW;
+	logic [63:0] 	   CmpResE, CmpResM, CmpResW;
+	
+	// fsgn signals
+	logic [63:0] 	   SgnResE, SgnResM;
+	logic 		   SgnNVE, SgnNVM, SgnNVW;
+	logic [63:0] 	   FResM, FResW;
+	logic [4:0] 	   FFlgM, FFlgW;
+	
+	// instantiation of W stage regfile signals
+	logic [63:0] 	   AlignedSrcAM;
+	
+	// classify signals
+	logic [63:0] 	   ClassResE, ClassResM;
+	
+	// 64-bit FPU result   
+	logic [63:0] 	   FPUResultW;                                           
+	logic [4:0] 	   FPUFlagsW;
+	
+	//DECODE STAGE
+	
+	// top-level controller for FPU
+	fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), 
+                     .FRM_REGW, .IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD, 
+                     .FIntResSelD, .FmtD, .FrmD, .FWriteIntD);
+	
+	// regfile instantiation
+	fregfile fregfile (clk, reset, FRegWriteW,
+			   InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW,
+			   FPUResultW,
+			   FRD1D, FRD2D, FRD3D);	
+	
+	//*****************
+	// D/E pipe registers
+	//*****************
+	flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
+	flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
+	flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
+	flopenrc #(1) DECtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE);
+	flopenrc #(15) DECtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, 
+                                   {Adr1E,         Adr2E,         Adr3E});
+	flopenrc #(17) DECtrlReg3(clk, reset, FlushE, ~StallE, 
+				  {FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD},
+				  {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE});
+	
+	//EXECUTION STAGE
+	
+	// Hazard unit for FPU
+	fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM, .FStallD, 
                         .FForwardXE, .FForwardYE, .FForwardZE);
-
-      // forwarding muxs
-      mux3  #(64)  fxemux(FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE);
-      mux3  #(64)  fyemux(FRD2E, FPUResultW, FResM, FForwardYE, FSrcYE);
-      mux3  #(64)  fzemux(FRD3E, FPUResultW, FResM, FForwardZE, FSrcZE);
-
-      unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XFracE, .YFracE, .ZFracE, .XAssumed1E, .YAssumed1E, .ZAssumed1E, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
+	
+	// forwarding muxs
+	mux3  #(64)  fxemux(FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE);
+	mux3  #(64)  fyemux(FRD2E, FPUResultW, FResM, FForwardYE, FSrcYE);
+	mux3  #(64)  fzemux(FRD3E, FPUResultW, FResM, FForwardZE, FSrcZE);
+	
+	unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), 
+			    .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .XSgnE, .YSgnE, 
+			    .ZSgnE, .XExpE, .YExpE, .ZExpE, .XFracE, .YFracE, .ZFracE, 
+			    .XAssumed1E, .YAssumed1E, .ZAssumed1E, .XNaNE, .YNaNE, .ZNaNE, 
+			    .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, 
+			    .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
       // first of two-stage instance of floating-point fused multiply-add unit
-      fma fma (.clk, .reset, .FlushM, .StallM, 
-               .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XFracE, .YFracE, .ZFracE, .XAssumed1E, .YAssumed1E, .ZAssumed1E, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE, 
-               .XSgnM, .YSgnM, .ZSgnM, .XExpM, .YExpM, .ZExpM, .XFracM, .YFracM, .ZFracM, .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM,
-              //  .FSrcXE, .FSrcYE, .FSrcZE, .FSrcXM, .FSrcYM, .FSrcZM, 
-               .FOpCtrlE(FOpCtrlE[2:0]), .FOpCtrlM(FOpCtrlM[2:0]), 
-               .FmtE, .FmtM, .FrmM, .FMAFlgM, .FMAResM);
-      
-      // first and only instance of floating-point divider
-      logic fpdivClk;
-      
-      clockgater fpdivclkg(.E(FDivStartE),
-            .SE(1'b0),
-            .CLK(clk),
-            .ECLK(fpdivClk));
-      
-      // capture the inputs for div/sqrt	 
-      flopenrc #(64) reg_input1 (.d(FSrcXE), .q(DivInput1E),
-                  .en(1'b1), .clear(FDivSqrtDoneE),
-                  .reset(reset),  .clk(HoldInputs));
-      flopenrc #(64) reg_input2 (.d(FSrcYE), .q(DivInput2E),
-                  .en(1'b1), .clear(FDivSqrtDoneE),
-                  .reset(reset),  .clk(HoldInputs));
-      //*** add round to nearest ties to max magnitude
-      fpdiv fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .done(FDivSqrtDoneE), .rm(FrmE[1:0]),	.op_type(FOpCtrlE[0]), .P(~FmtE), .FDivBusyE, .HoldInputs, 
-                      .OvEn(1'b1), .UnEn(1'b1),	.start(FDivStartE), .reset, .clk(~clk), .AS_Result(FDivResultM), .Flags(FDivSqrtFlgM));
+	fma fma (.clk, .reset, .FlushM, .StallM, 
+		 .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XFracE, .YFracE, .
+		 ZFracE, .XAssumed1E, .YAssumed1E, .ZAssumed1E, .XDenormE, .YDenormE, 
+		 .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE, 
+		 .XSgnM, .YSgnM, .ZSgnM, .XExpM, .YExpM, .ZExpM, .XFracM, 
+		 .YFracM, .ZFracM, .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM,
+		 //  .FSrcXE, .FSrcYE, .FSrcZE, .FSrcXM, .FSrcYM, .FSrcZM, 
+		 .FOpCtrlE(FOpCtrlE[2:0]), .FOpCtrlM(FOpCtrlM[2:0]), 
+		 .FmtE, .FmtM, .FrmM, .FMAFlgM, .FMAResM);
+	
+	// first and only instance of floating-point divider
+	logic 		   fpdivClk;
+	
+	clockgater fpdivclkg(.E(FDivStartE),
+			     .SE(1'b0),
+			     .CLK(clk),
+			     .ECLK(fpdivClk));
+	
+	// capture the inputs for div/sqrt	 
+	flopenrc #(64) reg_input1 (.d(FSrcXE), .q(DivInput1E),
+				   .en(1'b1), .clear(FDivSqrtDoneE),
+				   .reset(reset),  .clk(HoldInputs));
+	flopenrc #(64) reg_input2 (.d(FSrcYE), .q(DivInput2E),
+				   .en(1'b1), .clear(FDivSqrtDoneE),
+				   .reset(reset),  .clk(HoldInputs));
+	//*** add round to nearest ties to max magnitude
+	fpdiv fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .done(FDivSqrtDoneE), .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]), 
+			.P(~FmtE), .FDivBusyE, .HoldInputs, 
+			.OvEn(1'b1), .UnEn(1'b1),
+			.start(FDivStartE), .reset, .clk(~clk), .AS_Result(FDivResultM), .Flags(FDivSqrtFlgM));
+	
         // .DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .DivInput1E, .DivInput2E, 
         //                 .FrmE, .DivOvEn(1'b1), .DivUnEn(1'b1), .FDivStartE, .FDivResultM, .FDivSqrtFlgM, 
         //                 .FDivSqrtDoneE, .FDivBusyE, .HoldInputs, .reset);
-      // assign FDivBusyE = 0;
-      // first of two-stage instance of floating-point add/cvt unit
-      faddcvt faddcvt (.clk, .reset, .FlushM, .StallM, .FrmM, .FOpCtrlM, .FmtE, .FmtM,
-                        .FSrcXE, .FSrcYE, .FOpCtrlE, .FAddResM, .FAddFlgM);
-      
-      // first and only instance of floating-point comparator
-      fcmp fcmp (.op1({XSgnE,XExpE,XFracE}), .op2({YSgnE,YExpE,YFracE}), .FSrcXE, .FSrcYE, .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .Invalid(CmpNVE), .CmpResE, .XNaNE, .YNaNE, .XZeroE, .YZeroE);
-      
-      // first and only instance of floating-point sign converter
-      fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .XExpE, .XFracE, .FmtE, .SgnResE, .SgnNVE, .XExpMaxE);
-      
-      // first and only instance of floating-point classify unit
-      fclassify fclassify (.XSgnE, .XFracE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE, .XSNaNE, .ClassResE);
-
-
-      fcvt fcvt (.XSgnE, .XExpE, .XFracE, .XAssumed1E, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .SrcAE, .FOpCtrlE, .FmtE, .FrmE, .CvtResE, .CvtFlgE);
-
-      // output for store instructions
-      assign FWriteDataE = FSrcYE[`XLEN-1:0];
-
-      //*****************
-      // E/M pipe registers
-      //*****************
-      flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
-      // flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, FSrcYE, FSrcYM);
-      // flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, FSrcZE, FSrcZM);
-      flopenrc #(64) EMFpReg4(clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XFracE}, {XSgnM,XExpM,XFracM});
-      flopenrc #(64) EMFpReg5(clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YFracE}, {YSgnM,YExpM,YFracM});
-      flopenrc #(64) EMFpReg6(clk, reset, FlushM, ~StallM, {ZSgnE,ZExpE,ZFracE}, {ZSgnM,ZExpM,ZFracM});
-      flopenrc #(12) EMFpReg7(clk, reset, FlushM, ~StallM, 
-                          {XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
-                          {XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});
-
-      
-     
-      flopenrc #(1)  EMRegCmp1(clk, reset, FlushM, ~StallM, CmpNVE, CmpNVM); 
-      flopenrc #(64) EMRegCmp2(clk, reset, FlushM, ~StallM, CmpResE, CmpResM); 
-      
-      flopenrc #(64) EMRegSgn1(clk, reset, FlushM, ~StallM, SgnResE, SgnResM);
-      flopenrc #(1) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM);
-      
-      flopenrc #(64) EMRegCvt1(clk, reset, FlushM, ~StallM, CvtResE, CvtResM);
-      flopenrc #(5) EMRegCvt2(clk, reset, FlushM, ~StallM, CvtFlgE, CvtFlgM);
-      
-      flopenrc #(17) EMCtrlReg(clk, reset, FlushM, ~StallM,
-                           {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE},
-                           {FRegWriteM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM});
-
-      flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM);
-
-      //BEGIN MEMORY STAGE
-      mux4  #(64)  FResMux(AlignedSrcAM, SgnResM, CmpResM, CvtResM, FResSelM, FResM);
-      mux4  #(5)  FFlgMux(5'b0, {4'b0, SgnNVM}, {4'b0, CmpNVM}, CvtFlgM, FResSelM, FFlgM);
-
-      // mux2  #(`XLEN)  FSrcXAlignedMux({{`XLEN-32{1'b0}}, FSrcXM[63:32]}, FSrcXM[63:64-`XLEN], FmtM, FSrcXMAligned);
-      mux4  #(`XLEN)  IntResMux(CmpResM[`XLEN-1:0], FSrcXM[`XLEN-1:0], ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM);
-      
-      // Align SrcA to MSB when single precicion
-      mux2  #(64)  SrcAMux({{32{1'b1}}, SrcAM[31:0]}, {{64-`XLEN{1'b1}}, SrcAM}, FmtM, AlignedSrcAM);
-      mux5  #(5)  FPUFlgMux(5'b0, FMAFlgM, FAddFlgM, FDivSqrtFlgM, FFlgM, FResultSelW, SetFflagsM);
-
-      //*****************
-      // M/W pipe registers
-      //*****************
-      flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); 
-      
-      flopenrc #(64) MWRegDiv1(clk, reset, FlushW, ~StallW, FDivResultM, FDivResultW); 
-      
-      flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResM, FAddResW); 
-      
-      flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, CmpResM, CmpResW);
-
-      flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW);
-      
-      flopenrc #(6) MWCtrlReg(clk, reset, FlushW, ~StallW,
-                           {FRegWriteM, FResultSelM, FmtM, FWriteIntM},
-                           {FRegWriteW, FResultSelW, FmtW, FWriteIntW});
-      
-   //#########################################
-   // BEGIN WRITEBACK STAGE
-   //#########################################
-
-      mux2  #(64)  ReadResMux({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW);
-      mux5  #(64)  FPUResultMux(ReadResW, FMAResW, FAddResW, FDivResultW, FResW, FResultSelW, FPUResultW);
-      
-
-   end else begin // no F_SUPPORTED; tie outputs low
-     assign FStallD = 0;
-     assign FWriteIntE = 0; 
-     assign FWriteIntM = 0;
-     assign FWriteIntW = 0;
-     assign FWriteDataE = 0;
-     assign FIntResM = 0;
-     assign FDivBusyE = 0;
-     assign IllegalFPUInstrD = 1;
-     assign SetFflagsM = 0;
-   end
+	// assign FDivBusyE = 0;
+	
+	// first of two-stage instance of floating-point add/cvt unit
+	faddcvt faddcvt (.clk, .reset, .FlushM, .StallM, .FrmM, .FOpCtrlM, .FmtE, .FmtM,
+                         .FSrcXE, .FSrcYE, .FOpCtrlE, .FAddResM, .FAddFlgM);
+	
+	// first and only instance of floating-point comparator
+	fcmp fcmp (.op1({XSgnE,XExpE,XFracE}), .op2({YSgnE,YExpE,YFracE}), .FSrcXE, 
+		   .FSrcYE, .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, 
+		   .Invalid(CmpNVE), .CmpResE, .XNaNE, .YNaNE, .XZeroE, .YZeroE);
+	
+	// first and only instance of floating-point sign converter
+	fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .XExpE, .XFracE, .FmtE, .SgnResE, .SgnNVE, .XExpMaxE);
+	
+	// first and only instance of floating-point classify unit
+	fclassify fclassify (.XSgnE, .XFracE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE, .XSNaNE, .ClassResE);
+	
+	fcvt fcvt (.XSgnE, .XExpE, .XFracE, .XAssumed1E, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .SrcAE, .FOpCtrlE, .FmtE, .FrmE, .CvtResE, .CvtFlgE);
+	
+	// output for store instructions
+	assign FWriteDataE = FSrcYE[`XLEN-1:0];
+	
+	//*****************
+	// E/M pipe registers
+	//*****************
+	flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
+	// flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, FSrcYE, FSrcYM);
+	// flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, FSrcZE, FSrcZM);
+	flopenrc #(64) EMFpReg4(clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XFracE}, {XSgnM,XExpM,XFracM});
+	flopenrc #(64) EMFpReg5(clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YFracE}, {YSgnM,YExpM,YFracM});
+	flopenrc #(64) EMFpReg6(clk, reset, FlushM, ~StallM, {ZSgnE,ZExpE,ZFracE}, {ZSgnM,ZExpM,ZFracM});
+	flopenrc #(12) EMFpReg7(clk, reset, FlushM, ~StallM, 
+				{XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
+				{XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});
+	
+	flopenrc #(1)  EMRegCmp1(clk, reset, FlushM, ~StallM, CmpNVE, CmpNVM); 
+	flopenrc #(64) EMRegCmp2(clk, reset, FlushM, ~StallM, CmpResE, CmpResM); 
+	
+	flopenrc #(64) EMRegSgn1(clk, reset, FlushM, ~StallM, SgnResE, SgnResM);
+	flopenrc #(1) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM);
+	
+	flopenrc #(64) EMRegCvt1(clk, reset, FlushM, ~StallM, CvtResE, CvtResM);
+	flopenrc #(5) EMRegCvt2(clk, reset, FlushM, ~StallM, CvtFlgE, CvtFlgM);
+	
+	flopenrc #(17) EMCtrlReg(clk, reset, FlushM, ~StallM,
+				 {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE},
+				 {FRegWriteM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM});
+	
+	flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM);
+	
+	//BEGIN MEMORY STAGE
+	mux4  #(64)  FResMux(AlignedSrcAM, SgnResM, CmpResM, CvtResM, FResSelM, FResM);
+	mux4  #(5)  FFlgMux(5'b0, {4'b0, SgnNVM}, {4'b0, CmpNVM}, CvtFlgM, FResSelM, FFlgM);
+	
+	// mux2  #(`XLEN)  FSrcXAlignedMux({{`XLEN-32{1'b0}}, FSrcXM[63:32]}, FSrcXM[63:64-`XLEN], FmtM, FSrcXMAligned);
+	mux4  #(`XLEN)  IntResMux(CmpResM[`XLEN-1:0], FSrcXM[`XLEN-1:0], ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM);
+	
+	// Align SrcA to MSB when single precicion
+	mux2  #(64)  SrcAMux({{32{1'b1}}, SrcAM[31:0]}, {{64-`XLEN{1'b1}}, SrcAM}, FmtM, AlignedSrcAM);
+	mux5  #(5)  FPUFlgMux(5'b0, FMAFlgM, FAddFlgM, FDivSqrtFlgM, FFlgM, FResultSelW, SetFflagsM);
+	
+	//*****************
+	// M/W pipe registers
+	//*****************
+	flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); 
+	flopenrc #(64) MWRegDiv1(clk, reset, FlushW, ~StallW, FDivResultM, FDivResultW); 
+	flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResM, FAddResW); 
+	flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, CmpResM, CmpResW);
+	flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW);
+	flopenrc #(6) MWCtrlReg(clk, reset, FlushW, ~StallW,
+				{FRegWriteM, FResultSelM, FmtM, FWriteIntM},
+				{FRegWriteW, FResultSelW, FmtW, FWriteIntW});
+	
+	//#########################################
+	// BEGIN WRITEBACK STAGE
+	//#########################################
+	mux2  #(64)  ReadResMux({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW);
+	mux5  #(64)  FPUResultMux(ReadResW, FMAResW, FAddResW, FDivResultW, FResW, FResultSelW, FPUResultW);
+	
+	
+     end else begin // no F_SUPPORTED; tie outputs low
+	assign FStallD = 0;
+	assign FWriteIntE = 0; 
+	assign FWriteIntM = 0;
+	assign FWriteIntW = 0;
+	assign FWriteDataE = 0;
+	assign FIntResM = 0;
+	assign FDivBusyE = 0;
+	assign IllegalFPUInstrD = 1;
+	assign SetFflagsM = 0;
+     end
   endgenerate 
-  
+   
 endmodule // fpu
diff --git a/wally-pipelined/src/fpu/fsm.sv b/wally-pipelined/src/fpu/fsm.sv
index 0e15f5534..8991fb71d 100755
--- a/wally-pipelined/src/fpu/fsm.sv
+++ b/wally-pipelined/src/fpu/fsm.sv
@@ -6,7 +6,7 @@ module fsm (done, load_rega, load_regb, load_regc,
    input 	clk;
    input 	reset;
    input 	start;
-//    input 	error;
+   //    input 	error;
    input  	op_type;
    //***can use divbusy insted of holdinputs
    output       done;      
@@ -113,8 +113,8 @@ module fsm (done, load_rega, load_regb, load_regc,
 	  S1:
 	    begin
 	       done = 1'b0;
-		   divBusy = 1'b1;
-		   holdInputs = 1'b1;
+	       divBusy = 1'b1;
+	       holdInputs = 1'b1;
 	       load_rega = 1'b1;
 	       load_regb = 1'b0;
 	       load_regc = 1'b1;
@@ -129,8 +129,8 @@ module fsm (done, load_rega, load_regb, load_regc,
 	  S2: // iteration 1
 	    begin
 	       done = 1'b0;
-		   divBusy = 1'b1;
-		   holdInputs = 1'b1;
+	       divBusy = 1'b1;
+	       holdInputs = 1'b1;
 	       load_rega = 1'b0;
 	       load_regb = 1'b1;
 	       load_regc = 1'b0;
@@ -145,8 +145,8 @@ module fsm (done, load_rega, load_regb, load_regc,
 	  S3:
 	    begin
 	       done = 1'b0;
-		   divBusy = 1'b1;
-		   holdInputs = 1'b1;
+	       divBusy = 1'b1;
+	       holdInputs = 1'b1;
 	       load_rega = 1'b1;
 	       load_regb = 1'b0;
 	       load_regc = 1'b1;
@@ -161,8 +161,8 @@ module fsm (done, load_rega, load_regb, load_regc,
 	  S4: // iteration 2
 	    begin
 	       done = 1'b0;
-		   divBusy = 1'b1;
-		   holdInputs = 1'b1;
+	       divBusy = 1'b1;
+	       holdInputs = 1'b1;
 	       load_rega = 1'b0;
 	       load_regb = 1'b1;
 	       load_regc = 1'b0;
@@ -177,8 +177,8 @@ module fsm (done, load_rega, load_regb, load_regc,
 	  S5:
 	    begin
 	       done = 1'b0;
-		   divBusy = 1'b1;
-		   holdInputs = 1'b1;
+	       divBusy = 1'b1;
+	       holdInputs = 1'b1;
 	       load_rega = 1'b1;
 	       load_regb = 1'b0;
 	       load_regc = 1'b1;
@@ -193,8 +193,8 @@ module fsm (done, load_rega, load_regb, load_regc,
 	  S6: // iteration 3
 	    begin
 	       done = 1'b0;
-		   divBusy = 1'b1;
-		   holdInputs = 1'b1;
+	       divBusy = 1'b1;
+	       holdInputs = 1'b1;
 	       load_rega = 1'b0;
 	       load_regb = 1'b1;
 	       load_regc = 1'b0;
@@ -209,8 +209,8 @@ module fsm (done, load_rega, load_regb, load_regc,
 	  S7:
 	    begin
 	       done = 1'b0;
-		   divBusy = 1'b1;
-		   holdInputs = 1'b1;
+	       divBusy = 1'b1;
+	       holdInputs = 1'b1;
 	       load_rega = 1'b1;
 	       load_regb = 1'b0;
 	       load_regc = 1'b1;
@@ -225,8 +225,8 @@ module fsm (done, load_rega, load_regb, load_regc,
 	  S8: // q,qm,qp
 	    begin
 	       done = 1'b0;
-		   divBusy = 1'b1;
-		   holdInputs = 1'b1;
+	       divBusy = 1'b1;
+	       holdInputs = 1'b1;
 	       load_rega = 1'b0;
 	       load_regb = 1'b0;
 	       load_regc = 1'b0;
@@ -241,8 +241,8 @@ module fsm (done, load_rega, load_regb, load_regc,
 	  S9:  // rem
 	    begin
 	       done = 1'b0;
-		   divBusy = 1'b1;
-		   holdInputs = 1'b1;
+	       divBusy = 1'b1;
+	       holdInputs = 1'b1;
 	       load_rega = 1'b0;
 	       load_regb = 1'b0;
 	       load_regc = 1'b0;
@@ -257,8 +257,8 @@ module fsm (done, load_rega, load_regb, load_regc,
 	  S10:  // done
 	    begin
 	       done = 1'b1;
-		   divBusy = 1'b0;
-		   holdInputs = 1'b0;
+	       divBusy = 1'b0;
+	       holdInputs = 1'b0;
 	       load_rega = 1'b0;
 	       load_regb = 1'b0;
 	       load_regc = 1'b0;
@@ -273,8 +273,8 @@ module fsm (done, load_rega, load_regb, load_regc,
 	  S13:  // start of sqrt path
 	    begin
 	       done = 1'b0;
-		   divBusy = 1'b1;
-		   holdInputs = 1'b1;
+	       divBusy = 1'b1;
+	       holdInputs = 1'b1;
 	       load_rega = 1'b0;
 	       load_regb = 1'b0;
 	       load_regc = 1'b0;
@@ -289,8 +289,8 @@ module fsm (done, load_rega, load_regb, load_regc,
 	  S14:  
 	    begin
 	       done = 1'b0;
-		   divBusy = 1'b1;
-		   holdInputs = 1'b1;
+	       divBusy = 1'b1;
+	       holdInputs = 1'b1;
 	       load_rega = 1'b1;
 	       load_regb = 1'b0;
 	       load_regc = 1'b1;
@@ -305,8 +305,8 @@ module fsm (done, load_rega, load_regb, load_regc,
 	  S15:  // iteration 1
 	    begin
 	       done = 1'b0;
-		   divBusy = 1'b1;
-		   holdInputs = 1'b1;
+	       divBusy = 1'b1;
+	       holdInputs = 1'b1;
 	       load_rega = 1'b0;
 	       load_regb = 1'b1;
 	       load_regc = 1'b0;
@@ -321,8 +321,8 @@ module fsm (done, load_rega, load_regb, load_regc,
 	  S16:  
 	    begin
 	       done = 1'b0;
-		   divBusy = 1'b1;
-		   holdInputs = 1'b1;
+	       divBusy = 1'b1;
+	       holdInputs = 1'b1;
 	       load_rega = 1'b0;
 	       load_regb = 1'b0;
 	       load_regc = 1'b0;
@@ -337,8 +337,8 @@ module fsm (done, load_rega, load_regb, load_regc,
 	  S17:  
 	    begin
 	       done = 1'b0;
-		   divBusy = 1'b1;
-		   holdInputs = 1'b1;
+	       divBusy = 1'b1;
+	       holdInputs = 1'b1;
 	       load_rega = 1'b1;
 	       load_regb = 1'b0;
 	       load_regc = 1'b1;
@@ -353,8 +353,8 @@ module fsm (done, load_rega, load_regb, load_regc,
 	  S18:  // iteration 2
 	    begin
 	       done = 1'b0;
-		   divBusy = 1'b1;
-		   holdInputs = 1'b1;
+	       divBusy = 1'b1;
+	       holdInputs = 1'b1;
 	       load_rega = 1'b0;
 	       load_regb = 1'b1;
 	       load_regc = 1'b0;
@@ -369,8 +369,8 @@ module fsm (done, load_rega, load_regb, load_regc,
 	  S19:  
 	    begin
 	       done = 1'b0;
-		   divBusy = 1'b1;
-		   holdInputs = 1'b1;
+	       divBusy = 1'b1;
+	       holdInputs = 1'b1;
 	       load_rega = 1'b0;
 	       load_regb = 1'b0;
 	       load_regc = 1'b0;
@@ -385,8 +385,8 @@ module fsm (done, load_rega, load_regb, load_regc,
 	  S20:  
 	    begin
 	       done = 1'b0;
-		   divBusy = 1'b1;
-		   holdInputs = 1'b1;
+	       divBusy = 1'b1;
+	       holdInputs = 1'b1;
 	       load_rega = 1'b1;
 	       load_regb = 1'b0;
 	       load_regc = 1'b1;
@@ -401,8 +401,8 @@ module fsm (done, load_rega, load_regb, load_regc,
 	  S21:  // iteration 3
 	    begin
 	       done = 1'b0;
-		   divBusy = 1'b1;
-		   holdInputs = 1'b1;
+	       divBusy = 1'b1;
+	       holdInputs = 1'b1;
 	       load_rega = 1'b0;
 	       load_regb = 1'b1;
 	       load_regc = 1'b0;
@@ -417,8 +417,8 @@ module fsm (done, load_rega, load_regb, load_regc,
 	  S22:  
 	    begin
 	       done = 1'b0;
-		   divBusy = 1'b1;
-		   holdInputs = 1'b1;
+	       divBusy = 1'b1;
+	       holdInputs = 1'b1;
 	       load_rega = 1'b0;
 	       load_regb = 1'b0;
 	       load_regc = 1'b0;
@@ -433,8 +433,8 @@ module fsm (done, load_rega, load_regb, load_regc,
 	  S23:  
 	    begin
 	       done = 1'b0;
-		   divBusy = 1'b1;
-		   holdInputs = 1'b1;
+	       divBusy = 1'b1;
+	       holdInputs = 1'b1;
 	       load_rega = 1'b1;
 	       load_regb = 1'b0;
 	       load_regc = 1'b1;
@@ -449,8 +449,8 @@ module fsm (done, load_rega, load_regb, load_regc,
 	  S24: // q,qm,qp
 	    begin
 	       done = 1'b0;
-		   divBusy = 1'b1;
-		   holdInputs = 1'b1;
+	       divBusy = 1'b1;
+	       holdInputs = 1'b1;
 	       load_rega = 1'b0;
 	       load_regb = 1'b0;
 	       load_regc = 1'b0;
@@ -465,8 +465,8 @@ module fsm (done, load_rega, load_regb, load_regc,
 	  S25:  // rem
 	    begin
 	       done = 1'b0;
-		   divBusy = 1'b1;
-		   holdInputs = 1'b1;
+	       divBusy = 1'b1;
+	       holdInputs = 1'b1;
 	       load_rega = 1'b0;
 	       load_regb = 1'b0;
 	       load_regc = 1'b0;
@@ -476,13 +476,13 @@ module fsm (done, load_rega, load_regb, load_regc,
 	       sel_muxa = 3'b011;
 	       sel_muxb = 3'b110;
 	       sel_muxr = 1'b1;
-	       NEXT_STATE = S26;
-	    end 	  
+	       NEXT_STATE = S27;
+	    end 
 	  S26:  // done
 	    begin
 	       done = 1'b1;
-		   divBusy = 1'b0;
-		   holdInputs = 1'b0;
+	       divBusy = 1'b0;
+	       holdInputs = 1'b0;
 	       load_rega = 1'b0;
 	       load_regb = 1'b0;
 	       load_regc = 1'b0;
@@ -497,8 +497,8 @@ module fsm (done, load_rega, load_regb, load_regc,
 	  default: 
 	    begin
 	       done = 1'b0;
-		   divBusy = 1'b0;
-		   holdInputs = 1'b0;
+	       divBusy = 1'b0;
+	       holdInputs = 1'b0;
 	       load_rega = 1'b0;
 	       load_regb = 1'b0;
 	       load_regc = 1'b0;