diff --git a/wally-pipelined/src/fpu/fpdiv.sv b/wally-pipelined/src/fpu/fpdiv.sv
new file mode 100755
index 000000000..4051f6de9
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpdiv.sv
@@ -0,0 +1,151 @@
+//
+// File name : fpdiv
+// Title     : Floating-Point Divider/Square-Root
+// project   : FPU
+// Library   : fpdiv
+// Author(s) : James E. Stine, Jr.
+// Purpose   : definition of main unit to floating-point div/sqrt
+// notes :   
+//
+// Copyright Oklahoma State University
+//
+// Basic Operations
+//
+// Step 1: Load operands, set flags, and convert SP to DP
+// Step 2: Check for special inputs ( +/- Infinity,  NaN)
+// Step 3: Exponent Logic
+// Step 4: Divide/Sqrt using Goldschmidt
+// Step 5: Normalize the result.//
+//   Shift left until normalized.  Normalized when the value to the 
+//   left of the binrary point is 1.
+// Step 6: Round the result.// 
+// Step 7: Put quotient/remainder onto output.
+//
+
+`timescale 1ps/1ps
+module fpdiv (done, AS_Result, Flags, Denorm, op1, op2, rm, op_type, P, OvEn, UnEn,
+	      start, reset, clk);
+
+   input [63:0] op1;		// 1st input operand (A)
+   input [63:0] op2;		// 2nd input operand (B)
+   input [1:0] 	rm;		// Rounding mode - specify values 
+   input 	op_type;	// Function opcode
+   input 	P;   		// Result Precision (0 for double, 1 for single)
+   input 	OvEn;		// Overflow trap enabled
+   input 	UnEn;   	// Underflow trap enabled
+   input 	start;
+   input 	reset;
+   input 	clk;   
+
+   output [63:0] AS_Result;	// Result of operation
+   output [4:0]  Flags;   	// IEEE exception flags 
+   output 	 Denorm;   	// Denorm on input or output
+   output 	 done;
+
+   supply1 	  vdd;
+   supply0 	  vss;   
+
+   wire [63:0] 	 Float1; 
+   wire [63:0] 	 Float2;
+   wire [63:0] 	 IntValue;
+   
+   wire [12:0] 	 exp1, exp2, expF;
+   wire [12:0] 	 exp_diff, bias;
+   wire [13:0] 	 exp_sqrt;
+   wire [12:0] 	 exp_s;
+   wire [12:0] 	 exp_c;
+   
+   wire [10:0] 	 exponent, exp_pre;
+   wire [63:0] 	 Result;   
+   wire [52:0] 	 mantissaA;
+   wire [52:0] 	 mantissaB; 
+   wire [63:0] 	 sum, sum_tc, sum_corr, sum_norm;
+   
+   wire [5:0] 	 align_shift;
+   wire [5:0] 	 norm_shift;
+   wire [2:0] 	 sel_inv;
+   wire		 op1_Norm, op2_Norm;
+   wire		 opA_Norm, opB_Norm;
+   wire		 Invalid;
+   wire 	 DenormIn, DenormIO;
+   wire [4:0] 	 FlagsIn;   	
+   wire 	 exp_gt63;
+   wire 	 Sticky_out;
+   wire 	 signResult, sign_corr;
+   wire          corr_sign;
+   wire 	 zeroB;         
+   wire 	 convert;
+   wire          swap;
+   wire          sub;
+   
+   wire [63:0] 	 q1, qm1, qp1, q0, qm0, qp0;
+   wire [63:0] 	 rega_out, regb_out, regc_out, regd_out;
+   wire [127:0]  regr_out;
+   wire [2:0] 	 sel_muxa, sel_muxb;
+   wire 	 sel_muxr;   
+   wire 	 load_rega, load_regb, load_regc, load_regd, load_regr;
+
+   wire 	 donev, sel_muxrv, sel_muxsv;
+   wire [1:0] 	 sel_muxav, sel_muxbv;   
+   wire 	 load_regav, load_regbv, load_regcv;
+   wire 	 load_regrv, load_regsv;
+   
+   // Convert the input operands to their appropriate forms based on 
+   // the orignal operands, the op_type , and their precision P. 
+   // Single precision inputs are converted to double precision 
+   // and the sign of the first operand is set appropratiately based on
+   // if the operation is absolute value or negation.   
+   convert_inputs_div conv1 (Float1, Float2, op1, op2, op_type, P);
+
+   // Test for exceptions and return the "Invalid Operation" and
+   // "Denormalized" Input Flags. The "sel_inv" is used in
+   // the third pipeline stage to select the result. Also, op1_Norm
+   // and op2_Norm are one if op1 and op2 are not zero or denormalized.
+   // sub is one if the effective operation is subtaction.   
+   exception_div exc1 (sel_inv, Invalid, DenormIn, op1_Norm, op2_Norm, 
+		       Float1, Float2, op_type);
+
+   // Determine Sign/Mantissa
+   assign signResult = ((Float1[63]^Float2[63])&~op_type) | Float1[63]&op_type;
+   assign mantissaA = {vdd, Float1[51:0]};
+   assign mantissaB = {vdd, Float2[51:0]};
+   // Perform Exponent Subtraction - expA - expB + Bias   
+   assign exp1 = {2'b0, Float1[62:52]};
+   assign exp2 = {2'b0, Float2[62:52]};
+   // bias : DP = 2^{11-1}-1 = 1023
+   assign bias = {3'h0, 10'h3FF};
+   // Divide exponent
+   csa #(13) csa1 (exp1, ~exp2, bias, exp_s, exp_c);
+   adder #(14) explogic1 ({vss, exp_s}, {vss, exp_c}, 1'b1, {open, exp_diff}, exp_cout1);
+   
+   // Sqrt exponent (check if exponent is odd)
+   assign exp_odd = Float1[52] ? vss : vdd;
+   adder #(14) explogic2 ({vss, exp1}, {4'h0, 10'h3ff}, exp_odd, exp_sqrt, exp_cout2);
+   // Choose correct exponent
+   assign expF = op_type ? exp_sqrt[13:1] : exp_diff;   
+
+   // Main Goldschmidt/Division Routine   
+   divconv goldy (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, regd_out,
+		  regr_out, mantissaB, mantissaA, sel_muxa, sel_muxb, sel_muxr, 
+		  reset, clk,  load_rega, load_regb, load_regc, load_regd,
+		  load_regr, load_regs, P, op_type, exp_odd);
+
+   // FSM : control divider   
+   fsm_div control (done, load_rega, load_regb, load_regc, load_regd, 
+		    load_regr, load_regs, sel_muxa, sel_muxb, sel_muxr, 
+		    clk, reset, start, error, op_type);
+   
+   // Round the mantissa to a 52-bit value, with the leading one
+   // removed. The rounding units also handles special cases and 
+   // set the exception flags.   
+   rounder_div round1 (Result, DenormIO, FlagsIn, 
+		   rm, P, OvEn, UnEn, expF, 
+   		   sel_inv, Invalid, DenormIn, signResult, 
+		   q1, qm1, qp1, q0, qm0, qp0, regr_out);
+
+   // Store the final result and the exception flags in registers.
+   flopenr #(64) rega (clk, reset, done, Result, AS_Result);
+   flopenr #(1) regb (clk, reset, done, DenormIO, Denorm);   
+   flopenr #(5) regc (clk, reset, done, FlagsIn, Flags);   
+   
+endmodule // fpadd
diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv
index 1f878cb1f..77d685918 100755
--- a/wally-pipelined/src/fpu/fpu.sv
+++ b/wally-pipelined/src/fpu/fpu.sv
@@ -108,17 +108,9 @@ module fpu (
       logic [63:0] 	FPUResultW;                                           
       logic [4:0] 	FPUFlagsW;
       
-      
-
-
-
-
-
-
 
       //DECODE STAGE
       
-      
       // top-level controller for FPU
       fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), 
                   .FRM_REGW, .IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD, 
@@ -129,14 +121,6 @@ module fpu (
             InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW,
             FPUResultW,
             FRD1D, FRD2D, FRD3D);	
-      
-
-
-
-
-
-
-
 
       //*****************
       // D/E pipe registers
@@ -152,18 +136,6 @@ module fpu (
                            {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE,          FOpCtrlE, FWriteIntE});
 
 
-
-
-
-
-
-
-
-
-
-
-
-
       //EXECUTION STAGE
       
       // Hazard unit for FPU
@@ -198,12 +170,10 @@ module fpu (
                   .en(~HoldInputs), .clear(FDivSqrtDoneE),
                   .reset(reset),  .clk(clk));
 
-      fdivsqrt fdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .DivInput1E, .DivInput2E, 
+      fpdiv fdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .DivInput1E, .DivInput2E, 
                         .FrmE, .DivOvEn(1'b1), .DivUnEn(1'b1), .FDivStartE, .FDivResultM, .FDivSqrtFlgM, 
                         .FDivSqrtDoneE, .FDivBusyE, .HoldInputs, .reset);
       
-
-
       // first of two-stage instance of floating-point add/cvt unit
       faddcvt faddcvt (.clk, .reset, .FlushM, .StallM, .FrmM, .FOpCtrlM, .FmtE, .FmtM,
                         .SrcXE, .SrcYE, .FOpCtrlE, .FAddResM, .FAddFlgM);
@@ -224,15 +194,6 @@ module fpu (
       // mux2  #(`XLEN)  FWriteDataMux({{`XLEN-32{1'b0}}, SrcYE[63:32]}, SrcYE[63:64-`XLEN], FmtE, FWriteDataE);
       assign FWriteDataE = SrcYE[`XLEN-1:0];
 
-
-
-
-
-
-
-
-
-
       //*****************
       // E/M pipe registers
       //*****************
@@ -255,36 +216,18 @@ module fpu (
                            {FRegWriteM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, RdM, FOpCtrlM, FWriteIntM});
 
       flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM);
-      
-
-
-
-
-
-
 
       //BEGIN MEMORY STAGE
-      
       mux4  #(64)  FResMux(AlignedSrcAM, SgnResM, CmpResM, CvtResM, FResSelM, FResM);
       mux4  #(5)  FFlgMux(5'b0, {4'b0, SgnNVM}, {4'b0, CmpNVM}, CvtFlgM, FResSelM, FFlgM);
 
       // mux2  #(`XLEN)  SrcXAlignedMux({{`XLEN-32{1'b0}}, SrcXM[63:32]}, SrcXM[63:64-`XLEN], FmtM, SrcXMAligned);
       mux4  #(`XLEN)  IntResMux(CmpResM[`XLEN-1:0], SrcXM[`XLEN-1:0], ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM);
-
       
       // Align SrcA to MSB when single precicion
       mux2  #(64)  SrcAMux({{32{1'b1}}, SrcAM[31:0]}, {{64-`XLEN{1'b1}}, SrcAM}, FmtM, AlignedSrcAM);
-         
-         
       mux5  #(5)  FPUFlgMux(5'b0, FMAFlgM, FAddFlgM, FDivSqrtFlgM, FFlgM, FResultSelW, SetFflagsM);
 
-
-
-
-
-
-
-            
       //*****************
       // M/W pipe registers
       //*****************
@@ -302,16 +245,10 @@ module fpu (
                            {FRegWriteM, FResultSelM, RdM, FmtM, FWriteIntM},
                            {FRegWriteW, FResultSelW, RdW, FmtW, FWriteIntW});
       
-      
-
-
-
-
    //#########################################
    // BEGIN WRITEBACK STAGE
    //#########################################
 
-
       mux2  #(64)  ReadResMux({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW);
       mux5  #(64)  FPUResultMux(ReadResW, FMAResW, FAddResW, FDivResultW, FResW, FResultSelW, FPUResultW);
       
@@ -330,4 +267,3 @@ module fpu (
   endgenerate 
   
 endmodule // fpu
-