diff --git a/wally-pipelined/src/fpu/fcvt.sv b/wally-pipelined/src/fpu/fcvt.sv
index e893fc81d..bf652a7fd 100644
--- a/wally-pipelined/src/fpu/fcvt.sv
+++ b/wally-pipelined/src/fpu/fcvt.sv
@@ -1,95 +1,122 @@
 
-// `include "wally-config.vh"
+`include "wally-config.vh"
 module fcvt (
-    input logic [63:0] X,
-    input logic [64-1:0] SrcAE,
-    input logic [3:0] FOpCtrlE,
-    input logic [2:0] FrmE,
-    input logic FmtE,
-    output logic [63:0] CvtResE,
-    output logic [4:0] CvtFlgE);
+    input logic [63:0] X,           // floating point input
+    input logic [`XLEN-1:0] SrcAE,  // integer input
+    input logic [3:0] FOpCtrlE,     // chooses which instruction is done (full list below)
+    input logic [2:0] FrmE,         // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
+    input logic FmtE,               // precision 1 = double 0 = single
+    output logic [63:0] CvtResE,    // convert final result
+    output logic [4:0] CvtFlgE);     // convert flags {invalid, divide by zero, overflow, underflow, inexact}
 
-    logic [10:0] XExp;
-    logic [51:0] XFrac;
-    logic XSgn;
-    logic [10:0] ResExp,TmpExp;
-    logic [51:0] ResFrac;
-    logic ResSgn;
-    logic [10:0] NormCnt;
-    logic [11:0]    Bias;   // 1023 for double, 127 for single
-    logic [7:0]    Bits, SubBits;
-    logic [64+51:0]    ShiftedManTmp;
-    logic [64+51:0]    ShiftVal;
-    logic [64+1:0]    ShiftedMan;
-    logic [64:0]	RoundedTmp;
-    logic [63:0]	Rounded;
-    logic [12:0]    ExpVal, ShiftCnt;
-    logic [64-1:0] PosInt;
-    
-    logic [64-1:0] CvtIntRes;
-    logic [63:0] CvtRes;
-    logic XFracZero, Of,Uf;
-    logic XExpMax;
-    logic XNaN, XDenorm, XInf, XZero;
-    logic Plus1,CalcPlus1, Guard, Round, LSB, Sticky;
-    logic SgnRes, In64;
-    logic Res64;
-    logic RoundMSB;
-    logic RoundSgn;
-    logic XExpZero;
+    logic               XSgn;   // FP input's sign
+    logic [10:0]        XExp;   // FP input's exponent
+    logic [51:0]        XFrac;  // FP input's fraction
+    logic               ResSgn; // FP result's sign
+    logic [10:0]        ResExp,TmpExp; // FP result's exponent
+    logic [51:0]        ResFrac;    // FP result's fraction
+    logic [5:0]         LZResP;     // lz output
+    // logic              LZResV;
+    logic [11:0]        Bias;       // 1023 for double, 127 for single
+    logic [7:0]         Bits;       // how many bits are in the integer result
+    logic [7:0]         SubBits;    // subtract these bits from the exponent (FP result)
+    logic [`XLEN+51:0]  ShiftedManTmp; // Shifted mantissa
+    logic [`XLEN+51:0]  ShiftVal;       // value being shifted (to int - XMan, to FP - |integer input|)
+    logic [`XLEN+1:0]   ShiftedMan;     // shifted mantissa truncated
+    logic [64:0]	    RoundedTmp;     // full size rounded result - in case of overfow
+    logic [63:0]	    Rounded;        // rounded result
+    logic [12:0]        ExpVal;         // unbiased X exponent
+    logic [12:0]        ShiftCnt;       // how much is the mantissa shifted
+	logic [`XLEN-1:0]   IntIn;          // trimed integer input
+    logic [`XLEN-1:0]   PosInt;         // absolute value of the integer input
+    logic [63:0]        CvtIntRes;      // interger result from the fp -> int instructions
+    logic [63:0]        CvtFPRes;       // floating point result from the int -> fp instructions
+    logic               XFracZero;      // is the fraction of X zero?
+    logic               Of, Uf;         // did the integer result underflow or overflow
+    logic               XExpZero;       // is X's exponent zero
+    logic               XExpMax;        // is the exponent all ones
+    logic               XNaN, XDenorm, XInf, XZero; // is X a special value
+    logic               Guard, Round, LSB, Sticky;  // bits used to determine rounding
+    logic               Plus1,CalcPlus1;    // do you add one for rounding
+    logic               SgnRes;             // sign of the floating point result
+    logic               Res64, In64;        // is the result or input 64 bits
+    logic               RoundMSB;           // most significant bit of the fraction
+    logic               RoundSgn;           // sign of the rounded result
 
-      //  fcvt.w.s  = 0010 -
-      //  fcvt.wu.s = 0110 -
-      //  fcvt.s.w  = 0001 
-      //  fcvt.s.wu = 0101 
-      //  fcvt.l.s  = 1010 -
-      //  fcvt.lu.s = 1110 -
-      //  fcvt.s.l  = 1001 
-      //  fcvt.s.lu = 1101 
-      //  fcvt.w.d  = 0010 - 
-      //  fcvt.wu.d = 0110 -
-      //  fcvt.d.w  = 0001 
-      //  fcvt.d.wu = 0101 
-      //  fcvt.l.d  = 1010 -
-      //  fcvt.lu.d = 1110 -
-      //  fcvt.d.l  = 1001 --
-      //  fcvt.d.lu = 1101 --
-      //  {long, unsigned, to int, from int} Fmt controls the output for fp -> fp
+    // FOpCtrlE:
+      //  fcvt.w.s  = 0010
+      //  fcvt.wu.s = 0110
+      //  fcvt.s.w  = 0001
+      //  fcvt.s.wu = 0101
+      //  fcvt.l.s  = 1010
+      //  fcvt.lu.s = 1110
+      //  fcvt.s.l  = 1001
+      //  fcvt.s.lu = 1101
+      //  fcvt.w.d  = 0010 
+      //  fcvt.wu.d = 0110
+      //  fcvt.d.w  = 0001
+      //  fcvt.d.wu = 0101
+      //  fcvt.l.d  = 1010
+      //  fcvt.lu.d = 1110
+      //  fcvt.d.l  = 1001
+      //  fcvt.d.lu = 1101
+      //  {long, unsigned, to int, from int}
+   
+    // split the input into it's various parts
     assign XSgn = X[63];
     assign XExp = FmtE ? X[62:52] : {3'b0, X[62:55]};
     assign XFrac = FmtE ? X[51:0] : {X[54:32], 29'b0};
+
+    // determine if the exponent and fraction are all zero or ones
     assign XExpZero = ~|XExp;
-   
     assign XFracZero = ~|XFrac;
     assign XExpMax = FmtE ? &XExp[10:0] : &XExp[7:0];
+
+    // determine if X is a special value
     assign XNaN = XExpMax & ~XFracZero;
     assign XDenorm = XExpZero & ~XFracZero;
     assign XInf = XExpMax & XFracZero;
     assign XZero = XExpZero & XFracZero;
 
-
+    // calculate signals based off the input and output's size
     assign Bias = FmtE ? 12'h3ff : 12'h7f;
     assign Res64 = ((FOpCtrlE==4'b1010 || FOpCtrlE==4'b1110) | (FmtE&(FOpCtrlE==4'b0001 | FOpCtrlE==4'b0101 | FOpCtrlE==4'b0000 | FOpCtrlE==4'b1001 | FOpCtrlE==4'b1101)));
     assign In64 = ((FOpCtrlE==4'b1001 || FOpCtrlE==4'b1101) | (FmtE&(FOpCtrlE==4'b0010 | FOpCtrlE==4'b0110 | FOpCtrlE==4'b1010 | FOpCtrlE==4'b1110) | (FOpCtrlE==4'b1101 & ~FmtE)));
     assign SubBits = In64 ? 8'd64 : 8'd32;
     assign Bits = Res64 ? 8'd64 : 8'd32;
+
+    // calulate the unbiased exponent
     assign ExpVal = XExp - Bias + XDenorm;
 
 ////////////////////////////////////////////////////////
 
-	logic [64-1:0] IntIn;
+    // position the input in the most significant bits
     assign IntIn = FOpCtrlE[3] ? SrcAE : {SrcAE[31:0], 32'b0};
+    // make the integer positive
     assign PosInt = IntIn[64-1]&~FOpCtrlE[2] ? -IntIn : IntIn;
+    // determine the integer's sign
     assign ResSgn = ~FOpCtrlE[2] ? IntIn[64-1] : 1'b0;
     
+    // This did not work \/
+    // generate
+    //     if(`XLEN == 64) 
+    //         lz64 lz(LZResP, LZResV, PosInt);
+    //     else if(`XLEN == 32) begin
+    //         assign LZResP[5] = 1'b0;
+    //         lz32 lz(LZResP[4:0], LZResV, PosInt);
+    //     end 
+    // endgenerate
+
 	// Leading one detector
 	logic [8:0]	i;
 	always_comb begin
 			i = 0;
 			while (~PosInt[64-1-i] && i <= 64) i = i+1;  // search for leading one 
-			NormCnt = i+1;    // compute shift count
+			LZResP = i+1;    // compute shift count
 	end
-    assign TmpExp = i==64 ? 0 : Bias + SubBits - NormCnt;
+
+    // if no one was found set to zero otherwise calculate the exponent
+    assign TmpExp = i==64 ? 0 : Bias + SubBits - LZResP;
 
 
 
@@ -97,15 +124,21 @@ module fcvt (
 ////////////////////////////////////////////
 
 
+    // select the shift value and amount based on operation (to fp or int)
+    assign ShiftCnt = FOpCtrlE[1] ? ExpVal : LZResP;
+    assign ShiftVal = FOpCtrlE[1] ? {{`XLEN-2{1'b0}}, ~(XDenorm|XZero), XFrac} : {PosInt, 52'b0};
 
-    assign ShiftCnt = FOpCtrlE[1] ? ExpVal : NormCnt;
-    assign ShiftVal = FOpCtrlE[1] ? {{64-2{1'b0}}, ~(XDenorm|XZero), XFrac} : {PosInt, 52'b0};
-	//if shift = -1 then shift one bit right for round to nearest (shift over 2 never rounds)
-	// if the shift is negitive add bit for sticky bit
+	// if shift = -1 then shift one bit right for gaurd bit (right shifting twice never rounds)
+	// if the shift is negitive add a bit for sticky bit calculation
 	// otherwise shift left
-    assign ShiftedManTmp = &ShiftCnt ? {{64-1{1'b0}}, ~(XDenorm|XZero), XFrac[51:1]} : ShiftCnt[12] ? {115'b0, ~XZero} : ShiftVal << ShiftCnt;
+    assign ShiftedManTmp = &ShiftCnt ? {{`XLEN-1{1'b0}}, ~(XDenorm|XZero), XFrac[51:1]} : ShiftCnt[12] ? {{`XLEN+51{1'b0}}, ~XZero} : ShiftVal << ShiftCnt;
 
+    // truncate the shifted mantissa
     assign ShiftedMan = ShiftedManTmp[64+51:50];
+
+    // calculate sticky bit 
+    //  - take into account the possible right shift from before
+    //  - the sticky bit calculation covers three diffrent sizes depending on the opperation
     assign Sticky = |ShiftedManTmp[49:0] | &ShiftCnt&XFrac[0] | (FOpCtrlE[0]&|ShiftedManTmp[62:50]) | (FOpCtrlE[0]&~FmtE&|ShiftedManTmp[91:63]);
 
     
@@ -126,33 +159,45 @@ module fcvt (
         endcase
     end
 
+    // dont tound if the result is exact
     assign Plus1 = CalcPlus1 & (Guard|Round|Sticky)&~(XZero&FOpCtrlE[1]);
 
+    // round the shifted mantissa
     assign RoundedTmp = ShiftedMan[64+1:2] + Plus1;
     assign {ResExp, ResFrac} = FmtE ? {TmpExp, ShiftedMan[64+1:14]} + Plus1 :  {{TmpExp, ShiftedMan[64+1:43]} + Plus1, 29'b0} ;
 
+    // fit the rounded result into the appropriate size and take the 2's complement if needed
      assign Rounded = Res64 ? XSgn&FOpCtrlE[1] ? -RoundedTmp[63:0] : RoundedTmp[63:0] : 
 			      XSgn ? {{32{1'b1}}, -RoundedTmp[31:0]} : {32'b0, RoundedTmp[31:0]};
+
+    // extract the MSB and Sign for later use (will be used to determine underflow and overflow)
      assign RoundMSB = Res64 ? RoundedTmp[64] : RoundedTmp[32];
      assign RoundSgn = Res64 ? Rounded[63] : Rounded[31];
 
 
-
-   // Choose result
-   //    double to unsigned long
-   //         >2^64-1 or +inf or NaN - all 1's
-   //         <0 or -inf - zero
-   //         otherwise rounded result
-    //assign Of = (~XSgn&($signed(ShiftCnt) >= $signed(Bits))) | (RoundMSB&(ShiftCnt==(Bits-1))) | (~XSgn&XInf) | XNaN;
+    // check if the result overflows
     assign Of = (~XSgn&($signed(ShiftCnt) >= $signed(Bits))) | (~XSgn&RoundSgn&~FOpCtrlE[2]) | (RoundMSB&(ShiftCnt==(Bits-1))) | (~XSgn&XInf) | XNaN;
+
+    // check if the result underflows (this calculation changes if the result is signed or unsigned)
     assign Uf = FOpCtrlE[2] ? XSgn&~XZero | (XSgn&XInf) | (XSgn&~XZero&(~ShiftCnt[12]|CalcPlus1)) | (ShiftCnt[12]&Plus1) : (XSgn&XInf) | (XSgn&($signed(ShiftCnt) >= $signed(Bits))) | (XSgn&~RoundSgn&~ShiftCnt[12]);    // assign CvtIntRes =  (XSgn | ShiftCnt[12]) ? {64{1'b0}}  : (ShiftCnt >= 64) ? {64{1'b1}} : Rounded;
+    
+    // calculate the result's sign
     assign SgnRes = ~FOpCtrlE[3] & FOpCtrlE[1];
+
+    // select the integer result
     assign CvtIntRes = Of ? FOpCtrlE[2] ? SgnRes ? {32'b0, {32{1'b1}}}: {64{1'b1}} : SgnRes ? {33'b0, {31{1'b1}}}: {1'b0, {63{1'b1}}} : 
                     Uf ? FOpCtrlE[2] ? 64'b0 : SgnRes ? {32'b0, 1'b1, 31'b0} : {1'b1, 63'b0} :
 		            Rounded[64-1:0];
-                    
-    assign CvtRes = FmtE ? {ResSgn, ResExp, ResFrac} : {ResSgn, ResExp[7:0], ResFrac, 3'b0};
-    assign CvtResE = FOpCtrlE[0] ? CvtRes : CvtIntRes;
+
+    // select the floating point result            
+    assign CvtFPRes = FmtE ? {ResSgn, ResExp, ResFrac} : {ResSgn, ResExp[7:0], ResFrac, 3'b0};
+
+    // select the result
+    assign CvtResE = FOpCtrlE[0] ? CvtFPRes : CvtIntRes;
+
+    // calculate the flags
+    //      - to int only sets the invalid flag
+    //      - from int only sets the inexact flag
     assign CvtFlgE = {(Of | Uf)&FOpCtrlE[1], 3'b0, (Guard|Round|Sticky)&FOpCtrlE[0]};
 
 
diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv
index ab351bd78..0ff199129 100755
--- a/wally-pipelined/src/fpu/fpu.sv
+++ b/wally-pipelined/src/fpu/fpu.sv
@@ -40,8 +40,7 @@ module fpu (
   output logic [`XLEN-1:0] FIntResM,     
   output logic 		      FDivBusyE,        // Is the divison/sqrt unit busy
   output logic 		      IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
-  output logic [4:0] 	   SetFflagsM,       // FPU flags
-  output logic [`XLEN-1:0] FPUResultW);      // FPU result
+  output logic [4:0] 	   SetFflagsM);      // FPU result
 // *** change FMA to do 16 - 32 - 64 - 128 FEXPBITS 
 
   generate
@@ -79,6 +78,9 @@ module fpu (
       logic [63:0]   FMAResM, FMAResW;
       logic [4:0]    FMAFlgM, FMAFlgW;
 
+
+      logic [63:0]   ReadResW;
+
       // add/cvt signals
       logic [63:0] 	FAddResM, FAddResW;
       logic [4:0] 	FAddFlgM, FAddFlgW;  
@@ -102,7 +104,7 @@ module fpu (
       logic [63:0] 	ClassResE, ClassResM;
       
       // 64-bit FPU result   
-      logic [63:0] 	FPUResult64W;                                           
+      logic [63:0] 	FPUResultW;                                           
       logic [4:0] 	FPUFlagsW;
       
       
@@ -124,7 +126,7 @@ module fpu (
       // regfile instantiation
       fregfile fregfile (clk, reset, FWriteEnW,
             InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW,
-            FPUResult64W,
+            FPUResultW,
             FRD1D, FRD2D, FRD3D);	
       
 
@@ -168,9 +170,9 @@ module fpu (
                         .ForwardXE, .ForwardYE, .ForwardZE);
 
       // forwarding muxs
-      mux3  #(64)  fxemux(FRD1E, FPUResult64W, FResM, ForwardXE, SrcXE);
-      mux3  #(64)  fyemux(FRD2E, FPUResult64W, FResM, ForwardYE, SrcYE);
-      mux3  #(64)  fzemux(FRD3E, FPUResult64W, FResM, ForwardZE, SrcZE);
+      mux3  #(64)  fxemux(FRD1E, FPUResultW, FResM, ForwardXE, SrcXE);
+      mux3  #(64)  fyemux(FRD2E, FPUResultW, FResM, ForwardYE, SrcYE);
+      mux3  #(64)  fzemux(FRD3E, FPUResultW, FResM, ForwardZE, SrcZE);
 
       
       // first of two-stage instance of floating-point fused multiply-add unit
@@ -218,8 +220,7 @@ module fpu (
       fcvt fcvt (.X(SrcXE), .SrcAE, .FOpCtrlE, .FmtE, .FrmE, .CvtResE, .CvtFlgE);
 
       // output for store instructions
-      assign FWriteDataE = FmtE ? SrcYE[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcYE[63:32]};
-      //***swap to mux
+      mux2  #(`XLEN)  FWriteDataMux({{`XLEN-32{1'b0}}, SrcYE[63:32]}, SrcYE[63:64-`XLEN], FmtE, FWriteDataE);
 
 
 
@@ -265,8 +266,7 @@ module fpu (
       mux4  #(64)  FResMux(AlignedSrcAM, SgnResM, CmpResM, CvtResM, FResSelM, FResM);
       mux4  #(5)  FFlgMux(5'b0, {4'b0, SgnNVM}, {4'b0, CmpNVM}, CvtFlgM, FResSelM, FFlgM);
 
-      //***change to mux
-      assign SrcXMAligned = FmtM ? SrcXM[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcXM[63:32]};
+      mux2  #(`XLEN)  SrcXAlignedMux({{`XLEN-32{1'b0}}, SrcXM[63:32]}, SrcXM[63:64-`XLEN], FmtM, SrcXMAligned);
       mux4  #(`XLEN)  IntResMux(CmpResM[`XLEN-1:0], SrcXMAligned, ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM);
 
       
@@ -318,28 +318,10 @@ module fpu (
    //#########################################
 
 
+      mux2  #(64)  ReadResMux({ReadDataW[31:0], 32'b0}, {ReadDataW, {64-`XLEN{1'b0}}}, FmtW, ReadResW);
+      mux5  #(64)  FPUResultMux(ReadResW, FMAResW, FAddResW, FDivResultW, FResW, FResultSelW, FPUResultW);
+      
 
-      always_comb begin
-         case (FResultSelW)
-      3'b000 : FPUResult64W = FmtW ? {ReadDataW, {64-`XLEN{1'b0}}} : {ReadDataW[31:0], 32'b0};
-      3'b001 : FPUResult64W = FMAResW;
-      3'b010 : FPUResult64W = FAddResW;
-      3'b011 : FPUResult64W = FDivResultW;
-      3'b100 : FPUResult64W = FResW;
-      default : FPUResult64W = 64'bxxxxx;
-         endcase
-      end
-      
-      
-      // interface between XLEN size datapath and double-precision sized
-      // floating-point results
-      //
-      // define offsets for LSB zero extension or truncation
-   always_comb begin      
-      // zero extension 
-//***turn into mux
-      FPUResultW = FmtW ? FPUResult64W[63:64-`XLEN] : {{`XLEN-32{1'b0}}, FPUResult64W[63:32]};    
-   end
    end else begin // no F_SUPPORTED; tie outputs low
      assign FStallD = 0;
      assign FWriteIntE = 0; 
@@ -350,7 +332,6 @@ module fpu (
      assign FDivBusyE = 0;
      assign IllegalFPUInstrD = 1;
      assign SetFflagsM = 0;
-     assign FPUResultW = 0;
    end
   endgenerate