fcvt.sv cleanup

2025-02-11 06:05:49 +00:00 · 2021-07-11 21:30:01 -04:00 · 2021-07-11 21:30:01 -04:00 · a4bd128978
commit a4bd128978
parent 0cc07fda1b
2 changed files with 132 additions and 106 deletions
--- a/wally-pipelined/src/fpu/fcvt.sv
+++ b/wally-pipelined/src/fpu/fcvt.sv
@ -1,95 +1,122 @@

-// `include "wally-config.vh"
+`include "wally-config.vh"
 module fcvt (
-    input logic [63:0] X,
-    input logic [64-1:0] SrcAE,
-    input logic [3:0] FOpCtrlE,
-    input logic [2:0] FrmE,
-    input logic FmtE,
-    output logic [63:0] CvtResE,
-    output logic [4:0] CvtFlgE);
+    input logic [63:0] X,           // floating point input
+    input logic [`XLEN-1:0] SrcAE,  // integer input
+    input logic [3:0] FOpCtrlE,     // chooses which instruction is done (full list below)
+    input logic [2:0] FrmE,         // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
+    input logic FmtE,               // precision 1 = double 0 = single
+    output logic [63:0] CvtResE,    // convert final result
+    output logic [4:0] CvtFlgE);     // convert flags {invalid, divide by zero, overflow, underflow, inexact}

-    logic [10:0] XExp;
-    logic [51:0] XFrac;
-    logic XSgn;
-    logic [10:0] ResExp,TmpExp;
-    logic [51:0] ResFrac;
-    logic ResSgn;
-    logic [10:0] NormCnt;
+    logic               XSgn;   // FP input's sign
+    logic [10:0]        XExp;   // FP input's exponent
+    logic [51:0]        XFrac;  // FP input's fraction
+    logic               ResSgn; // FP result's sign
+    logic [10:0]        ResExp,TmpExp; // FP result's exponent
+    logic [51:0]        ResFrac;    // FP result's fraction
+    logic [5:0]         LZResP;     // lz output
+    // logic              LZResV;
    logic [11:0]        Bias;       // 1023 for double, 127 for single
-    logic [7:0]    Bits, SubBits;
-    logic [64+51:0]    ShiftedManTmp;
-    logic [64+51:0]    ShiftVal;
-    logic [64+1:0]    ShiftedMan;
-    logic [64:0]	RoundedTmp;
-    logic [63:0]	Rounded;
-    logic [12:0]    ExpVal, ShiftCnt;
-    logic [64-1:0] PosInt;
+    logic [7:0]         Bits;       // how many bits are in the integer result
+    logic [7:0]         SubBits;    // subtract these bits from the exponent (FP result)
+    logic [`XLEN+51:0]  ShiftedManTmp; // Shifted mantissa
+    logic [`XLEN+51:0]  ShiftVal;       // value being shifted (to int - XMan, to FP - |integer input|)
+    logic [`XLEN+1:0]   ShiftedMan;     // shifted mantissa truncated
+    logic [64:0]	    RoundedTmp;     // full size rounded result - in case of overfow
+    logic [63:0]	    Rounded;        // rounded result
+    logic [12:0]        ExpVal;         // unbiased X exponent
+    logic [12:0]        ShiftCnt;       // how much is the mantissa shifted
+	logic [`XLEN-1:0]   IntIn;          // trimed integer input
+    logic [`XLEN-1:0]   PosInt;         // absolute value of the integer input
+    logic [63:0]        CvtIntRes;      // interger result from the fp -> int instructions
+    logic [63:0]        CvtFPRes;       // floating point result from the int -> fp instructions
+    logic               XFracZero;      // is the fraction of X zero?
+    logic               Of, Uf;         // did the integer result underflow or overflow
+    logic               XExpZero;       // is X's exponent zero
+    logic               XExpMax;        // is the exponent all ones
+    logic               XNaN, XDenorm, XInf, XZero; // is X a special value
+    logic               Guard, Round, LSB, Sticky;  // bits used to determine rounding
+    logic               Plus1,CalcPlus1;    // do you add one for rounding
+    logic               SgnRes;             // sign of the floating point result
+    logic               Res64, In64;        // is the result or input 64 bits
+    logic               RoundMSB;           // most significant bit of the fraction
+    logic               RoundSgn;           // sign of the rounded result

-    logic [64-1:0] CvtIntRes;
-    logic [63:0] CvtRes;
-    logic XFracZero, Of,Uf;
-    logic XExpMax;
-    logic XNaN, XDenorm, XInf, XZero;
-    logic Plus1,CalcPlus1, Guard, Round, LSB, Sticky;
-    logic SgnRes, In64;
-    logic Res64;
-    logic RoundMSB;
-    logic RoundSgn;
-    logic XExpZero;
-
-      //  fcvt.w.s  = 0010 -
-      //  fcvt.wu.s = 0110 -
+    // FOpCtrlE:
+      //  fcvt.w.s  = 0010
+      //  fcvt.wu.s = 0110
      //  fcvt.s.w  = 0001
      //  fcvt.s.wu = 0101
-      //  fcvt.l.s  = 1010 -
-      //  fcvt.lu.s = 1110 -
+      //  fcvt.l.s  = 1010
+      //  fcvt.lu.s = 1110
      //  fcvt.s.l  = 1001
      //  fcvt.s.lu = 1101
-      //  fcvt.w.d  = 0010 - 
-      //  fcvt.wu.d = 0110 -
+      //  fcvt.w.d  = 0010 
+      //  fcvt.wu.d = 0110
      //  fcvt.d.w  = 0001
      //  fcvt.d.wu = 0101
-      //  fcvt.l.d  = 1010 -
-      //  fcvt.lu.d = 1110 -
-      //  fcvt.d.l  = 1001 --
-      //  fcvt.d.lu = 1101 --
-      //  {long, unsigned, to int, from int} Fmt controls the output for fp -> fp
+      //  fcvt.l.d  = 1010
+      //  fcvt.lu.d = 1110
+      //  fcvt.d.l  = 1001
+      //  fcvt.d.lu = 1101
+      //  {long, unsigned, to int, from int}
+   
+    // split the input into it's various parts
    assign XSgn = X[63];
    assign XExp = FmtE ? X[62:52] : {3'b0, X[62:55]};
    assign XFrac = FmtE ? X[51:0] : {X[54:32], 29'b0};
-    assign XExpZero = ~|XExp;

+    // determine if the exponent and fraction are all zero or ones
+    assign XExpZero = ~|XExp;
    assign XFracZero = ~|XFrac;
    assign XExpMax = FmtE ? &XExp[10:0] : &XExp[7:0];
+
+    // determine if X is a special value
    assign XNaN = XExpMax & ~XFracZero;
    assign XDenorm = XExpZero & ~XFracZero;
    assign XInf = XExpMax & XFracZero;
    assign XZero = XExpZero & XFracZero;

-
+    // calculate signals based off the input and output's size
    assign Bias = FmtE ? 12'h3ff : 12'h7f;
    assign Res64 = ((FOpCtrlE==4'b1010 || FOpCtrlE==4'b1110) | (FmtE&(FOpCtrlE==4'b0001 | FOpCtrlE==4'b0101 | FOpCtrlE==4'b0000 | FOpCtrlE==4'b1001 | FOpCtrlE==4'b1101)));
    assign In64 = ((FOpCtrlE==4'b1001 || FOpCtrlE==4'b1101) | (FmtE&(FOpCtrlE==4'b0010 | FOpCtrlE==4'b0110 | FOpCtrlE==4'b1010 | FOpCtrlE==4'b1110) | (FOpCtrlE==4'b1101 & ~FmtE)));
    assign SubBits = In64 ? 8'd64 : 8'd32;
    assign Bits = Res64 ? 8'd64 : 8'd32;
+
+    // calulate the unbiased exponent
    assign ExpVal = XExp - Bias + XDenorm;

 ////////////////////////////////////////////////////////

-	logic [64-1:0] IntIn;
+    // position the input in the most significant bits
    assign IntIn = FOpCtrlE[3] ? SrcAE : {SrcAE[31:0], 32'b0};
+    // make the integer positive
    assign PosInt = IntIn[64-1]&~FOpCtrlE[2] ? -IntIn : IntIn;
+    // determine the integer's sign
    assign ResSgn = ~FOpCtrlE[2] ? IntIn[64-1] : 1'b0;
    
+    // This did not work \/
+    // generate
+    //     if(`XLEN == 64) 
+    //         lz64 lz(LZResP, LZResV, PosInt);
+    //     else if(`XLEN == 32) begin
+    //         assign LZResP[5] = 1'b0;
+    //         lz32 lz(LZResP[4:0], LZResV, PosInt);
+    //     end 
+    // endgenerate
+
 	// Leading one detector
 	logic [8:0]	i;
 	always_comb begin
 			i = 0;
 			while (~PosInt[64-1-i] && i <= 64) i = i+1;  // search for leading one 
-			NormCnt = i+1;    // compute shift count
+			LZResP = i+1;    // compute shift count
 	end
-    assign TmpExp = i==64 ? 0 : Bias + SubBits - NormCnt;
+
+    // if no one was found set to zero otherwise calculate the exponent
+    assign TmpExp = i==64 ? 0 : Bias + SubBits - LZResP;



@ -97,15 +124,21 @@ module fcvt (
 ////////////////////////////////////////////


+    // select the shift value and amount based on operation (to fp or int)
+    assign ShiftCnt = FOpCtrlE[1] ? ExpVal : LZResP;
+    assign ShiftVal = FOpCtrlE[1] ? {{`XLEN-2{1'b0}}, ~(XDenorm|XZero), XFrac} : {PosInt, 52'b0};

-    assign ShiftCnt = FOpCtrlE[1] ? ExpVal : NormCnt;
-    assign ShiftVal = FOpCtrlE[1] ? {{64-2{1'b0}}, ~(XDenorm|XZero), XFrac} : {PosInt, 52'b0};
-	//if shift = -1 then shift one bit right for round to nearest (shift over 2 never rounds)
-	// if the shift is negitive add bit for sticky bit
+	// if shift = -1 then shift one bit right for gaurd bit (right shifting twice never rounds)
+	// if the shift is negitive add a bit for sticky bit calculation
 	// otherwise shift left
-    assign ShiftedManTmp = &ShiftCnt ? {{64-1{1'b0}}, ~(XDenorm|XZero), XFrac[51:1]} : ShiftCnt[12] ? {115'b0, ~XZero} : ShiftVal << ShiftCnt;
+    assign ShiftedManTmp = &ShiftCnt ? {{`XLEN-1{1'b0}}, ~(XDenorm|XZero), XFrac[51:1]} : ShiftCnt[12] ? {{`XLEN+51{1'b0}}, ~XZero} : ShiftVal << ShiftCnt;

+    // truncate the shifted mantissa
    assign ShiftedMan = ShiftedManTmp[64+51:50];
+
+    // calculate sticky bit 
+    //  - take into account the possible right shift from before
+    //  - the sticky bit calculation covers three diffrent sizes depending on the opperation
    assign Sticky = |ShiftedManTmp[49:0] | &ShiftCnt&XFrac[0] | (FOpCtrlE[0]&|ShiftedManTmp[62:50]) | (FOpCtrlE[0]&~FmtE&|ShiftedManTmp[91:63]);

    
@ -126,33 +159,45 @@ module fcvt (
        endcase
    end

+    // dont tound if the result is exact
    assign Plus1 = CalcPlus1 & (Guard|Round|Sticky)&~(XZero&FOpCtrlE[1]);

+    // round the shifted mantissa
    assign RoundedTmp = ShiftedMan[64+1:2] + Plus1;
    assign {ResExp, ResFrac} = FmtE ? {TmpExp, ShiftedMan[64+1:14]} + Plus1 :  {{TmpExp, ShiftedMan[64+1:43]} + Plus1, 29'b0} ;

+    // fit the rounded result into the appropriate size and take the 2's complement if needed
     assign Rounded = Res64 ? XSgn&FOpCtrlE[1] ? -RoundedTmp[63:0] : RoundedTmp[63:0] : 
 			      XSgn ? {{32{1'b1}}, -RoundedTmp[31:0]} : {32'b0, RoundedTmp[31:0]};
+
+    // extract the MSB and Sign for later use (will be used to determine underflow and overflow)
     assign RoundMSB = Res64 ? RoundedTmp[64] : RoundedTmp[32];
     assign RoundSgn = Res64 ? Rounded[63] : Rounded[31];


-
-   // Choose result
-   //    double to unsigned long
-   //         >2^64-1 or +inf or NaN - all 1's
-   //         <0 or -inf - zero
-   //         otherwise rounded result
-    //assign Of = (~XSgn&($signed(ShiftCnt) >= $signed(Bits))) | (RoundMSB&(ShiftCnt==(Bits-1))) | (~XSgn&XInf) | XNaN;
+    // check if the result overflows
    assign Of = (~XSgn&($signed(ShiftCnt) >= $signed(Bits))) | (~XSgn&RoundSgn&~FOpCtrlE[2]) | (RoundMSB&(ShiftCnt==(Bits-1))) | (~XSgn&XInf) | XNaN;
+
+    // check if the result underflows (this calculation changes if the result is signed or unsigned)
    assign Uf = FOpCtrlE[2] ? XSgn&~XZero | (XSgn&XInf) | (XSgn&~XZero&(~ShiftCnt[12]|CalcPlus1)) | (ShiftCnt[12]&Plus1) : (XSgn&XInf) | (XSgn&($signed(ShiftCnt) >= $signed(Bits))) | (XSgn&~RoundSgn&~ShiftCnt[12]);    // assign CvtIntRes =  (XSgn | ShiftCnt[12]) ? {64{1'b0}}  : (ShiftCnt >= 64) ? {64{1'b1}} : Rounded;
+    
+    // calculate the result's sign
    assign SgnRes = ~FOpCtrlE[3] & FOpCtrlE[1];
+
+    // select the integer result
    assign CvtIntRes = Of ? FOpCtrlE[2] ? SgnRes ? {32'b0, {32{1'b1}}}: {64{1'b1}} : SgnRes ? {33'b0, {31{1'b1}}}: {1'b0, {63{1'b1}}} : 
                    Uf ? FOpCtrlE[2] ? 64'b0 : SgnRes ? {32'b0, 1'b1, 31'b0} : {1'b1, 63'b0} :
 		            Rounded[64-1:0];

-    assign CvtRes = FmtE ? {ResSgn, ResExp, ResFrac} : {ResSgn, ResExp[7:0], ResFrac, 3'b0};
-    assign CvtResE = FOpCtrlE[0] ? CvtRes : CvtIntRes;
+    // select the floating point result            
+    assign CvtFPRes = FmtE ? {ResSgn, ResExp, ResFrac} : {ResSgn, ResExp[7:0], ResFrac, 3'b0};
+
+    // select the result
+    assign CvtResE = FOpCtrlE[0] ? CvtFPRes : CvtIntRes;
+
+    // calculate the flags
+    //      - to int only sets the invalid flag
+    //      - from int only sets the inexact flag
    assign CvtFlgE = {(Of | Uf)&FOpCtrlE[1], 3'b0, (Guard|Round|Sticky)&FOpCtrlE[0]};


--- a/wally-pipelined/src/fpu/fpu.sv
+++ b/wally-pipelined/src/fpu/fpu.sv
@ -40,8 +40,7 @@ module fpu (
  output logic [`XLEN-1:0] FIntResM,     
  output logic 		      FDivBusyE,        // Is the divison/sqrt unit busy
  output logic 		      IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
-  output logic [4:0] 	   SetFflagsM,       // FPU flags
-  output logic [`XLEN-1:0] FPUResultW);      // FPU result
+  output logic [4:0] 	   SetFflagsM);      // FPU result
 // *** change FMA to do 16 - 32 - 64 - 128 FEXPBITS 

  generate
@ -79,6 +78,9 @@ module fpu (
      logic [63:0]   FMAResM, FMAResW;
      logic [4:0]    FMAFlgM, FMAFlgW;

+
+      logic [63:0]   ReadResW;
+
      // add/cvt signals
      logic [63:0] 	FAddResM, FAddResW;
      logic [4:0] 	FAddFlgM, FAddFlgW;  
@ -102,7 +104,7 @@ module fpu (
      logic [63:0] 	ClassResE, ClassResM;
      
      // 64-bit FPU result   
-      logic [63:0] 	FPUResult64W;                                           
+      logic [63:0] 	FPUResultW;                                           
      logic [4:0] 	FPUFlagsW;
      
      
@ -124,7 +126,7 @@ module fpu (
      // regfile instantiation
      fregfile fregfile (clk, reset, FWriteEnW,
            InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW,
-            FPUResult64W,
+            FPUResultW,
            FRD1D, FRD2D, FRD3D);	
      

@ -168,9 +170,9 @@ module fpu (
                        .ForwardXE, .ForwardYE, .ForwardZE);

      // forwarding muxs
-      mux3  #(64)  fxemux(FRD1E, FPUResult64W, FResM, ForwardXE, SrcXE);
-      mux3  #(64)  fyemux(FRD2E, FPUResult64W, FResM, ForwardYE, SrcYE);
-      mux3  #(64)  fzemux(FRD3E, FPUResult64W, FResM, ForwardZE, SrcZE);
+      mux3  #(64)  fxemux(FRD1E, FPUResultW, FResM, ForwardXE, SrcXE);
+      mux3  #(64)  fyemux(FRD2E, FPUResultW, FResM, ForwardYE, SrcYE);
+      mux3  #(64)  fzemux(FRD3E, FPUResultW, FResM, ForwardZE, SrcZE);

      
      // first of two-stage instance of floating-point fused multiply-add unit
@ -218,8 +220,7 @@ module fpu (
      fcvt fcvt (.X(SrcXE), .SrcAE, .FOpCtrlE, .FmtE, .FrmE, .CvtResE, .CvtFlgE);

      // output for store instructions
-      assign FWriteDataE = FmtE ? SrcYE[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcYE[63:32]};
-      //***swap to mux
+      mux2  #(`XLEN)  FWriteDataMux({{`XLEN-32{1'b0}}, SrcYE[63:32]}, SrcYE[63:64-`XLEN], FmtE, FWriteDataE);



@ -265,8 +266,7 @@ module fpu (
      mux4  #(64)  FResMux(AlignedSrcAM, SgnResM, CmpResM, CvtResM, FResSelM, FResM);
      mux4  #(5)  FFlgMux(5'b0, {4'b0, SgnNVM}, {4'b0, CmpNVM}, CvtFlgM, FResSelM, FFlgM);

-      //***change to mux
-      assign SrcXMAligned = FmtM ? SrcXM[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcXM[63:32]};
+      mux2  #(`XLEN)  SrcXAlignedMux({{`XLEN-32{1'b0}}, SrcXM[63:32]}, SrcXM[63:64-`XLEN], FmtM, SrcXMAligned);
      mux4  #(`XLEN)  IntResMux(CmpResM[`XLEN-1:0], SrcXMAligned, ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM);

      
@ -318,28 +318,10 @@ module fpu (
   //#########################################


-
-      always_comb begin
-         case (FResultSelW)
-      3'b000 : FPUResult64W = FmtW ? {ReadDataW, {64-`XLEN{1'b0}}} : {ReadDataW[31:0], 32'b0};
-      3'b001 : FPUResult64W = FMAResW;
-      3'b010 : FPUResult64W = FAddResW;
-      3'b011 : FPUResult64W = FDivResultW;
-      3'b100 : FPUResult64W = FResW;
-      default : FPUResult64W = 64'bxxxxx;
-         endcase
-      end
+      mux2  #(64)  ReadResMux({ReadDataW[31:0], 32'b0}, {ReadDataW, {64-`XLEN{1'b0}}}, FmtW, ReadResW);
+      mux5  #(64)  FPUResultMux(ReadResW, FMAResW, FAddResW, FDivResultW, FResW, FResultSelW, FPUResultW);
      

-      // interface between XLEN size datapath and double-precision sized
-      // floating-point results
-      //
-      // define offsets for LSB zero extension or truncation
-   always_comb begin      
-      // zero extension 
-//***turn into mux
-      FPUResultW = FmtW ? FPUResult64W[63:64-`XLEN] : {{`XLEN-32{1'b0}}, FPUResult64W[63:32]};    
-   end
   end else begin // no F_SUPPORTED; tie outputs low
     assign FStallD = 0;
     assign FWriteIntE = 0; 
@ -350,7 +332,6 @@ module fpu (
     assign FDivBusyE = 0;
     assign IllegalFPUInstrD = 1;
     assign SetFflagsM = 0;
-     assign FPUResultW = 0;
   end
  endgenerate