Merge branch 'main' of github.com:davidharrishmc/riscv-wally into main

2025-02-03 18:25:27 +00:00 · 2021-08-10 13:36:29 -05:00 · 2021-08-10 13:36:29 -05:00 · 74e5b60819
commit 74e5b60819
parent 05a32508eb 21555c392f
7 changed files with 846 additions and 196 deletions
--- a/wally-pipelined/config/rv64icfd/wally-config.vh
+++ b/wally-pipelined/config/rv64icfd/wally-config.vh
@ -26,7 +26,7 @@
 // include shared configuration
 `include "wally-shared.vh"
-// `include "../../../config/shared/wally-shared.vh"
+  // `include "../shared/wally-shared.vh"
 `define QEMU 0
 `define BUILDROOT 0
--- a/wally-pipelined/fpu-testfloat/FMA/tbgen/test_gen.sh
+++ b/wally-pipelined/fpu-testfloat/FMA/tbgen/test_gen.sh
@ -1,3 +1,3 @@
-testfloat_gen f64_mulAdd -tininessafter -n 6133248 -rnear_even  -seed 113355 -level 1 > testFloat
+testfloat_gen f32_add -tininessafter -n 6133248 -rnear_even  -seed 113355 -level 1 > testFloat
 tr -d ' ' < testFloat > testFloatNoSpace
--- a/wally-pipelined/src/fpu/cvtfp.sv
+++ b/wally-pipelined/src/fpu/cvtfp.sv
@ -0,0 +1,120 @@
 // `include "wally-config.vh"
 module cvtfp (
    input logic [10:0] XExpE,
    input logic [52:0] XManE,
    input logic XSgnE,
    input logic XZeroE,
    input logic XDenormE,
    input logic XInfE,
    input logic XNaNE,
    input logic XSNaNE,
    input logic [2:0] FrmE,
    input logic FmtE,
    output logic [63:0] CvtFpResE,
    output logic [4:0] CvtFpFlgE);
    logic [7:0] DExp;
    logic [51:0] Frac;
    logic Denorm;
 	logic [8:0]	i,NormCnt;
 	always_comb begin
 			i = 0;
 			while (~XManE[52-i] && i <= 52) i = i+1;  // search for leading one 
 			NormCnt = i;
 	end
    logic [12:0] DExpCalc;
    // logic Overflow, Underflow;
    assign DExpCalc = (XExpE-1023+127)&{13{~XZeroE}};
    assign Denorm = $signed(DExpCalc) <= 0 & $signed(DExpCalc) > $signed(-23);
    logic [12:0] ShiftCnt;
 	logic [51:0] SFrac;
 	logic [25:0] DFrac;
 	logic [77:0] DFracTmp,tmp, tmp2;
    //assign ShiftCnt = FmtE ? -DExpCalc&{13{Denorm}} : NormCnt;
    assign SFrac = XManE[51:0] << NormCnt;
 logic Shift;
 assign tmp = (-DExpCalc+1)&{13{Shift}};
 assign tmp2 = {XManE, 23'b0};
 assign Shift = {13{Denorm|(($signed(DExpCalc) > $signed(-25)) & DExpCalc[12])}};
 	assign DFracTmp = {XManE, 25'b0} >> ((-DExpCalc+1)&{13{Shift}});
 assign DFrac = DFracTmp[76:51];
    logic Sticky, UfSticky, Guard, Round, LSBFrac, UfGuard, UfRound, UfLSBFrac;
    logic CalcPlus1, UfCalcPlus1;
    logic Plus1, UfPlus1;
    // used to determine underflow flag
    assign UfSticky = |DFracTmp[50:0];
    assign UfGuard = DFrac[1];
    assign UfRound = DFrac[0];
    assign UfLSBFrac = DFrac[2];
    assign Sticky = UfSticky | UfRound;
    assign Guard = DFrac[2];
    assign Round = DFrac[1];
    assign LSBFrac = DFrac[3];
    always_comb begin
        // Determine if you add 1
        case (FrmE)
            3'b000: CalcPlus1 = Guard & (Round | (Sticky) | (~Round&~Sticky&LSBFrac));//round to nearest even
            3'b001: CalcPlus1 = 0;//round to zero
            3'b010: CalcPlus1 = XSgnE;//round down
            3'b011: CalcPlus1 = ~XSgnE;//round up
            3'b100: CalcPlus1 = (Guard & (Round | (Sticky) | (~Round&~Sticky)));//round to nearest max magnitude
            default: CalcPlus1 = 1'bx;
        endcase
        // Determine if you add 1 (for underflow flag)
        case (FrmE)
            3'b000: UfCalcPlus1 = UfGuard & (UfRound | UfSticky | (~UfRound&~UfSticky&UfLSBFrac));//round to nearest even
            3'b001: UfCalcPlus1 = 0;//round to zero
            3'b010: UfCalcPlus1 = XSgnE;//round down
            3'b011: UfCalcPlus1 = ~XSgnE;//round up
            3'b100: UfCalcPlus1 = (UfGuard & (UfRound | UfSticky | (~UfRound&~UfSticky)));//round to nearest max magnitude
            default: UfCalcPlus1 = 1'bx;
        endcase
    end
    // If an answer is exact don't round
    assign Plus1 = CalcPlus1 & (Sticky | UfGuard | Guard | Round);
    assign UfPlus1 = UfCalcPlus1 & (Sticky | UfGuard);
    logic [12:0] DExpFull;
 logic [22:0] DResFrac;
 logic [7:0] DResExp;
    assign {DExpFull, DResFrac} = {DExpCalc&{13{~Denorm}}, DFrac[25:3]} + Plus1;
    assign DResExp = DExpFull[7:0];
 	logic [10:0] SExp;
 	assign SExp = XExpE-(NormCnt&{8{~XZeroE}})+({11{XDenormE}}&1024-127);
    logic Overflow, Underflow, Inexact;
    assign Overflow = $signed(DExpFull) >= $signed({1'b0, {8{1'b1}}}) & ~(XNaNE|XInfE);
    assign Underflow = (($signed(DExpFull) <= 0) & ((Sticky|Guard|Round) | (XManE[52]&~|DFrac) | (|DFrac&~Denorm)) | ((DExpFull == 1) & Denorm & ~(UfPlus1&UfLSBFrac))) & ~(XNaNE|XInfE);
    assign Inexact = (Sticky|Guard|Round|Underflow|Overflow) &~(XNaNE);
 logic [31:0] DRes;
    assign DRes = XNaNE ? {XSgnE, XExpE, 1'b1, XManE[50:29]} : 
 			Underflow & ~Denorm ? {XSgnE, 30'b0, CalcPlus1&(|FrmE[1:0]|Shift)} : 
 			    Overflow | XInfE ? ((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~XSgnE) | (FrmE[1:0]==2'b11&XSgnE)) & ~XInfE ? {XSgnE, 8'hfe, {23{1'b1}}} :
                                                                                                                 {XSgnE, 8'hff, 23'b0} : 
 			    {XSgnE, DResExp, DResFrac};
    assign CvtFpResE = FmtE ? {{32{1'b1}},DRes} : {XSgnE, SExp, SFrac[51]|XNaNE, SFrac[50:0]};
    assign CvtFpFlgE = FmtE ? {XSNaNE, 1'b0, Overflow, Underflow, Inexact} : {XSNaNE, 4'b0};
 endmodule // fpadd
--- a/wally-pipelined/src/fpu/faddcvt.sv
+++ b/wally-pipelined/src/fpu/faddcvt.sv
@ -117,8 +117,8 @@ module fpuaddcvt1 (
   output logic         AddSwapE
   );
-   wire [5:0]	 ZP_mantissaA;
+   logic [5:0]	 ZP_mantissaA;
-   wire [5:0]	 ZP_mantissaB;
+   logic [5:0]	 ZP_mantissaB;
   wire		    ZV_mantissaA;
   wire		    ZV_mantissaB;
@ -181,8 +181,20 @@ module fpuaddcvt1 (
   // normalization. If sum_corrected is all zeros, the exp_valid is 
   // zero; otherwise, it is one. 
   // modified to 52 bits to detect leading zeroes on denormalized mantissas
-   lz52 lz_norm_1 (ZP_mantissaA, ZV_mantissaA, mantissaA);
+   // lz52 lz_norm_1 (ZP_mantissaA, ZV_mantissaA, mantissaA);
-   lz52 lz_norm_2 (ZP_mantissaB, ZV_mantissaB, mantissaB);
+   // lz52 lz_norm_2 (ZP_mantissaB, ZV_mantissaB, mantissaB);    
   logic [8:0] i;
   logic [8:0] j;
    always_comb begin
            i = 0;
            while (~mantissaA[52-i] && $unsigned(i) <= $unsigned(52)) i = i+1;  // search for leading one
            ZP_mantissaA = i;
    end
    always_comb begin
            j = 0;
            while (~mantissaB[52-j] && $unsigned(j) <= $unsigned(52)) j = j+1;  // search for leading one
            ZP_mantissaB = j;
    end
   // Denormalized exponents created by subtracting the leading zeroes from the original exponents
   assign AddExp1DenormE = AddSwapE ? (exp1 - {6'b0, ZP_mantissaB}) : (exp1 - {6'b0, ZP_mantissaA}); //KEP extended ZP_mantissa 
--- a/wally-pipelined/src/fpu/fctrl.sv
+++ b/wally-pipelined/src/fpu/fctrl.sv
@ -38,8 +38,8 @@ module fctrl (
      7'b1001011:   ControlsD = `FCTRLW'b1_0_001_010_00_00_0_0; // fnmsub
      7'b1001111:   ControlsD = `FCTRLW'b1_0_001_011_00_00_0_0; // fnmadd
      7'b1010011: casez(Funct7D)
-                    7'b00000??: ControlsD = `FCTRLW'b1_0_010_000_00_00_0_0; // fadd
+                    7'b00000??: ControlsD = `FCTRLW'b1_0_001_110_00_00_0_0; // fadd
-                    7'b00001??: ControlsD = `FCTRLW'b1_0_010_001_00_00_0_0; // fsub
+                    7'b00001??: ControlsD = `FCTRLW'b1_0_001_111_00_00_0_0; // fsub
                    7'b00010??: ControlsD = `FCTRLW'b1_0_001_100_00_00_0_0; // fmul
                    7'b00011??: ControlsD = `FCTRLW'b1_0_011_000_00_00_1_0; // fdiv
                    7'b01011??: ControlsD = `FCTRLW'b1_0_011_001_00_00_1_0; // fsqrt
--- a/wally-pipelined/src/fpu/fma.sv
+++ b/wally-pipelined/src/fpu/fma.sv
--- a/wally-pipelined/src/fpu/fpu.sv
+++ b/wally-pipelined/src/fpu/fpu.sv
@ -76,7 +76,7 @@ module fpu (
 	logic [63:0] 	    FRD1D, FRD2D, FRD3D;  // Read Data from FP register - decode stage
 	logic [63:0] 	    FRD1E, FRD2E, FRD3E;  // Read Data from FP register - execute stage
 	logic [63:0] 	    FSrcXE, FSrcXM;       // Input 1 to the various units (after forwarding)
-	logic [63:0] 	    FSrcYE;               // Input 2 to the various units (after forwarding)
+	logic [63:0] 	    FPreSrcYE, FSrcYE;               // Input 2 to the various units (after forwarding)
 	logic [63:0] 	    FPreSrcZE, FSrcZE;     // Input 3 to the various units (after forwarding)
 	// unpacking signals
@ -110,8 +110,8 @@ module fpu (
 	logic [63:0] 	ReadResW;           // read result (load instruction)
-	logic [63:0] 	FAddResM, FAddResW; // add/FP -> FP convert result
+	logic [63:0] 	CvtFpResE, CvtFpResM, CvtFpResW; // add/FP -> FP convert result
-	logic [4:0] 	FAddFlgM, FAddFlgW; // add/FP -> FP convert flags
+	logic [4:0] 	CvtFpFlgE, CvtFpFlgM, CvtFpFlgW; // add/FP -> FP convert flags
 	logic [63:0] 	CvtResE, CvtResM;   // FP <-> int convert result
 	logic [4:0] 	CvtFlgE, CvtFlgM;   // FP <-> int convert flags //*** trim this
@ -196,9 +196,10 @@ module fpu (
 	// forwarding muxs
 	mux3  #(64)  fxemux(FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE);
-	mux3  #(64)  fyemux(FRD2E, FPUResultW, FResM, FForwardYE, FSrcYE);
+	mux3  #(64)  fyemux(FRD2E, FPUResultW, FResM, FForwardYE, FPreSrcYE);
 	mux3  #(64)  fzemux(FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE);
-	mux2  #(64)  fzmulmux(FPreSrcZE, 64'b0, FOpCtrlE[2], FSrcZE); // Force Z to be 0 for multiply instructions
+	mux3  #(64)  fyaddmux(FPreSrcYE, {{32{1'b1}}, 2'b0, {7{1'b1}}, 23'b0}, {2'b0, {10{1'b1}}, 52'b0}, {FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==3'b001), ~FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==3'b001)}, FSrcYE); // Force Z to be 0 for multiply instructions
 	mux3  #(64)  fzmulmux(FPreSrcZE, 64'b0, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE); // Force Z to be 0 for multiply instructions
  // unpacking unit
@ -261,11 +262,14 @@ module fpu (
  //    - contains some E/M pipleine registers
  //*** remove uneeded logic
  //*** change to use the unpacking unit if possible
-	faddcvt faddcvt (.clk, .reset, .FlushM, .StallM, .FrmM, .FOpCtrlM, .FmtE, .FmtM, .FSrcXE, .FSrcYE, .FOpCtrlE, 
+// 	faddcvt faddcvt (.clk, .reset, .FlushM, .StallM, .FrmM, .FOpCtrlM, .FmtE, .FmtM, .FSrcXE, .FSrcYE, .FOpCtrlE, 
-   .XSgnM, .YSgnM, .XManM, .YManM, .XExpM, .YExpM,
+//    .XSgnM, .YSgnM, .XManM, .YManM, .XExpM, .YExpM,
-   .XSgnE, .YSgnE, .XManE, .YManE, .XExpE, .YExpE, .XDenormE, .YDenormE, .XNormE, .YNormE, .XNormM, .YNormM,  .XZeroE, .YZeroE, .XInfE, .YInfE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE,
+//    .XSgnE, .YSgnE, .XManE, .YManE, .XExpE, .YExpE, .XDenormE, .YDenormE, .XNormE, .YNormE, .XNormM, .YNormM,  .XZeroE, .YZeroE, .XInfE, .YInfE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE,
-                  // outputs:
+//                   // outputs:
-                  .FAddResM, .FAddFlgM);
+//                   .CvtFpResM, .CvtFpFlgM);
 	cvtfp cvtfp (.XExpE, .XManE, .XSgnE, .XZeroE, .XDenormE, .XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtFpResE, .CvtFpFlgE);
 	// compare unit
  //    - computation is done in one stage
@ -322,6 +326,9 @@ module fpu (
 	flopenrc #(64) EMRegSgnRes(clk, reset, FlushM, ~StallM, SgnResE, SgnResM);
 	flopenrc #(1) EMRegSgnFlg(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM);
 	flopenrc #(64) EMRegCvtFpRes(clk, reset, FlushM, ~StallM, CvtFpResE, CvtFpResM);
 	flopenrc #(5) EMRegCvtFpFlg(clk, reset, FlushM, ~StallM, CvtFpFlgE, CvtFpFlgM);
 	flopenrc #(64) EMRegCvtRes(clk, reset, FlushM, ~StallM, CvtResE, CvtResM);
 	flopenrc #(5) EMRegCvtFlg(clk, reset, FlushM, ~StallM, CvtFlgE, CvtFlgM);
@ -352,7 +359,7 @@ module fpu (
 	mux4  #(`XLEN)  IntResMux(CmpResM[`XLEN-1:0], FSrcXM[`XLEN-1:0], ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM);
  // FPU flag selection - to privileged
-	mux5  #(5)  FPUFlgMux(5'b0, FMAFlgM, FAddFlgM, FDivFlgM, FFlgM, FResultSelW, SetFflagsM);
+	mux5  #(5)  FPUFlgMux(5'b0, FMAFlgM, CvtFpFlgM, FDivFlgM, FFlgM, FResultSelW, SetFflagsM);
@ -363,7 +370,7 @@ module fpu (
 	////////////////////////////////////////////////////////////////////////////////////////
 	flopenrc #(64) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); 
 	flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW); 
-	flopenrc #(64) MWRegAdd(clk, reset, FlushW, ~StallW, FAddResM, FAddResW); 
+	flopenrc #(64) MWRegAdd(clk, reset, FlushW, ~StallW, CvtFpResM, CvtFpResW); 
 	flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW);
 	flopenrc #(6)  MWCtrlReg(clk, reset, FlushW, ~StallW,
 				{FRegWriteM, FResultSelM, FmtM, FWriteIntM},
@ -382,7 +389,7 @@ module fpu (
 	mux2  #(64)  ReadResMux({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW);
  // select the result to be written to the FP register
-	mux5  #(64)  FPUResultMux(ReadResW, FMAResW, FAddResW, FDivResW, FResW, FResultSelW, FPUResultW);
+	mux5  #(64)  FPUResultMux(ReadResW, FMAResW, CvtFpResW, FDivResW, FResW, FResultSelW, FPUResultW);
  end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low
`@ -1,3 +1,3 @@`
	`testfloat_gen f64_mulAdd -tininessafter -n 6133248 -rnear_even -seed 113355 -level 1 > testFloat`	`testfloat_gen f32_add -tininessafter -n 6133248 -rnear_even -seed 113355 -level 1 > testFloat`
	`tr -d ' ' < testFloat > testFloatNoSpace`	`tr -d ' ' < testFloat > testFloatNoSpace`