forked from Github_Repos/cvw
		
	LZA added to FMA and attemting a merged FMA and adder in synthesis
This commit is contained in:
		
							parent
							
								
									cce0571925
								
							
						
					
					
						commit
						e00f181bcf
					
				@ -26,7 +26,7 @@
 | 
			
		||||
 | 
			
		||||
// include shared configuration
 | 
			
		||||
`include "wally-shared.vh"
 | 
			
		||||
// `include "../../../config/shared/wally-shared.vh"
 | 
			
		||||
  // `include "../shared/wally-shared.vh"
 | 
			
		||||
 | 
			
		||||
`define QEMU 0
 | 
			
		||||
`define BUILDROOT 0
 | 
			
		||||
 | 
			
		||||
@ -1,3 +1,3 @@
 | 
			
		||||
testfloat_gen f64_mulAdd -tininessafter -n 6133248 -rnear_even  -seed 113355 -level 1 > testFloat
 | 
			
		||||
testfloat_gen f32_add -tininessafter -n 6133248 -rnear_even  -seed 113355 -level 1 > testFloat
 | 
			
		||||
tr -d ' ' < testFloat > testFloatNoSpace
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										120
									
								
								wally-pipelined/src/fpu/cvtfp.sv
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										120
									
								
								wally-pipelined/src/fpu/cvtfp.sv
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,120 @@
 | 
			
		||||
 | 
			
		||||
// `include "wally-config.vh"
 | 
			
		||||
module cvtfp (
 | 
			
		||||
    input logic [10:0] XExpE,
 | 
			
		||||
    input logic [52:0] XManE,
 | 
			
		||||
    input logic XSgnE,
 | 
			
		||||
    input logic XZeroE,
 | 
			
		||||
    input logic XDenormE,
 | 
			
		||||
    input logic XInfE,
 | 
			
		||||
    input logic XNaNE,
 | 
			
		||||
    input logic XSNaNE,
 | 
			
		||||
    input logic [2:0] FrmE,
 | 
			
		||||
    input logic FmtE,
 | 
			
		||||
    output logic [63:0] CvtFpResE,
 | 
			
		||||
    output logic [4:0] CvtFpFlgE);
 | 
			
		||||
 | 
			
		||||
    logic [7:0] DExp;
 | 
			
		||||
    logic [51:0] Frac;
 | 
			
		||||
    logic Denorm;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
	logic [8:0]	i,NormCnt;
 | 
			
		||||
	always_comb begin
 | 
			
		||||
			i = 0;
 | 
			
		||||
			while (~XManE[52-i] && i <= 52) i = i+1;  // search for leading one 
 | 
			
		||||
			NormCnt = i;
 | 
			
		||||
	end
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    logic [12:0] DExpCalc;
 | 
			
		||||
    // logic Overflow, Underflow;
 | 
			
		||||
    assign DExpCalc = (XExpE-1023+127)&{13{~XZeroE}};
 | 
			
		||||
    assign Denorm = $signed(DExpCalc) <= 0 & $signed(DExpCalc) > $signed(-23);
 | 
			
		||||
 | 
			
		||||
    logic [12:0] ShiftCnt;
 | 
			
		||||
	logic [51:0] SFrac;
 | 
			
		||||
	logic [25:0] DFrac;
 | 
			
		||||
	logic [77:0] DFracTmp,tmp, tmp2;
 | 
			
		||||
    //assign ShiftCnt = FmtE ? -DExpCalc&{13{Denorm}} : NormCnt;
 | 
			
		||||
    assign SFrac = XManE[51:0] << NormCnt;
 | 
			
		||||
logic Shift;
 | 
			
		||||
assign tmp = (-DExpCalc+1)&{13{Shift}};
 | 
			
		||||
assign tmp2 = {XManE, 23'b0};
 | 
			
		||||
assign Shift = {13{Denorm|(($signed(DExpCalc) > $signed(-25)) & DExpCalc[12])}};
 | 
			
		||||
	assign DFracTmp = {XManE, 25'b0} >> ((-DExpCalc+1)&{13{Shift}});
 | 
			
		||||
assign DFrac = DFracTmp[76:51];
 | 
			
		||||
 | 
			
		||||
    logic Sticky, UfSticky, Guard, Round, LSBFrac, UfGuard, UfRound, UfLSBFrac;
 | 
			
		||||
    logic CalcPlus1, UfCalcPlus1;
 | 
			
		||||
    logic Plus1, UfPlus1;
 | 
			
		||||
    // used to determine underflow flag
 | 
			
		||||
    assign UfSticky = |DFracTmp[50:0];
 | 
			
		||||
    assign UfGuard = DFrac[1];
 | 
			
		||||
    assign UfRound = DFrac[0];
 | 
			
		||||
    assign UfLSBFrac = DFrac[2];
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
    assign Sticky = UfSticky | UfRound;
 | 
			
		||||
    assign Guard = DFrac[2];
 | 
			
		||||
    assign Round = DFrac[1];
 | 
			
		||||
    assign LSBFrac = DFrac[3];
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    always_comb begin
 | 
			
		||||
        // Determine if you add 1
 | 
			
		||||
        case (FrmE)
 | 
			
		||||
            3'b000: CalcPlus1 = Guard & (Round | (Sticky) | (~Round&~Sticky&LSBFrac));//round to nearest even
 | 
			
		||||
            3'b001: CalcPlus1 = 0;//round to zero
 | 
			
		||||
            3'b010: CalcPlus1 = XSgnE;//round down
 | 
			
		||||
            3'b011: CalcPlus1 = ~XSgnE;//round up
 | 
			
		||||
            3'b100: CalcPlus1 = (Guard & (Round | (Sticky) | (~Round&~Sticky)));//round to nearest max magnitude
 | 
			
		||||
            default: CalcPlus1 = 1'bx;
 | 
			
		||||
        endcase
 | 
			
		||||
        // Determine if you add 1 (for underflow flag)
 | 
			
		||||
        case (FrmE)
 | 
			
		||||
            3'b000: UfCalcPlus1 = UfGuard & (UfRound | UfSticky | (~UfRound&~UfSticky&UfLSBFrac));//round to nearest even
 | 
			
		||||
            3'b001: UfCalcPlus1 = 0;//round to zero
 | 
			
		||||
            3'b010: UfCalcPlus1 = XSgnE;//round down
 | 
			
		||||
            3'b011: UfCalcPlus1 = ~XSgnE;//round up
 | 
			
		||||
            3'b100: UfCalcPlus1 = (UfGuard & (UfRound | UfSticky | (~UfRound&~UfSticky)));//round to nearest max magnitude
 | 
			
		||||
            default: UfCalcPlus1 = 1'bx;
 | 
			
		||||
        endcase
 | 
			
		||||
   
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    // If an answer is exact don't round
 | 
			
		||||
    assign Plus1 = CalcPlus1 & (Sticky | UfGuard | Guard | Round);
 | 
			
		||||
    assign UfPlus1 = UfCalcPlus1 & (Sticky | UfGuard);
 | 
			
		||||
    logic [12:0] DExpFull;
 | 
			
		||||
logic [22:0] DResFrac;
 | 
			
		||||
logic [7:0] DResExp;
 | 
			
		||||
    assign {DExpFull, DResFrac} = {DExpCalc&{13{~Denorm}}, DFrac[25:3]} + Plus1;
 | 
			
		||||
    assign DResExp = DExpFull[7:0];
 | 
			
		||||
 | 
			
		||||
	logic [10:0] SExp;
 | 
			
		||||
	assign SExp = XExpE-(NormCnt&{8{~XZeroE}})+({11{XDenormE}}&1024-127);
 | 
			
		||||
 | 
			
		||||
    logic Overflow, Underflow, Inexact;
 | 
			
		||||
    assign Overflow = $signed(DExpFull) >= $signed({1'b0, {8{1'b1}}}) & ~(XNaNE|XInfE);
 | 
			
		||||
    assign Underflow = (($signed(DExpFull) <= 0) & ((Sticky|Guard|Round) | (XManE[52]&~|DFrac) | (|DFrac&~Denorm)) | ((DExpFull == 1) & Denorm & ~(UfPlus1&UfLSBFrac))) & ~(XNaNE|XInfE);
 | 
			
		||||
    assign Inexact = (Sticky|Guard|Round|Underflow|Overflow) &~(XNaNE);
 | 
			
		||||
 | 
			
		||||
logic [31:0] DRes;
 | 
			
		||||
    assign DRes = XNaNE ? {XSgnE, XExpE, 1'b1, XManE[50:29]} : 
 | 
			
		||||
			Underflow & ~Denorm ? {XSgnE, 30'b0, CalcPlus1&(|FrmE[1:0]|Shift)} : 
 | 
			
		||||
			    Overflow | XInfE ? ((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~XSgnE) | (FrmE[1:0]==2'b11&XSgnE)) & ~XInfE ? {XSgnE, 8'hfe, {23{1'b1}}} :
 | 
			
		||||
                                                                                                                 {XSgnE, 8'hff, 23'b0} : 
 | 
			
		||||
			    {XSgnE, DResExp, DResFrac};
 | 
			
		||||
    assign CvtFpResE = FmtE ? {{32{1'b1}},DRes} : {XSgnE, SExp, SFrac[51]|XNaNE, SFrac[50:0]};
 | 
			
		||||
    assign CvtFpFlgE = FmtE ? {XSNaNE, 1'b0, Overflow, Underflow, Inexact} : {XSNaNE, 4'b0};
 | 
			
		||||
 | 
			
		||||
endmodule // fpadd
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -117,8 +117,8 @@ module fpuaddcvt1 (
 | 
			
		||||
   output logic         AddSwapE
 | 
			
		||||
   );
 | 
			
		||||
 | 
			
		||||
   wire [5:0]	 ZP_mantissaA;
 | 
			
		||||
   wire [5:0]	 ZP_mantissaB;
 | 
			
		||||
   logic [5:0]	 ZP_mantissaA;
 | 
			
		||||
   logic [5:0]	 ZP_mantissaB;
 | 
			
		||||
   wire		    ZV_mantissaA;
 | 
			
		||||
   wire		    ZV_mantissaB;
 | 
			
		||||
 | 
			
		||||
@ -181,8 +181,20 @@ module fpuaddcvt1 (
 | 
			
		||||
   // normalization. If sum_corrected is all zeros, the exp_valid is 
 | 
			
		||||
   // zero; otherwise, it is one. 
 | 
			
		||||
   // modified to 52 bits to detect leading zeroes on denormalized mantissas
 | 
			
		||||
   lz52 lz_norm_1 (ZP_mantissaA, ZV_mantissaA, mantissaA);
 | 
			
		||||
   lz52 lz_norm_2 (ZP_mantissaB, ZV_mantissaB, mantissaB);
 | 
			
		||||
   // lz52 lz_norm_1 (ZP_mantissaA, ZV_mantissaA, mantissaA);
 | 
			
		||||
   // lz52 lz_norm_2 (ZP_mantissaB, ZV_mantissaB, mantissaB);    
 | 
			
		||||
   logic [8:0] i;
 | 
			
		||||
   logic [8:0] j;
 | 
			
		||||
    always_comb begin
 | 
			
		||||
            i = 0;
 | 
			
		||||
            while (~mantissaA[52-i] && $unsigned(i) <= $unsigned(52)) i = i+1;  // search for leading one
 | 
			
		||||
            ZP_mantissaA = i;
 | 
			
		||||
    end
 | 
			
		||||
    always_comb begin
 | 
			
		||||
            j = 0;
 | 
			
		||||
            while (~mantissaB[52-j] && $unsigned(j) <= $unsigned(52)) j = j+1;  // search for leading one
 | 
			
		||||
            ZP_mantissaB = j;
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
   // Denormalized exponents created by subtracting the leading zeroes from the original exponents
 | 
			
		||||
   assign AddExp1DenormE = AddSwapE ? (exp1 - {6'b0, ZP_mantissaB}) : (exp1 - {6'b0, ZP_mantissaA}); //KEP extended ZP_mantissa 
 | 
			
		||||
 | 
			
		||||
@ -38,8 +38,8 @@ module fctrl (
 | 
			
		||||
      7'b1001011:   ControlsD = `FCTRLW'b1_0_001_010_00_00_0_0; // fnmsub
 | 
			
		||||
      7'b1001111:   ControlsD = `FCTRLW'b1_0_001_011_00_00_0_0; // fnmadd
 | 
			
		||||
      7'b1010011: casez(Funct7D)
 | 
			
		||||
                    7'b00000??: ControlsD = `FCTRLW'b1_0_010_000_00_00_0_0; // fadd
 | 
			
		||||
                    7'b00001??: ControlsD = `FCTRLW'b1_0_010_001_00_00_0_0; // fsub
 | 
			
		||||
                    7'b00000??: ControlsD = `FCTRLW'b1_0_001_110_00_00_0_0; // fadd
 | 
			
		||||
                    7'b00001??: ControlsD = `FCTRLW'b1_0_001_111_00_00_0_0; // fsub
 | 
			
		||||
                    7'b00010??: ControlsD = `FCTRLW'b1_0_001_100_00_00_0_0; // fmul
 | 
			
		||||
                    7'b00011??: ControlsD = `FCTRLW'b1_0_011_000_00_00_1_0; // fdiv
 | 
			
		||||
                    7'b01011??: ControlsD = `FCTRLW'b1_0_011_001_00_00_1_0; // fsqrt
 | 
			
		||||
 | 
			
		||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@ -76,7 +76,7 @@ module fpu (
 | 
			
		||||
	logic [63:0] 	    FRD1D, FRD2D, FRD3D;  // Read Data from FP register - decode stage
 | 
			
		||||
	logic [63:0] 	    FRD1E, FRD2E, FRD3E;  // Read Data from FP register - execute stage
 | 
			
		||||
	logic [63:0] 	    FSrcXE, FSrcXM;       // Input 1 to the various units (after forwarding)
 | 
			
		||||
	logic [63:0] 	    FSrcYE;               // Input 2 to the various units (after forwarding)
 | 
			
		||||
	logic [63:0] 	    FPreSrcYE, FSrcYE;               // Input 2 to the various units (after forwarding)
 | 
			
		||||
	logic [63:0] 	    FPreSrcZE, FSrcZE;     // Input 3 to the various units (after forwarding)
 | 
			
		||||
	
 | 
			
		||||
	// unpacking signals
 | 
			
		||||
@ -110,8 +110,8 @@ module fpu (
 | 
			
		||||
	
 | 
			
		||||
	logic [63:0] 	ReadResW;           // read result (load instruction)
 | 
			
		||||
 | 
			
		||||
	logic [63:0] 	FAddResM, FAddResW; // add/FP -> FP convert result
 | 
			
		||||
	logic [4:0] 	FAddFlgM, FAddFlgW; // add/FP -> FP convert flags
 | 
			
		||||
	logic [63:0] 	CvtFpResE, CvtFpResM, CvtFpResW; // add/FP -> FP convert result
 | 
			
		||||
	logic [4:0] 	CvtFpFlgE, CvtFpFlgM, CvtFpFlgW; // add/FP -> FP convert flags
 | 
			
		||||
 | 
			
		||||
	logic [63:0] 	CvtResE, CvtResM;   // FP <-> int convert result
 | 
			
		||||
	logic [4:0] 	CvtFlgE, CvtFlgM;   // FP <-> int convert flags //*** trim this
 | 
			
		||||
@ -196,9 +196,10 @@ module fpu (
 | 
			
		||||
 | 
			
		||||
	// forwarding muxs
 | 
			
		||||
	mux3  #(64)  fxemux(FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE);
 | 
			
		||||
	mux3  #(64)  fyemux(FRD2E, FPUResultW, FResM, FForwardYE, FSrcYE);
 | 
			
		||||
	mux3  #(64)  fyemux(FRD2E, FPUResultW, FResM, FForwardYE, FPreSrcYE);
 | 
			
		||||
	mux3  #(64)  fzemux(FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE);
 | 
			
		||||
	mux2  #(64)  fzmulmux(FPreSrcZE, 64'b0, FOpCtrlE[2], FSrcZE); // Force Z to be 0 for multiply instructions
 | 
			
		||||
	mux3  #(64)  fyaddmux(FPreSrcYE, {{32{1'b1}}, 2'b0, {7{1'b1}}, 23'b0}, {2'b0, {10{1'b1}}, 52'b0}, {FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==3'b001), ~FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==3'b001)}, FSrcYE); // Force Z to be 0 for multiply instructions
 | 
			
		||||
	mux3  #(64)  fzmulmux(FPreSrcZE, 64'b0, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE); // Force Z to be 0 for multiply instructions
 | 
			
		||||
 	
 | 
			
		||||
   
 | 
			
		||||
  // unpacking unit
 | 
			
		||||
@ -261,11 +262,14 @@ module fpu (
 | 
			
		||||
  //    - contains some E/M pipleine registers
 | 
			
		||||
  //*** remove uneeded logic
 | 
			
		||||
  //*** change to use the unpacking unit if possible
 | 
			
		||||
	faddcvt faddcvt (.clk, .reset, .FlushM, .StallM, .FrmM, .FOpCtrlM, .FmtE, .FmtM, .FSrcXE, .FSrcYE, .FOpCtrlE, 
 | 
			
		||||
   .XSgnM, .YSgnM, .XManM, .YManM, .XExpM, .YExpM,
 | 
			
		||||
   .XSgnE, .YSgnE, .XManE, .YManE, .XExpE, .YExpE, .XDenormE, .YDenormE, .XNormE, .YNormE, .XNormM, .YNormM,  .XZeroE, .YZeroE, .XInfE, .YInfE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE,
 | 
			
		||||
                  // outputs:
 | 
			
		||||
                  .FAddResM, .FAddFlgM);
 | 
			
		||||
// 	faddcvt faddcvt (.clk, .reset, .FlushM, .StallM, .FrmM, .FOpCtrlM, .FmtE, .FmtM, .FSrcXE, .FSrcYE, .FOpCtrlE, 
 | 
			
		||||
//    .XSgnM, .YSgnM, .XManM, .YManM, .XExpM, .YExpM,
 | 
			
		||||
//    .XSgnE, .YSgnE, .XManE, .YManE, .XExpE, .YExpE, .XDenormE, .YDenormE, .XNormE, .YNormE, .XNormM, .YNormM,  .XZeroE, .YZeroE, .XInfE, .YInfE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE,
 | 
			
		||||
//                   // outputs:
 | 
			
		||||
//                   .CvtFpResM, .CvtFpFlgM);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
	cvtfp cvtfp (.XExpE, .XManE, .XSgnE, .XZeroE, .XDenormE, .XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtFpResE, .CvtFpFlgE);
 | 
			
		||||
	
 | 
			
		||||
	// compare unit
 | 
			
		||||
  //    - computation is done in one stage
 | 
			
		||||
@ -323,6 +327,9 @@ module fpu (
 | 
			
		||||
	flopenrc #(64) EMRegSgnRes(clk, reset, FlushM, ~StallM, SgnResE, SgnResM);
 | 
			
		||||
	flopenrc #(1) EMRegSgnFlg(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM);
 | 
			
		||||
 | 
			
		||||
	flopenrc #(64) EMRegCvtFpRes(clk, reset, FlushM, ~StallM, CvtFpResE, CvtFpResM);
 | 
			
		||||
	flopenrc #(5) EMRegCvtFpFlg(clk, reset, FlushM, ~StallM, CvtFpFlgE, CvtFpFlgM);
 | 
			
		||||
	
 | 
			
		||||
	flopenrc #(64) EMRegCvtRes(clk, reset, FlushM, ~StallM, CvtResE, CvtResM);
 | 
			
		||||
	flopenrc #(5) EMRegCvtFlg(clk, reset, FlushM, ~StallM, CvtFlgE, CvtFlgM);
 | 
			
		||||
  
 | 
			
		||||
@ -352,7 +359,7 @@ module fpu (
 | 
			
		||||
	mux4  #(`XLEN)  IntResMux(CmpResM[`XLEN-1:0], FSrcXM[`XLEN-1:0], ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM);
 | 
			
		||||
	
 | 
			
		||||
  // FPU flag selection - to privileged
 | 
			
		||||
	mux5  #(5)  FPUFlgMux(5'b0, FMAFlgM, FAddFlgM, FDivFlgM, FFlgM, FResultSelW, SetFflagsM);
 | 
			
		||||
	mux5  #(5)  FPUFlgMux(5'b0, FMAFlgM, CvtFpFlgM, FDivFlgM, FFlgM, FResultSelW, SetFflagsM);
 | 
			
		||||
	
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -363,7 +370,7 @@ module fpu (
 | 
			
		||||
	////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
	flopenrc #(64) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); 
 | 
			
		||||
	flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW); 
 | 
			
		||||
	flopenrc #(64) MWRegAdd(clk, reset, FlushW, ~StallW, FAddResM, FAddResW); 
 | 
			
		||||
	flopenrc #(64) MWRegAdd(clk, reset, FlushW, ~StallW, CvtFpResM, CvtFpResW); 
 | 
			
		||||
	flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW);
 | 
			
		||||
	flopenrc #(6)  MWCtrlReg(clk, reset, FlushW, ~StallW,
 | 
			
		||||
				{FRegWriteM, FResultSelM, FmtM, FWriteIntM},
 | 
			
		||||
@ -382,7 +389,7 @@ module fpu (
 | 
			
		||||
	mux2  #(64)  ReadResMux({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW);
 | 
			
		||||
 | 
			
		||||
  // select the result to be written to the FP register
 | 
			
		||||
	mux5  #(64)  FPUResultMux(ReadResW, FMAResW, FAddResW, FDivResW, FResW, FResultSelW, FPUResultW);
 | 
			
		||||
	mux5  #(64)  FPUResultMux(ReadResW, FMAResW, CvtFpResW, FDivResW, FResW, FResultSelW, FPUResultW);
 | 
			
		||||
	
 | 
			
		||||
	
 | 
			
		||||
  end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user