Merge branch 'main' of github.com:davidharrishmc/riscv-wally into main

This commit is contained in:
Ross Thompson 2021-08-10 13:36:29 -05:00
commit 74e5b60819
7 changed files with 846 additions and 196 deletions

View File

@ -26,7 +26,7 @@
// include shared configuration
`include "wally-shared.vh"
// `include "../../../config/shared/wally-shared.vh"
// `include "../shared/wally-shared.vh"
`define QEMU 0
`define BUILDROOT 0

View File

@ -1,3 +1,3 @@
testfloat_gen f64_mulAdd -tininessafter -n 6133248 -rnear_even -seed 113355 -level 1 > testFloat
testfloat_gen f32_add -tininessafter -n 6133248 -rnear_even -seed 113355 -level 1 > testFloat
tr -d ' ' < testFloat > testFloatNoSpace

View File

@ -0,0 +1,120 @@
// `include "wally-config.vh"
module cvtfp (
input logic [10:0] XExpE,
input logic [52:0] XManE,
input logic XSgnE,
input logic XZeroE,
input logic XDenormE,
input logic XInfE,
input logic XNaNE,
input logic XSNaNE,
input logic [2:0] FrmE,
input logic FmtE,
output logic [63:0] CvtFpResE,
output logic [4:0] CvtFpFlgE);
logic [7:0] DExp;
logic [51:0] Frac;
logic Denorm;
logic [8:0] i,NormCnt;
always_comb begin
i = 0;
while (~XManE[52-i] && i <= 52) i = i+1; // search for leading one
NormCnt = i;
end
logic [12:0] DExpCalc;
// logic Overflow, Underflow;
assign DExpCalc = (XExpE-1023+127)&{13{~XZeroE}};
assign Denorm = $signed(DExpCalc) <= 0 & $signed(DExpCalc) > $signed(-23);
logic [12:0] ShiftCnt;
logic [51:0] SFrac;
logic [25:0] DFrac;
logic [77:0] DFracTmp,tmp, tmp2;
//assign ShiftCnt = FmtE ? -DExpCalc&{13{Denorm}} : NormCnt;
assign SFrac = XManE[51:0] << NormCnt;
logic Shift;
assign tmp = (-DExpCalc+1)&{13{Shift}};
assign tmp2 = {XManE, 23'b0};
assign Shift = {13{Denorm|(($signed(DExpCalc) > $signed(-25)) & DExpCalc[12])}};
assign DFracTmp = {XManE, 25'b0} >> ((-DExpCalc+1)&{13{Shift}});
assign DFrac = DFracTmp[76:51];
logic Sticky, UfSticky, Guard, Round, LSBFrac, UfGuard, UfRound, UfLSBFrac;
logic CalcPlus1, UfCalcPlus1;
logic Plus1, UfPlus1;
// used to determine underflow flag
assign UfSticky = |DFracTmp[50:0];
assign UfGuard = DFrac[1];
assign UfRound = DFrac[0];
assign UfLSBFrac = DFrac[2];
assign Sticky = UfSticky | UfRound;
assign Guard = DFrac[2];
assign Round = DFrac[1];
assign LSBFrac = DFrac[3];
always_comb begin
// Determine if you add 1
case (FrmE)
3'b000: CalcPlus1 = Guard & (Round | (Sticky) | (~Round&~Sticky&LSBFrac));//round to nearest even
3'b001: CalcPlus1 = 0;//round to zero
3'b010: CalcPlus1 = XSgnE;//round down
3'b011: CalcPlus1 = ~XSgnE;//round up
3'b100: CalcPlus1 = (Guard & (Round | (Sticky) | (~Round&~Sticky)));//round to nearest max magnitude
default: CalcPlus1 = 1'bx;
endcase
// Determine if you add 1 (for underflow flag)
case (FrmE)
3'b000: UfCalcPlus1 = UfGuard & (UfRound | UfSticky | (~UfRound&~UfSticky&UfLSBFrac));//round to nearest even
3'b001: UfCalcPlus1 = 0;//round to zero
3'b010: UfCalcPlus1 = XSgnE;//round down
3'b011: UfCalcPlus1 = ~XSgnE;//round up
3'b100: UfCalcPlus1 = (UfGuard & (UfRound | UfSticky | (~UfRound&~UfSticky)));//round to nearest max magnitude
default: UfCalcPlus1 = 1'bx;
endcase
end
// If an answer is exact don't round
assign Plus1 = CalcPlus1 & (Sticky | UfGuard | Guard | Round);
assign UfPlus1 = UfCalcPlus1 & (Sticky | UfGuard);
logic [12:0] DExpFull;
logic [22:0] DResFrac;
logic [7:0] DResExp;
assign {DExpFull, DResFrac} = {DExpCalc&{13{~Denorm}}, DFrac[25:3]} + Plus1;
assign DResExp = DExpFull[7:0];
logic [10:0] SExp;
assign SExp = XExpE-(NormCnt&{8{~XZeroE}})+({11{XDenormE}}&1024-127);
logic Overflow, Underflow, Inexact;
assign Overflow = $signed(DExpFull) >= $signed({1'b0, {8{1'b1}}}) & ~(XNaNE|XInfE);
assign Underflow = (($signed(DExpFull) <= 0) & ((Sticky|Guard|Round) | (XManE[52]&~|DFrac) | (|DFrac&~Denorm)) | ((DExpFull == 1) & Denorm & ~(UfPlus1&UfLSBFrac))) & ~(XNaNE|XInfE);
assign Inexact = (Sticky|Guard|Round|Underflow|Overflow) &~(XNaNE);
logic [31:0] DRes;
assign DRes = XNaNE ? {XSgnE, XExpE, 1'b1, XManE[50:29]} :
Underflow & ~Denorm ? {XSgnE, 30'b0, CalcPlus1&(|FrmE[1:0]|Shift)} :
Overflow | XInfE ? ((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~XSgnE) | (FrmE[1:0]==2'b11&XSgnE)) & ~XInfE ? {XSgnE, 8'hfe, {23{1'b1}}} :
{XSgnE, 8'hff, 23'b0} :
{XSgnE, DResExp, DResFrac};
assign CvtFpResE = FmtE ? {{32{1'b1}},DRes} : {XSgnE, SExp, SFrac[51]|XNaNE, SFrac[50:0]};
assign CvtFpFlgE = FmtE ? {XSNaNE, 1'b0, Overflow, Underflow, Inexact} : {XSNaNE, 4'b0};
endmodule // fpadd

View File

@ -117,8 +117,8 @@ module fpuaddcvt1 (
output logic AddSwapE
);
wire [5:0] ZP_mantissaA;
wire [5:0] ZP_mantissaB;
logic [5:0] ZP_mantissaA;
logic [5:0] ZP_mantissaB;
wire ZV_mantissaA;
wire ZV_mantissaB;
@ -181,8 +181,20 @@ module fpuaddcvt1 (
// normalization. If sum_corrected is all zeros, the exp_valid is
// zero; otherwise, it is one.
// modified to 52 bits to detect leading zeroes on denormalized mantissas
lz52 lz_norm_1 (ZP_mantissaA, ZV_mantissaA, mantissaA);
lz52 lz_norm_2 (ZP_mantissaB, ZV_mantissaB, mantissaB);
// lz52 lz_norm_1 (ZP_mantissaA, ZV_mantissaA, mantissaA);
// lz52 lz_norm_2 (ZP_mantissaB, ZV_mantissaB, mantissaB);
logic [8:0] i;
logic [8:0] j;
always_comb begin
i = 0;
while (~mantissaA[52-i] && $unsigned(i) <= $unsigned(52)) i = i+1; // search for leading one
ZP_mantissaA = i;
end
always_comb begin
j = 0;
while (~mantissaB[52-j] && $unsigned(j) <= $unsigned(52)) j = j+1; // search for leading one
ZP_mantissaB = j;
end
// Denormalized exponents created by subtracting the leading zeroes from the original exponents
assign AddExp1DenormE = AddSwapE ? (exp1 - {6'b0, ZP_mantissaB}) : (exp1 - {6'b0, ZP_mantissaA}); //KEP extended ZP_mantissa

View File

@ -38,8 +38,8 @@ module fctrl (
7'b1001011: ControlsD = `FCTRLW'b1_0_001_010_00_00_0_0; // fnmsub
7'b1001111: ControlsD = `FCTRLW'b1_0_001_011_00_00_0_0; // fnmadd
7'b1010011: casez(Funct7D)
7'b00000??: ControlsD = `FCTRLW'b1_0_010_000_00_00_0_0; // fadd
7'b00001??: ControlsD = `FCTRLW'b1_0_010_001_00_00_0_0; // fsub
7'b00000??: ControlsD = `FCTRLW'b1_0_001_110_00_00_0_0; // fadd
7'b00001??: ControlsD = `FCTRLW'b1_0_001_111_00_00_0_0; // fsub
7'b00010??: ControlsD = `FCTRLW'b1_0_001_100_00_00_0_0; // fmul
7'b00011??: ControlsD = `FCTRLW'b1_0_011_000_00_00_1_0; // fdiv
7'b01011??: ControlsD = `FCTRLW'b1_0_011_001_00_00_1_0; // fsqrt

File diff suppressed because it is too large Load Diff

View File

@ -76,7 +76,7 @@ module fpu (
logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
logic [63:0] FSrcXE, FSrcXM; // Input 1 to the various units (after forwarding)
logic [63:0] FSrcYE; // Input 2 to the various units (after forwarding)
logic [63:0] FPreSrcYE, FSrcYE; // Input 2 to the various units (after forwarding)
logic [63:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding)
// unpacking signals
@ -110,8 +110,8 @@ module fpu (
logic [63:0] ReadResW; // read result (load instruction)
logic [63:0] FAddResM, FAddResW; // add/FP -> FP convert result
logic [4:0] FAddFlgM, FAddFlgW; // add/FP -> FP convert flags
logic [63:0] CvtFpResE, CvtFpResM, CvtFpResW; // add/FP -> FP convert result
logic [4:0] CvtFpFlgE, CvtFpFlgM, CvtFpFlgW; // add/FP -> FP convert flags
logic [63:0] CvtResE, CvtResM; // FP <-> int convert result
logic [4:0] CvtFlgE, CvtFlgM; // FP <-> int convert flags //*** trim this
@ -196,9 +196,10 @@ module fpu (
// forwarding muxs
mux3 #(64) fxemux(FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE);
mux3 #(64) fyemux(FRD2E, FPUResultW, FResM, FForwardYE, FSrcYE);
mux3 #(64) fyemux(FRD2E, FPUResultW, FResM, FForwardYE, FPreSrcYE);
mux3 #(64) fzemux(FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE);
mux2 #(64) fzmulmux(FPreSrcZE, 64'b0, FOpCtrlE[2], FSrcZE); // Force Z to be 0 for multiply instructions
mux3 #(64) fyaddmux(FPreSrcYE, {{32{1'b1}}, 2'b0, {7{1'b1}}, 23'b0}, {2'b0, {10{1'b1}}, 52'b0}, {FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==3'b001), ~FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==3'b001)}, FSrcYE); // Force Z to be 0 for multiply instructions
mux3 #(64) fzmulmux(FPreSrcZE, 64'b0, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE); // Force Z to be 0 for multiply instructions
// unpacking unit
@ -261,11 +262,14 @@ module fpu (
// - contains some E/M pipleine registers
//*** remove uneeded logic
//*** change to use the unpacking unit if possible
faddcvt faddcvt (.clk, .reset, .FlushM, .StallM, .FrmM, .FOpCtrlM, .FmtE, .FmtM, .FSrcXE, .FSrcYE, .FOpCtrlE,
.XSgnM, .YSgnM, .XManM, .YManM, .XExpM, .YExpM,
.XSgnE, .YSgnE, .XManE, .YManE, .XExpE, .YExpE, .XDenormE, .YDenormE, .XNormE, .YNormE, .XNormM, .YNormM, .XZeroE, .YZeroE, .XInfE, .YInfE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE,
// outputs:
.FAddResM, .FAddFlgM);
// faddcvt faddcvt (.clk, .reset, .FlushM, .StallM, .FrmM, .FOpCtrlM, .FmtE, .FmtM, .FSrcXE, .FSrcYE, .FOpCtrlE,
// .XSgnM, .YSgnM, .XManM, .YManM, .XExpM, .YExpM,
// .XSgnE, .YSgnE, .XManE, .YManE, .XExpE, .YExpE, .XDenormE, .YDenormE, .XNormE, .YNormE, .XNormM, .YNormM, .XZeroE, .YZeroE, .XInfE, .YInfE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE,
// // outputs:
// .CvtFpResM, .CvtFpFlgM);
cvtfp cvtfp (.XExpE, .XManE, .XSgnE, .XZeroE, .XDenormE, .XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtFpResE, .CvtFpFlgE);
// compare unit
// - computation is done in one stage
@ -322,6 +326,9 @@ module fpu (
flopenrc #(64) EMRegSgnRes(clk, reset, FlushM, ~StallM, SgnResE, SgnResM);
flopenrc #(1) EMRegSgnFlg(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM);
flopenrc #(64) EMRegCvtFpRes(clk, reset, FlushM, ~StallM, CvtFpResE, CvtFpResM);
flopenrc #(5) EMRegCvtFpFlg(clk, reset, FlushM, ~StallM, CvtFpFlgE, CvtFpFlgM);
flopenrc #(64) EMRegCvtRes(clk, reset, FlushM, ~StallM, CvtResE, CvtResM);
flopenrc #(5) EMRegCvtFlg(clk, reset, FlushM, ~StallM, CvtFlgE, CvtFlgM);
@ -352,7 +359,7 @@ module fpu (
mux4 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], FSrcXM[`XLEN-1:0], ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM);
// FPU flag selection - to privileged
mux5 #(5) FPUFlgMux(5'b0, FMAFlgM, FAddFlgM, FDivFlgM, FFlgM, FResultSelW, SetFflagsM);
mux5 #(5) FPUFlgMux(5'b0, FMAFlgM, CvtFpFlgM, FDivFlgM, FFlgM, FResultSelW, SetFflagsM);
@ -363,7 +370,7 @@ module fpu (
////////////////////////////////////////////////////////////////////////////////////////
flopenrc #(64) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW);
flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW);
flopenrc #(64) MWRegAdd(clk, reset, FlushW, ~StallW, FAddResM, FAddResW);
flopenrc #(64) MWRegAdd(clk, reset, FlushW, ~StallW, CvtFpResM, CvtFpResW);
flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW);
flopenrc #(6) MWCtrlReg(clk, reset, FlushW, ~StallW,
{FRegWriteM, FResultSelM, FmtM, FWriteIntM},
@ -382,7 +389,7 @@ module fpu (
mux2 #(64) ReadResMux({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW);
// select the result to be written to the FP register
mux5 #(64) FPUResultMux(ReadResW, FMAResW, FAddResW, FDivResW, FResW, FResultSelW, FPUResultW);
mux5 #(64) FPUResultMux(ReadResW, FMAResW, CvtFpResW, FDivResW, FResW, FResultSelW, FPUResultW);
end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low