Merge branch 'main' of github.com:davidharrishmc/riscv-wally into main

This commit is contained in:
Ross Thompson 2021-08-10 13:36:29 -05:00
commit 74e5b60819
7 changed files with 846 additions and 196 deletions

View File

@ -26,7 +26,7 @@
// include shared configuration // include shared configuration
`include "wally-shared.vh" `include "wally-shared.vh"
// `include "../../../config/shared/wally-shared.vh" // `include "../shared/wally-shared.vh"
`define QEMU 0 `define QEMU 0
`define BUILDROOT 0 `define BUILDROOT 0

View File

@ -1,3 +1,3 @@
testfloat_gen f64_mulAdd -tininessafter -n 6133248 -rnear_even -seed 113355 -level 1 > testFloat testfloat_gen f32_add -tininessafter -n 6133248 -rnear_even -seed 113355 -level 1 > testFloat
tr -d ' ' < testFloat > testFloatNoSpace tr -d ' ' < testFloat > testFloatNoSpace

View File

@ -0,0 +1,120 @@
// `include "wally-config.vh"
module cvtfp (
input logic [10:0] XExpE,
input logic [52:0] XManE,
input logic XSgnE,
input logic XZeroE,
input logic XDenormE,
input logic XInfE,
input logic XNaNE,
input logic XSNaNE,
input logic [2:0] FrmE,
input logic FmtE,
output logic [63:0] CvtFpResE,
output logic [4:0] CvtFpFlgE);
logic [7:0] DExp;
logic [51:0] Frac;
logic Denorm;
logic [8:0] i,NormCnt;
always_comb begin
i = 0;
while (~XManE[52-i] && i <= 52) i = i+1; // search for leading one
NormCnt = i;
end
logic [12:0] DExpCalc;
// logic Overflow, Underflow;
assign DExpCalc = (XExpE-1023+127)&{13{~XZeroE}};
assign Denorm = $signed(DExpCalc) <= 0 & $signed(DExpCalc) > $signed(-23);
logic [12:0] ShiftCnt;
logic [51:0] SFrac;
logic [25:0] DFrac;
logic [77:0] DFracTmp,tmp, tmp2;
//assign ShiftCnt = FmtE ? -DExpCalc&{13{Denorm}} : NormCnt;
assign SFrac = XManE[51:0] << NormCnt;
logic Shift;
assign tmp = (-DExpCalc+1)&{13{Shift}};
assign tmp2 = {XManE, 23'b0};
assign Shift = {13{Denorm|(($signed(DExpCalc) > $signed(-25)) & DExpCalc[12])}};
assign DFracTmp = {XManE, 25'b0} >> ((-DExpCalc+1)&{13{Shift}});
assign DFrac = DFracTmp[76:51];
logic Sticky, UfSticky, Guard, Round, LSBFrac, UfGuard, UfRound, UfLSBFrac;
logic CalcPlus1, UfCalcPlus1;
logic Plus1, UfPlus1;
// used to determine underflow flag
assign UfSticky = |DFracTmp[50:0];
assign UfGuard = DFrac[1];
assign UfRound = DFrac[0];
assign UfLSBFrac = DFrac[2];
assign Sticky = UfSticky | UfRound;
assign Guard = DFrac[2];
assign Round = DFrac[1];
assign LSBFrac = DFrac[3];
always_comb begin
// Determine if you add 1
case (FrmE)
3'b000: CalcPlus1 = Guard & (Round | (Sticky) | (~Round&~Sticky&LSBFrac));//round to nearest even
3'b001: CalcPlus1 = 0;//round to zero
3'b010: CalcPlus1 = XSgnE;//round down
3'b011: CalcPlus1 = ~XSgnE;//round up
3'b100: CalcPlus1 = (Guard & (Round | (Sticky) | (~Round&~Sticky)));//round to nearest max magnitude
default: CalcPlus1 = 1'bx;
endcase
// Determine if you add 1 (for underflow flag)
case (FrmE)
3'b000: UfCalcPlus1 = UfGuard & (UfRound | UfSticky | (~UfRound&~UfSticky&UfLSBFrac));//round to nearest even
3'b001: UfCalcPlus1 = 0;//round to zero
3'b010: UfCalcPlus1 = XSgnE;//round down
3'b011: UfCalcPlus1 = ~XSgnE;//round up
3'b100: UfCalcPlus1 = (UfGuard & (UfRound | UfSticky | (~UfRound&~UfSticky)));//round to nearest max magnitude
default: UfCalcPlus1 = 1'bx;
endcase
end
// If an answer is exact don't round
assign Plus1 = CalcPlus1 & (Sticky | UfGuard | Guard | Round);
assign UfPlus1 = UfCalcPlus1 & (Sticky | UfGuard);
logic [12:0] DExpFull;
logic [22:0] DResFrac;
logic [7:0] DResExp;
assign {DExpFull, DResFrac} = {DExpCalc&{13{~Denorm}}, DFrac[25:3]} + Plus1;
assign DResExp = DExpFull[7:0];
logic [10:0] SExp;
assign SExp = XExpE-(NormCnt&{8{~XZeroE}})+({11{XDenormE}}&1024-127);
logic Overflow, Underflow, Inexact;
assign Overflow = $signed(DExpFull) >= $signed({1'b0, {8{1'b1}}}) & ~(XNaNE|XInfE);
assign Underflow = (($signed(DExpFull) <= 0) & ((Sticky|Guard|Round) | (XManE[52]&~|DFrac) | (|DFrac&~Denorm)) | ((DExpFull == 1) & Denorm & ~(UfPlus1&UfLSBFrac))) & ~(XNaNE|XInfE);
assign Inexact = (Sticky|Guard|Round|Underflow|Overflow) &~(XNaNE);
logic [31:0] DRes;
assign DRes = XNaNE ? {XSgnE, XExpE, 1'b1, XManE[50:29]} :
Underflow & ~Denorm ? {XSgnE, 30'b0, CalcPlus1&(|FrmE[1:0]|Shift)} :
Overflow | XInfE ? ((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~XSgnE) | (FrmE[1:0]==2'b11&XSgnE)) & ~XInfE ? {XSgnE, 8'hfe, {23{1'b1}}} :
{XSgnE, 8'hff, 23'b0} :
{XSgnE, DResExp, DResFrac};
assign CvtFpResE = FmtE ? {{32{1'b1}},DRes} : {XSgnE, SExp, SFrac[51]|XNaNE, SFrac[50:0]};
assign CvtFpFlgE = FmtE ? {XSNaNE, 1'b0, Overflow, Underflow, Inexact} : {XSNaNE, 4'b0};
endmodule // fpadd

View File

@ -117,8 +117,8 @@ module fpuaddcvt1 (
output logic AddSwapE output logic AddSwapE
); );
wire [5:0] ZP_mantissaA; logic [5:0] ZP_mantissaA;
wire [5:0] ZP_mantissaB; logic [5:0] ZP_mantissaB;
wire ZV_mantissaA; wire ZV_mantissaA;
wire ZV_mantissaB; wire ZV_mantissaB;
@ -181,8 +181,20 @@ module fpuaddcvt1 (
// normalization. If sum_corrected is all zeros, the exp_valid is // normalization. If sum_corrected is all zeros, the exp_valid is
// zero; otherwise, it is one. // zero; otherwise, it is one.
// modified to 52 bits to detect leading zeroes on denormalized mantissas // modified to 52 bits to detect leading zeroes on denormalized mantissas
lz52 lz_norm_1 (ZP_mantissaA, ZV_mantissaA, mantissaA); // lz52 lz_norm_1 (ZP_mantissaA, ZV_mantissaA, mantissaA);
lz52 lz_norm_2 (ZP_mantissaB, ZV_mantissaB, mantissaB); // lz52 lz_norm_2 (ZP_mantissaB, ZV_mantissaB, mantissaB);
logic [8:0] i;
logic [8:0] j;
always_comb begin
i = 0;
while (~mantissaA[52-i] && $unsigned(i) <= $unsigned(52)) i = i+1; // search for leading one
ZP_mantissaA = i;
end
always_comb begin
j = 0;
while (~mantissaB[52-j] && $unsigned(j) <= $unsigned(52)) j = j+1; // search for leading one
ZP_mantissaB = j;
end
// Denormalized exponents created by subtracting the leading zeroes from the original exponents // Denormalized exponents created by subtracting the leading zeroes from the original exponents
assign AddExp1DenormE = AddSwapE ? (exp1 - {6'b0, ZP_mantissaB}) : (exp1 - {6'b0, ZP_mantissaA}); //KEP extended ZP_mantissa assign AddExp1DenormE = AddSwapE ? (exp1 - {6'b0, ZP_mantissaB}) : (exp1 - {6'b0, ZP_mantissaA}); //KEP extended ZP_mantissa

View File

@ -38,8 +38,8 @@ module fctrl (
7'b1001011: ControlsD = `FCTRLW'b1_0_001_010_00_00_0_0; // fnmsub 7'b1001011: ControlsD = `FCTRLW'b1_0_001_010_00_00_0_0; // fnmsub
7'b1001111: ControlsD = `FCTRLW'b1_0_001_011_00_00_0_0; // fnmadd 7'b1001111: ControlsD = `FCTRLW'b1_0_001_011_00_00_0_0; // fnmadd
7'b1010011: casez(Funct7D) 7'b1010011: casez(Funct7D)
7'b00000??: ControlsD = `FCTRLW'b1_0_010_000_00_00_0_0; // fadd 7'b00000??: ControlsD = `FCTRLW'b1_0_001_110_00_00_0_0; // fadd
7'b00001??: ControlsD = `FCTRLW'b1_0_010_001_00_00_0_0; // fsub 7'b00001??: ControlsD = `FCTRLW'b1_0_001_111_00_00_0_0; // fsub
7'b00010??: ControlsD = `FCTRLW'b1_0_001_100_00_00_0_0; // fmul 7'b00010??: ControlsD = `FCTRLW'b1_0_001_100_00_00_0_0; // fmul
7'b00011??: ControlsD = `FCTRLW'b1_0_011_000_00_00_1_0; // fdiv 7'b00011??: ControlsD = `FCTRLW'b1_0_011_000_00_00_1_0; // fdiv
7'b01011??: ControlsD = `FCTRLW'b1_0_011_001_00_00_1_0; // fsqrt 7'b01011??: ControlsD = `FCTRLW'b1_0_011_001_00_00_1_0; // fsqrt

File diff suppressed because it is too large Load Diff

View File

@ -76,7 +76,7 @@ module fpu (
logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
logic [63:0] FSrcXE, FSrcXM; // Input 1 to the various units (after forwarding) logic [63:0] FSrcXE, FSrcXM; // Input 1 to the various units (after forwarding)
logic [63:0] FSrcYE; // Input 2 to the various units (after forwarding) logic [63:0] FPreSrcYE, FSrcYE; // Input 2 to the various units (after forwarding)
logic [63:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding) logic [63:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding)
// unpacking signals // unpacking signals
@ -110,8 +110,8 @@ module fpu (
logic [63:0] ReadResW; // read result (load instruction) logic [63:0] ReadResW; // read result (load instruction)
logic [63:0] FAddResM, FAddResW; // add/FP -> FP convert result logic [63:0] CvtFpResE, CvtFpResM, CvtFpResW; // add/FP -> FP convert result
logic [4:0] FAddFlgM, FAddFlgW; // add/FP -> FP convert flags logic [4:0] CvtFpFlgE, CvtFpFlgM, CvtFpFlgW; // add/FP -> FP convert flags
logic [63:0] CvtResE, CvtResM; // FP <-> int convert result logic [63:0] CvtResE, CvtResM; // FP <-> int convert result
logic [4:0] CvtFlgE, CvtFlgM; // FP <-> int convert flags //*** trim this logic [4:0] CvtFlgE, CvtFlgM; // FP <-> int convert flags //*** trim this
@ -196,9 +196,10 @@ module fpu (
// forwarding muxs // forwarding muxs
mux3 #(64) fxemux(FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE); mux3 #(64) fxemux(FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE);
mux3 #(64) fyemux(FRD2E, FPUResultW, FResM, FForwardYE, FSrcYE); mux3 #(64) fyemux(FRD2E, FPUResultW, FResM, FForwardYE, FPreSrcYE);
mux3 #(64) fzemux(FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE); mux3 #(64) fzemux(FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE);
mux2 #(64) fzmulmux(FPreSrcZE, 64'b0, FOpCtrlE[2], FSrcZE); // Force Z to be 0 for multiply instructions mux3 #(64) fyaddmux(FPreSrcYE, {{32{1'b1}}, 2'b0, {7{1'b1}}, 23'b0}, {2'b0, {10{1'b1}}, 52'b0}, {FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==3'b001), ~FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==3'b001)}, FSrcYE); // Force Z to be 0 for multiply instructions
mux3 #(64) fzmulmux(FPreSrcZE, 64'b0, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE); // Force Z to be 0 for multiply instructions
// unpacking unit // unpacking unit
@ -261,11 +262,14 @@ module fpu (
// - contains some E/M pipleine registers // - contains some E/M pipleine registers
//*** remove uneeded logic //*** remove uneeded logic
//*** change to use the unpacking unit if possible //*** change to use the unpacking unit if possible
faddcvt faddcvt (.clk, .reset, .FlushM, .StallM, .FrmM, .FOpCtrlM, .FmtE, .FmtM, .FSrcXE, .FSrcYE, .FOpCtrlE, // faddcvt faddcvt (.clk, .reset, .FlushM, .StallM, .FrmM, .FOpCtrlM, .FmtE, .FmtM, .FSrcXE, .FSrcYE, .FOpCtrlE,
.XSgnM, .YSgnM, .XManM, .YManM, .XExpM, .YExpM, // .XSgnM, .YSgnM, .XManM, .YManM, .XExpM, .YExpM,
.XSgnE, .YSgnE, .XManE, .YManE, .XExpE, .YExpE, .XDenormE, .YDenormE, .XNormE, .YNormE, .XNormM, .YNormM, .XZeroE, .YZeroE, .XInfE, .YInfE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, // .XSgnE, .YSgnE, .XManE, .YManE, .XExpE, .YExpE, .XDenormE, .YDenormE, .XNormE, .YNormE, .XNormM, .YNormM, .XZeroE, .YZeroE, .XInfE, .YInfE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE,
// outputs: // // outputs:
.FAddResM, .FAddFlgM); // .CvtFpResM, .CvtFpFlgM);
cvtfp cvtfp (.XExpE, .XManE, .XSgnE, .XZeroE, .XDenormE, .XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtFpResE, .CvtFpFlgE);
// compare unit // compare unit
// - computation is done in one stage // - computation is done in one stage
@ -322,6 +326,9 @@ module fpu (
flopenrc #(64) EMRegSgnRes(clk, reset, FlushM, ~StallM, SgnResE, SgnResM); flopenrc #(64) EMRegSgnRes(clk, reset, FlushM, ~StallM, SgnResE, SgnResM);
flopenrc #(1) EMRegSgnFlg(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM); flopenrc #(1) EMRegSgnFlg(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM);
flopenrc #(64) EMRegCvtFpRes(clk, reset, FlushM, ~StallM, CvtFpResE, CvtFpResM);
flopenrc #(5) EMRegCvtFpFlg(clk, reset, FlushM, ~StallM, CvtFpFlgE, CvtFpFlgM);
flopenrc #(64) EMRegCvtRes(clk, reset, FlushM, ~StallM, CvtResE, CvtResM); flopenrc #(64) EMRegCvtRes(clk, reset, FlushM, ~StallM, CvtResE, CvtResM);
flopenrc #(5) EMRegCvtFlg(clk, reset, FlushM, ~StallM, CvtFlgE, CvtFlgM); flopenrc #(5) EMRegCvtFlg(clk, reset, FlushM, ~StallM, CvtFlgE, CvtFlgM);
@ -352,7 +359,7 @@ module fpu (
mux4 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], FSrcXM[`XLEN-1:0], ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM); mux4 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], FSrcXM[`XLEN-1:0], ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM);
// FPU flag selection - to privileged // FPU flag selection - to privileged
mux5 #(5) FPUFlgMux(5'b0, FMAFlgM, FAddFlgM, FDivFlgM, FFlgM, FResultSelW, SetFflagsM); mux5 #(5) FPUFlgMux(5'b0, FMAFlgM, CvtFpFlgM, FDivFlgM, FFlgM, FResultSelW, SetFflagsM);
@ -363,7 +370,7 @@ module fpu (
//////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////
flopenrc #(64) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); flopenrc #(64) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW);
flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW); flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW);
flopenrc #(64) MWRegAdd(clk, reset, FlushW, ~StallW, FAddResM, FAddResW); flopenrc #(64) MWRegAdd(clk, reset, FlushW, ~StallW, CvtFpResM, CvtFpResW);
flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW); flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW);
flopenrc #(6) MWCtrlReg(clk, reset, FlushW, ~StallW, flopenrc #(6) MWCtrlReg(clk, reset, FlushW, ~StallW,
{FRegWriteM, FResultSelM, FmtM, FWriteIntM}, {FRegWriteM, FResultSelM, FmtM, FWriteIntM},
@ -382,7 +389,7 @@ module fpu (
mux2 #(64) ReadResMux({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW); mux2 #(64) ReadResMux({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW);
// select the result to be written to the FP register // select the result to be written to the FP register
mux5 #(64) FPUResultMux(ReadResW, FMAResW, FAddResW, FDivResW, FResW, FResultSelW, FPUResultW); mux5 #(64) FPUResultMux(ReadResW, FMAResW, CvtFpResW, FDivResW, FResW, FResultSelW, FPUResultW);
end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low