diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv index 4d7eb3120..4f3dcf6ff 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv @@ -43,13 +43,14 @@ module fdivsqrt( input logic FDivStartE, IDivStartE, input logic StallM, input logic StallE, + input logic TrapM, input logic SqrtE, SqrtM, input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B input logic [2:0] Funct3E, Funct3M, input logic MDUE, W64E, output logic DivSM, - output logic FDivBusyE, - output logic DivDone, + output logic FDivBusyE, DivStartE, FDivDoneE, +// output logic DivDone, output logic [`NE+1:0] QeM, output logic [`DIVb:0] QmM // output logic [`XLEN-1:0] RemM, @@ -66,7 +67,6 @@ module fdivsqrt( logic SpecialCaseM; logic [`DIVBLEN:0] n, m; logic OTFCSwap, ALTB, BZero, As; - logic DivStartE; fdivsqrtpreproc fdivsqrtpreproc( .clk, .DivStartE, .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), @@ -75,11 +75,11 @@ module fdivsqrt( .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E); fdivsqrtfsm fdivsqrtfsm( .clk, .reset, .FmtE, .XsE, .SqrtE, - .FDivBusyE, .FDivStartE, .IDivStartE, .DivStartE, .StallE, .StallM, .DivDone, .XZeroE, .YZeroE, + .FDivBusyE, .FDivStartE, .IDivStartE, .DivStartE, .FDivDoneE, .StallE, .StallM, .TrapM, /*.DivDone, */ .XZeroE, .YZeroE, .XNaNE, .YNaNE, .MDUE, .n, .XInfE, .YInfE, .WZero, .SpecialCaseM); fdivsqrtiter fdivsqrtiter( - .clk, .Firstun, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, .SqrtM, + .clk, .Firstun, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, // .SqrtM, .X,.Dpreproc, .FirstWS(WS), .FirstWC(WC), .DivStartE, .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, .OTFCSwap, .FDivBusyE); diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv index e33688500..aa13b7da9 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv @@ -42,12 +42,13 @@ module fdivsqrtfsm( input logic SqrtE, input logic StallE, input logic StallM, + input logic TrapM, input logic WZero, input logic MDUE, input logic [`DIVBLEN:0] n, output logic DivStartE, - output logic DivDone, - output logic FDivBusyE, +// output logic DivDone, + output logic FDivBusyE, FDivDoneE, output logic SpecialCaseM ); @@ -61,8 +62,10 @@ module fdivsqrtfsm( // *** start logic is presently in fctl. Make it look more like integer division start logic // DivStartE comes from fctrl, reflecitng the start of floating-point and possibly integer division assign DivStartE = (FDivStartE | IDivStartE) & (state == IDLE) & ~StallM; - assign DivDone = (state == DONE) | (WZero & (state == BUSY)); // *** used in postprocess.sv and round.sv. This doesn't seem proper. They break when removed. - assign FDivBusyE = (state == BUSY & ~DivDone); // *** want to add | DivStartE but it creates comb loop + assign FDivDoneE = (state == DONE); + // assign DivDone = (state == DONE) | (WZero & (state == BUSY)); // *** used in postprocess.sv and round.sv. This doesn't seem proper. They break when removed. + //assign FDivBusyE = (state == BUSY & ~DivDone); // *** want to add | DivStartE but it creates comb loop + assign FDivBusyE = (state == BUSY) | DivStartE; // Divider control signals from MDU //assign DivBusyE = (state == BUSY) | DivStartE; @@ -110,6 +113,23 @@ module fdivsqrtfsm( /* verilator lint_on WIDTH */ + always_ff @(posedge clk) begin + if (reset | TrapM) begin + state <= #1 IDLE; + end else if (DivStartE) begin + step <= cycles; + if (SpecialCaseE) state <= #1 DONE; + else state <= #1 BUSY; + end else if (state == BUSY) begin + if (step == 1) state <= #1 DONE; + step <= step - 1; + end else if ((state == DONE) | (WZero & (state == BUSY))) begin + if (StallM) state <= #1 DONE; + else state <= #1 IDLE; + end + end + +/* always_ff @(posedge clk) begin if (reset) begin state <= #1 IDLE; @@ -129,6 +149,6 @@ module fdivsqrtfsm( step <= step - 1; end end - +*/ endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv index a6c6c8bce..df8dd2c7f 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv @@ -37,7 +37,7 @@ module fdivsqrtiter( input logic [`NE-1:0] Xe, Ye, input logic XZeroE, YZeroE, input logic SqrtE, - input logic SqrtM, +// input logic SqrtM, input logic OTFCSwap, input logic [`DIVb+3:0] X, input logic [`DIVN-2:0] Dpreproc, @@ -85,8 +85,8 @@ module fdivsqrtiter( // Residual WS/SC registers/initializaiton mux mux2 #(`DIVb+4) wsmux(WS[`DIVCOPIES], X, DivStartE, WSN); mux2 #(`DIVb+4) wcmux(WC[`DIVCOPIES], '0, DivStartE, WCN); - flopen #(`DIVb+4) wsflop(clk, DivStartE|FDivBusyE, WSN, WS[0]); - flopen #(`DIVb+4) wcflop(clk, DivStartE|FDivBusyE, WCN, WC[0]); + flopen #(`DIVb+4) wsflop(clk, FDivBusyE, WSN, WS[0]); + flopen #(`DIVb+4) wcflop(clk, FDivBusyE, WCN, WC[0]); // UOTFC Result U and UM registers/initialization mux // Initialize U to 1.0 and UM to 0 for square root; U to 0 and UM to -1 for division @@ -122,13 +122,13 @@ module fdivsqrtiter( generate for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin : iterations if (`RADIX == 2) begin: stage - fdivsqrtstage2 fdivsqrtstage(.D, .DBar, .SqrtM, .OTFCSwap, + fdivsqrtstage2 fdivsqrtstage(.D, .DBar, .SqrtE, .OTFCSwap, .WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]), .C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i])); end else begin: stage logic j1; assign j1 = (i == 0 & ~C[0][`DIVb-1]); - fdivsqrtstage4 fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtM, .j1, .OTFCSwap, + fdivsqrtstage4 fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtE, .j1, .OTFCSwap, .WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]), .C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i])); end diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index a7da01454..9eec65dae 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -134,6 +134,7 @@ module fdivsqrtpostproc( // division takes the result from the next cycle, which is shifted to the left one more time so the square root also needs to be shifted + // *** Result is unused right now assign Result = ($signed(PreResult) >>> NormShift) + {{(`DIVb+3){1'b0}}, (PostInc & ~RemOp)}; assign PreQmM = NegSticky ? FirstUM : FirstU; // Select U or U-1 depending on negative sticky bit diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv index 8d78ccd5b..b4c2527d3 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv @@ -37,7 +37,7 @@ module fdivsqrtstage2 ( input logic [`DIVb:0] U, UM, input logic [`DIVb+3:0] WS, WC, input logic [`DIVb+1:0] C, - input logic SqrtM, + input logic SqrtE, input logic OTFCSwap, output logic un, output logic [`DIVb+1:0] CNext, @@ -73,8 +73,8 @@ module fdivsqrtstage2 ( // Partial Product Generation // WSA, WCA = WS + WC - qD - assign AddIn = SqrtM ? F : Dsel; - csa #(`DIVb+4) csa(WS, WC, AddIn, up&~SqrtM, WSA, WCA); + assign AddIn = SqrtE ? F : Dsel; + csa #(`DIVb+4) csa(WS, WC, AddIn, up&~SqrtE, WSA, WCA); assign WSNext = WSA << 1; assign WCNext = WCA << 1; diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv index 92e8f55d4..fb203fd72 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv @@ -36,7 +36,7 @@ module fdivsqrtstage4 ( input logic [`DIVb:0] U, UM, input logic [`DIVb+3:0] WS, WC, input logic [`DIVb+1:0] C, - input logic SqrtM, j1, OTFCSwap, + input logic SqrtE, j1, OTFCSwap, output logic [`DIVb+1:0] CNext, output logic un, output logic [`DIVb:0] UNext, UMNext, @@ -65,7 +65,7 @@ module fdivsqrtstage4 ( assign WCmsbs = WC[`DIVb+3:`DIVb-4]; assign WSmsbs = WS[`DIVb+3:`DIVb-4]; - fdivsqrtqsel4cmp qsel4(.Dmsbs, .Smsbs, .WSmsbs, .WCmsbs, .Sqrt(SqrtM), .j1, .udigit, .OTFCSwap); + fdivsqrtqsel4cmp qsel4(.Dmsbs, .Smsbs, .WSmsbs, .WCmsbs, .Sqrt(SqrtE), .j1, .udigit, .OTFCSwap); assign un = 1'b0; // unused for radix 4 // F generation logic @@ -84,8 +84,8 @@ module fdivsqrtstage4 ( // Residual Update // {WS, WC}}Next = (WS + WC - qD or F) << 2 - assign AddIn = SqrtM ? F : Dsel; - assign CarryIn = ~SqrtM & (udigit[3] | udigit[2]); // +1 for 2's complement of -D and -2D + assign AddIn = SqrtE ? F : Dsel; + assign CarryIn = ~SqrtE & (udigit[3] | udigit[2]); // +1 for 2's complement of -D and -2D csa #(`DIVb+4) csa(WS, WC, AddIn, CarryIn, WSA, WCA); assign WSNext = WSA << 2; assign WCNext = WCA << 2; @@ -94,7 +94,7 @@ module fdivsqrtstage4 ( assign CNext = {2'b11, C[`DIVb+1:2]}; // On-the-fly converter to accumulate result - fdivsqrtuotfc4 fdivsqrtuotfc4(.udigit, .Sqrt(SqrtM), .C(CNext[`DIVb:0]), .U, .UM, .UNext, .UMNext); + fdivsqrtuotfc4 fdivsqrtuotfc4(.udigit, .Sqrt(SqrtE), .C(CNext[`DIVb:0]), .U, .UM, .UNext, .UMNext); endmodule diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index d0b4aceef..0aa549991 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -38,6 +38,7 @@ module fpu ( input logic [`FLEN-1:0] ReadDataW, // Read data (from LSU) input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // Integer input (from IEU) input logic StallE, StallM, StallW, // stall signals (from HZU) + input logic TrapM, input logic FlushE, FlushM, FlushW, // flush signals (from HZU) input logic [4:0] RdM, RdW, // which FP register to write to (from IEU) input logic [1:0] STATUS_FS, // Is floating-point enabled? (From privileged unit) @@ -129,7 +130,8 @@ module fpu ( logic [`DIVb:0] QmM; logic [`NE+1:0] QeE, QeM; logic DivSE, DivSM; - logic DivDoneM; +// logic DivDoneM; + logic FDivDoneE, DivStartE; // result and flag signals logic [`XLEN-1:0] ClassResE; // classify result @@ -149,6 +151,7 @@ module fpu ( logic [`FLEN-1:0] AlignedSrcAE; // align SrcA to the floating point format logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed logic [`FLEN-1:0] BoxedOneE; // Zero value for Z for multiplication, with NaN boxing if needed + logic EMRegEn; // DECODE STAGE @@ -176,7 +179,7 @@ module fpu ( .a4(RdW), .wd4(FPUResultW), .rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D)); - // D/E pipeline registers + // D/E pipeline registers flopenrc #(`FLEN) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); flopenrc #(`FLEN) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); @@ -263,8 +266,8 @@ module fpu ( fdivsqrt fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]), .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E, - .StallE, .StallM, .DivSM, .FDivBusyE, .QeM, - .QmM, .DivDone(DivDoneM)); + .StallE, .StallM, .TrapM, .DivSM, .FDivBusyE, .DivStartE, .FDivDoneE, .QeM, + .QmM /*, .DivDone(DivDoneM) */); // // compare @@ -337,15 +340,20 @@ module fpu ( // E/M pipe registers - // flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, XE, FSrcXM); - flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XsE,XmE}, {XsM,XmM}); - flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YsE,YmE}, {YsM,YmM}); + assign EMRegEn = ~StallM & (~FDivBusyE & ~FDivDoneE | DivStartE); + + // flopenrc #(64) EMFpReg1(clk, reset, FlushM, EMRegEn, XE, FSrcXM); + flopenrc #(`NF+1) EMFpReg2 (clk, reset, FlushM, ~StallM, XmE, XmM); + flopenrc #(`NF+1) EMFpReg3 (clk, reset, FlushM, ~StallM, YmE, YmM); flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZeE,ZmE}, {ZeM,ZmM}); flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM); flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM); - flopenrc #(13) EMFpReg5 (clk, reset, FlushM, ~StallM, + flopenr #(15) EMFpReg5 (clk, reset, EMRegEn, + {XsE, YsE, XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE, ZDenormE}, + {XsM, YsM, XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM, ZDenormM}); + /* flopenrc #(13) EMFpReg5 (clk, reset, FlushM, ~StallM, {XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE, ZDenormE}, - {XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM, ZDenormM}); + {XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM, ZDenormM}); */ flopenrc #(1) EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM); flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM); flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, PeE, PeM); @@ -372,7 +380,7 @@ module fpu ( postprocess postprocess(.Xs(XsM), .Ys(YsM), .Ze(ZeM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(PeM), .FmaZmS(ZmStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM), - .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SmM), .DivQe(QeM), .DivDone(DivDoneM), + .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SmM), .DivQe(QeM), /*.DivDone(DivDoneM), */ .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM), .CvtCe(CeM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CsM), .ToInt(FWriteIntM), .DivS(DivSM), .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM)); diff --git a/pipelined/src/fpu/postproc/postprocess.sv b/pipelined/src/fpu/postproc/postprocess.sv index 24365cf96..bcac6de59 100644 --- a/pipelined/src/fpu/postproc/postprocess.sv +++ b/pipelined/src/fpu/postproc/postprocess.sv @@ -58,7 +58,7 @@ module postprocess ( input logic [$clog2(3*`NF+7)-1:0] FmaSCnt, // the normalization shift count //divide signals input logic DivS, - input logic DivDone, +// input logic DivDone, input logic [`NE+1:0] DivQe, input logic [`DIVb:0] DivQm, // conversion signals @@ -129,7 +129,7 @@ module postprocess ( assign Mult = OpCtrl[2]&~OpCtrl[1]&~OpCtrl[0]; assign CvtOp = (PostProcSel == 2'b00); assign FmaOp = (PostProcSel == 2'b10); - assign DivOp = (PostProcSel == 2'b01) & DivDone; + assign DivOp = (PostProcSel == 2'b01); // & DivDone; assign Sqrt = OpCtrl[0]; // is there an input of infinity or NaN being used @@ -165,13 +165,13 @@ module postprocess ( ShiftIn = {CvtShiftIn, {`NORMSHIFTSZ-`CVTLEN-`NF-1{1'b0}}}; end 2'b01: begin //div - if(DivDone) begin + /* if(DivDone) begin */ ShiftAmt = DivShiftAmt; ShiftIn = DivShiftIn; - end else begin + /* end else begin ShiftAmt = '0; ShiftIn = '0; - end + end */ end default: begin ShiftAmt = {`LOGNORMSHIFTSZ{1'bx}}; @@ -201,7 +201,7 @@ module postprocess ( round round(.OutFmt, .Frm, .FmaZmS, .Plus1, .PostProcSel, .CvtCe, .Qe, .Ms, .FmaMe, .FmaOp, .CvtOp, .CvtResDenormUf, .Mf, .ToInt, .CvtResUf, - .DivS, .DivDone, + .DivS, //.DivDone, .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .S, .R, .G, .Me); /////////////////////////////////////////////////////////////////////////////// diff --git a/pipelined/src/fpu/postproc/round.sv b/pipelined/src/fpu/postproc/round.sv index e4450325e..c9e2b94e4 100644 --- a/pipelined/src/fpu/postproc/round.sv +++ b/pipelined/src/fpu/postproc/round.sv @@ -43,7 +43,7 @@ module round( input logic DivOp, input logic CvtOp, input logic ToInt, - input logic DivDone, +// input logic DivDone, input logic [1:0] PostProcSel, input logic CvtResDenormUf, input logic CvtResUf, @@ -295,7 +295,8 @@ module round( case(PostProcSel) 2'b10: Me = FmaMe; // fma 2'b00: Me = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResDenormUf|CvtResUf}}; // cvt - 2'b01: Me = DivDone ? Qe : '0; // divide + // 2'b01: Me = DivDone ? Qe : '0; // divide + 2'b01: Me = Qe; // divide default: Me = '0; endcase diff --git a/pipelined/src/hazard/hazard.sv b/pipelined/src/hazard/hazard.sv index b95b7a375..7d381f7c3 100644 --- a/pipelined/src/hazard/hazard.sv +++ b/pipelined/src/hazard/hazard.sv @@ -65,10 +65,10 @@ module hazard( assign StallFCause = CSRWriteFencePendingDEM & ~(TrapM | RetM | BPPredWrongE); // stall in decode if instruction is a load/mul/csr dependent on previous assign StallDCause = (LoadStallD | StoreStallD | MDUStallD | CSRRdStallD | FPUStallD | FStallD) & ~(TrapM | RetM | BPPredWrongE); - assign StallECause = (DivBusyE) & ~(TrapM); // *** can we move to decode stage (KP?) + assign StallECause = (DivBusyE | FDivBusyE) & ~(TrapM); // *** can we move to decode stage (KP?) // WFI terminates if any enabled interrupt is pending, even if global interrupts are disabled. It could also terminate with TW trap assign StallMCause = ((wfiM) & (~TrapM & ~IntPendingM)); - assign StallWCause = ((IFUStallF | LSUStallM) & ~TrapM) | (FDivBusyE & ~TrapM & ~IntPendingM); + assign StallWCause = ((IFUStallF | LSUStallM) & ~TrapM); // | (FDivBusyE & ~TrapM & ~IntPendingM); // head version // assign StallWCause = LSUStallM | IFUStallF | (FDivBusyE & ~TrapM & ~IntPendingM); // *** FDivBusyE should look like DivBusyE // assign StallMCause = (wfiM & (~TrapM & ~IntPendingM)); // | FDivBusyE; diff --git a/pipelined/src/wally/wallypipelinedcore.sv b/pipelined/src/wally/wallypipelinedcore.sv index 9092d179d..ba9753950 100644 --- a/pipelined/src/wally/wallypipelinedcore.sv +++ b/pipelined/src/wally/wallypipelinedcore.sv @@ -389,6 +389,7 @@ module wallypipelinedcore ( .ReadDataW(ReadDataW[`FLEN-1:0]),// Read data from memory .ForwardedSrcAE, // Integer input being processed (from IEU) .StallE, .StallM, .StallW, // stall signals from HZU + .TrapM, .FlushE, .FlushM, .FlushW, // flush signals from HZU .RdM, .RdW, // which FP register to write to (from IEU) .STATUS_FS, // is floating-point enabled? diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index d8f77ed61..faf9dd8f8 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -136,7 +136,6 @@ string tvpaths[] = '{ string imperas32f[] = '{ `IMPERASTEST, - "rv32i_m/F/FDIV-S-DYN-RDN-01", "rv32i_m/F/FADD-S-DYN-RDN-01", "rv32i_m/F/FADD-S-DYN-RMM-01", "rv32i_m/F/FADD-S-DYN-RNE-01",