FPU divider working with execute stage stall

This commit is contained in:
David Harris 2022-12-02 11:11:53 -08:00
parent a86c9de36b
commit db5f3c15a4
12 changed files with 74 additions and 44 deletions

View File

@ -43,13 +43,14 @@ module fdivsqrt(
input logic FDivStartE, IDivStartE, input logic FDivStartE, IDivStartE,
input logic StallM, input logic StallM,
input logic StallE, input logic StallE,
input logic TrapM,
input logic SqrtE, SqrtM, input logic SqrtE, SqrtM,
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
input logic [2:0] Funct3E, Funct3M, input logic [2:0] Funct3E, Funct3M,
input logic MDUE, W64E, input logic MDUE, W64E,
output logic DivSM, output logic DivSM,
output logic FDivBusyE, output logic FDivBusyE, DivStartE, FDivDoneE,
output logic DivDone, // output logic DivDone,
output logic [`NE+1:0] QeM, output logic [`NE+1:0] QeM,
output logic [`DIVb:0] QmM output logic [`DIVb:0] QmM
// output logic [`XLEN-1:0] RemM, // output logic [`XLEN-1:0] RemM,
@ -66,7 +67,6 @@ module fdivsqrt(
logic SpecialCaseM; logic SpecialCaseM;
logic [`DIVBLEN:0] n, m; logic [`DIVBLEN:0] n, m;
logic OTFCSwap, ALTB, BZero, As; logic OTFCSwap, ALTB, BZero, As;
logic DivStartE;
fdivsqrtpreproc fdivsqrtpreproc( fdivsqrtpreproc fdivsqrtpreproc(
.clk, .DivStartE, .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), .clk, .DivStartE, .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE),
@ -75,11 +75,11 @@ module fdivsqrt(
.ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E); .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E);
fdivsqrtfsm fdivsqrtfsm( fdivsqrtfsm fdivsqrtfsm(
.clk, .reset, .FmtE, .XsE, .SqrtE, .clk, .reset, .FmtE, .XsE, .SqrtE,
.FDivBusyE, .FDivStartE, .IDivStartE, .DivStartE, .StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .FDivBusyE, .FDivStartE, .IDivStartE, .DivStartE, .FDivDoneE, .StallE, .StallM, .TrapM, /*.DivDone, */ .XZeroE, .YZeroE,
.XNaNE, .YNaNE, .MDUE, .n, .XNaNE, .YNaNE, .MDUE, .n,
.XInfE, .YInfE, .WZero, .SpecialCaseM); .XInfE, .YInfE, .WZero, .SpecialCaseM);
fdivsqrtiter fdivsqrtiter( fdivsqrtiter fdivsqrtiter(
.clk, .Firstun, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, .SqrtM, .clk, .Firstun, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, // .SqrtM,
.X,.Dpreproc, .FirstWS(WS), .FirstWC(WC), .X,.Dpreproc, .FirstWS(WS), .FirstWC(WC),
.DivStartE, .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, .OTFCSwap, .DivStartE, .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, .OTFCSwap,
.FDivBusyE); .FDivBusyE);

View File

@ -42,12 +42,13 @@ module fdivsqrtfsm(
input logic SqrtE, input logic SqrtE,
input logic StallE, input logic StallE,
input logic StallM, input logic StallM,
input logic TrapM,
input logic WZero, input logic WZero,
input logic MDUE, input logic MDUE,
input logic [`DIVBLEN:0] n, input logic [`DIVBLEN:0] n,
output logic DivStartE, output logic DivStartE,
output logic DivDone, // output logic DivDone,
output logic FDivBusyE, output logic FDivBusyE, FDivDoneE,
output logic SpecialCaseM output logic SpecialCaseM
); );
@ -61,8 +62,10 @@ module fdivsqrtfsm(
// *** start logic is presently in fctl. Make it look more like integer division start logic // *** start logic is presently in fctl. Make it look more like integer division start logic
// DivStartE comes from fctrl, reflecitng the start of floating-point and possibly integer division // DivStartE comes from fctrl, reflecitng the start of floating-point and possibly integer division
assign DivStartE = (FDivStartE | IDivStartE) & (state == IDLE) & ~StallM; assign DivStartE = (FDivStartE | IDivStartE) & (state == IDLE) & ~StallM;
assign DivDone = (state == DONE) | (WZero & (state == BUSY)); // *** used in postprocess.sv and round.sv. This doesn't seem proper. They break when removed. assign FDivDoneE = (state == DONE);
assign FDivBusyE = (state == BUSY & ~DivDone); // *** want to add | DivStartE but it creates comb loop // assign DivDone = (state == DONE) | (WZero & (state == BUSY)); // *** used in postprocess.sv and round.sv. This doesn't seem proper. They break when removed.
//assign FDivBusyE = (state == BUSY & ~DivDone); // *** want to add | DivStartE but it creates comb loop
assign FDivBusyE = (state == BUSY) | DivStartE;
// Divider control signals from MDU // Divider control signals from MDU
//assign DivBusyE = (state == BUSY) | DivStartE; //assign DivBusyE = (state == BUSY) | DivStartE;
@ -110,6 +113,23 @@ module fdivsqrtfsm(
/* verilator lint_on WIDTH */ /* verilator lint_on WIDTH */
always_ff @(posedge clk) begin
if (reset | TrapM) begin
state <= #1 IDLE;
end else if (DivStartE) begin
step <= cycles;
if (SpecialCaseE) state <= #1 DONE;
else state <= #1 BUSY;
end else if (state == BUSY) begin
if (step == 1) state <= #1 DONE;
step <= step - 1;
end else if ((state == DONE) | (WZero & (state == BUSY))) begin
if (StallM) state <= #1 DONE;
else state <= #1 IDLE;
end
end
/*
always_ff @(posedge clk) begin always_ff @(posedge clk) begin
if (reset) begin if (reset) begin
state <= #1 IDLE; state <= #1 IDLE;
@ -129,6 +149,6 @@ module fdivsqrtfsm(
step <= step - 1; step <= step - 1;
end end
end end
*/
endmodule endmodule

View File

@ -37,7 +37,7 @@ module fdivsqrtiter(
input logic [`NE-1:0] Xe, Ye, input logic [`NE-1:0] Xe, Ye,
input logic XZeroE, YZeroE, input logic XZeroE, YZeroE,
input logic SqrtE, input logic SqrtE,
input logic SqrtM, // input logic SqrtM,
input logic OTFCSwap, input logic OTFCSwap,
input logic [`DIVb+3:0] X, input logic [`DIVb+3:0] X,
input logic [`DIVN-2:0] Dpreproc, input logic [`DIVN-2:0] Dpreproc,
@ -85,8 +85,8 @@ module fdivsqrtiter(
// Residual WS/SC registers/initializaiton mux // Residual WS/SC registers/initializaiton mux
mux2 #(`DIVb+4) wsmux(WS[`DIVCOPIES], X, DivStartE, WSN); mux2 #(`DIVb+4) wsmux(WS[`DIVCOPIES], X, DivStartE, WSN);
mux2 #(`DIVb+4) wcmux(WC[`DIVCOPIES], '0, DivStartE, WCN); mux2 #(`DIVb+4) wcmux(WC[`DIVCOPIES], '0, DivStartE, WCN);
flopen #(`DIVb+4) wsflop(clk, DivStartE|FDivBusyE, WSN, WS[0]); flopen #(`DIVb+4) wsflop(clk, FDivBusyE, WSN, WS[0]);
flopen #(`DIVb+4) wcflop(clk, DivStartE|FDivBusyE, WCN, WC[0]); flopen #(`DIVb+4) wcflop(clk, FDivBusyE, WCN, WC[0]);
// UOTFC Result U and UM registers/initialization mux // UOTFC Result U and UM registers/initialization mux
// Initialize U to 1.0 and UM to 0 for square root; U to 0 and UM to -1 for division // Initialize U to 1.0 and UM to 0 for square root; U to 0 and UM to -1 for division
@ -122,13 +122,13 @@ module fdivsqrtiter(
generate generate
for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin : iterations for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin : iterations
if (`RADIX == 2) begin: stage if (`RADIX == 2) begin: stage
fdivsqrtstage2 fdivsqrtstage(.D, .DBar, .SqrtM, .OTFCSwap, fdivsqrtstage2 fdivsqrtstage(.D, .DBar, .SqrtE, .OTFCSwap,
.WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]), .WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]),
.C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i])); .C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i]));
end else begin: stage end else begin: stage
logic j1; logic j1;
assign j1 = (i == 0 & ~C[0][`DIVb-1]); assign j1 = (i == 0 & ~C[0][`DIVb-1]);
fdivsqrtstage4 fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtM, .j1, .OTFCSwap, fdivsqrtstage4 fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtE, .j1, .OTFCSwap,
.WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]), .WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]),
.C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i])); .C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i]));
end end

View File

@ -134,6 +134,7 @@ module fdivsqrtpostproc(
// division takes the result from the next cycle, which is shifted to the left one more time so the square root also needs to be shifted // division takes the result from the next cycle, which is shifted to the left one more time so the square root also needs to be shifted
// *** Result is unused right now
assign Result = ($signed(PreResult) >>> NormShift) + {{(`DIVb+3){1'b0}}, (PostInc & ~RemOp)}; assign Result = ($signed(PreResult) >>> NormShift) + {{(`DIVb+3){1'b0}}, (PostInc & ~RemOp)};
assign PreQmM = NegSticky ? FirstUM : FirstU; // Select U or U-1 depending on negative sticky bit assign PreQmM = NegSticky ? FirstUM : FirstU; // Select U or U-1 depending on negative sticky bit

View File

@ -37,7 +37,7 @@ module fdivsqrtstage2 (
input logic [`DIVb:0] U, UM, input logic [`DIVb:0] U, UM,
input logic [`DIVb+3:0] WS, WC, input logic [`DIVb+3:0] WS, WC,
input logic [`DIVb+1:0] C, input logic [`DIVb+1:0] C,
input logic SqrtM, input logic SqrtE,
input logic OTFCSwap, input logic OTFCSwap,
output logic un, output logic un,
output logic [`DIVb+1:0] CNext, output logic [`DIVb+1:0] CNext,
@ -73,8 +73,8 @@ module fdivsqrtstage2 (
// Partial Product Generation // Partial Product Generation
// WSA, WCA = WS + WC - qD // WSA, WCA = WS + WC - qD
assign AddIn = SqrtM ? F : Dsel; assign AddIn = SqrtE ? F : Dsel;
csa #(`DIVb+4) csa(WS, WC, AddIn, up&~SqrtM, WSA, WCA); csa #(`DIVb+4) csa(WS, WC, AddIn, up&~SqrtE, WSA, WCA);
assign WSNext = WSA << 1; assign WSNext = WSA << 1;
assign WCNext = WCA << 1; assign WCNext = WCA << 1;

View File

@ -36,7 +36,7 @@ module fdivsqrtstage4 (
input logic [`DIVb:0] U, UM, input logic [`DIVb:0] U, UM,
input logic [`DIVb+3:0] WS, WC, input logic [`DIVb+3:0] WS, WC,
input logic [`DIVb+1:0] C, input logic [`DIVb+1:0] C,
input logic SqrtM, j1, OTFCSwap, input logic SqrtE, j1, OTFCSwap,
output logic [`DIVb+1:0] CNext, output logic [`DIVb+1:0] CNext,
output logic un, output logic un,
output logic [`DIVb:0] UNext, UMNext, output logic [`DIVb:0] UNext, UMNext,
@ -65,7 +65,7 @@ module fdivsqrtstage4 (
assign WCmsbs = WC[`DIVb+3:`DIVb-4]; assign WCmsbs = WC[`DIVb+3:`DIVb-4];
assign WSmsbs = WS[`DIVb+3:`DIVb-4]; assign WSmsbs = WS[`DIVb+3:`DIVb-4];
fdivsqrtqsel4cmp qsel4(.Dmsbs, .Smsbs, .WSmsbs, .WCmsbs, .Sqrt(SqrtM), .j1, .udigit, .OTFCSwap); fdivsqrtqsel4cmp qsel4(.Dmsbs, .Smsbs, .WSmsbs, .WCmsbs, .Sqrt(SqrtE), .j1, .udigit, .OTFCSwap);
assign un = 1'b0; // unused for radix 4 assign un = 1'b0; // unused for radix 4
// F generation logic // F generation logic
@ -84,8 +84,8 @@ module fdivsqrtstage4 (
// Residual Update // Residual Update
// {WS, WC}}Next = (WS + WC - qD or F) << 2 // {WS, WC}}Next = (WS + WC - qD or F) << 2
assign AddIn = SqrtM ? F : Dsel; assign AddIn = SqrtE ? F : Dsel;
assign CarryIn = ~SqrtM & (udigit[3] | udigit[2]); // +1 for 2's complement of -D and -2D assign CarryIn = ~SqrtE & (udigit[3] | udigit[2]); // +1 for 2's complement of -D and -2D
csa #(`DIVb+4) csa(WS, WC, AddIn, CarryIn, WSA, WCA); csa #(`DIVb+4) csa(WS, WC, AddIn, CarryIn, WSA, WCA);
assign WSNext = WSA << 2; assign WSNext = WSA << 2;
assign WCNext = WCA << 2; assign WCNext = WCA << 2;
@ -94,7 +94,7 @@ module fdivsqrtstage4 (
assign CNext = {2'b11, C[`DIVb+1:2]}; assign CNext = {2'b11, C[`DIVb+1:2]};
// On-the-fly converter to accumulate result // On-the-fly converter to accumulate result
fdivsqrtuotfc4 fdivsqrtuotfc4(.udigit, .Sqrt(SqrtM), .C(CNext[`DIVb:0]), .U, .UM, .UNext, .UMNext); fdivsqrtuotfc4 fdivsqrtuotfc4(.udigit, .Sqrt(SqrtE), .C(CNext[`DIVb:0]), .U, .UM, .UNext, .UMNext);
endmodule endmodule

View File

@ -38,6 +38,7 @@ module fpu (
input logic [`FLEN-1:0] ReadDataW, // Read data (from LSU) input logic [`FLEN-1:0] ReadDataW, // Read data (from LSU)
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // Integer input (from IEU) input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // Integer input (from IEU)
input logic StallE, StallM, StallW, // stall signals (from HZU) input logic StallE, StallM, StallW, // stall signals (from HZU)
input logic TrapM,
input logic FlushE, FlushM, FlushW, // flush signals (from HZU) input logic FlushE, FlushM, FlushW, // flush signals (from HZU)
input logic [4:0] RdM, RdW, // which FP register to write to (from IEU) input logic [4:0] RdM, RdW, // which FP register to write to (from IEU)
input logic [1:0] STATUS_FS, // Is floating-point enabled? (From privileged unit) input logic [1:0] STATUS_FS, // Is floating-point enabled? (From privileged unit)
@ -129,7 +130,8 @@ module fpu (
logic [`DIVb:0] QmM; logic [`DIVb:0] QmM;
logic [`NE+1:0] QeE, QeM; logic [`NE+1:0] QeE, QeM;
logic DivSE, DivSM; logic DivSE, DivSM;
logic DivDoneM; // logic DivDoneM;
logic FDivDoneE, DivStartE;
// result and flag signals // result and flag signals
logic [`XLEN-1:0] ClassResE; // classify result logic [`XLEN-1:0] ClassResE; // classify result
@ -149,6 +151,7 @@ module fpu (
logic [`FLEN-1:0] AlignedSrcAE; // align SrcA to the floating point format logic [`FLEN-1:0] AlignedSrcAE; // align SrcA to the floating point format
logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed
logic [`FLEN-1:0] BoxedOneE; // Zero value for Z for multiplication, with NaN boxing if needed logic [`FLEN-1:0] BoxedOneE; // Zero value for Z for multiplication, with NaN boxing if needed
logic EMRegEn;
// DECODE STAGE // DECODE STAGE
@ -176,7 +179,7 @@ module fpu (
.a4(RdW), .wd4(FPUResultW), .a4(RdW), .wd4(FPUResultW),
.rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D)); .rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D));
// D/E pipeline registers // D/E pipeline registers
flopenrc #(`FLEN) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); flopenrc #(`FLEN) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
flopenrc #(`FLEN) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); flopenrc #(`FLEN) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
@ -263,8 +266,8 @@ module fpu (
fdivsqrt fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]), fdivsqrt fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]),
.XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE,
.ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E,
.StallE, .StallM, .DivSM, .FDivBusyE, .QeM, .StallE, .StallM, .TrapM, .DivSM, .FDivBusyE, .DivStartE, .FDivDoneE, .QeM,
.QmM, .DivDone(DivDoneM)); .QmM /*, .DivDone(DivDoneM) */);
// //
// compare // compare
@ -337,15 +340,20 @@ module fpu (
// E/M pipe registers // E/M pipe registers
// flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, XE, FSrcXM); assign EMRegEn = ~StallM & (~FDivBusyE & ~FDivDoneE | DivStartE);
flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XsE,XmE}, {XsM,XmM});
flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YsE,YmE}, {YsM,YmM}); // flopenrc #(64) EMFpReg1(clk, reset, FlushM, EMRegEn, XE, FSrcXM);
flopenrc #(`NF+1) EMFpReg2 (clk, reset, FlushM, ~StallM, XmE, XmM);
flopenrc #(`NF+1) EMFpReg3 (clk, reset, FlushM, ~StallM, YmE, YmM);
flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZeE,ZmE}, {ZeM,ZmM}); flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZeE,ZmE}, {ZeM,ZmM});
flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM); flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM); flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM);
flopenrc #(13) EMFpReg5 (clk, reset, FlushM, ~StallM, flopenr #(15) EMFpReg5 (clk, reset, EMRegEn,
{XsE, YsE, XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE, ZDenormE},
{XsM, YsM, XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM, ZDenormM});
/* flopenrc #(13) EMFpReg5 (clk, reset, FlushM, ~StallM,
{XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE, ZDenormE}, {XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE, ZDenormE},
{XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM, ZDenormM}); {XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM, ZDenormM}); */
flopenrc #(1) EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM); flopenrc #(1) EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM);
flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM); flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM);
flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, PeE, PeM); flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, PeE, PeM);
@ -372,7 +380,7 @@ module fpu (
postprocess postprocess(.Xs(XsM), .Ys(YsM), .Ze(ZeM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(PeM), postprocess postprocess(.Xs(XsM), .Ys(YsM), .Ze(ZeM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(PeM),
.FmaZmS(ZmStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM), .FmaZmS(ZmStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM),
.ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SmM), .DivQe(QeM), .DivDone(DivDoneM), .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SmM), .DivQe(QeM), /*.DivDone(DivDoneM), */
.FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM), .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM),
.CvtCe(CeM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CsM), .ToInt(FWriteIntM), .DivS(DivSM), .CvtCe(CeM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CsM), .ToInt(FWriteIntM), .DivS(DivSM),
.CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM)); .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM));

View File

@ -58,7 +58,7 @@ module postprocess (
input logic [$clog2(3*`NF+7)-1:0] FmaSCnt, // the normalization shift count input logic [$clog2(3*`NF+7)-1:0] FmaSCnt, // the normalization shift count
//divide signals //divide signals
input logic DivS, input logic DivS,
input logic DivDone, // input logic DivDone,
input logic [`NE+1:0] DivQe, input logic [`NE+1:0] DivQe,
input logic [`DIVb:0] DivQm, input logic [`DIVb:0] DivQm,
// conversion signals // conversion signals
@ -129,7 +129,7 @@ module postprocess (
assign Mult = OpCtrl[2]&~OpCtrl[1]&~OpCtrl[0]; assign Mult = OpCtrl[2]&~OpCtrl[1]&~OpCtrl[0];
assign CvtOp = (PostProcSel == 2'b00); assign CvtOp = (PostProcSel == 2'b00);
assign FmaOp = (PostProcSel == 2'b10); assign FmaOp = (PostProcSel == 2'b10);
assign DivOp = (PostProcSel == 2'b01) & DivDone; assign DivOp = (PostProcSel == 2'b01); // & DivDone;
assign Sqrt = OpCtrl[0]; assign Sqrt = OpCtrl[0];
// is there an input of infinity or NaN being used // is there an input of infinity or NaN being used
@ -165,13 +165,13 @@ module postprocess (
ShiftIn = {CvtShiftIn, {`NORMSHIFTSZ-`CVTLEN-`NF-1{1'b0}}}; ShiftIn = {CvtShiftIn, {`NORMSHIFTSZ-`CVTLEN-`NF-1{1'b0}}};
end end
2'b01: begin //div 2'b01: begin //div
if(DivDone) begin /* if(DivDone) begin */
ShiftAmt = DivShiftAmt; ShiftAmt = DivShiftAmt;
ShiftIn = DivShiftIn; ShiftIn = DivShiftIn;
end else begin /* end else begin
ShiftAmt = '0; ShiftAmt = '0;
ShiftIn = '0; ShiftIn = '0;
end end */
end end
default: begin default: begin
ShiftAmt = {`LOGNORMSHIFTSZ{1'bx}}; ShiftAmt = {`LOGNORMSHIFTSZ{1'bx}};
@ -201,7 +201,7 @@ module postprocess (
round round(.OutFmt, .Frm, .FmaZmS, .Plus1, .PostProcSel, .CvtCe, .Qe, round round(.OutFmt, .Frm, .FmaZmS, .Plus1, .PostProcSel, .CvtCe, .Qe,
.Ms, .FmaMe, .FmaOp, .CvtOp, .CvtResDenormUf, .Mf, .ToInt, .CvtResUf, .Ms, .FmaMe, .FmaOp, .CvtOp, .CvtResDenormUf, .Mf, .ToInt, .CvtResUf,
.DivS, .DivDone, .DivS, //.DivDone,
.DivOp, .UfPlus1, .FullRe, .Rf, .Re, .S, .R, .G, .Me); .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .S, .R, .G, .Me);
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////

View File

@ -43,7 +43,7 @@ module round(
input logic DivOp, input logic DivOp,
input logic CvtOp, input logic CvtOp,
input logic ToInt, input logic ToInt,
input logic DivDone, // input logic DivDone,
input logic [1:0] PostProcSel, input logic [1:0] PostProcSel,
input logic CvtResDenormUf, input logic CvtResDenormUf,
input logic CvtResUf, input logic CvtResUf,
@ -295,7 +295,8 @@ module round(
case(PostProcSel) case(PostProcSel)
2'b10: Me = FmaMe; // fma 2'b10: Me = FmaMe; // fma
2'b00: Me = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResDenormUf|CvtResUf}}; // cvt 2'b00: Me = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResDenormUf|CvtResUf}}; // cvt
2'b01: Me = DivDone ? Qe : '0; // divide // 2'b01: Me = DivDone ? Qe : '0; // divide
2'b01: Me = Qe; // divide
default: Me = '0; default: Me = '0;
endcase endcase

View File

@ -65,10 +65,10 @@ module hazard(
assign StallFCause = CSRWriteFencePendingDEM & ~(TrapM | RetM | BPPredWrongE); assign StallFCause = CSRWriteFencePendingDEM & ~(TrapM | RetM | BPPredWrongE);
// stall in decode if instruction is a load/mul/csr dependent on previous // stall in decode if instruction is a load/mul/csr dependent on previous
assign StallDCause = (LoadStallD | StoreStallD | MDUStallD | CSRRdStallD | FPUStallD | FStallD) & ~(TrapM | RetM | BPPredWrongE); assign StallDCause = (LoadStallD | StoreStallD | MDUStallD | CSRRdStallD | FPUStallD | FStallD) & ~(TrapM | RetM | BPPredWrongE);
assign StallECause = (DivBusyE) & ~(TrapM); // *** can we move to decode stage (KP?) assign StallECause = (DivBusyE | FDivBusyE) & ~(TrapM); // *** can we move to decode stage (KP?)
// WFI terminates if any enabled interrupt is pending, even if global interrupts are disabled. It could also terminate with TW trap // WFI terminates if any enabled interrupt is pending, even if global interrupts are disabled. It could also terminate with TW trap
assign StallMCause = ((wfiM) & (~TrapM & ~IntPendingM)); assign StallMCause = ((wfiM) & (~TrapM & ~IntPendingM));
assign StallWCause = ((IFUStallF | LSUStallM) & ~TrapM) | (FDivBusyE & ~TrapM & ~IntPendingM); assign StallWCause = ((IFUStallF | LSUStallM) & ~TrapM); // | (FDivBusyE & ~TrapM & ~IntPendingM);
// head version // head version
// assign StallWCause = LSUStallM | IFUStallF | (FDivBusyE & ~TrapM & ~IntPendingM); // *** FDivBusyE should look like DivBusyE // assign StallWCause = LSUStallM | IFUStallF | (FDivBusyE & ~TrapM & ~IntPendingM); // *** FDivBusyE should look like DivBusyE
// assign StallMCause = (wfiM & (~TrapM & ~IntPendingM)); // | FDivBusyE; // assign StallMCause = (wfiM & (~TrapM & ~IntPendingM)); // | FDivBusyE;

View File

@ -389,6 +389,7 @@ module wallypipelinedcore (
.ReadDataW(ReadDataW[`FLEN-1:0]),// Read data from memory .ReadDataW(ReadDataW[`FLEN-1:0]),// Read data from memory
.ForwardedSrcAE, // Integer input being processed (from IEU) .ForwardedSrcAE, // Integer input being processed (from IEU)
.StallE, .StallM, .StallW, // stall signals from HZU .StallE, .StallM, .StallW, // stall signals from HZU
.TrapM,
.FlushE, .FlushM, .FlushW, // flush signals from HZU .FlushE, .FlushM, .FlushW, // flush signals from HZU
.RdM, .RdW, // which FP register to write to (from IEU) .RdM, .RdW, // which FP register to write to (from IEU)
.STATUS_FS, // is floating-point enabled? .STATUS_FS, // is floating-point enabled?

View File

@ -136,7 +136,6 @@ string tvpaths[] = '{
string imperas32f[] = '{ string imperas32f[] = '{
`IMPERASTEST, `IMPERASTEST,
"rv32i_m/F/FDIV-S-DYN-RDN-01",
"rv32i_m/F/FADD-S-DYN-RDN-01", "rv32i_m/F/FADD-S-DYN-RDN-01",
"rv32i_m/F/FADD-S-DYN-RMM-01", "rv32i_m/F/FADD-S-DYN-RMM-01",
"rv32i_m/F/FADD-S-DYN-RNE-01", "rv32i_m/F/FADD-S-DYN-RNE-01",