diff --git a/wally-pipelined/regression/wave-dos/peripheral-waves.do b/wally-pipelined/regression/wave-dos/peripheral-waves.do index 0203836e3..594d572ad 100644 --- a/wally-pipelined/regression/wave-dos/peripheral-waves.do +++ b/wally-pipelined/regression/wave-dos/peripheral-waves.do @@ -35,7 +35,7 @@ add wave -hex /testbench/dut/hart/ieu/dp/SrcAE add wave -hex /testbench/dut/hart/ieu/dp/SrcBE add wave -hex /testbench/dut/hart/ieu/dp/ALUResultE #add wave /testbench/dut/hart/ieu/dp/PCSrcE -add wave /testbench/dut/hart/mdu/genblk1/div/StartDivideE +add wave /testbench/dut/hart/mdu/genblk1/div/DivStartE add wave /testbench/dut/hart/mdu/DivBusyE add wave -hex /testbench/dut/hart/mdu/genblk1/div/RemM add wave -hex /testbench/dut/hart/mdu/genblk1/div/QuotM diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index 12ce38759..eeac7dbf9 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -31,104 +31,104 @@ module intdivrestoring ( input logic clk, input logic reset, input logic StallM, FlushM, - input logic SignedDivideE, W64E, - input logic StartDivideE, + input logic DivSignedE, W64E, + input logic DivE, input logic [`XLEN-1:0] SrcAE, SrcBE, - output logic BusyE, DivDoneM, + output logic DivBusyE, output logic [`XLEN-1:0] QuotM, RemM ); - logic [`XLEN-1:0] WE[`DIV_BITSPERCYCLE:0]; - logic [`XLEN-1:0] XQE[`DIV_BITSPERCYCLE:0]; - logic [`XLEN-1:0] DSavedE, XSavedE, XSavedM, DinE, XinE, DnE, DAbsBE, XnE, XInitE, WM, XQM, WnM, XQnM; + logic [`XLEN-1:0] WM[`DIV_BITSPERCYCLE:0]; + logic [`XLEN-1:0] XQM[`DIV_BITSPERCYCLE:0]; + logic [`XLEN-1:0] DinE, XinE, DnE, DAbsBE, DAbsBM, XnE, XInitE, WnM, XQnM; localparam STEPBITS = $clog2(`XLEN/`DIV_BITSPERCYCLE); logic [STEPBITS:0] step; logic Div0E, Div0M; - logic DivInitE, SignXE, SignXM, SignDE, SignDM, NegWM, NegQM; - logic SignedDivideM; + logic DivStartE, SignXE, SignXM, SignDE, SignDM, NegWM, NegQM; + logic BusyE, DivDoneM; + + logic [`XLEN-1:0] WNextE, XQNextE; - // save inputs on the negative edge of the execute clock. - // This is unusual practice, but the inputs are not guaranteed to be stable due to some hazard and forwarding logic. - // Saving the inputs is the most hardware-efficient way to fix the issue. - flopen #(`XLEN) xsavereg(~clk, StartDivideE, SrcAE, XSavedE); - flopen #(`XLEN) dsavereg(~clk, StartDivideE, SrcBE, DSavedE); + ////////////////////////////// + // Execute Stage: prepare for division calculation with control logic, W logic and absolute values, initialize W and XQ + ////////////////////////////// + + // Divider control signals + assign DivStartE = DivE & ~BusyE & ~DivDoneM & ~StallM; + assign DivBusyE = BusyE | DivStartE; // Handle sign extension for W-type instructions generate if (`XLEN == 64) begin // RV64 has W-type instructions - mux2 #(`XLEN) xinmux(XSavedE, {XSavedE[31:0], 32'b0}, W64E, XinE); - mux2 #(`XLEN) dinmux(DSavedE, {{32{DSavedE[31]&SignedDivideE}}, DSavedE[31:0]}, W64E, DinE); - end else begin // RV32 has no W-type instructions - assign XinE = XSavedE; - assign DinE = DSavedE; + mux2 #(`XLEN) xinmux(SrcAE, {SrcAE[31:0], 32'b0}, W64E, XinE); + mux2 #(`XLEN) dinmux(SrcBE, {{32{SrcBE[31]&DivSignedE}}, SrcBE[31:0]}, W64E, DinE); + end else begin // RV32 has no W-type instructions + assign XinE = SrcAE; + assign DinE = SrcBE; end endgenerate // Extract sign bits and check fo division by zero - assign SignDE = DinE[`XLEN-1]; - assign SignXE = XinE[`XLEN-1]; + assign SignDE = DivSignedE & DinE[`XLEN-1]; + assign SignXE = DivSignedE & XinE[`XLEN-1]; assign Div0E = (DinE == 0); - // pipeline registers - flopenrc #(1) SignedDivideMReg(clk, reset, FlushM, ~StallM, SignedDivideE, SignedDivideM); - flopenrc #(1) Div0eMReg(clk, reset, FlushM, ~StallM, Div0E, Div0M); - flopenrc #(1) SignDMReg(clk, reset, FlushM, ~StallM, SignDE, SignDM); - flopenrc #(1) SignXMReg(clk, reset, FlushM, ~StallM, SignXE, SignXM); - flopenrc #(`XLEN) XSavedMReg(clk, reset, FlushM, ~StallM, XSavedE, XSavedM); // is this truly necessary? - // Take absolute value for signed operations, and negate D to handle subtraction in divider stages neg #(`XLEN) negd(DinE, DnE); - mux2 #(`XLEN) dabsmux(DnE, DinE, SignedDivideE & SignDE, DAbsBE); // take absolute value for signed operations, and negate for subtraction setp + mux2 #(`XLEN) dabsmux(DnE, DinE, SignDE, DAbsBE); // take absolute value for signed operations, and negate for subtraction setp neg #(`XLEN) negx(XinE, XnE); - mux2 #(`XLEN) xabsmux(XinE, XnE, SignedDivideE & SignXE, XInitE); // need original X as remainder if doing divide by 0 + mux3 #(`XLEN) xabsmux(XinE, XnE, SrcAE, {Div0E, SignXE}, XInitE); // take absolute value for signed operations, or keep original value for divide by 0 - // initialization multiplexers on first cycle of operation (one cycle after start is asserted) - mux2 #(`XLEN) wmux(WM, {`XLEN{1'b0}}, DivInitE, WE[0]); - mux2 #(`XLEN) xmux(XQM, XInitE, DivInitE, XQE[0]); + // initialization multiplexers on first cycle of operation + mux2 #(`XLEN) wmux(WM[`DIV_BITSPERCYCLE], {`XLEN{1'b0}}, DivStartE, WNextE); + mux2 #(`XLEN) xmux(XQM[`DIV_BITSPERCYCLE], XInitE, DivStartE, XQNextE); + ////////////////////////////// + // Memory Stage: division iterations, output sign correction + ////////////////////////////// + + // registers before division steps + flopen #(`XLEN) wreg(clk, DivBusyE, WNextE, WM[0]); + flopen #(`XLEN) xreg(clk, DivBusyE, XQNextE, XQM[0]); + flopen #(`XLEN) dabsreg(clk, DivStartE, DAbsBE, DAbsBM); + flopen #(3) Div0eMReg(clk, DivStartE, {Div0E, SignDE, SignXE}, {Div0M, SignDM, SignXM}); + // one copy of divstep for each bit produced per cycle generate genvar i; for (i=0; i<`DIV_BITSPERCYCLE; i = i+1) - intdivrestoringstep divstep(WE[i], XQE[i], DAbsBE, WE[i+1], XQE[i+1]); + intdivrestoringstep divstep(WM[i], XQM[i], DAbsBM, WM[i+1], XQM[i+1]); endgenerate - // registers after division steps - flopen #(`XLEN) wreg(clk, BusyE, WE[`DIV_BITSPERCYCLE], WM); - flopen #(`XLEN) xreg(clk, BusyE, XQE[`DIV_BITSPERCYCLE], XQM); - - // Output selection logic in Memory Stage - // On final setp of signed operations, negate outputs as needed - assign NegWM = SignedDivideM & SignXM; // Remainder should have same sign as X - assign NegQM = SignedDivideM & (SignXM ^ SignDM); // Quotient should be negative if one operand is positive and the other is negative - neg #(`XLEN) wneg(WM, WnM); - neg #(`XLEN) qneg(XQM, XQnM); + // On final setp of signed operations, negate outputs as needed to get correct sign + assign NegWM = SignXM; // Remainder should have same sign as X + assign NegQM = SignXM ^ SignDM; // Quotient should be negative if one operand is positive and the other is negative + neg #(`XLEN) qneg(XQM[0], XQnM); + neg #(`XLEN) wneg(WM[0], WnM); // Select appropriate output: normal, negated, or for divide by zero - mux3 #(`XLEN) qmux(XQM, XQnM, {`XLEN{1'b1}}, {Div0M, NegQM}, QuotM); // Q taken from XQ register, negated if necessary, or all 1s when dividing by zero - mux3 #(`XLEN) remmux(WM, WnM, XSavedM, {Div0M, NegWM}, RemM); // REM taken from W register, negated if necessary, or from X when dividing by zero + mux3 #(`XLEN) qmux(XQM[0], XQnM, {`XLEN{1'b1}}, {Div0M, NegQM}, QuotM); // Q taken from XQ register, negated if necessary, or all 1s when dividing by zero + mux3 #(`XLEN) remmux(WM[0], WnM, XQM[0], {Div0M, NegWM}, RemM); // REM taken from W register, negated if necessary, or from X when dividing by zero - // Divider FSM to sequence Init, Busy, and Done - always_ff @(posedge clk) + ////////////////////////////// + // Divider FSM to sequence Busy and Done + ////////////////////////////// + + always_ff @(posedge clk) if (reset) begin - BusyE = 0; DivDoneM = 0; step = 0; DivInitE = 0; - end else if (StartDivideE & ~StallM) begin + BusyE = 0; DivDoneM = 0; step = 0; + end else if (DivStartE) begin + step = 0; if (Div0E) DivDoneM = 1; - else begin - BusyE = 1; step = 0; DivInitE = 1; - end - end else if (BusyE & ~DivDoneM) begin // pause one cycle at beginning of signed operations for absolute value - DivInitE = 0; + else BusyE = 1; + end else if (BusyE) begin // pause one cycle at beginning of signed operations for absolute value step = step + 1; if (step[STEPBITS] | (`XLEN==64) & W64E & step[STEPBITS-1]) begin // complete in half the time for W-type instructions - step = 0; BusyE = 0; DivDoneM = 1; end end else if (DivDoneM) begin DivDoneM = StallM; - BusyE = 0; end - endmodule /* verilator lint_on UNOPTFLAT */ diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index 8ffe91e9e..32dccd009 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -48,8 +48,8 @@ module muldiv ( logic [`XLEN-1:0] QuotM, RemM; logic [`XLEN*2-1:0] ProdE, ProdM; - logic StartDivideE, BusyE, DivDoneM; - logic SignedDivideE; + logic DivE; + logic DivSignedE; logic W64M; // Multiplier @@ -58,11 +58,10 @@ module muldiv ( // Divide // Start a divide when a new division instruction is received and the divider isn't already busy or finishing - assign StartDivideE = MulDivE & Funct3E[2] & ~BusyE & ~DivDoneM; - assign DivBusyE = StartDivideE | BusyE; - assign SignedDivideE = ~Funct3E[0]; + assign DivE = MulDivE & Funct3E[2]; + assign DivSignedE = ~Funct3E[0]; intdivrestoring div(.clk, .reset, .StallM, .FlushM, - .SignedDivideE, .W64E, .StartDivideE, .SrcAE, .SrcBE, .BusyE, .DivDoneM, .QuotM, .RemM); + .DivSignedE, .W64E, .DivE, .SrcAE, .SrcBE, .DivBusyE, .QuotM, .RemM); // Result multiplexer always_comb