From 64ed2678252d24ad250692088cf12e922f9654fb Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 10 Oct 2021 08:30:19 -0700 Subject: [PATCH 01/11] renamed DivSigned --- wally-pipelined/src/muldiv/intdivrestoring.sv | 16 ++++++++-------- wally-pipelined/src/muldiv/muldiv.sv | 6 +++--- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index 12ce3875..2a78ec0f 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -31,7 +31,7 @@ module intdivrestoring ( input logic clk, input logic reset, input logic StallM, FlushM, - input logic SignedDivideE, W64E, + input logic DivSignedE, W64E, input logic StartDivideE, input logic [`XLEN-1:0] SrcAE, SrcBE, output logic BusyE, DivDoneM, @@ -45,7 +45,7 @@ module intdivrestoring ( logic [STEPBITS:0] step; logic Div0E, Div0M; logic DivInitE, SignXE, SignXM, SignDE, SignDM, NegWM, NegQM; - logic SignedDivideM; + logic DivSignedM; // save inputs on the negative edge of the execute clock. // This is unusual practice, but the inputs are not guaranteed to be stable due to some hazard and forwarding logic. @@ -57,7 +57,7 @@ module intdivrestoring ( generate if (`XLEN == 64) begin // RV64 has W-type instructions mux2 #(`XLEN) xinmux(XSavedE, {XSavedE[31:0], 32'b0}, W64E, XinE); - mux2 #(`XLEN) dinmux(DSavedE, {{32{DSavedE[31]&SignedDivideE}}, DSavedE[31:0]}, W64E, DinE); + mux2 #(`XLEN) dinmux(DSavedE, {{32{DSavedE[31]&DivSignedE}}, DSavedE[31:0]}, W64E, DinE); end else begin // RV32 has no W-type instructions assign XinE = XSavedE; assign DinE = DSavedE; @@ -70,7 +70,7 @@ module intdivrestoring ( assign Div0E = (DinE == 0); // pipeline registers - flopenrc #(1) SignedDivideMReg(clk, reset, FlushM, ~StallM, SignedDivideE, SignedDivideM); + flopenrc #(1) DivSignedMReg(clk, reset, FlushM, ~StallM, DivSignedE, DivSignedM); flopenrc #(1) Div0eMReg(clk, reset, FlushM, ~StallM, Div0E, Div0M); flopenrc #(1) SignDMReg(clk, reset, FlushM, ~StallM, SignDE, SignDM); flopenrc #(1) SignXMReg(clk, reset, FlushM, ~StallM, SignXE, SignXM); @@ -78,9 +78,9 @@ module intdivrestoring ( // Take absolute value for signed operations, and negate D to handle subtraction in divider stages neg #(`XLEN) negd(DinE, DnE); - mux2 #(`XLEN) dabsmux(DnE, DinE, SignedDivideE & SignDE, DAbsBE); // take absolute value for signed operations, and negate for subtraction setp + mux2 #(`XLEN) dabsmux(DnE, DinE, DivSignedE & SignDE, DAbsBE); // take absolute value for signed operations, and negate for subtraction setp neg #(`XLEN) negx(XinE, XnE); - mux2 #(`XLEN) xabsmux(XinE, XnE, SignedDivideE & SignXE, XInitE); // need original X as remainder if doing divide by 0 + mux2 #(`XLEN) xabsmux(XinE, XnE, DivSignedE & SignXE, XInitE); // need original X as remainder if doing divide by 0 // initialization multiplexers on first cycle of operation (one cycle after start is asserted) mux2 #(`XLEN) wmux(WM, {`XLEN{1'b0}}, DivInitE, WE[0]); @@ -99,8 +99,8 @@ module intdivrestoring ( // Output selection logic in Memory Stage // On final setp of signed operations, negate outputs as needed - assign NegWM = SignedDivideM & SignXM; // Remainder should have same sign as X - assign NegQM = SignedDivideM & (SignXM ^ SignDM); // Quotient should be negative if one operand is positive and the other is negative + assign NegWM = DivSignedM & SignXM; // Remainder should have same sign as X + assign NegQM = DivSignedM & (SignXM ^ SignDM); // Quotient should be negative if one operand is positive and the other is negative neg #(`XLEN) wneg(WM, WnM); neg #(`XLEN) qneg(XQM, XQnM); // Select appropriate output: normal, negated, or for divide by zero diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index 8ffe91e9..11fb4ff1 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -49,7 +49,7 @@ module muldiv ( logic [`XLEN*2-1:0] ProdE, ProdM; logic StartDivideE, BusyE, DivDoneM; - logic SignedDivideE; + logic DivSignedE; logic W64M; // Multiplier @@ -60,9 +60,9 @@ module muldiv ( // Start a divide when a new division instruction is received and the divider isn't already busy or finishing assign StartDivideE = MulDivE & Funct3E[2] & ~BusyE & ~DivDoneM; assign DivBusyE = StartDivideE | BusyE; - assign SignedDivideE = ~Funct3E[0]; + assign DivSignedE = ~Funct3E[0]; intdivrestoring div(.clk, .reset, .StallM, .FlushM, - .SignedDivideE, .W64E, .StartDivideE, .SrcAE, .SrcBE, .BusyE, .DivDoneM, .QuotM, .RemM); + .DivSignedE, .W64E, .StartDivideE, .SrcAE, .SrcBE, .BusyE, .DivDoneM, .QuotM, .RemM); // Result multiplexer always_comb From 39bbeefa78532e31271d13b9aca63d300b00ea5a Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 10 Oct 2021 08:32:04 -0700 Subject: [PATCH 02/11] renamed DivStart --- wally-pipelined/src/muldiv/intdivrestoring.sv | 8 ++++---- wally-pipelined/src/muldiv/muldiv.sv | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index 2a78ec0f..9dd60f8d 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -32,7 +32,7 @@ module intdivrestoring ( input logic reset, input logic StallM, FlushM, input logic DivSignedE, W64E, - input logic StartDivideE, + input logic DivStartE, input logic [`XLEN-1:0] SrcAE, SrcBE, output logic BusyE, DivDoneM, output logic [`XLEN-1:0] QuotM, RemM @@ -50,8 +50,8 @@ module intdivrestoring ( // save inputs on the negative edge of the execute clock. // This is unusual practice, but the inputs are not guaranteed to be stable due to some hazard and forwarding logic. // Saving the inputs is the most hardware-efficient way to fix the issue. - flopen #(`XLEN) xsavereg(~clk, StartDivideE, SrcAE, XSavedE); - flopen #(`XLEN) dsavereg(~clk, StartDivideE, SrcBE, DSavedE); + flopen #(`XLEN) xsavereg(~clk, DivStartE, SrcAE, XSavedE); + flopen #(`XLEN) dsavereg(~clk, DivStartE, SrcBE, DSavedE); // Handle sign extension for W-type instructions generate @@ -111,7 +111,7 @@ module intdivrestoring ( always_ff @(posedge clk) if (reset) begin BusyE = 0; DivDoneM = 0; step = 0; DivInitE = 0; - end else if (StartDivideE & ~StallM) begin + end else if (DivStartE & ~StallM) begin if (Div0E) DivDoneM = 1; else begin BusyE = 1; step = 0; DivInitE = 1; diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index 11fb4ff1..c38a6ce0 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -48,7 +48,7 @@ module muldiv ( logic [`XLEN-1:0] QuotM, RemM; logic [`XLEN*2-1:0] ProdE, ProdM; - logic StartDivideE, BusyE, DivDoneM; + logic DivStartE, BusyE, DivDoneM; logic DivSignedE; logic W64M; @@ -58,11 +58,11 @@ module muldiv ( // Divide // Start a divide when a new division instruction is received and the divider isn't already busy or finishing - assign StartDivideE = MulDivE & Funct3E[2] & ~BusyE & ~DivDoneM; - assign DivBusyE = StartDivideE | BusyE; + assign DivStartE = MulDivE & Funct3E[2] & ~BusyE & ~DivDoneM; + assign DivBusyE = DivStartE | BusyE; assign DivSignedE = ~Funct3E[0]; intdivrestoring div(.clk, .reset, .StallM, .FlushM, - .DivSignedE, .W64E, .StartDivideE, .SrcAE, .SrcBE, .BusyE, .DivDoneM, .QuotM, .RemM); + .DivSignedE, .W64E, .DivStartE, .SrcAE, .SrcBE, .BusyE, .DivDoneM, .QuotM, .RemM); // Result multiplexer always_comb From 3aa9e088c8cf3b4bfe34b948206b81536560e2d7 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 10 Oct 2021 08:35:26 -0700 Subject: [PATCH 03/11] Simplified divider sign handling --- wally-pipelined/src/muldiv/intdivrestoring.sv | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index 9dd60f8d..64a65b0a 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -45,7 +45,6 @@ module intdivrestoring ( logic [STEPBITS:0] step; logic Div0E, Div0M; logic DivInitE, SignXE, SignXM, SignDE, SignDM, NegWM, NegQM; - logic DivSignedM; // save inputs on the negative edge of the execute clock. // This is unusual practice, but the inputs are not guaranteed to be stable due to some hazard and forwarding logic. @@ -65,12 +64,11 @@ module intdivrestoring ( endgenerate // Extract sign bits and check fo division by zero - assign SignDE = DinE[`XLEN-1]; - assign SignXE = XinE[`XLEN-1]; + assign SignDE = DivSignedE & DinE[`XLEN-1]; + assign SignXE = DivSignedE & XinE[`XLEN-1]; assign Div0E = (DinE == 0); // pipeline registers - flopenrc #(1) DivSignedMReg(clk, reset, FlushM, ~StallM, DivSignedE, DivSignedM); flopenrc #(1) Div0eMReg(clk, reset, FlushM, ~StallM, Div0E, Div0M); flopenrc #(1) SignDMReg(clk, reset, FlushM, ~StallM, SignDE, SignDM); flopenrc #(1) SignXMReg(clk, reset, FlushM, ~StallM, SignXE, SignXM); @@ -78,9 +76,9 @@ module intdivrestoring ( // Take absolute value for signed operations, and negate D to handle subtraction in divider stages neg #(`XLEN) negd(DinE, DnE); - mux2 #(`XLEN) dabsmux(DnE, DinE, DivSignedE & SignDE, DAbsBE); // take absolute value for signed operations, and negate for subtraction setp + mux2 #(`XLEN) dabsmux(DnE, DinE, SignDE, DAbsBE); // take absolute value for signed operations, and negate for subtraction setp neg #(`XLEN) negx(XinE, XnE); - mux2 #(`XLEN) xabsmux(XinE, XnE, DivSignedE & SignXE, XInitE); // need original X as remainder if doing divide by 0 + mux2 #(`XLEN) xabsmux(XinE, XnE, SignXE, XInitE); // need original X as remainder if doing divide by 0 // initialization multiplexers on first cycle of operation (one cycle after start is asserted) mux2 #(`XLEN) wmux(WM, {`XLEN{1'b0}}, DivInitE, WE[0]); @@ -99,8 +97,8 @@ module intdivrestoring ( // Output selection logic in Memory Stage // On final setp of signed operations, negate outputs as needed - assign NegWM = DivSignedM & SignXM; // Remainder should have same sign as X - assign NegQM = DivSignedM & (SignXM ^ SignDM); // Quotient should be negative if one operand is positive and the other is negative + assign NegWM = SignXM; // Remainder should have same sign as X + assign NegQM = SignXM ^ SignDM; // Quotient should be negative if one operand is positive and the other is negative neg #(`XLEN) wneg(WM, WnM); neg #(`XLEN) qneg(XQM, XQnM); // Select appropriate output: normal, negated, or for divide by zero From c2bb0324c665e5bfbda292baeb682c2d78c53d64 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 10 Oct 2021 10:41:13 -0700 Subject: [PATCH 04/11] Removed negedge flops from divider --- .../regression/wave-dos/peripheral-waves.do | 2 +- wally-pipelined/src/muldiv/intdivrestoring.sv | 43 +++++++++++-------- wally-pipelined/src/muldiv/muldiv.sv | 3 +- 3 files changed, 28 insertions(+), 20 deletions(-) diff --git a/wally-pipelined/regression/wave-dos/peripheral-waves.do b/wally-pipelined/regression/wave-dos/peripheral-waves.do index 0203836e..594d572a 100644 --- a/wally-pipelined/regression/wave-dos/peripheral-waves.do +++ b/wally-pipelined/regression/wave-dos/peripheral-waves.do @@ -35,7 +35,7 @@ add wave -hex /testbench/dut/hart/ieu/dp/SrcAE add wave -hex /testbench/dut/hart/ieu/dp/SrcBE add wave -hex /testbench/dut/hart/ieu/dp/ALUResultE #add wave /testbench/dut/hart/ieu/dp/PCSrcE -add wave /testbench/dut/hart/mdu/genblk1/div/StartDivideE +add wave /testbench/dut/hart/mdu/genblk1/div/DivStartE add wave /testbench/dut/hart/mdu/DivBusyE add wave -hex /testbench/dut/hart/mdu/genblk1/div/RemM add wave -hex /testbench/dut/hart/mdu/genblk1/div/QuotM diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index 64a65b0a..4ca12f4b 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -40,26 +40,28 @@ module intdivrestoring ( logic [`XLEN-1:0] WE[`DIV_BITSPERCYCLE:0]; logic [`XLEN-1:0] XQE[`DIV_BITSPERCYCLE:0]; - logic [`XLEN-1:0] DSavedE, XSavedE, XSavedM, DinE, XinE, DnE, DAbsBE, XnE, XInitE, WM, XQM, WnM, XQnM; + logic [`XLEN-1:0] DSavedE, XSavedE, XSavedM, DinE, XinE, DnE, DAbsBE, DAbsBM, XnE, XInitE, WM, XQM, WnM, XQnM; localparam STEPBITS = $clog2(`XLEN/`DIV_BITSPERCYCLE); logic [STEPBITS:0] step; logic Div0E, Div0M; logic DivInitE, SignXE, SignXM, SignDE, SignDM, NegWM, NegQM; + + logic [`XLEN-1:0] WNextE, XQNextE; // save inputs on the negative edge of the execute clock. // This is unusual practice, but the inputs are not guaranteed to be stable due to some hazard and forwarding logic. // Saving the inputs is the most hardware-efficient way to fix the issue. - flopen #(`XLEN) xsavereg(~clk, DivStartE, SrcAE, XSavedE); - flopen #(`XLEN) dsavereg(~clk, DivStartE, SrcBE, DSavedE); + //flopen #(`XLEN) xsavereg(~clk, DivStartE, SrcAE, XSavedE); + // flopen #(`XLEN) dsavereg(~clk, DivStartE, SrcBE, DSavedE); // Handle sign extension for W-type instructions generate if (`XLEN == 64) begin // RV64 has W-type instructions - mux2 #(`XLEN) xinmux(XSavedE, {XSavedE[31:0], 32'b0}, W64E, XinE); - mux2 #(`XLEN) dinmux(DSavedE, {{32{DSavedE[31]&DivSignedE}}, DSavedE[31:0]}, W64E, DinE); + mux2 #(`XLEN) xinmux(SrcAE, {SrcAE[31:0], 32'b0}, W64E, XinE); + mux2 #(`XLEN) dinmux(SrcBE, {{32{SrcBE[31]&DivSignedE}}, SrcBE[31:0]}, W64E, DinE); end else begin // RV32 has no W-type instructions - assign XinE = XSavedE; - assign DinE = DSavedE; + assign XinE = SrcAE; + assign DinE = SrcBE; end endgenerate @@ -69,10 +71,9 @@ module intdivrestoring ( assign Div0E = (DinE == 0); // pipeline registers - flopenrc #(1) Div0eMReg(clk, reset, FlushM, ~StallM, Div0E, Div0M); - flopenrc #(1) SignDMReg(clk, reset, FlushM, ~StallM, SignDE, SignDM); - flopenrc #(1) SignXMReg(clk, reset, FlushM, ~StallM, SignXE, SignXM); - flopenrc #(`XLEN) XSavedMReg(clk, reset, FlushM, ~StallM, XSavedE, XSavedM); // is this truly necessary? + flopen #(1) Div0eMReg(clk, DivStartE, Div0E, Div0M); + flopen #(1) SignDMReg(clk, DivStartE, SignDE, SignDM); + flopen #(1) SignXMReg(clk, DivStartE, SignXE, SignXM); // Take absolute value for signed operations, and negate D to handle subtraction in divider stages neg #(`XLEN) negd(DinE, DnE); @@ -81,19 +82,25 @@ module intdivrestoring ( mux2 #(`XLEN) xabsmux(XinE, XnE, SignXE, XInitE); // need original X as remainder if doing divide by 0 // initialization multiplexers on first cycle of operation (one cycle after start is asserted) - mux2 #(`XLEN) wmux(WM, {`XLEN{1'b0}}, DivInitE, WE[0]); - mux2 #(`XLEN) xmux(XQM, XInitE, DivInitE, XQE[0]); + mux2 #(`XLEN) wmux(WE[`DIV_BITSPERCYCLE], {`XLEN{1'b0}}, DivStartE, WNextE); + mux2 #(`XLEN) xmux(XQE[`DIV_BITSPERCYCLE], XInitE, DivStartE, XQNextE); + // registers before division steps + // *** maybe change this stuff to M stage + flopen #(`XLEN) dabsreg(clk, DivStartE, DAbsBE, DAbsBM); + flopen #(`XLEN) wreg(clk, BusyE | DivStartE, WNextE, WE[0]); // *** merge Busy and start without combinational loop + flopen #(`XLEN) xreg(clk, BusyE | DivStartE, XQNextE, XQE[0]); + flopen #(`XLEN) XSavedMReg(clk, DivStartE, SrcAE, XSavedM); + // one copy of divstep for each bit produced per cycle generate genvar i; for (i=0; i<`DIV_BITSPERCYCLE; i = i+1) - intdivrestoringstep divstep(WE[i], XQE[i], DAbsBE, WE[i+1], XQE[i+1]); + intdivrestoringstep divstep(WE[i], XQE[i], DAbsBM, WE[i+1], XQE[i+1]); endgenerate - // registers after division steps - flopen #(`XLEN) wreg(clk, BusyE, WE[`DIV_BITSPERCYCLE], WM); - flopen #(`XLEN) xreg(clk, BusyE, XQE[`DIV_BITSPERCYCLE], XQM); + assign WM = WE[0]; + assign XQM = XQE[0]; // Output selection logic in Memory Stage // On final setp of signed operations, negate outputs as needed @@ -112,7 +119,7 @@ module intdivrestoring ( end else if (DivStartE & ~StallM) begin if (Div0E) DivDoneM = 1; else begin - BusyE = 1; step = 0; DivInitE = 1; + BusyE = 1; step = 0; DivInitE = 1; // *** can drop DivInit end end else if (BusyE & ~DivDoneM) begin // pause one cycle at beginning of signed operations for absolute value DivInitE = 0; diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index c38a6ce0..09f1547f 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -58,9 +58,10 @@ module muldiv ( // Divide // Start a divide when a new division instruction is received and the divider isn't already busy or finishing + assign DivE = MulDivE & Funct3E[2]; assign DivStartE = MulDivE & Funct3E[2] & ~BusyE & ~DivDoneM; - assign DivBusyE = DivStartE | BusyE; assign DivSignedE = ~Funct3E[0]; + assign DivBusyE = BusyE | DivStartE; intdivrestoring div(.clk, .reset, .StallM, .FlushM, .DivSignedE, .W64E, .DivStartE, .SrcAE, .SrcBE, .BusyE, .DivDoneM, .QuotM, .RemM); From 6988c8c37cffe852a8183e214dd0b1e65447b219 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 10 Oct 2021 10:55:02 -0700 Subject: [PATCH 05/11] divider control signal simplificaiton --- wally-pipelined/src/muldiv/intdivrestoring.sv | 19 ++++++++++--------- wally-pipelined/src/muldiv/muldiv.sv | 6 ++---- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index 4ca12f4b..327a37b6 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -32,9 +32,9 @@ module intdivrestoring ( input logic reset, input logic StallM, FlushM, input logic DivSignedE, W64E, - input logic DivStartE, + input logic DivE, input logic [`XLEN-1:0] SrcAE, SrcBE, - output logic BusyE, DivDoneM, + output logic DivBusyE, output logic [`XLEN-1:0] QuotM, RemM ); @@ -44,7 +44,8 @@ module intdivrestoring ( localparam STEPBITS = $clog2(`XLEN/`DIV_BITSPERCYCLE); logic [STEPBITS:0] step; logic Div0E, Div0M; - logic DivInitE, SignXE, SignXM, SignDE, SignDM, NegWM, NegQM; + logic DivStartE, SignXE, SignXM, SignDE, SignDM, NegWM, NegQM; + logic BusyE, DivDoneM; logic [`XLEN-1:0] WNextE, XQNextE; @@ -54,6 +55,9 @@ module intdivrestoring ( //flopen #(`XLEN) xsavereg(~clk, DivStartE, SrcAE, XSavedE); // flopen #(`XLEN) dsavereg(~clk, DivStartE, SrcBE, DSavedE); + assign DivStartE = DivE & ~BusyE & ~DivDoneM; + assign DivBusyE = BusyE | DivStartE; + // Handle sign extension for W-type instructions generate if (`XLEN == 64) begin // RV64 has W-type instructions @@ -112,26 +116,23 @@ module intdivrestoring ( mux3 #(`XLEN) qmux(XQM, XQnM, {`XLEN{1'b1}}, {Div0M, NegQM}, QuotM); // Q taken from XQ register, negated if necessary, or all 1s when dividing by zero mux3 #(`XLEN) remmux(WM, WnM, XSavedM, {Div0M, NegWM}, RemM); // REM taken from W register, negated if necessary, or from X when dividing by zero - // Divider FSM to sequence Init, Busy, and Done + // Divider FSM to sequence Busy, and Done always_ff @(posedge clk) if (reset) begin - BusyE = 0; DivDoneM = 0; step = 0; DivInitE = 0; + BusyE = 0; DivDoneM = 0; step = 0; end else if (DivStartE & ~StallM) begin if (Div0E) DivDoneM = 1; else begin - BusyE = 1; step = 0; DivInitE = 1; // *** can drop DivInit + BusyE = 1; step = 0; end end else if (BusyE & ~DivDoneM) begin // pause one cycle at beginning of signed operations for absolute value - DivInitE = 0; step = step + 1; if (step[STEPBITS] | (`XLEN==64) & W64E & step[STEPBITS-1]) begin // complete in half the time for W-type instructions - step = 0; BusyE = 0; DivDoneM = 1; end end else if (DivDoneM) begin DivDoneM = StallM; - BusyE = 0; end endmodule diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index 09f1547f..32dccd00 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -48,7 +48,7 @@ module muldiv ( logic [`XLEN-1:0] QuotM, RemM; logic [`XLEN*2-1:0] ProdE, ProdM; - logic DivStartE, BusyE, DivDoneM; + logic DivE; logic DivSignedE; logic W64M; @@ -59,11 +59,9 @@ module muldiv ( // Divide // Start a divide when a new division instruction is received and the divider isn't already busy or finishing assign DivE = MulDivE & Funct3E[2]; - assign DivStartE = MulDivE & Funct3E[2] & ~BusyE & ~DivDoneM; assign DivSignedE = ~Funct3E[0]; - assign DivBusyE = BusyE | DivStartE; intdivrestoring div(.clk, .reset, .StallM, .FlushM, - .DivSignedE, .W64E, .DivStartE, .SrcAE, .SrcBE, .BusyE, .DivDoneM, .QuotM, .RemM); + .DivSignedE, .W64E, .DivE, .SrcAE, .SrcBE, .DivBusyE, .QuotM, .RemM); // Result multiplexer always_comb From b713b6ca8785fa1c1a2a6df076db98893cf1f07e Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 10 Oct 2021 11:20:07 -0700 Subject: [PATCH 06/11] Simplified remainder for divide by 0 --- wally-pipelined/src/muldiv/intdivrestoring.sv | 30 +++++++------------ 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index 327a37b6..bc7945f0 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -40,7 +40,7 @@ module intdivrestoring ( logic [`XLEN-1:0] WE[`DIV_BITSPERCYCLE:0]; logic [`XLEN-1:0] XQE[`DIV_BITSPERCYCLE:0]; - logic [`XLEN-1:0] DSavedE, XSavedE, XSavedM, DinE, XinE, DnE, DAbsBE, DAbsBM, XnE, XInitE, WM, XQM, WnM, XQnM; + logic [`XLEN-1:0] DinE, XinE, DnE, DAbsBE, DAbsBM, XnE, XInitE, WM, XQM, WnM, XQnM; localparam STEPBITS = $clog2(`XLEN/`DIV_BITSPERCYCLE); logic [STEPBITS:0] step; logic Div0E, Div0M; @@ -49,12 +49,7 @@ module intdivrestoring ( logic [`XLEN-1:0] WNextE, XQNextE; - // save inputs on the negative edge of the execute clock. - // This is unusual practice, but the inputs are not guaranteed to be stable due to some hazard and forwarding logic. - // Saving the inputs is the most hardware-efficient way to fix the issue. - //flopen #(`XLEN) xsavereg(~clk, DivStartE, SrcAE, XSavedE); - // flopen #(`XLEN) dsavereg(~clk, DivStartE, SrcBE, DSavedE); - + // Divider control signals assign DivStartE = DivE & ~BusyE & ~DivDoneM; assign DivBusyE = BusyE | DivStartE; @@ -63,7 +58,7 @@ module intdivrestoring ( if (`XLEN == 64) begin // RV64 has W-type instructions mux2 #(`XLEN) xinmux(SrcAE, {SrcAE[31:0], 32'b0}, W64E, XinE); mux2 #(`XLEN) dinmux(SrcBE, {{32{SrcBE[31]&DivSignedE}}, SrcBE[31:0]}, W64E, DinE); - end else begin // RV32 has no W-type instructions + end else begin // RV32 has no W-type instructions assign XinE = SrcAE; assign DinE = SrcBE; end @@ -74,16 +69,11 @@ module intdivrestoring ( assign SignXE = DivSignedE & XinE[`XLEN-1]; assign Div0E = (DinE == 0); - // pipeline registers - flopen #(1) Div0eMReg(clk, DivStartE, Div0E, Div0M); - flopen #(1) SignDMReg(clk, DivStartE, SignDE, SignDM); - flopen #(1) SignXMReg(clk, DivStartE, SignXE, SignXM); - // Take absolute value for signed operations, and negate D to handle subtraction in divider stages neg #(`XLEN) negd(DinE, DnE); mux2 #(`XLEN) dabsmux(DnE, DinE, SignDE, DAbsBE); // take absolute value for signed operations, and negate for subtraction setp neg #(`XLEN) negx(XinE, XnE); - mux2 #(`XLEN) xabsmux(XinE, XnE, SignXE, XInitE); // need original X as remainder if doing divide by 0 + mux3 #(`XLEN) xabsmux(XinE, XnE, SrcAE, {Div0E, SignXE}, XInitE); // take absolute value for signed operations, or keep original value for divide by 0 // initialization multiplexers on first cycle of operation (one cycle after start is asserted) mux2 #(`XLEN) wmux(WE[`DIV_BITSPERCYCLE], {`XLEN{1'b0}}, DivStartE, WNextE); @@ -91,10 +81,12 @@ module intdivrestoring ( // registers before division steps // *** maybe change this stuff to M stage + flopen #(`XLEN) wreg(clk, DivBusyE, WNextE, WE[0]); + flopen #(`XLEN) xreg(clk, DivBusyE, XQNextE, XQE[0]); flopen #(`XLEN) dabsreg(clk, DivStartE, DAbsBE, DAbsBM); - flopen #(`XLEN) wreg(clk, BusyE | DivStartE, WNextE, WE[0]); // *** merge Busy and start without combinational loop - flopen #(`XLEN) xreg(clk, BusyE | DivStartE, XQNextE, XQE[0]); - flopen #(`XLEN) XSavedMReg(clk, DivStartE, SrcAE, XSavedM); + flopen #(1) Div0eMReg(clk, DivStartE, Div0E, Div0M); + flopen #(1) SignDMReg(clk, DivStartE, SignDE, SignDM); + flopen #(1) SignXMReg(clk, DivStartE, SignXE, SignXM); // one copy of divstep for each bit produced per cycle generate @@ -103,7 +95,7 @@ module intdivrestoring ( intdivrestoringstep divstep(WE[i], XQE[i], DAbsBM, WE[i+1], XQE[i+1]); endgenerate - assign WM = WE[0]; + assign WM = WE[0]; // *** move to M stage assign XQM = XQE[0]; // Output selection logic in Memory Stage @@ -114,7 +106,7 @@ module intdivrestoring ( neg #(`XLEN) qneg(XQM, XQnM); // Select appropriate output: normal, negated, or for divide by zero mux3 #(`XLEN) qmux(XQM, XQnM, {`XLEN{1'b1}}, {Div0M, NegQM}, QuotM); // Q taken from XQ register, negated if necessary, or all 1s when dividing by zero - mux3 #(`XLEN) remmux(WM, WnM, XSavedM, {Div0M, NegWM}, RemM); // REM taken from W register, negated if necessary, or from X when dividing by zero + mux3 #(`XLEN) remmux(WM, WnM, XQM, {Div0M, NegWM}, RemM); // REM taken from W register, negated if necessary, or from X when dividing by zero // Divider FSM to sequence Busy, and Done always_ff @(posedge clk) From 635fe181f8c3058cee5ccd6b1456904662c04160 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 10 Oct 2021 11:30:53 -0700 Subject: [PATCH 07/11] Moved divide iteration register names to M stage --- wally-pipelined/src/muldiv/intdivrestoring.sv | 50 +++++++++++-------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index bc7945f0..b403db47 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -38,9 +38,9 @@ module intdivrestoring ( output logic [`XLEN-1:0] QuotM, RemM ); - logic [`XLEN-1:0] WE[`DIV_BITSPERCYCLE:0]; - logic [`XLEN-1:0] XQE[`DIV_BITSPERCYCLE:0]; - logic [`XLEN-1:0] DinE, XinE, DnE, DAbsBE, DAbsBM, XnE, XInitE, WM, XQM, WnM, XQnM; + logic [`XLEN-1:0] WM[`DIV_BITSPERCYCLE:0]; + logic [`XLEN-1:0] XQM[`DIV_BITSPERCYCLE:0]; + logic [`XLEN-1:0] DinE, XinE, DnE, DAbsBE, DAbsBM, XnE, XInitE, WnM, XQnM; localparam STEPBITS = $clog2(`XLEN/`DIV_BITSPERCYCLE); logic [STEPBITS:0] step; logic Div0E, Div0M; @@ -49,6 +49,10 @@ module intdivrestoring ( logic [`XLEN-1:0] WNextE, XQNextE; + ////////////////////////////// + // Execute Stage: prepare for division calculation with control logic, W logic and absolute values, initialize W and XQ + ////////////////////////////// + // Divider control signals assign DivStartE = DivE & ~BusyE & ~DivDoneM; assign DivBusyE = BusyE | DivStartE; @@ -75,41 +79,43 @@ module intdivrestoring ( neg #(`XLEN) negx(XinE, XnE); mux3 #(`XLEN) xabsmux(XinE, XnE, SrcAE, {Div0E, SignXE}, XInitE); // take absolute value for signed operations, or keep original value for divide by 0 - // initialization multiplexers on first cycle of operation (one cycle after start is asserted) - mux2 #(`XLEN) wmux(WE[`DIV_BITSPERCYCLE], {`XLEN{1'b0}}, DivStartE, WNextE); - mux2 #(`XLEN) xmux(XQE[`DIV_BITSPERCYCLE], XInitE, DivStartE, XQNextE); + // initialization multiplexers on first cycle of operation + mux2 #(`XLEN) wmux(WM[`DIV_BITSPERCYCLE], {`XLEN{1'b0}}, DivStartE, WNextE); + mux2 #(`XLEN) xmux(XQM[`DIV_BITSPERCYCLE], XInitE, DivStartE, XQNextE); + + ////////////////////////////// + // Memory Stage: division iterations, output sign correction + ////////////////////////////// // registers before division steps // *** maybe change this stuff to M stage - flopen #(`XLEN) wreg(clk, DivBusyE, WNextE, WE[0]); - flopen #(`XLEN) xreg(clk, DivBusyE, XQNextE, XQE[0]); + flopen #(`XLEN) wreg(clk, DivBusyE, WNextE, WM[0]); + flopen #(`XLEN) xreg(clk, DivBusyE, XQNextE, XQM[0]); flopen #(`XLEN) dabsreg(clk, DivStartE, DAbsBE, DAbsBM); - flopen #(1) Div0eMReg(clk, DivStartE, Div0E, Div0M); - flopen #(1) SignDMReg(clk, DivStartE, SignDE, SignDM); - flopen #(1) SignXMReg(clk, DivStartE, SignXE, SignXM); + flopen #(3) Div0eMReg(clk, DivStartE, {Div0E, SignDE, SignXE}, {Div0M, SignDM, SignXM}); // one copy of divstep for each bit produced per cycle generate genvar i; for (i=0; i<`DIV_BITSPERCYCLE; i = i+1) - intdivrestoringstep divstep(WE[i], XQE[i], DAbsBM, WE[i+1], XQE[i+1]); + intdivrestoringstep divstep(WM[i], XQM[i], DAbsBM, WM[i+1], XQM[i+1]); endgenerate - assign WM = WE[0]; // *** move to M stage - assign XQM = XQE[0]; - // Output selection logic in Memory Stage // On final setp of signed operations, negate outputs as needed assign NegWM = SignXM; // Remainder should have same sign as X assign NegQM = SignXM ^ SignDM; // Quotient should be negative if one operand is positive and the other is negative - neg #(`XLEN) wneg(WM, WnM); - neg #(`XLEN) qneg(XQM, XQnM); + neg #(`XLEN) qneg(XQM[0], XQnM); + neg #(`XLEN) wneg(WM[0], WnM); // Select appropriate output: normal, negated, or for divide by zero - mux3 #(`XLEN) qmux(XQM, XQnM, {`XLEN{1'b1}}, {Div0M, NegQM}, QuotM); // Q taken from XQ register, negated if necessary, or all 1s when dividing by zero - mux3 #(`XLEN) remmux(WM, WnM, XQM, {Div0M, NegWM}, RemM); // REM taken from W register, negated if necessary, or from X when dividing by zero + mux3 #(`XLEN) qmux(XQM[0], XQnM, {`XLEN{1'b1}}, {Div0M, NegQM}, QuotM); // Q taken from XQ register, negated if necessary, or all 1s when dividing by zero + mux3 #(`XLEN) remmux(WM[0], WnM, XQM[0], {Div0M, NegWM}, RemM); // REM taken from W register, negated if necessary, or from X when dividing by zero - // Divider FSM to sequence Busy, and Done - always_ff @(posedge clk) + ////////////////////////////// + // Divider FSM to sequence Busy and Done + ////////////////////////////// + + always_ff @(posedge clk) if (reset) begin BusyE = 0; DivDoneM = 0; step = 0; end else if (DivStartE & ~StallM) begin @@ -127,6 +133,8 @@ module intdivrestoring ( DivDoneM = StallM; end + //counter #(STEPBITS+1) stepcnt(clk, cntrst, cnten, step); + endmodule /* verilator lint_on UNOPTFLAT */ From 2759f1fcb14d3f87f12427c376abdd45f1b079a7 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 10 Oct 2021 11:49:32 -0700 Subject: [PATCH 08/11] Moved & ~StallM from FSM into DivStartE --- wally-pipelined/src/muldiv/intdivrestoring.sv | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index b403db47..2d7d365f 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -54,7 +54,7 @@ module intdivrestoring ( ////////////////////////////// // Divider control signals - assign DivStartE = DivE & ~BusyE & ~DivDoneM; + assign DivStartE = DivE & ~BusyE & ~DivDoneM & ~StallM; assign DivBusyE = BusyE | DivStartE; // Handle sign extension for W-type instructions @@ -88,7 +88,6 @@ module intdivrestoring ( ////////////////////////////// // registers before division steps - // *** maybe change this stuff to M stage flopen #(`XLEN) wreg(clk, DivBusyE, WNextE, WM[0]); flopen #(`XLEN) xreg(clk, DivBusyE, XQNextE, XQM[0]); flopen #(`XLEN) dabsreg(clk, DivStartE, DAbsBE, DAbsBM); @@ -118,7 +117,7 @@ module intdivrestoring ( always_ff @(posedge clk) if (reset) begin BusyE = 0; DivDoneM = 0; step = 0; - end else if (DivStartE & ~StallM) begin + end else if (DivStartE) begin if (Div0E) DivDoneM = 1; else begin BusyE = 1; step = 0; From 4deae8019a9c0806b7d5e5d67e3aea2644d886cc Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 10 Oct 2021 12:21:36 -0700 Subject: [PATCH 09/11] Simplifying divider FSM --- wally-pipelined/src/muldiv/intdivrestoring.sv | 33 +++++++++++++++---- 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index 2d7d365f..fe9d675c 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -42,7 +42,7 @@ module intdivrestoring ( logic [`XLEN-1:0] XQM[`DIV_BITSPERCYCLE:0]; logic [`XLEN-1:0] DinE, XinE, DnE, DAbsBE, DAbsBM, XnE, XInitE, WnM, XQnM; localparam STEPBITS = $clog2(`XLEN/`DIV_BITSPERCYCLE); - logic [STEPBITS:0] step; + logic [STEPBITS:0] step, step2; logic Div0E, Div0M; logic DivStartE, SignXE, SignXM, SignDE, SignDM, NegWM, NegQM; logic BusyE, DivDoneM; @@ -118,11 +118,10 @@ module intdivrestoring ( if (reset) begin BusyE = 0; DivDoneM = 0; step = 0; end else if (DivStartE) begin + step = 0; if (Div0E) DivDoneM = 1; - else begin - BusyE = 1; step = 0; - end - end else if (BusyE & ~DivDoneM) begin // pause one cycle at beginning of signed operations for absolute value + else BusyE = 1; + end else if (BusyE) begin // pause one cycle at beginning of signed operations for absolute value step = step + 1; if (step[STEPBITS] | (`XLEN==64) & W64E & step[STEPBITS-1]) begin // complete in half the time for W-type instructions BusyE = 0; @@ -131,8 +130,30 @@ module intdivrestoring ( end else if (DivDoneM) begin DivDoneM = StallM; end + /* + logic NextDivDoneE, NextDivBusyE; + always_comb begin + if (DivStartE) + if (Div0E) begin + NextDivDoneM = 1; NextDivBusyE = 0; + end else begin + NextDivDoneM = 0; NextDivBusyE = 1; + end + else if (BusyE) + if (step[STEPBITS] | (`XLEN==64) & W64E & step[STEPBITS-1]) begin + NextDivDoneM = 1; NextDivBusyE = 0; + end else begin + NextDivDoneM = 0; NextDivBusyE = 1; + end + else if (DivDoneE) begin + NextDivDoneE = StallM; + NextDivBusyE = 0; + end + end - //counter #(STEPBITS+1) stepcnt(clk, cntrst, cnten, step); + flopr #(2) divfsmregs(clk, reset, {NextDivDoneM, NextBusyE}, {DivDoneM, BusyE}); */ + counter #(STEPBITS+1) stepcnt(.clk, .reset(DivStartE), .en(BusyE), .q(step2)); +// assert (step == step2) else $warning("counters disagree"); endmodule From 6704e3759732983a1239c07b5f656da75b8dbdde Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 10 Oct 2021 12:21:43 -0700 Subject: [PATCH 10/11] Simplifying divider FSM --- wally-pipelined/src/muldiv/intdivrestoring.sv | 28 +------------------ 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index fe9d675c..9d40f81c 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -100,8 +100,7 @@ module intdivrestoring ( intdivrestoringstep divstep(WM[i], XQM[i], DAbsBM, WM[i+1], XQM[i+1]); endgenerate - // Output selection logic in Memory Stage - // On final setp of signed operations, negate outputs as needed + // On final setp of signed operations, negate outputs as needed to get correct sign assign NegWM = SignXM; // Remainder should have same sign as X assign NegQM = SignXM ^ SignDM; // Quotient should be negative if one operand is positive and the other is negative neg #(`XLEN) qneg(XQM[0], XQnM); @@ -130,31 +129,6 @@ module intdivrestoring ( end else if (DivDoneM) begin DivDoneM = StallM; end - /* - logic NextDivDoneE, NextDivBusyE; - always_comb begin - if (DivStartE) - if (Div0E) begin - NextDivDoneM = 1; NextDivBusyE = 0; - end else begin - NextDivDoneM = 0; NextDivBusyE = 1; - end - else if (BusyE) - if (step[STEPBITS] | (`XLEN==64) & W64E & step[STEPBITS-1]) begin - NextDivDoneM = 1; NextDivBusyE = 0; - end else begin - NextDivDoneM = 0; NextDivBusyE = 1; - end - else if (DivDoneE) begin - NextDivDoneE = StallM; - NextDivBusyE = 0; - end - end - - flopr #(2) divfsmregs(clk, reset, {NextDivDoneM, NextBusyE}, {DivDoneM, BusyE}); */ - counter #(STEPBITS+1) stepcnt(.clk, .reset(DivStartE), .en(BusyE), .q(step2)); -// assert (step == step2) else $warning("counters disagree"); - endmodule /* verilator lint_on UNOPTFLAT */ From 43d92f25075749c3f44e1faf05b76ffb7f76ff33 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 10 Oct 2021 12:24:44 -0700 Subject: [PATCH 11/11] Divider cleanup --- wally-pipelined/src/muldiv/intdivrestoring.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index 9d40f81c..eeac7dbf 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -42,7 +42,7 @@ module intdivrestoring ( logic [`XLEN-1:0] XQM[`DIV_BITSPERCYCLE:0]; logic [`XLEN-1:0] DinE, XinE, DnE, DAbsBE, DAbsBM, XnE, XInitE, WnM, XQnM; localparam STEPBITS = $clog2(`XLEN/`DIV_BITSPERCYCLE); - logic [STEPBITS:0] step, step2; + logic [STEPBITS:0] step; logic Div0E, Div0M; logic DivStartE, SignXE, SignXM, SignDE, SignDM, NegWM, NegQM; logic BusyE, DivDoneM;