diff --git a/wally-pipelined/regression/linux-wave.do b/wally-pipelined/regression/linux-wave.do index 10d264d8d..7a0ee7bd9 100644 --- a/wally-pipelined/regression/linux-wave.do +++ b/wally-pipelined/regression/linux-wave.do @@ -176,7 +176,7 @@ add wave -noupdate -group muldiv /testbench/dut/hart/mdu/FlushM add wave -noupdate -group muldiv /testbench/dut/hart/mdu/FlushW add wave -noupdate -group muldiv /testbench/dut/hart/mdu/MulDivResultW add wave -noupdate -group muldiv /testbench/dut/hart/mdu/genblk1/div/start -add wave -noupdate -group muldiv /testbench/dut/hart/mdu/DivDoneE +add wave -noupdate -group muldiv /testbench/dut/hart/mdu/DivDoneM add wave -noupdate -group muldiv /testbench/dut/hart/mdu/DivBusyE add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/fsm1/CURRENT_STATE add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/N diff --git a/wally-pipelined/regression/wave-dos/peripheral-waves.do b/wally-pipelined/regression/wave-dos/peripheral-waves.do index 57eb5babc..30f9718fb 100644 --- a/wally-pipelined/regression/wave-dos/peripheral-waves.do +++ b/wally-pipelined/regression/wave-dos/peripheral-waves.do @@ -37,7 +37,6 @@ add wave -hex /testbench/dut/hart/ieu/dp/ALUResultE #add wave /testbench/dut/hart/ieu/dp/PCSrcE add wave /testbench/dut/hart/mdu/genblk1/div/StartDivideE add wave /testbench/dut/hart/mdu/DivBusyE -add wave /testbench/dut/hart/mdu/DivDoneE add wave -hex /testbench/dut/hart/mdu/genblk1/div/DE add wave -hex /testbench/dut/hart/mdu/genblk1/div/Din add wave -hex /testbench/dut/hart/mdu/genblk1/div/XE diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do index d176c63f1..8bff207f1 100644 --- a/wally-pipelined/regression/wave.do +++ b/wally-pipelined/regression/wave.do @@ -179,7 +179,7 @@ add wave -noupdate -group muldiv /testbench/dut/hart/mdu/FlushM add wave -noupdate -group muldiv /testbench/dut/hart/mdu/FlushW add wave -noupdate -group muldiv /testbench/dut/hart/mdu/MulDivResultW add wave -noupdate -group muldiv /testbench/dut/hart/mdu/genblk1/div/start -add wave -noupdate -group muldiv /testbench/dut/hart/mdu/DivDoneE +add wave -noupdate -group muldiv /testbench/dut/hart/mdu/DivDoneM add wave -noupdate -group muldiv /testbench/dut/hart/mdu/DivBusyE add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/fsm1/CURRENT_STATE add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/N diff --git a/wally-pipelined/src/ieu/forward.sv b/wally-pipelined/src/ieu/forward.sv index 47a649f85..3e25ca7cc 100644 --- a/wally-pipelined/src/ieu/forward.sv +++ b/wally-pipelined/src/ieu/forward.sv @@ -30,7 +30,7 @@ module forward( input logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW, input logic MemReadE, MulDivE, CSRReadE, input logic RegWriteM, RegWriteW, - input logic DivDoneE, DivBusyE, + input logic DivBusyE, input logic FWriteIntE, FWriteIntM, FWriteIntW, input logic SCE, input logic StallD, @@ -54,7 +54,7 @@ module forward( // Stall on dependent operations that finish in Mem Stage and can't bypass in time assign FPUStallD = FWriteIntE & ((Rs1D == RdE) | (Rs2D == RdE)); assign LoadStallD = (MemReadE|SCE) & ((Rs1D == RdE) | (Rs2D == RdE)); - assign MulDivStallD = MulDivE & ((Rs1D == RdE) | (Rs2D == RdE)) /*| DivBusyE */; // *** extend with stalls for divide + assign MulDivStallD = MulDivE & ((Rs1D == RdE) | (Rs2D == RdE)); assign CSRRdStallD = CSRReadE & ((Rs1D == RdE) | (Rs2D == RdE)); endmodule diff --git a/wally-pipelined/src/ieu/ieu.sv b/wally-pipelined/src/ieu/ieu.sv index f2984d7ff..234f767a3 100644 --- a/wally-pipelined/src/ieu/ieu.sv +++ b/wally-pipelined/src/ieu/ieu.sv @@ -73,7 +73,6 @@ module ieu ( input logic FlushD, FlushE, FlushM, FlushW, output logic FPUStallD, LoadStallD, MulDivStallD, CSRRdStallD, output logic PCSrcE, - input logic DivDoneE, input logic DivBusyE, output logic CSRReadM, CSRWriteM, PrivilegedM, output logic CSRWritePendingDEM, diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index e9221cc50..8f4947da3 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -32,7 +32,7 @@ module intdivrestoring ( input logic SignedDivideE, input logic StartDivideE, input logic [`XLEN-1:0] XE, DE, - output logic BusyE, done, + output logic BusyE, DivDoneM, output logic [`XLEN-1:0] QuotM, RemM ); @@ -40,8 +40,8 @@ module intdivrestoring ( logic qi, qib; // curent quotient bit localparam STEPBITS = $clog2(`XLEN)-1; logic [STEPBITS:0] step; - logic div0; - logic init, startd, SignX, SignD, NegW, NegQ; + logic Div0E, Div0M; + logic init, startd, SignXE, SignXM, SignDE, SignDM, NegWM, NegQM; logic SignedDivideM; // *** add pipe stages to everything @@ -50,19 +50,27 @@ module intdivrestoring ( // Saving the inputs is the most hardware-efficient way to fix the issue. flopen #(`XLEN) dsavereg(~clk, StartDivideE, DE, DSavedE); flopen #(`XLEN) xsavereg(~clk, StartDivideE, XE, XSavedE); + assign SignDE = DSavedE[`XLEN-1]; // *** do some of these need pipelining for consecutive divides? + assign SignXE = XSavedE[`XLEN-1]; + assign Div0E = (DSavedE == 0); + + // pipeline registers flopenrc #(1) SignedDivideMReg(clk, reset, FlushM, ~StallM, SignedDivideE, SignedDivideM); - assign SignD = DSavedE[`XLEN-1]; // *** do some of these need pipelining for consecutive divides? - assign SignX = XSavedE[`XLEN-1]; - assign div0 = (DSavedE == 0); + flopenrc #(1) Div0eMReg(clk, reset, FlushM, ~StallM, Div0E, Div0M); + flopenrc #(1) SignDMReg(clk, reset, FlushM, ~StallM, SignDE, SignDM); + flopenrc #(1) SignXMReg(clk, reset, FlushM, ~StallM, SignXE, SignXM); + flopenrc #(`XLEN) XSavedMReg(clk, reset, FlushM, ~StallM, XSavedE, XSavedM); // is this truly necessary? // Take absolute value for signed operations neg #(`XLEN) negd(DSavedE, DnE); - mux2 #(`XLEN) dabsmux(DSavedE, DnE, SignedDivideE & SignD, Din); // take absolute value for signed operations + mux2 #(`XLEN) dabsmux(DSavedE, DnE, SignedDivideE & SignDE, Din); // take absolute value for signed operations neg #(`XLEN) negx(XSavedE, XnE); - mux2 #(`XLEN) xabsmux(XSavedE, XnE, SignedDivideE & SignX, Xinit); // need original X as remainder if doing divide by 0 + mux2 #(`XLEN) xabsmux(XSavedE, XnE, SignedDivideE & SignXE, Xinit); // need original X as remainder if doing divide by 0 // Negate D for subtraction assign DAbsB = ~Din; + // *** merge this into dabsmux if possible + // Put suffixes on Xinit, init->DivInitE, Wn, XQn // initialization multiplexers on first cycle of operation (one cycle after start is asserted) mux2 #(`XLEN) wmux(W, {`XLEN{1'b0}}, init, Win); @@ -77,33 +85,34 @@ module intdivrestoring ( // Output selection logic in Memory Stage // On final setp of signed operations, negate outputs as needed - assign NegW = SignedDivideM & SignX; - assign NegQ = SignedDivideM & (SignX ^ SignD); + assign NegWM = SignedDivideM & SignXM; + assign NegQM = SignedDivideM & (SignXM ^ SignDM); neg #(`XLEN) wneg(W, Wn); neg #(`XLEN) qneg(XQ, XQn); // Select appropriate output: normal, negated, or for divide by zero - mux3 #(`XLEN) qmux(XQ, XQn, {`XLEN{1'b1}}, {div0, NegQ}, QuotM); // Q taken from XQ register, negated if necessary, or all 1s when dividing by zero - mux3 #(`XLEN) remmux(W, Wn, XSavedE, {div0, NegW}, RemM); // REM taken from W register, negated if necessary, or from X when dividing by zero - + mux3 #(`XLEN) qmux(XQ, XQn, {`XLEN{1'b1}}, {Div0M, NegQM}, QuotM); // Q taken from XQ register, negated if necessary, or all 1s when dividing by zero + mux3 #(`XLEN) remmux(W, Wn, XSavedM, {Div0M, NegWM}, RemM); // REM taken from W register, negated if necessary, or from X when dividing by zero + // verify it's really necessary to have XSavedM + // busy logic always_ff @(posedge clk) if (reset) begin - BusyE = 0; done = 0; step = 0; init = 0; + BusyE = 0; DivDoneM = 0; step = 0; init = 0; end else if (StartDivideE & ~StallM) begin - if (div0) done = 1; + if (Div0E) DivDoneM = 1; else begin BusyE = 1; step = 0; init = 1; end - end else if (BusyE & ~done) begin // pause one cycle at beginning of signed operations for absolute value + end else if (BusyE & ~DivDoneM) begin // pause one cycle at beginning of signed operations for absolute value init = 0; step = step + 1; if (step[STEPBITS]) begin step = 0; BusyE = 0; - done = 1; + DivDoneM = 1; end - end else if (done) begin - done = 0; + end else if (DivDoneM) begin + DivDoneM = 0; BusyE = 0; end diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index 734965195..7cccf2d72 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -36,7 +36,6 @@ module muldiv ( // Writeback stage output logic [`XLEN-1:0] MulDivResultW, // Divide Done - output logic DivDoneE, output logic DivBusyE, // hazards input logic StallE, StallM, StallW, FlushM, FlushW @@ -56,7 +55,7 @@ module muldiv ( //logic [`XLEN-1:0] Num0, Den0; // logic gclk; - logic StartDivideE, BusyE; + logic StartDivideE, BusyE, DivDoneM; logic SignedDivideE; logic W64M; @@ -79,10 +78,10 @@ module muldiv ( assign SignedDivideE = ~Funct3E[0]; // simplified from (Funct3E[2]&~Funct3E[1]&~Funct3E[0]) | (Funct3E[2]&Funct3E[1]&~Funct3E[0]); //intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, StartDivideE, SignedDivideE); intdivrestoring div(.clk, .reset, .StallM, .FlushM, - .SignedDivideE, .StartDivideE, .XE, .DE, .BusyE, .done(DivDoneE), .QuotM, .RemM); + .SignedDivideE, .StartDivideE, .XE, .DE, .BusyE, .DivDoneM, .QuotM, .RemM); // Start a divide when a new division instruction is received and the divider isn't already busy or finishing - assign StartDivideE = MulDivE & Funct3E[2] & ~BusyE & ~DivDoneE; // *** mabye DivDone should be M stage + assign StartDivideE = MulDivE & Funct3E[2] & ~BusyE & ~DivDoneM; assign DivBusyE = StartDivideE | BusyE; // Select result @@ -111,7 +110,6 @@ module muldiv ( end else begin // no M instructions supported assign MulDivResultW = 0; assign DivBusyE = 0; - assign DivDoneE = 0; end endgenerate diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index eb5169eb2..8a298594f 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -88,7 +88,6 @@ module wallypipelinedhart logic InvalidateICacheM, FlushDCacheM; logic PCSrcE; logic CSRWritePendingDEM; - logic DivDoneE; logic DivBusyE; logic RegWriteD; logic LoadStallD, StoreStallD, MulDivStallD, CSRRdStallD;