From e1ad732178c1d055e37f42c1cc9c5e483766d15e Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 30 Sep 2021 12:17:24 -0400 Subject: [PATCH 01/14] SRT Division unsigned passing Imperas tests --- wally-pipelined/regression/wally-pipelined.do | 2 +- wally-pipelined/src/generic/abs.sv | 38 ++++++++++++ wally-pipelined/src/generic/neg.sv | 34 ++++++++++ wally-pipelined/src/ieu/forward.sv | 3 +- .../src/muldiv/intdiv_restoring.sv | 45 ++++++++++---- wally-pipelined/src/muldiv/muldiv.sv | 62 +++++-------------- .../testbench/common/instrTrackerTB.sv | 2 +- .../testbench/testbench-imperas.sv | 18 +++--- 8 files changed, 135 insertions(+), 69 deletions(-) create mode 100644 wally-pipelined/src/generic/abs.sv create mode 100644 wally-pipelined/src/generic/neg.sv diff --git a/wally-pipelined/regression/wally-pipelined.do b/wally-pipelined/regression/wally-pipelined.do index 861657308..76e3d8668 100644 --- a/wally-pipelined/regression/wally-pipelined.do +++ b/wally-pipelined/regression/wally-pipelined.do @@ -43,7 +43,7 @@ view wave do ./wave-dos/peripheral-waves.do -- Run the Simulation -#run 5000 +#run 3600 run -all #quit noview ../testbench/testbench-imperas.sv diff --git a/wally-pipelined/src/generic/abs.sv b/wally-pipelined/src/generic/abs.sv new file mode 100644 index 000000000..7ddbd38b6 --- /dev/null +++ b/wally-pipelined/src/generic/abs.sv @@ -0,0 +1,38 @@ +/////////////////////////////////////////// +// neg.sv +// +// Written: David_Harris@hmc.edu 28 September 2021 +// Modified: +// +// Purpose: 2's complement negator +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module abs #(parameter WIDTH = 8) ( + input logic [WIDTH-1:0] a, + output logic [WIDTH-1:0] y); + + logic [WIDTH-1:0] minusa; + + // select -a if sign bit of a is 1 + neg #(WIDTH) neg(a, minusa); + mux2 #(WIDTH) absmux(a, minusa, a[WIDTH-1], y); +endmodule + diff --git a/wally-pipelined/src/generic/neg.sv b/wally-pipelined/src/generic/neg.sv new file mode 100644 index 000000000..a162a5c92 --- /dev/null +++ b/wally-pipelined/src/generic/neg.sv @@ -0,0 +1,34 @@ +/////////////////////////////////////////// +// neg.sv +// +// Written: David_Harris@hmc.edu 28 September 2021 +// Modified: +// +// Purpose: 2's complement negator +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module neg #(parameter WIDTH = 8) ( + input logic [WIDTH-1:0] a, + output logic [WIDTH-1:0] y); + + assign y = ~a + 1; +endmodule + diff --git a/wally-pipelined/src/ieu/forward.sv b/wally-pipelined/src/ieu/forward.sv index e7b3ff247..47a649f85 100644 --- a/wally-pipelined/src/ieu/forward.sv +++ b/wally-pipelined/src/ieu/forward.sv @@ -33,6 +33,7 @@ module forward( input logic DivDoneE, DivBusyE, input logic FWriteIntE, FWriteIntM, FWriteIntW, input logic SCE, + input logic StallD, // Forwarding controls output logic [1:0] ForwardAE, ForwardBE, output logic FPUStallD, LoadStallD, MulDivStallD, CSRRdStallD @@ -53,7 +54,7 @@ module forward( // Stall on dependent operations that finish in Mem Stage and can't bypass in time assign FPUStallD = FWriteIntE & ((Rs1D == RdE) | (Rs2D == RdE)); assign LoadStallD = (MemReadE|SCE) & ((Rs1D == RdE) | (Rs2D == RdE)); - assign MulDivStallD = MulDivE & ((Rs1D == RdE) | (Rs2D == RdE)) | MulDivE | DivBusyE; // *** extend with stalls for divide + assign MulDivStallD = MulDivE & ((Rs1D == RdE) | (Rs2D == RdE)) /*| DivBusyE */; // *** extend with stalls for divide assign CSRRdStallD = CSRReadE & ((Rs1D == RdE) | (Rs2D == RdE)); endmodule diff --git a/wally-pipelined/src/muldiv/intdiv_restoring.sv b/wally-pipelined/src/muldiv/intdiv_restoring.sv index 9571ba721..e6118cd40 100644 --- a/wally-pipelined/src/muldiv/intdiv_restoring.sv +++ b/wally-pipelined/src/muldiv/intdiv_restoring.sv @@ -35,32 +35,52 @@ module intdiv_restoring ( output logic [`XLEN-1:0] Q, REM ); - logic [`XLEN-1:0] W, Win, Wshift, Wprime, Wnext, XQ, XQin, XQshift; - logic qi; // curent quotient bit + logic [`XLEN-1:0] W, Win, Wshift, Wprime, Wnext, XQ, XQin, XQshift, Dsaved, Din, Dabs, D2, Xabs, Xinit; + logic qi, qib; // curent quotient bit localparam STEPBITS = $clog2(`XLEN); logic [STEPBITS:0] step; logic div0; + // Setup for signed division + abs #(`XLEN) absd(D, Dabs); + mux2 #(`XLEN) dabsmux(D, Dabs, signedDivide, D2); + flopen #(`XLEN) dsavereg(clk, start, D2, Dsaved); + mux2 #(`XLEN) dfirstmux(Dsaved, D, start, Din); // *** change start to init (could be delayed one from start) + + abs #(`XLEN) absx(X, Xabs); + mux2 #(`XLEN) xabsmux(X, Xabs, signedDivide, Xinit); + // restoring division mux2 #(`XLEN) wmux(W, 0, start, Win); - mux2 #(`XLEN) xmux(0, X, start, XQin); + mux2 #(`XLEN) xmux(XQ, Xinit, start, XQin); assign {Wshift, XQshift} = {Win[`XLEN-2:0], XQin, qi}; - assign {qi, Wprime} = Wshift - D; // subtractor, carry out determines quotient bit + assign {qib, Wprime} = {1'b0, Wshift} + ~{1'b0, Din} + 1; // subtractor, carry out determines quotient bit + assign qi = ~qib; mux2 #(`XLEN) wrestoremux(Wshift, Wprime, qi, Wnext); - flopen #(`XLEN) wreg(clk, busy, Wnext, W); - flopen #(`XLEN) xreg(clk, busy, XQshift, XQ); + flopen #(`XLEN) wreg(clk, start | busy, Wnext, W); + flopen #(`XLEN) xreg(clk, start | busy, XQshift, XQ); + + // save D, which comes from SrcAE forwarding mux and could change because register file read is stalled during divide + // flopen #(`XLEN) dreg(clk, start, D, Dsaved); + //mux2 #(`XLEN) dmux(Dsaved, D, start, Din); // outputs // *** sign extension, handling W instructions - assign div0 = (D == 0); + assign div0 = (Din == 0); mux2 #(`XLEN) qmux(XQ, {`XLEN{1'b1}}, div0, Q); // Q taken from XQ register, or all 1s when dividing by zero mux2 #(`XLEN) remmux(W, X, div0, REM); // REM taken from W register, or from X when dividing by zero + // busy logic - always_ff @(posedge clk) - if (start) begin - busy = 1; done = 0; step = 0; - end else if (busy) begin + always_ff @(posedge clk) + if (reset) begin + busy = 0; done = 0; step = 0; + end else if (start) begin + if (div0) done = 1; + else begin + busy = 1; done = 0; step = 1; + end + end else if (busy & ~done) begin step = step + 1; if (step[STEPBITS] | div0) begin // *** early terminate on division by 0 step = 0; @@ -69,7 +89,10 @@ module intdiv_restoring ( end end else if (done) begin done = 0; + busy = 0; end + + endmodule // muldiv diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index 714f7ebe7..75ac11f3d 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -50,14 +50,13 @@ module muldiv ( logic [`XLEN*2-1:0] ProdE; logic enable_q; - logic [2:0] Funct3E_Q; + //logic [2:0] Funct3E_Q; logic div0error; // ***unused - logic [`XLEN-1:0] N, D; - logic [`XLEN-1:0] Num0, Den0; + logic [`XLEN-1:0] X, D; + //logic [`XLEN-1:0] Num0, Den0; logic gclk; - logic DivStartE; - logic startDivideE; + logic startDivideE, busy; logic signedDivide; // Multiplier @@ -72,37 +71,21 @@ module muldiv ( // Handle sign extension for W-type instructions if (`XLEN == 64) begin // RV64 has W-type instructions - assign Num0 = W64E ? {{32{SrcAE[31]&signedDivide}}, SrcAE[31:0]} : SrcAE; - assign Den0 = W64E ? {{32{SrcBE[31]&signedDivide}}, SrcBE[31:0]} : SrcBE; + assign X = W64E ? {{32{SrcAE[31]&signedDivide}}, SrcAE[31:0]} : SrcAE; + assign D = W64E ? {{32{SrcBE[31]&signedDivide}}, SrcBE[31:0]} : SrcBE; end else begin // RV32 has no W-type instructions - assign Num0 = SrcAE; - assign Den0 = SrcBE; + assign X = SrcAE; + assign D = SrcBE; end - // capture the Numerator/Denominator - flopenrc #(`XLEN) reg_num (.d(Num0), .q(N), - .en(startDivideE), .clear(DivDoneE), - .reset(reset), .clk(~gclk)); - flopenrc #(`XLEN) reg_den (.d(Den0), .q(D), - .en(startDivideE), .clear(DivDoneE), - .reset(reset), .clk(~gclk)); - - assign signedDivide = (Funct3E[2]&~Funct3E[1]&~Funct3E[0]) | (Funct3E[2]&Funct3E[1]&~Funct3E[0]); - intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, startDivideE, signedDivide); - //intdiv_restoring div(.clk, .reset, .signedDivide, .start(startDivideE), .X(N), .D(D), .busy(DivBusyE), .done(DivDoneE), .Q(QuotE), .REM(RemE)); + assign signedDivide = ~Funct3E[0]; // simplified from (Funct3E[2]&~Funct3E[1]&~Funct3E[0]) | (Funct3E[2]&Funct3E[1]&~Funct3E[0]); + //intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, startDivideE, signedDivide); + intdiv_restoring div(.clk, .reset, .signedDivide, .start(startDivideE), .X(X), .D(D), .busy(busy), .done(DivDoneE), .Q(QuotE), .REM(RemE)); - // Added for debugging of start signal for divide - assign startDivideE = MulDivE&DivStartE&~DivBusyE; - - // capture the start control signals since they are not held constant. - // *** appears to be unused - flopenrc #(3) funct3ereg (.d(Funct3E), - .q(Funct3E_Q), - .en(DivStartE), - .clear(DivDoneE), - .reset(reset), - .clk(clk)); - + // Start a divide when a new division instruction is received and the divider isn't already busy or finishing + assign startDivideE = MulDivE & Funct3E[2] & ~busy & ~DivDoneE; + assign DivBusyE = startDivideE | busy; + // Select result always_comb case (Funct3E) @@ -115,19 +98,6 @@ module muldiv ( 3'b110: PrelimResultE = RemE; 3'b111: PrelimResultE = RemE; endcase // case (Funct3E) - - // Start Divide process. This simplifies to DivStartE = Funct3E[2]; - always_comb - case (Funct3E) - 3'b000: DivStartE = 1'b0; - 3'b001: DivStartE = 1'b0; - 3'b010: DivStartE = 1'b0; - 3'b011: DivStartE = 1'b0; - 3'b100: DivStartE = 1'b1; - 3'b101: DivStartE = 1'b1; - 3'b110: DivStartE = 1'b1; - 3'b111: DivStartE = 1'b1; - endcase // Handle sign extension for W-type instructions if (`XLEN == 64) begin // RV64 has W-type instructions @@ -136,7 +106,7 @@ module muldiv ( assign MulDivResultE = PrelimResultE; end - flopenrc #(`XLEN) MulDivResultMReg(clk, reset, FlushM, ~StallM, MulDivResultE, MulDivResultM); + flopenrc #(`XLEN) MulDivResultMReg(clk, reset, FlushM, ~StallM, MulDivResultE, MulDivResultM); // could let part of multiplication spill into Memory stage flopenrc #(`XLEN) MulDivResultWReg(clk, reset, FlushW, ~StallW, MulDivResultM, MulDivResultW); end else begin // no M instructions supported diff --git a/wally-pipelined/testbench/common/instrTrackerTB.sv b/wally-pipelined/testbench/common/instrTrackerTB.sv index 0283f6502..2b0ca7c50 100644 --- a/wally-pipelined/testbench/common/instrTrackerTB.sv +++ b/wally-pipelined/testbench/common/instrTrackerTB.sv @@ -13,5 +13,5 @@ module instrTrackerTB( instrNameDecTB ddec(InstrD, InstrDName); instrNameDecTB edec(InstrE, InstrEName); instrNameDecTB mdec(InstrM, InstrMName); - instrNameDecTB wdec(InstrW, InstrWName); + instrNameDecTB wdec(InstrW, InstrWName); // *** delete this because InstrW is deleted from IFU endmodule diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 318140769..50b447039 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -318,14 +318,14 @@ string tests32f[] = '{ }; string tests32m[] = '{ + "rv32m/I-DIVU-01", "2000", + "rv32m/I-REMU-01", "2000", + "rv32m/I-DIV-01", "2000", + "rv32m/I-REM-01", "2000", "rv32m/I-MUL-01", "2000", "rv32m/I-MULH-01", "2000", "rv32m/I-MULHSU-01", "2000", - "rv32m/I-MULHU-01", "2000", - "rv32m/I-DIV-01", "2000", - "rv32m/I-DIVU-01", "2000", - "rv32m/I-REM-01", "2000", - "rv32m/I-REMU-01", "2000" + "rv32m/I-MULHU-01", "2000" }; string tests32ic[] = '{ @@ -551,12 +551,12 @@ string tests32f[] = '{ tests = tests32p; else begin tests = {tests32i, tests32p};//,tests32periph}; *** broken at the moment - if (`C_SUPPORTED % 2 == 1) tests = {tests, tests32ic}; + if (`C_SUPPORTED) tests = {tests, tests32ic}; else tests = {tests, tests32iNOc}; - if (`M_SUPPORTED % 2 == 1) tests = {tests, tests32m}; if (`F_SUPPORTED) tests = {tests32f, tests}; if (`MEM_VIRTMEM) tests = {tests32mmu, tests}; if (`A_SUPPORTED) tests = {tests32a, tests}; + if (`M_SUPPORTED) tests = {tests32m, tests}; end end end @@ -607,9 +607,9 @@ string tests32f[] = '{ end // read test vectors into memory memfilename = {"../../imperas-riscv-tests/work/", tests[test], ".elf.memfile"}; - romfilename = {"../../imperas-riscv-tests/imperas-boottim.txt"}; +// romfilename = {"../../imperas-riscv-tests/imperas-boottim.txt"}; $readmemh(memfilename, dut.uncore.dtim.RAM); - $readmemh(romfilename, dut.uncore.bootdtim.bootdtim.RAM); +// $readmemh(romfilename, dut.uncore.bootdtim.bootdtim.RAM); ProgramAddrMapFile = {"../../imperas-riscv-tests/work/", tests[test], ".elf.objdump.addr"}; ProgramLabelMapFile = {"../../imperas-riscv-tests/work/", tests[test], ".elf.objdump.lab"}; $display("Read memfile %s", memfilename); From 953c8931edde69dff5ea2c9df8375736a3e361cd Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 30 Sep 2021 15:24:43 -0400 Subject: [PATCH 02/14] RV32 div/rem working signed and unsigned --- .../src/muldiv/intdiv_restoring.sv | 44 ++++++++++++++----- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/wally-pipelined/src/muldiv/intdiv_restoring.sv b/wally-pipelined/src/muldiv/intdiv_restoring.sv index e6118cd40..65f843d83 100644 --- a/wally-pipelined/src/muldiv/intdiv_restoring.sv +++ b/wally-pipelined/src/muldiv/intdiv_restoring.sv @@ -35,11 +35,12 @@ module intdiv_restoring ( output logic [`XLEN-1:0] Q, REM ); - logic [`XLEN-1:0] W, Win, Wshift, Wprime, Wnext, XQ, XQin, XQshift, Dsaved, Din, Dabs, D2, Xabs, Xinit; + logic [`XLEN-1:0] W, W2, Win, Wshift, Wprime, Wn, Wnn, Wnext, XQ, XQin, XQshift, XQn, XQnn, XQnext, Dsaved, Din, Dabs, D2, Xabs, Xinit; logic qi, qib; // curent quotient bit localparam STEPBITS = $clog2(`XLEN); logic [STEPBITS:0] step; logic div0; + logic negate, init, startd, SignX, SignD, NegW, NegQ; // Setup for signed division abs #(`XLEN) absd(D, Dabs); @@ -51,14 +52,22 @@ module intdiv_restoring ( mux2 #(`XLEN) xabsmux(X, Xabs, signedDivide, Xinit); // restoring division - mux2 #(`XLEN) wmux(W, 0, start, Win); - mux2 #(`XLEN) xmux(XQ, Xinit, start, XQin); + mux2 #(`XLEN) wmux(W, {`XLEN{1'b0}}, init, Win); + mux2 #(`XLEN) xmux(XQ, Xinit, init, XQin); assign {Wshift, XQshift} = {Win[`XLEN-2:0], XQin, qi}; assign {qib, Wprime} = {1'b0, Wshift} + ~{1'b0, Din} + 1; // subtractor, carry out determines quotient bit assign qi = ~qib; - mux2 #(`XLEN) wrestoremux(Wshift, Wprime, qi, Wnext); + mux2 #(`XLEN) wrestoremux(Wshift, Wprime, qi, W2); + + // conditionally negate outputs at end of signed operation + neg #(`XLEN) wneg(W, Wn); + mux2 #(`XLEN) wnegmux(W, Wn, NegW, Wnn); + mux2 #(`XLEN) wnextmux(W2, Wnn, negate, Wnext); + neg #(`XLEN) qneg(XQ, XQn); + mux2 #(`XLEN) qnegmux(XQ, XQn, NegQ, XQnn); + mux2 #(`XLEN) qnextmux(XQshift, XQnn, negate, XQnext); flopen #(`XLEN) wreg(clk, start | busy, Wnext, W); - flopen #(`XLEN) xreg(clk, start | busy, XQshift, XQ); + flopen #(`XLEN) xreg(clk, start | busy, XQnext, XQ); // save D, which comes from SrcAE forwarding mux and could change because register file read is stalled during divide // flopen #(`XLEN) dreg(clk, start, D, Dsaved); @@ -70,29 +79,42 @@ module intdiv_restoring ( mux2 #(`XLEN) qmux(XQ, {`XLEN{1'b1}}, div0, Q); // Q taken from XQ register, or all 1s when dividing by zero mux2 #(`XLEN) remmux(W, X, div0, REM); // REM taken from W register, or from X when dividing by zero - // busy logic always_ff @(posedge clk) if (reset) begin - busy = 0; done = 0; step = 0; + busy = 0; done = 0; step = 0; negate = 0; end else if (start) begin if (div0) done = 1; else begin - busy = 1; done = 0; step = 1; + busy = 1; step = 1; end - end else if (busy & ~done) begin + end else if (busy & ~done & ~(startd & signedDivide)) begin // pause one cycle at beginning of signed operations for absolute value step = step + 1; - if (step[STEPBITS] | div0) begin // *** early terminate on division by 0 + if (step[STEPBITS]) begin // *** early terminate on division by 0 + if (signedDivide & ~negate) begin + negate = 1; + end else begin step = 0; busy = 0; + negate = 0; done = 1; + end end end else if (done) begin done = 0; busy = 0; + negate = 0; end - + // initialize on the start cycle for unsigned operations, or one cycle later for signed operations (giving time for abs) + flop #(1) initflop(clk, start, startd); + mux2 #(1) initmux(start, startd, signedDivide, init); + + // save signs of original inputs + flopen #(2) signflops(clk, start, {D[`XLEN-1], X[`XLEN-1]}, {SignD, SignX}); + // On final setp of signed operations, negate outputs as needed + assign NegW = SignX & negate; + assign NegQ = (SignX ^ SignD) & negate; endmodule // muldiv From a8573a27d4013bed82f2ea603928220699ca42d1 Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 30 Sep 2021 20:07:22 -0400 Subject: [PATCH 03/14] Integer Divide/Rem passing all regression. --- .../src/muldiv/intdiv_restoring.sv | 27 +++++++++++-------- wally-pipelined/src/muldiv/muldiv.sv | 2 +- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/wally-pipelined/src/muldiv/intdiv_restoring.sv b/wally-pipelined/src/muldiv/intdiv_restoring.sv index 65f843d83..ea337c188 100644 --- a/wally-pipelined/src/muldiv/intdiv_restoring.sv +++ b/wally-pipelined/src/muldiv/intdiv_restoring.sv @@ -28,6 +28,7 @@ module intdiv_restoring ( input logic clk, input logic reset, + input logic StallM, input logic signedDivide, input logic start, input logic [`XLEN-1:0] X, D, @@ -35,7 +36,7 @@ module intdiv_restoring ( output logic [`XLEN-1:0] Q, REM ); - logic [`XLEN-1:0] W, W2, Win, Wshift, Wprime, Wn, Wnn, Wnext, XQ, XQin, XQshift, XQn, XQnn, XQnext, Dsaved, Din, Dabs, D2, Xabs, Xinit; + logic [`XLEN-1:0] W, W2, Win, Wshift, Wprime, Wn, Wnn, Wnext, XQ, XQin, XQshift, XQn, XQnn, XQnext, Dsaved, Din, Dabs, D2, Xabs, X2, Xsaved, Xinit; logic qi, qib; // curent quotient bit localparam STEPBITS = $clog2(`XLEN); logic [STEPBITS:0] step; @@ -46,10 +47,12 @@ module intdiv_restoring ( abs #(`XLEN) absd(D, Dabs); mux2 #(`XLEN) dabsmux(D, Dabs, signedDivide, D2); flopen #(`XLEN) dsavereg(clk, start, D2, Dsaved); - mux2 #(`XLEN) dfirstmux(Dsaved, D, start, Din); // *** change start to init (could be delayed one from start) + mux2 #(`XLEN) dfirstmux(Dsaved, D, start, Din); abs #(`XLEN) absx(X, Xabs); - mux2 #(`XLEN) xabsmux(X, Xabs, signedDivide, Xinit); + mux2 #(`XLEN) xabsmux(X, Xabs, signedDivide & ~div0, X2); // need original X as remainder if doing divide by 0 + flopen #(`XLEN) xsavereg(clk, start, X2, Xsaved); + mux2 #(`XLEN) xfirstmux(Xsaved, X, start, Xinit); // restoring division mux2 #(`XLEN) wmux(W, {`XLEN{1'b0}}, init, Win); @@ -61,13 +64,15 @@ module intdiv_restoring ( // conditionally negate outputs at end of signed operation neg #(`XLEN) wneg(W, Wn); - mux2 #(`XLEN) wnegmux(W, Wn, NegW, Wnn); - mux2 #(`XLEN) wnextmux(W2, Wnn, negate, Wnext); +// mux2 #(`XLEN) wnegmux(W, Wn, NegW, Wnn); +// mux2 #(`XLEN) wnextmux(W2, Wnn, negate, Wnext); + mux2 #(`XLEN) wnextmux(W2, Wn, NegW, Wnext); neg #(`XLEN) qneg(XQ, XQn); - mux2 #(`XLEN) qnegmux(XQ, XQn, NegQ, XQnn); - mux2 #(`XLEN) qnextmux(XQshift, XQnn, negate, XQnext); - flopen #(`XLEN) wreg(clk, start | busy, Wnext, W); - flopen #(`XLEN) xreg(clk, start | busy, XQnext, XQ); +// mux2 #(`XLEN) qnegmux(XQ, XQn, NegQ, XQnn); +// mux2 #(`XLEN) qnextmux(XQshift, XQnn, negate, XQnext); + mux2 #(`XLEN) qnextmux(XQshift, XQn, NegQ, XQnext); + flopen #(`XLEN) wreg(clk, start | (busy & (~negate | NegW)), Wnext, W); + flopen #(`XLEN) xreg(clk, start | (busy & (~negate | NegQ)), XQnext, XQ); // save D, which comes from SrcAE forwarding mux and could change because register file read is stalled during divide // flopen #(`XLEN) dreg(clk, start, D, Dsaved); @@ -77,13 +82,13 @@ module intdiv_restoring ( // *** sign extension, handling W instructions assign div0 = (Din == 0); mux2 #(`XLEN) qmux(XQ, {`XLEN{1'b1}}, div0, Q); // Q taken from XQ register, or all 1s when dividing by zero - mux2 #(`XLEN) remmux(W, X, div0, REM); // REM taken from W register, or from X when dividing by zero + mux2 #(`XLEN) remmux(W, Xsaved, div0, REM); // REM taken from W register, or from X when dividing by zero // busy logic always_ff @(posedge clk) if (reset) begin busy = 0; done = 0; step = 0; negate = 0; - end else if (start) begin + end else if (start & ~StallM) begin if (div0) done = 1; else begin busy = 1; step = 1; diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index 75ac11f3d..ca9b47b43 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -80,7 +80,7 @@ module muldiv ( assign signedDivide = ~Funct3E[0]; // simplified from (Funct3E[2]&~Funct3E[1]&~Funct3E[0]) | (Funct3E[2]&Funct3E[1]&~Funct3E[0]); //intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, startDivideE, signedDivide); - intdiv_restoring div(.clk, .reset, .signedDivide, .start(startDivideE), .X(X), .D(D), .busy(busy), .done(DivDoneE), .Q(QuotE), .REM(RemE)); + intdiv_restoring div(.clk, .reset, .StallM, .signedDivide, .start(startDivideE), .X(X), .D(D), .busy(busy), .done(DivDoneE), .Q(QuotE), .REM(RemE)); // Start a divide when a new division instruction is received and the divider isn't already busy or finishing assign startDivideE = MulDivE & Funct3E[2] & ~busy & ~DivDoneE; From 5022647041d493cbd36c3cc72e0ce98e34ad15b6 Mon Sep 17 00:00:00 2001 From: bbracker Date: Thu, 30 Sep 2021 20:45:26 -0400 Subject: [PATCH 04/14] Revert "first attempt at verilog side of checkpoint functionality" This reverts commit f6ef8e56566f85b06b58f41a9d10db06c55d328c. --- wally-pipelined/src/generic/flop.sv | 22 +- wally-pipelined/src/ieu/regfile.sv | 8 +- wally-pipelined/src/privileged/csrc.sv | 249 +++++++++---------- wally-pipelined/src/privileged/csri.sv | 14 +- wally-pipelined/src/privileged/csrm.sv | 60 ++--- wally-pipelined/src/privileged/csrs.sv | 40 +-- wally-pipelined/src/privileged/csrsr.sv | 49 ++-- wally-pipelined/testbench/testbench-linux.sv | 9 +- 8 files changed, 174 insertions(+), 277 deletions(-) diff --git a/wally-pipelined/src/generic/flop.sv b/wally-pipelined/src/generic/flop.sv index 82c64c567..cb583de2e 100644 --- a/wally-pipelined/src/generic/flop.sv +++ b/wally-pipelined/src/generic/flop.sv @@ -25,8 +25,6 @@ `include "wally-config.vh" /* verilator lint_off DECLFILENAME */ -// Note that non-zero RESET_VAL's are only ever intended for simulation purposes (to start mid-execution from a checkpoint) - // ordinary flip-flop module flop #(parameter WIDTH = 8) ( @@ -42,11 +40,10 @@ endmodule module flopr #(parameter WIDTH = 8) ( input logic clk, reset, input logic [WIDTH-1:0] d, - output logic [WIDTH-1:0] q, - input var [WIDTH-1:0] RESET_VAL=0); + output logic [WIDTH-1:0] q); always_ff @(posedge clk, posedge reset) - if (reset) q <= #1 RESET_VAL; + if (reset) q <= #1 0; else q <= #1 d; endmodule @@ -64,11 +61,10 @@ endmodule module flopenrc #(parameter WIDTH = 8) ( input logic clk, reset, clear, en, input logic [WIDTH-1:0] d, - output logic [WIDTH-1:0] q, - input var [WIDTH-1:0] RESET_VAL=0); + output logic [WIDTH-1:0] q); always_ff @(posedge clk, posedge reset) - if (reset) q <= #1 RESET_VAL; + if (reset) q <= #1 0; else if (en) if (clear) q <= #1 0; else q <= #1 d; @@ -78,11 +74,10 @@ endmodule module flopenr #(parameter WIDTH = 8) ( input logic clk, reset, en, input logic [WIDTH-1:0] d, - output logic [WIDTH-1:0] q, - input var [WIDTH-1:0] RESET_VAL=0); + output logic [WIDTH-1:0] q); always_ff @(posedge clk, posedge reset) - if (reset) q <= #1 RESET_VAL; + if (reset) q <= #1 0; else if (en) q <= #1 d; endmodule @@ -104,11 +99,10 @@ module floprc #(parameter WIDTH = 8) ( input logic reset, input logic clear, input logic [WIDTH-1:0] d, - output logic [WIDTH-1:0] q, - input var RESET_VAL=0); + output logic [WIDTH-1:0] q); always_ff @(posedge clk, posedge reset) - if (reset) q <= #1 RESET_VAL; + if (reset) q <= #1 0; else if (clear) q <= #1 0; else q <= #1 d; diff --git a/wally-pipelined/src/ieu/regfile.sv b/wally-pipelined/src/ieu/regfile.sv index 8139e0b35..73b62a579 100644 --- a/wally-pipelined/src/ieu/regfile.sv +++ b/wally-pipelined/src/ieu/regfile.sv @@ -44,13 +44,7 @@ module regfile ( // reset is intended for simulation only, not synthesis always_ff @(negedge clk or posedge reset) - if (reset) - `ifdef CHECKPOINT - $readmemh({`LINUX_CHECKPOINT,"checkpoint-regfile.txt"}, rf); - `else - for(i=1; i<32; i++) rf[i] <= 0; - `endif - + if (reset) for(i=1; i<32; i++) rf[i] <= 0; else if (we3) rf[a3] <= wd3; assign #2 rd1 = (a1 != 0) ? rf[a1] : 0; diff --git a/wally-pipelined/src/privileged/csrc.sv b/wally-pipelined/src/privileged/csrc.sv index da8aca05b..3b1e544d7 100644 --- a/wally-pipelined/src/privileged/csrc.sv +++ b/wally-pipelined/src/privileged/csrc.sv @@ -70,24 +70,24 @@ module csrc #(parameter // ... more counters //HPMCOUNTER31H = 12'hC9F ) ( - input logic clk, reset, - input logic StallD, StallE, StallM, StallW, + input logic clk, reset, + input logic StallD, StallE, StallM, StallW, input logic FlushD, FlushE, FlushM, FlushW, - input logic InstrValidM, LoadStallD, CSRMWriteM, - input logic BPPredDirWrongM, - input logic BTBPredPCWrongM, - input logic RASPredPCWrongM, - input logic BPPredClassNonCFIWrongM, - input logic [4:0] InstrClassM, - input logic DCacheMiss, - input logic DCacheAccess, - input logic [11:0] CSRAdrM, - input logic [1:0] PrivilegeModeW, + input logic InstrValidM, LoadStallD, CSRMWriteM, + input logic BPPredDirWrongM, + input logic BTBPredPCWrongM, + input logic RASPredPCWrongM, + input logic BPPredClassNonCFIWrongM, + input logic [4:0] InstrClassM, + input logic DCacheMiss, + input logic DCacheAccess, + input logic [11:0] CSRAdrM, + input logic [1:0] PrivilegeModeW, input logic [`XLEN-1:0] CSRWriteValM, - input logic [31:0] MCOUNTINHIBIT_REGW, MCOUNTEREN_REGW, SCOUNTEREN_REGW, - input logic [63:0] MTIME_CLINT, MTIMECMP_CLINT, + input logic [31:0] MCOUNTINHIBIT_REGW, MCOUNTEREN_REGW, SCOUNTEREN_REGW, + input logic [63:0] MTIME_CLINT, MTIMECMP_CLINT, output logic [`XLEN-1:0] CSRCReadValM, - output logic IllegalCSRCAccessM + output logic IllegalCSRCAccessM ); generate @@ -97,22 +97,14 @@ module csrc #(parameter logic [63:0] HPMCOUNTER3_REGW, HPMCOUNTER4_REGW; // add more performance counters here if desired logic [63:0] CYCLEPlusM, INSTRETPlusM; logic [63:0] HPMCOUNTER3PlusM, HPMCOUNTER4PlusM; - // logic [`XLEN-1:0] NextTIMEM; + // logic [`XLEN-1:0] NextTIMEM; logic [`XLEN-1:0] NextCYCLEM, NextINSTRETM; logic [`XLEN-1:0] NextHPMCOUNTER3M, NextHPMCOUNTER4M; logic WriteCYCLEM, WriteINSTRETM; logic WriteHPMCOUNTER3M, WriteHPMCOUNTER4M; logic [4:0] CounterNumM; logic [`COUNTERS-1:3][`XLEN-1:0] HPMCOUNTER_REGW, HPMCOUNTERH_REGW; - var [`COUNTERS-1:3][`XLEN-1:0] initHPMCOUNTER; - logic InstrValidNotFlushedM; - - initial - `ifdef CHECKPOINT - $readmemh({`LINUX_CHECKPOINT,"checkpoint-HPMCOUNTER.txt"}, initHPMCOUNTER); - `else - initHPMCOUNTER = {(`COUNTERS-3){`XLEN'b0}}; - `endif + logic InstrValidNotFlushedM; assign InstrValidNotFlushedM = InstrValidM & ~StallW & ~FlushW; @@ -138,116 +130,121 @@ module csrc #(parameter //assign NextHPMCOUNTER3M = WriteHPMCOUNTER3M ? CSRWriteValM : HPMCOUNTER3PlusM[`XLEN-1:0]; //assign NextHPMCOUNTER4M = WriteHPMCOUNTER4M ? CSRWriteValM : HPMCOUNTER4PlusM[`XLEN-1:0]; - // parameterized number of additional counters - if (`COUNTERS > 3) begin + // parameterized number of additional counters + if (`COUNTERS > 3) begin logic [`COUNTERS-1:3] WriteHPMCOUNTERM; logic [`COUNTERS-1:0] CounterEvent; logic [63:0] /*HPMCOUNTER_REGW[`COUNTERS-1:3], */ HPMCOUNTERPlusM[`COUNTERS-1:3]; logic [`XLEN-1:0] NextHPMCOUNTERM[`COUNTERS-1:3]; genvar i; + // could replace special counters 0-2 with this loop for all counters assign CounterEvent[0] = 1'b1; assign CounterEvent[1] = 1'b0; - if(`QEMU) assign CounterEvent[`COUNTERS-1:2] = 0; - else begin - logic LoadStallE, LoadStallM; - flopenrc #(1) LoadStallEReg(.clk, .reset, .clear(FlushE), .en(~StallE), .d(LoadStallD), .q(LoadStallE)); - flopenrc #(1) LoadStallMReg(.clk, .reset, .clear(FlushM), .en(~StallM), .d(LoadStallE), .q(LoadStallM)); - - assign CounterEvent[2] = InstrValidNotFlushedM; - assign CounterEvent[3] = LoadStallM & InstrValidNotFlushedM; - assign CounterEvent[4] = BPPredDirWrongM & InstrValidNotFlushedM; - assign CounterEvent[5] = InstrClassM[0] & InstrValidNotFlushedM; - assign CounterEvent[6] = BTBPredPCWrongM & InstrValidNotFlushedM; - assign CounterEvent[7] = (InstrClassM[4] | InstrClassM[2] | InstrClassM[1]) & InstrValidNotFlushedM; - assign CounterEvent[8] = RASPredPCWrongM & InstrValidNotFlushedM; - assign CounterEvent[9] = InstrClassM[3] & InstrValidNotFlushedM; - assign CounterEvent[10] = BPPredClassNonCFIWrongM & InstrValidNotFlushedM; - assign CounterEvent[11] = DCacheAccess & InstrValidNotFlushedM; - assign CounterEvent[12] = DCacheMiss & InstrValidNotFlushedM; - assign CounterEvent[`COUNTERS-1:13] = 0; // eventually give these sources, including FP instructions, I$/D$ misses, branches and mispredictions - end - - for (i = 3; i < `COUNTERS; i = i+1) begin - assign WriteHPMCOUNTERM[i] = CSRMWriteM && (CSRAdrM == MHPMCOUNTERBASE + i); - assign NextHPMCOUNTERM[i][`XLEN-1:0] = WriteHPMCOUNTERM[i] ? CSRWriteValM : HPMCOUNTERPlusM[i][`XLEN-1:0]; - always @(posedge clk, posedge reset) // ModelSim doesn't like syntax of passing array element to flop - if (reset) HPMCOUNTER_REGW[i][`XLEN-1:0] <= #1 initHPMCOUNTER[i]; - else if (~StallW) HPMCOUNTER_REGW[i][`XLEN-1:0] <= #1 NextHPMCOUNTERM[i]; - //flopr #(`XLEN) HPMCOUNTERreg[i](clk, reset, NextHPMCOUNTERM[i], HPMCOUNTER_REGW[i]); + if(`QEMU) begin + assign CounterEvent[`COUNTERS-1:2] = 0; + end else begin - if (`XLEN==32) begin - logic [`COUNTERS-1:3] WriteHPMCOUNTERHM; - logic [`XLEN-1:0] NextHPMCOUNTERHM[`COUNTERS-1:3]; - assign HPMCOUNTERPlusM[i] = {HPMCOUNTERH_REGW[i], HPMCOUNTER_REGW[i]} + {63'b0, CounterEvent[i] & ~MCOUNTINHIBIT_REGW[i]}; - assign WriteHPMCOUNTERHM[i] = CSRMWriteM && (CSRAdrM == MHPMCOUNTERHBASE + i); - assign NextHPMCOUNTERHM[i] = WriteHPMCOUNTERHM[i] ? CSRWriteValM : HPMCOUNTERPlusM[i][63:32]; - always @(posedge clk, posedge reset) // ModelSim doesn't like syntax of passing array element to flop - if (reset) HPMCOUNTERH_REGW[i][`XLEN-1:0] <= #1 0; - else if (~StallW) HPMCOUNTERH_REGW[i][`XLEN-1:0] <= #1 NextHPMCOUNTERHM[i]; - //flopr #(`XLEN) HPMCOUNTERHreg[i](clk, reset, NextHPMCOUNTERHM[i], HPMCOUNTER_REGW[i][63:32]); - end else begin - assign HPMCOUNTERPlusM[i] = HPMCOUNTER_REGW[i] + {63'b0, CounterEvent[i] & ~MCOUNTINHIBIT_REGW[i]}; - end - end + logic LoadStallE, LoadStallM; + + flopenrc #(1) LoadStallEReg(.clk, .reset, .clear(FlushE), .en(~StallE), .d(LoadStallD), .q(LoadStallE)); + flopenrc #(1) LoadStallMReg(.clk, .reset, .clear(FlushM), .en(~StallM), .d(LoadStallE), .q(LoadStallM)); + + assign CounterEvent[2] = InstrValidNotFlushedM; + assign CounterEvent[3] = LoadStallM & InstrValidNotFlushedM; + assign CounterEvent[4] = BPPredDirWrongM & InstrValidNotFlushedM; + assign CounterEvent[5] = InstrClassM[0] & InstrValidNotFlushedM; + assign CounterEvent[6] = BTBPredPCWrongM & InstrValidNotFlushedM; + assign CounterEvent[7] = (InstrClassM[4] | InstrClassM[2] | InstrClassM[1]) & InstrValidNotFlushedM; + assign CounterEvent[8] = RASPredPCWrongM & InstrValidNotFlushedM; + assign CounterEvent[9] = InstrClassM[3] & InstrValidNotFlushedM; + assign CounterEvent[10] = BPPredClassNonCFIWrongM & InstrValidNotFlushedM; + assign CounterEvent[11] = DCacheAccess & InstrValidNotFlushedM; + assign CounterEvent[12] = DCacheMiss & InstrValidNotFlushedM; + assign CounterEvent[`COUNTERS-1:13] = 0; // eventually give these sources, including FP instructions, I$/D$ misses, branches and mispredictions end + + for (i = 3; i < `COUNTERS; i = i+1) begin + assign WriteHPMCOUNTERM[i] = CSRMWriteM && (CSRAdrM == MHPMCOUNTERBASE + i); + assign NextHPMCOUNTERM[i][`XLEN-1:0] = WriteHPMCOUNTERM[i] ? CSRWriteValM : HPMCOUNTERPlusM[i][`XLEN-1:0]; + always @(posedge clk, posedge reset) // ModelSim doesn't like syntax of passing array element to flop + if (reset) HPMCOUNTER_REGW[i][`XLEN-1:0] <= #1 0; + else if (~StallW) HPMCOUNTER_REGW[i][`XLEN-1:0] <= #1 NextHPMCOUNTERM[i]; + //flopr #(`XLEN) HPMCOUNTERreg[i](clk, reset, NextHPMCOUNTERM[i], HPMCOUNTER_REGW[i]); + + if (`XLEN==32) begin + logic [`COUNTERS-1:3] WriteHPMCOUNTERHM; + logic [`XLEN-1:0] NextHPMCOUNTERHM[`COUNTERS-1:3]; + assign HPMCOUNTERPlusM[i] = {HPMCOUNTERH_REGW[i], HPMCOUNTER_REGW[i]} + {63'b0, CounterEvent[i] & ~MCOUNTINHIBIT_REGW[i]}; + assign WriteHPMCOUNTERHM[i] = CSRMWriteM && (CSRAdrM == MHPMCOUNTERHBASE + i); + assign NextHPMCOUNTERHM[i] = WriteHPMCOUNTERHM[i] ? CSRWriteValM : HPMCOUNTERPlusM[i][63:32]; + always @(posedge clk, posedge reset) // ModelSim doesn't like syntax of passing array element to flop + if (reset) HPMCOUNTERH_REGW[i][`XLEN-1:0] <= #1 0; + else if (~StallW) HPMCOUNTERH_REGW[i][`XLEN-1:0] <= #1 NextHPMCOUNTERHM[i]; + //flopr #(`XLEN) HPMCOUNTERHreg[i](clk, reset, NextHPMCOUNTERHM[i], HPMCOUNTER_REGW[i][63:32]); + end else begin + assign HPMCOUNTERPlusM[i] = HPMCOUNTER_REGW[i] + {63'b0, CounterEvent[i] & ~MCOUNTINHIBIT_REGW[i]}; + end + end + end // Write / update counters // Only the Machine mode versions of the counter CSRs are writable - if (`XLEN==64) begin// 64-bit counters - // flopr #(64) TIMEreg(clk, reset, WriteTIMEM ? CSRWriteValM : TIME_REGW + 1, TIME_REGW); // may count off a different clock*** - // flopenr #(64) TIMECMPreg(clk, reset, WriteTIMECMPM, CSRWriteValM, TIMECMP_REGW); - flopr #(64) CYCLEreg(clk, reset, NextCYCLEM, CYCLE_REGW); - flopr #(64) INSTRETreg(clk, reset, NextINSTRETM, INSTRET_REGW); - //flopr #(64) HPMCOUNTER3reg(clk, reset, NextHPMCOUNTER3M, HPMCOUNTER3_REGW); - //flopr #(64) HPMCOUNTER4reg(clk, reset, NextHPMCOUNTER4M, HPMCOUNTER4_REGW); - end else begin // 32-bit low and high counters - logic WriteTIMEHM, WriteTIMECMPHM, WriteCYCLEHM, WriteINSTRETHM; - //logic WriteHPMCOUNTER3HM, WriteHPMCOUNTER4HM; - logic [`XLEN-1:0] NextCYCLEHM, NextTIMEHM, NextINSTRETHM; - //logic [`XLEN-1:0] NextHPMCOUNTER3HM, NextHPMCOUNTER4HM; + if (`XLEN==64) begin// 64-bit counters + // flopr #(64) TIMEreg(clk, reset, WriteTIMEM ? CSRWriteValM : TIME_REGW + 1, TIME_REGW); // may count off a different clock*** + // flopenr #(64) TIMECMPreg(clk, reset, WriteTIMECMPM, CSRWriteValM, TIMECMP_REGW); + flopr #(64) CYCLEreg(clk, reset, NextCYCLEM, CYCLE_REGW); + flopr #(64) INSTRETreg(clk, reset, NextINSTRETM, INSTRET_REGW); + //flopr #(64) HPMCOUNTER3reg(clk, reset, NextHPMCOUNTER3M, HPMCOUNTER3_REGW); + //flopr #(64) HPMCOUNTER4reg(clk, reset, NextHPMCOUNTER4M, HPMCOUNTER4_REGW); + end else begin // 32-bit low and high counters + logic WriteTIMEHM, WriteTIMECMPHM, WriteCYCLEHM, WriteINSTRETHM; + //logic WriteHPMCOUNTER3HM, WriteHPMCOUNTER4HM; + logic [`XLEN-1:0] NextCYCLEHM, NextTIMEHM, NextINSTRETHM; + //logic [`XLEN-1:0] NextHPMCOUNTER3HM, NextHPMCOUNTER4HM; - // Write Enables - // assign WriteTIMEHM = CSRMWriteM && (CSRAdrM == MTIMEH); - // assign WriteTIMECMPHM = CSRMWriteM && (CSRAdrM == MTIMECMPH); - assign WriteCYCLEHM = CSRMWriteM && (CSRAdrM == MCYCLEH); - assign WriteINSTRETHM = CSRMWriteM && (CSRAdrM == MINSTRETH); - //assign WriteHPMCOUNTER3HM = CSRMWriteM && (CSRAdrM == MHPMCOUNTER3H); - //assign WriteHPMCOUNTER4HM = CSRMWriteM && (CSRAdrM == MHPMCOUNTER4H); - assign NextCYCLEHM = WriteCYCLEM ? CSRWriteValM : CYCLEPlusM[63:32]; - // assign NextTIMEHM = WriteTIMEHM ? CSRWriteValM : TIMEPlusM[63:32]; - assign NextINSTRETHM = WriteINSTRETHM ? CSRWriteValM : INSTRETPlusM[63:32]; - //assign NextHPMCOUNTER3HM = WriteHPMCOUNTER3HM ? CSRWriteValM : HPMCOUNTER3PlusM[63:32]; - //assign NextHPMCOUNTER4HM = WriteHPMCOUNTER4HM ? CSRWriteValM : HPMCOUNTER4PlusM[63:32]; + // Write Enables + // assign WriteTIMEHM = CSRMWriteM && (CSRAdrM == MTIMEH); + // assign WriteTIMECMPHM = CSRMWriteM && (CSRAdrM == MTIMECMPH); + assign WriteCYCLEHM = CSRMWriteM && (CSRAdrM == MCYCLEH); + assign WriteINSTRETHM = CSRMWriteM && (CSRAdrM == MINSTRETH); + //assign WriteHPMCOUNTER3HM = CSRMWriteM && (CSRAdrM == MHPMCOUNTER3H); + //assign WriteHPMCOUNTER4HM = CSRMWriteM && (CSRAdrM == MHPMCOUNTER4H); + assign NextCYCLEHM = WriteCYCLEM ? CSRWriteValM : CYCLEPlusM[63:32]; + // assign NextTIMEHM = WriteTIMEHM ? CSRWriteValM : TIMEPlusM[63:32]; + assign NextINSTRETHM = WriteINSTRETHM ? CSRWriteValM : INSTRETPlusM[63:32]; + //assign NextHPMCOUNTER3HM = WriteHPMCOUNTER3HM ? CSRWriteValM : HPMCOUNTER3PlusM[63:32]; + //assign NextHPMCOUNTER4HM = WriteHPMCOUNTER4HM ? CSRWriteValM : HPMCOUNTER4PlusM[63:32]; - // Counter CSRs - // flopr #(32) TIMEreg(clk, reset, NextTIMEM, TIME_REGW); // may count off a different clock*** - // flopenr #(32) TIMECMPreg(clk, reset, WriteTIMECMPM, CSRWriteValM, TIMECMP_REGW[31:0]); - flopr #(32) CYCLEreg(clk, reset, NextCYCLEM, CYCLE_REGW[31:0]); - flopr #(32) INSTRETreg(clk, reset, NextINSTRETM, INSTRET_REGW[31:0]); - // flopr #(32) HPMCOUNTER3reg(clk, reset, NextHPMCOUNTER3M, HPMCOUNTER3_REGW[31:0]); - // flopr #(32) HPMCOUNTER4reg(clk, reset, NextHPMCOUNTER4M, HPMCOUNTER4_REGW[31:0]); - // flopr #(32) TIMEHreg(clk, reset, NextTIMEHM, TIME_REGW); // may count off a different clock*** - // flopenr #(32) TIMECMPHreg(clk, reset, WriteTIMECMPHM, CSRWriteValM, TIMECMP_REGW[63:32]); - flopr #(32) CYCLEHreg(clk, reset, NextCYCLEHM, CYCLE_REGW[63:32]); - flopr #(32) INSTRETHreg(clk, reset, NextINSTRETHM, INSTRET_REGW[63:32]); - //flopr #(32) HPMCOUNTER3Hreg(clk, reset, NextHPMCOUNTER3HM, HPMCOUNTER3_REGW[63:32]); - //flopr #(32) HPMCOUNTER4Hreg(clk, reset, NextHPMCOUNTER4HM, HPMCOUNTER4_REGW[63:32]); - end + // Counter CSRs + // flopr #(32) TIMEreg(clk, reset, NextTIMEM, TIME_REGW); // may count off a different clock*** + // flopenr #(32) TIMECMPreg(clk, reset, WriteTIMECMPM, CSRWriteValM, TIMECMP_REGW[31:0]); + flopr #(32) CYCLEreg(clk, reset, NextCYCLEM, CYCLE_REGW[31:0]); + flopr #(32) INSTRETreg(clk, reset, NextINSTRETM, INSTRET_REGW[31:0]); + //flopr #(32) HPMCOUNTER3reg(clk, reset, NextHPMCOUNTER3M, HPMCOUNTER3_REGW[31:0]); + //flopr #(32) HPMCOUNTER4reg(clk, reset, NextHPMCOUNTER4M, HPMCOUNTER4_REGW[31:0]); + // flopr #(32) TIMEHreg(clk, reset, NextTIMEHM, TIME_REGW); // may count off a different clock*** + // flopenr #(32) TIMECMPHreg(clk, reset, WriteTIMECMPHM, CSRWriteValM, TIMECMP_REGW[63:32]); + flopr #(32) CYCLEHreg(clk, reset, NextCYCLEHM, CYCLE_REGW[63:32]); + flopr #(32) INSTRETHreg(clk, reset, NextINSTRETHM, INSTRET_REGW[63:32]); + //flopr #(32) HPMCOUNTER3Hreg(clk, reset, NextHPMCOUNTER3HM, HPMCOUNTER3_REGW[63:32]); + //flopr #(32) HPMCOUNTER4Hreg(clk, reset, NextHPMCOUNTER4HM, HPMCOUNTER4_REGW[63:32]); + end - // eventually move TIME and TIMECMP to the CLINT -- Ben 06/17/21: sure let's give that a shot! - // run TIME off asynchronous reference clock - // synchronize write enable to TIME - // four phase handshake to synchronize reads from TIME + // eventually move TIME and TIMECMP to the CLINT -- Ben 06/17/21: sure let's give that a shot! + // run TIME off asynchronous reference clock + // synchronize write enable to TIME + // four phase handshake to synchronize reads from TIME - // interrupt on timer compare - // ability to disable optional CSRs + // interrupt on timer compare + // ability to disable optional CSRs // Read Counters, or cause excepiton if insufficient privilege in light of COUNTEREN flags assign CounterNumM = CSRAdrM[4:0]; // which counter to read? if (`XLEN==64) // 64-bit counter reads always_comb - if (PrivilegeModeW == `M_MODE || MCOUNTEREN_REGW[CounterNumM] && (PrivilegeModeW == `S_MODE || SCOUNTEREN_REGW[CounterNumM])) begin + if (PrivilegeModeW == `M_MODE || + MCOUNTEREN_REGW[CounterNumM] && (PrivilegeModeW == `S_MODE || SCOUNTEREN_REGW[CounterNumM])) begin IllegalCSRCAccessM = 0; if (CSRAdrM >= MHPMCOUNTERBASE+3 && CSRAdrM < MHPMCOUNTERBASE+`COUNTERS) CSRCReadValM = HPMCOUNTER_REGW[CSRAdrM-MHPMCOUNTERBASE]; else if (CSRAdrM >= HPMCOUNTERBASE+3 && CSRAdrM < HPMCOUNTERBASE+`COUNTERS) CSRCReadValM = HPMCOUNTER_REGW[CSRAdrM-HPMCOUNTERBASE]; @@ -312,7 +309,7 @@ module csrc #(parameter IllegalCSRCAccessM = 1; // no privileges for this csr CSRCReadValM = 0; end - end else begin // not `ZICOUNTERS_SUPPORTED + end else begin assign CSRCReadValM = 0; assign IllegalCSRCAccessM = 1; end @@ -359,20 +356,20 @@ module csrc #(parameter MPHMEVENTBASE = 12'h320, HPMCOUNTERBASE = 12'hC00, HPMCOUNTERHBASE = 12'hC80, - )(input logic clk, reset, - input logic StallD, StallE, StallM, StallW, - input logic InstrValidM, LoadStallD, CSRMWriteM, - input logic BPPredDirWrongM, - input logic BTBPredPCWrongM, - input logic RASPredPCWrongM, - input logic BPPredClassNonCFIWrongM, - input logic [4:0] InstrClassM, - input logic [11:0] CSRAdrM, - input logic [1:0] PrivilegeModeW, + )(input logic clk, reset, + input logic StallD, StallE, StallM, StallW, + input logic InstrValidM, LoadStallD, CSRMWriteM, + input logic BPPredDirWrongM, + input logic BTBPredPCWrongM, + input logic RASPredPCWrongM, + input logic BPPredClassNonCFIWrongM, + input logic [4:0] InstrClassM, + input logic [11:0] CSRAdrM, + input logic [1:0] PrivilegeModeW, input logic [`XLEN-1:0] CSRWriteValM, - input logic [31:0] MCOUNTINHIBIT_REGW, MCOUNTEREN_REGW, SCOUNTEREN_REGW, + input logic [31:0] MCOUNTINHIBIT_REGW, MCOUNTEREN_REGW, SCOUNTEREN_REGW, output logic [`XLEN-1:0] CSRCReadValM, - output logic IllegalCSRCAccessM); + output logic IllegalCSRCAccessM); // counters diff --git a/wally-pipelined/src/privileged/csri.sv b/wally-pipelined/src/privileged/csri.sv index 7ef9051f7..3b54d871a 100644 --- a/wally-pipelined/src/privileged/csri.sv +++ b/wally-pipelined/src/privileged/csri.sv @@ -79,24 +79,14 @@ module csri #(parameter assign SIP_WRITE_MASK = 12'h000; end always @(posedge clk, posedge reset) begin // *** I strongly feel that IntInM should go directly to IP_REGW -- Ben 9/7/21 - if (reset) - `ifdef CHECKPOINT - $readmemh({`LINUX_CHECKPOINT,"checkpoint-MIP.txt"}, IP_REGW_writeable); - `else - IP_REGW_writeable <= 10'b0; - `endif + if (reset) IP_REGW_writeable <= 10'b0; else if (WriteMIPM) IP_REGW_writeable <= (CSRWriteValM[9:0] & MIP_WRITE_MASK[9:0]) | IntInM[9:0]; // MTIP unclearable else if (WriteSIPM) IP_REGW_writeable <= (CSRWriteValM[9:0] & SIP_WRITE_MASK[9:0]) | IntInM[9:0]; // MTIP unclearable // else if (WriteUIPM) IP_REGW = (CSRWriteValM & 12'hBBB) | (NextIPM & 12'h080); // MTIP unclearable else IP_REGW_writeable <= IP_REGW_writeable | IntInM[9:0]; // *** check this turns off interrupts properly even when MIDELEG changes end always @(posedge clk, posedge reset) begin - if (reset) - `ifdef CHECKPOINT - $readmemh({`LINUX_CHECKPOINT,"checkpoint-MIE.txt"}, IE_REGW); - `else - IE_REGW <= 12'b0; - `endif + if (reset) IE_REGW <= 12'b0; else if (WriteMIEM) IE_REGW <= (CSRWriteValM[11:0] & 12'hAAA); // MIE controls M and S fields else if (WriteSIEM) IE_REGW <= (CSRWriteValM[11:0] & 12'h222) | (IE_REGW & 12'h888); // only S fields // else if (WriteUIEM) IE_REGW = (CSRWriteValM & 12'h111) | (IE_REGW & 12'hAAA); // only U field diff --git a/wally-pipelined/src/privileged/csrm.sv b/wally-pipelined/src/privileged/csrm.sv index f3f5d631b..a3baaaec4 100644 --- a/wally-pipelined/src/privileged/csrm.sv +++ b/wally-pipelined/src/privileged/csrm.sv @@ -85,45 +85,15 @@ module csrm #(parameter logic [`XLEN-1:0] MISA_REGW, MHARTID_REGW; logic [`XLEN-1:0] MSCRATCH_REGW, MCAUSE_REGW, MTVAL_REGW; - var [`XLEN-1:0] initMSCRATCH, initMCAUSE, initMEPC, initMTVEC, initMEDELEG, initMIDELEG; - var [31:0] initMCOUNTEREN, initMCOUNTINHIBIT; - var [`PMP_ENTRIES-1:0][7:0] initPMPCFG_ARRAY; - var [`PMP_ENTRIES-1:0][`XLEN-1:0] initPMPADDR_ARRAY; - - logic WriteMTVECM, WriteMEDELEGM, WriteMIDELEGM; - logic WriteMSCRATCHM, WriteMEPCM, WriteMCAUSEM, WriteMTVALM; - logic WriteMCOUNTERENM, WriteMCOUNTINHIBITM; + logic WriteMTVECM, WriteMEDELEGM, WriteMIDELEGM; + logic WriteMSCRATCHM, WriteMEPCM, WriteMCAUSEM, WriteMTVALM; + logic WriteMCOUNTERENM, WriteMCOUNTINHIBITM; logic [`PMP_ENTRIES-1:0] WritePMPCFGM; logic [`PMP_ENTRIES-1:0] WritePMPADDRM ; logic [`PMP_ENTRIES-1:0] ADDRLocked, CFGLocked; localparam MISA_26 = (`MISA) & 32'h03ffffff; - initial begin - `ifdef CHECKPOINT - $readmemh({`LINUX_CHECKPOINT,"checkpoint-MSCRATCH.txt"}, initMSCRATCH); - $readmemh({`LINUX_CHECKPOINT,"checkpoint-MCAUSE.txt"}, initMCAUSE); - $readmemh({`LINUX_CHECKPOINT,"checkpoint-MEPC.txt"}, initMEPC); - $readmemh({`LINUX_CHECKPOINT,"checkpoint-MTVEC.txt"}, initMTVEC); - $readmemh({`LINUX_CHECKPOINT,"checkpoint-MEDELEG.txt"}, initMEDELEG); - $readmemh({`LINUX_CHECKPOINT,"checkpoint-MIDELEG.txt"}, initMIDELEG); - $readmemh({`LINUX_CHECKPOINT,"checkpoint-MCOUNTEREN.txt"}, initMCOUNTEREN); - $readmemh({`LINUX_CHECKPOINT,"checkpoint-PMPCFG.txt"}, initPMPCFG_ARRAY); - $readmemh({`LINUX_CHECKPOINT,"checkpoint-PMPADDR.txt"}, initPMPADDR_ARRAY); - `else - initMSCRATCH = `XLEN'b0; - initMCAUSE = `XLEN'b0; - initMEPC = `XLEN'b0; - initMTVEC = `XLEN'b0; - initMEDELEG = `XLEN'b0; - initMIDELEG = `XLEN'b0; - initMCOUNTEREN = 32'b0; - initMCOUNTINHIBIT = 32'b0; - initPMPCFG_ARRAY = {`PMP_ENTRIES{8'b0}}; - initPMPADDR_ARRAY = {`PMP_ENTRIES{`XLEN'b0}}; - `endif - end - // MISA is hardwired. Spec says it could be written to disable features, but this is not supported by Wally assign MISA_REGW = {(`XLEN == 32 ? 2'b01 : 2'b10), {(`XLEN-28){1'b0}}, MISA_26[25:0]}; @@ -145,31 +115,33 @@ module csrm #(parameter assign IllegalCSRMWriteReadonlyM = CSRMWriteM && (CSRAdrM == MVENDORID || CSRAdrM == MARCHID || CSRAdrM == MIMPID || CSRAdrM == MHARTID); // CSRs - flopenl #(`XLEN) MTVECreg(clk, reset, WriteMTVECM, {CSRWriteValM[`XLEN-1:2], 1'b0, CSRWriteValM[0]}, initMTVEC, MTVEC_REGW); //busybear: changed reset value to 0 + flopenl #(`XLEN) MTVECreg(clk, reset, WriteMTVECM, {CSRWriteValM[`XLEN-1:2], 1'b0, CSRWriteValM[0]}, `XLEN'b0, MTVEC_REGW); //busybear: changed reset value to 0 generate if (`S_SUPPORTED | (`U_SUPPORTED & `N_SUPPORTED)) begin // DELEG registers should exist - flopenl #(`XLEN) MEDELEGreg(clk, reset, WriteMEDELEGM, CSRWriteValM & MEDELEG_MASK /*12'h7FF*/, initMEDELEG, MEDELEG_REGW); - flopenl #(`XLEN) MIDELEGreg(clk, reset, WriteMIDELEGM, CSRWriteValM & MIDELEG_MASK /*12'h222*/, initMIDELEG, MIDELEG_REGW); + flopenl #(`XLEN) MEDELEGreg(clk, reset, WriteMEDELEGM, CSRWriteValM & MEDELEG_MASK /*12'h7FF*/, `XLEN'b0, MEDELEG_REGW); + flopenl #(`XLEN) MIDELEGreg(clk, reset, WriteMIDELEGM, CSRWriteValM & MIDELEG_MASK /*12'h222*/, `XLEN'b0, MIDELEG_REGW); end else begin assign MEDELEG_REGW = 0; assign MIDELEG_REGW = 0; end endgenerate - flopenr #(`XLEN) MSCRATCHreg(clk, reset, WriteMSCRATCHM, CSRWriteValM, MSCRATCH_REGW, initMSCRATCH); - flopenr #(`XLEN) MEPCreg(clk, reset, WriteMEPCM, NextEPCM, MEPC_REGW, initMEPC); - flopenr #(`XLEN) MCAUSEreg(clk, reset, WriteMCAUSEM, NextCauseM, MCAUSE_REGW, initMCAUSE); +// flopenl #(`XLEN) MIPreg(clk, reset, WriteMIPM, CSRWriteValM, zero, MIP_REGW); +// flopenl #(`XLEN) MIEreg(clk, reset, WriteMIEM, CSRWriteValM, zero, MIE_REGW); + flopenr #(`XLEN) MSCRATCHreg(clk, reset, WriteMSCRATCHM, CSRWriteValM, MSCRATCH_REGW); + flopenr #(`XLEN) MEPCreg(clk, reset, WriteMEPCM, NextEPCM, MEPC_REGW); + flopenr #(`XLEN) MCAUSEreg(clk, reset, WriteMCAUSEM, NextCauseM, MCAUSE_REGW); if(`QEMU) assign MTVAL_REGW = `XLEN'b0; else flopenr #(`XLEN) MTVALreg(clk, reset, WriteMTVALM, NextMtvalM, MTVAL_REGW); generate if (`BUSYBEAR == 1) flopenl #(32) MCOUNTERENreg(clk, reset, WriteMCOUNTERENM, {CSRWriteValM[31:2],1'b0,CSRWriteValM[0]}, 32'b0, MCOUNTEREN_REGW); else if (`BUILDROOT == 1) - flopenl #(32) MCOUNTERENreg(clk, reset, WriteMCOUNTERENM, CSRWriteValM[31:0], initMCOUNTEREN, MCOUNTEREN_REGW); + flopenl #(32) MCOUNTERENreg(clk, reset, WriteMCOUNTERENM, CSRWriteValM[31:0], 32'h0, MCOUNTEREN_REGW); else flopenl #(32) MCOUNTERENreg(clk, reset, WriteMCOUNTERENM, CSRWriteValM[31:0], 32'hFFFFFFFF, MCOUNTEREN_REGW); endgenerate - flopenl #(32) MCOUNTINHIBITreg(clk, reset, WriteMCOUNTINHIBITM, CSRWriteValM[31:0], initMCOUNTINHIBIT, MCOUNTINHIBIT_REGW); + flopenl #(32) MCOUNTINHIBITreg(clk, reset, WriteMCOUNTINHIBITM, CSRWriteValM[31:0], 32'h0, MCOUNTINHIBIT_REGW); // There are PMP_ENTRIES = 0, 16, or 64 PMPADDR registers, each of which has its own flop @@ -186,14 +158,14 @@ module csrm #(parameter assign ADDRLocked[i] = PMPCFG_ARRAY_REGW[i][7] | (PMPCFG_ARRAY_REGW[i+1][7] & PMPCFG_ARRAY_REGW[i+1][4:3] == 2'b01); assign WritePMPADDRM[i] = (CSRMWriteM & (CSRAdrM == (PMPADDR0+i))) & ~StallW & ~ADDRLocked[i]; - flopenr #(`XLEN) PMPADDRreg(clk, reset, WritePMPADDRM[i], CSRWriteValM, PMPADDR_ARRAY_REGW[i], initPMPADDR_ARRAY[i]); + flopenr #(`XLEN) PMPADDRreg(clk, reset, WritePMPADDRM[i], CSRWriteValM, PMPADDR_ARRAY_REGW[i]); if (`XLEN==64) begin assign WritePMPCFGM[i] = (CSRMWriteM & (CSRAdrM == (PMPCFG0+2*(i/8)))) & ~StallW & ~CFGLocked[i]; - flopenr #(8) PMPCFGreg(clk, reset, WritePMPCFGM[i], CSRWriteValM[(i%8)*8+7:(i%8)*8], PMPCFG_ARRAY_REGW[i], initPMPCFG_ARRAY[i]); + flopenr #(8) PMPCFGreg(clk, reset, WritePMPCFGM[i], CSRWriteValM[(i%8)*8+7:(i%8)*8], PMPCFG_ARRAY_REGW[i]); end else begin assign WritePMPCFGM[i] = (CSRMWriteM & (CSRAdrM == (PMPCFG0+i/4))) & ~StallW & ~CFGLocked[i]; // assign WritePMPCFGHM[i] = (CSRMWriteM && (CSRAdrM == PMPCFG0+2*i+1)) && ~StallW; - flopenr #(8) PMPCFGreg(clk, reset, WritePMPCFGM[i], CSRWriteValM[(i%4)*8+7:(i%4)*8], PMPCFG_ARRAY_REGW[i], initPMPCFG_ARRAY[i]); + flopenr #(8) PMPCFGreg(clk, reset, WritePMPCFGM[i], CSRWriteValM[(i%4)*8+7:(i%4)*8], PMPCFG_ARRAY_REGW[i]); // flopenr #(`XLEN) PMPCFGHreg(clk, reset, WritePMPCFGHM[i], CSRWriteValM, PMPCFG_ARRAY_REGW[i][63:32]); end end diff --git a/wally-pipelined/src/privileged/csrs.sv b/wally-pipelined/src/privileged/csrs.sv index 2fffbced2..f3c9a4f94 100644 --- a/wally-pipelined/src/privileged/csrs.sv +++ b/wally-pipelined/src/privileged/csrs.sv @@ -74,30 +74,6 @@ module csrs #(parameter logic WriteSSCRATCHM, WriteSEPCM; logic WriteSCAUSEM, WriteSTVALM, WriteSATPM, WriteSCOUNTERENM; logic [`XLEN-1:0] SSCRATCH_REGW, SCAUSE_REGW, STVAL_REGW; - var [`XLEN-1:0] initSSCRATCH, initSCAUSE, initSEPC, initSTVEC, initSEDELEG, initSIDELEG, initSATP; - var [31:0] initSCOUNTEREN; - - initial begin - `ifdef CHECKPOINT - $readmemh({`LINUX_CHECKPOINT,"checkpoint-SSCRATCH.txt"}, initSSCRATCH); - $readmemh({`LINUX_CHECKPOINT,"checkpoint-SCAUSE.txt"}, initSCAUSE); - $readmemh({`LINUX_CHECKPOINT,"checkpoint-SEPC.txt"}, initSEPC); - $readmemh({`LINUX_CHECKPOINT,"checkpoint-STVEC.txt"}, initSTVEC); - $readmemh({`LINUX_CHECKPOINT,"checkpoint-SEDELEG.txt"}, initSEDELEG); - $readmemh({`LINUX_CHECKPOINT,"checkpoint-SIDELEG.txt"}, initSIDELEG); - $readmemh({`LINUX_CHECKPOINT,"checkpoint-SCOUNTEREN.txt"}, initSCOUNTEREN); - $readmemh({`LINUX_CHECKPOINT,"checkpoint-SATP.txt"}, initSATP); - `else - initSSCRATCH = `XLEN'b0; - initSCAUSE = `XLEN'b0; - initSEPC = `XLEN'b0; - initSTVEC = `XLEN'b0; - initSEDELEG = `XLEN'b0; - initSIDELEG = `XLEN'b0; - initSCOUNTEREN = 32'b0; - initSATP = `XLEN'b0; - `endif - end assign WriteSSTATUSM = CSRSWriteM && (CSRAdrM == SSTATUS) && ~StallW; assign WriteSTVECM = CSRSWriteM && (CSRAdrM == STVEC) && ~StallW; @@ -109,28 +85,28 @@ module csrs #(parameter assign WriteSCOUNTERENM = CSRSWriteM && (CSRAdrM == SCOUNTEREN) && ~StallW; // CSRs - flopenl #(`XLEN) STVECreg(clk, reset, WriteSTVECM, {CSRWriteValM[`XLEN-1:2], 1'b0, CSRWriteValM[0]}, initSTVEC, STVEC_REGW); //busybear: change reset to 0 - flopenr #(`XLEN) SSCRATCHreg(clk, reset, WriteSSCRATCHM, CSRWriteValM, SSCRATCH_REGW, initSSCRATCH); - flopenr #(`XLEN) SEPCreg(clk, reset, WriteSEPCM, NextEPCM, SEPC_REGW, initSEPC); - flopenl #(`XLEN) SCAUSEreg(clk, reset, WriteSCAUSEM, NextCauseM, initSCAUSE, SCAUSE_REGW); + flopenl #(`XLEN) STVECreg(clk, reset, WriteSTVECM, {CSRWriteValM[`XLEN-1:2], 1'b0, CSRWriteValM[0]}, `XLEN'b0, STVEC_REGW); //busybear: change reset to 0 + flopenr #(`XLEN) SSCRATCHreg(clk, reset, WriteSSCRATCHM, CSRWriteValM, SSCRATCH_REGW); + flopenr #(`XLEN) SEPCreg(clk, reset, WriteSEPCM, NextEPCM, SEPC_REGW); + flopenl #(`XLEN) SCAUSEreg(clk, reset, WriteSCAUSEM, NextCauseM, `XLEN'b0, SCAUSE_REGW); if(`QEMU) assign STVAL_REGW = `XLEN'b0; else flopenr #(`XLEN) STVALreg(clk, reset, WriteSTVALM, NextMtvalM, STVAL_REGW); if (`MEM_VIRTMEM) - flopenr #(`XLEN) SATPreg(clk, reset, WriteSATPM, CSRWriteValM, SATP_REGW, initSATP); + flopenr #(`XLEN) SATPreg(clk, reset, WriteSATPM, CSRWriteValM, SATP_REGW); else assign SATP_REGW = 0; // hardwire to zero if virtual memory not supported if (`BUSYBEAR == 1) flopenl #(32) SCOUNTERENreg(clk, reset, WriteSCOUNTERENM, {CSRWriteValM[31:2],1'b0,CSRWriteValM[0]}, 32'b0, SCOUNTEREN_REGW); else if (`BUILDROOT == 1) - flopenl #(32) SCOUNTERENreg(clk, reset, WriteSCOUNTERENM, CSRWriteValM[31:0], initSCOUNTEREN, SCOUNTEREN_REGW); + flopenl #(32) SCOUNTERENreg(clk, reset, WriteSCOUNTERENM, CSRWriteValM[31:0], 32'h0, SCOUNTEREN_REGW); else flopenl #(32) SCOUNTERENreg(clk, reset, WriteSCOUNTERENM, CSRWriteValM[31:0], 32'hFFFFFFFF, SCOUNTEREN_REGW); if (`N_SUPPORTED) begin logic WriteSEDELEGM, WriteSIDELEGM; assign WriteSEDELEGM = CSRSWriteM && (CSRAdrM == SEDELEG); assign WriteSIDELEGM = CSRSWriteM && (CSRAdrM == SIDELEG); - flopenl #(`XLEN) SEDELEGreg(clk, reset, WriteSEDELEGM, CSRWriteValM & SEDELEG_MASK /* 12'h1FF */, initSEDELEG, SEDELEG_REGW); - flopenl #(`XLEN) SIDELEGreg(clk, reset, WriteSIDELEGM, CSRWriteValM, initSIDELEG, SIDELEG_REGW); + flopenl #(`XLEN) SEDELEGreg(clk, reset, WriteSEDELEGM, CSRWriteValM & SEDELEG_MASK /* 12'h1FF */, `XLEN'b0, SEDELEG_REGW); + flopenl #(`XLEN) SIDELEGreg(clk, reset, WriteSIDELEGM, CSRWriteValM, `XLEN'b0, SIDELEG_REGW); end else begin assign SEDELEG_REGW = 0; assign SIDELEG_REGW = 0; diff --git a/wally-pipelined/src/privileged/csrsr.sv b/wally-pipelined/src/privileged/csrsr.sv index 113515b26..dfa2132d9 100644 --- a/wally-pipelined/src/privileged/csrsr.sv +++ b/wally-pipelined/src/privileged/csrsr.sv @@ -46,15 +46,6 @@ module csrsr ( logic [1:0] STATUS_SXL, STATUS_UXL, STATUS_XS, STATUS_FS, STATUS_FS_INT, STATUS_MPP_NEXT; logic STATUS_MPIE, STATUS_SPIE, STATUS_UPIE, STATUS_UIE; - var [`XLEN-1:0] initMSTATUS; - initial begin - `ifdef CHECKPOINT - $readmemh({`LINUX_CHECKPOINT,"checkpoint-MSTATUS.txt"}, initMSTATUS); - `else - initMSTATUS = `XLEN'b0; - `endif - end - // STATUS REGISTER FIELD // See Privileged Spec Section 3.1.6 // Lower privilege status registers are a subset of the full status register @@ -117,33 +108,23 @@ module csrsr ( // registers for STATUS bits // complex register with reset, write enable, and the ability to update other bits in certain cases - // these null things are needed to make the following LHS assignment legal; this is probably a crappy way of doing things always_ff @(posedge clk, posedge reset) if (reset) begin - //STATUS_TSR_INT <= #1 0; - //STATUS_TW_INT <= #1 0; - //STATUS_TVM_INT <= #1 0; - //STATUS_MXR_INT <= #1 0; - //STATUS_SUM_INT <= #1 0; - //STATUS_MPRV_INT <= #1 0; // Per Priv 3.3 - //STATUS_FS_INT <= #1 0; //2'b01; // busybear: change all these reset values to 0 - //STATUS_MPP <= #1 0; //`M_MODE; - //STATUS_SPP <= #1 0; //1'b1; - //STATUS_MPIE <= #1 0; //1; - //STATUS_SPIE <= #1 0; //`S_SUPPORTED; - //STATUS_UPIE <= #1 0; // `U_SUPPORTED; - //STATUS_MIE <= #1 0; // Per Priv 3.3 - //STATUS_SIE <= #1 0; //`S_SUPPORTED; - //STATUS_UIE <= #1 0; //`U_SUPPORTED; - // - // *** this assumes XLEN == 64. - // I don't like using generates to respond to XLEN. - // I'd rather have an XLEN64 so that we could use `ifdefs -- Ben 9/21 - {STATUS_TSR_INT,STATUS_TW_INT,STATUS_TVM_INT,STATUS_MXR_INT,STATUS_SUM_INT,STATUS_MPRV_INT} <= #1 initMSTATUS[22:17]; - {STATUS_FS_INT,STATUS_MPP} <= #1 initMSTATUS[14:11]; - {STATUS_SPP,STATUS_MPIE} <= #1 initMSTATUS[8:7]; - {STATUS_SPIE,STATUS_UPIE,STATUS_MIE} <= #1 initMSTATUS[5:3]; - {STATUS_SIE,STATUS_UIE} <= #1 initMSTATUS[1:0]; + STATUS_TSR_INT <= #1 0; + STATUS_TW_INT <= #1 0; + STATUS_TVM_INT <= #1 0; + STATUS_MXR_INT <= #1 0; + STATUS_SUM_INT <= #1 0; + STATUS_MPRV_INT <= #1 0; // Per Priv 3.3 + STATUS_FS_INT <= #1 0; //2'b01; // busybear: change all these reset values to 0 + STATUS_MPP <= #1 0; //`M_MODE; + STATUS_SPP <= #1 0; //1'b1; + STATUS_MPIE <= #1 0; //1; + STATUS_SPIE <= #1 0; //`S_SUPPORTED; + STATUS_UPIE <= #1 0; // `U_SUPPORTED; + STATUS_MIE <= #1 0; // Per Priv 3.3 + STATUS_SIE <= #1 0; //`S_SUPPORTED; + STATUS_UIE <= #1 0; //`U_SUPPORTED; end else if (~StallW) begin if (FRegWriteM | WriteFRMM | WriteFFLAGSM) STATUS_FS_INT <= #12'b11; // mark Float State dirty *** this should happen in M stage, be part of if/else; diff --git a/wally-pipelined/testbench/testbench-linux.sv b/wally-pipelined/testbench/testbench-linux.sv index 7dbed0e09..76a1841b8 100644 --- a/wally-pipelined/testbench/testbench-linux.sv +++ b/wally-pipelined/testbench/testbench-linux.sv @@ -27,9 +27,6 @@ `include "wally-config.vh" -//`define CHECKPOINT -`define LINUX_CHECKPOINT "../linux-testgen/linux-testvectors/checkpoint1K" - `define DEBUG_TRACE 0 // Debug Levels // 0: don't check against QEMU @@ -411,11 +408,7 @@ module testbench(); // initial loading of memories initial begin $readmemh({`LINUX_TEST_VECTORS,"bootmem.txt"}, dut.uncore.bootdtim.bootdtim.RAM, 'h1000 >> 3); - `ifdef CHECKPOINT - $readmemh({`LINUX_CHECKPOINT,"ram.txt"}, dut.uncore.dtim.RAM); - `else - $readmemh({`LINUX_TEST_VECTORS,"ram.txt"}, dut.uncore.dtim.RAM); - `endif + $readmemh({`LINUX_TEST_VECTORS,"ram.txt"}, dut.uncore.dtim.RAM); $readmemb(`TWO_BIT_PRELOAD, dut.hart.ifu.bpred.bpred.Predictor.DirPredictor.PHT.memory); $readmemb(`BTB_PRELOAD, dut.hart.ifu.bpred.bpred.TargetPredictor.memory.memory); ProgramAddrMapFile = {`LINUX_TEST_VECTORS,"vmlinux.objdump.addr"}; From 73d852b1efcb0f9bcd3dc18606f12c45bc46b4fb Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 2 Oct 2021 09:19:25 -0400 Subject: [PATCH 05/14] Divide performs 2 steps per cycle --- ...intdiv_restoring.sv => intdivrestoring.sv} | 48 +++++++++++-------- wally-pipelined/src/muldiv/muldiv.sv | 13 +++-- wally-pipelined/testbench/testbench-arch.sv | 4 +- 3 files changed, 37 insertions(+), 28 deletions(-) rename wally-pipelined/src/muldiv/{intdiv_restoring.sv => intdivrestoring.sv} (82%) diff --git a/wally-pipelined/src/muldiv/intdiv_restoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv similarity index 82% rename from wally-pipelined/src/muldiv/intdiv_restoring.sv rename to wally-pipelined/src/muldiv/intdivrestoring.sv index ea337c188..21e96c6e9 100644 --- a/wally-pipelined/src/muldiv/intdiv_restoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -1,10 +1,10 @@ /////////////////////////////////////////// -// intdiv_restoring.sv +// intdivrestoring.sv // // Written: David_Harris@hmc.edu 12 September 2021 // Modified: // -// Purpose: Restoring integer division using a shift register a subtractor +// Purpose: Restoring integer division using a shift register and subtractor // // A component of the Wally configurable RISC-V project. // @@ -25,7 +25,7 @@ `include "wally-config.vh" -module intdiv_restoring ( +module intdivrestoring ( input logic clk, input logic reset, input logic StallM, @@ -36,9 +36,9 @@ module intdiv_restoring ( output logic [`XLEN-1:0] Q, REM ); - logic [`XLEN-1:0] W, W2, Win, Wshift, Wprime, Wn, Wnn, Wnext, XQ, XQin, XQshift, XQn, XQnn, XQnext, Dsaved, Din, Dabs, D2, Xabs, X2, Xsaved, Xinit; + logic [`XLEN-1:0] W, W2, Win, Wshift, Wprime, Wn, Wnn, Wnext, XQ, XQin, XQshift, XQn, XQnn, XQnext, Dsaved, Din, Dabs, D2, Xabs, X2, Xsaved, Xinit, DAbsB, W1, XQ1; logic qi, qib; // curent quotient bit - localparam STEPBITS = $clog2(`XLEN); + localparam STEPBITS = $clog2(`XLEN)-1; logic [STEPBITS:0] step; logic div0; logic negate, init, startd, SignX, SignD, NegW, NegQ; @@ -53,33 +53,25 @@ module intdiv_restoring ( mux2 #(`XLEN) xabsmux(X, Xabs, signedDivide & ~div0, X2); // need original X as remainder if doing divide by 0 flopen #(`XLEN) xsavereg(clk, start, X2, Xsaved); mux2 #(`XLEN) xfirstmux(Xsaved, X, start, Xinit); - - // restoring division + mux2 #(`XLEN) wmux(W, {`XLEN{1'b0}}, init, Win); mux2 #(`XLEN) xmux(XQ, Xinit, init, XQin); - assign {Wshift, XQshift} = {Win[`XLEN-2:0], XQin, qi}; - assign {qib, Wprime} = {1'b0, Wshift} + ~{1'b0, Din} + 1; // subtractor, carry out determines quotient bit - assign qi = ~qib; - mux2 #(`XLEN) wrestoremux(Wshift, Wprime, qi, W2); + + assign DAbsB = ~Din; + + intdivrestoringstep step1(Win, XQin, DAbsB, W1, XQ1); + intdivrestoringstep step2(W1, XQ1, DAbsB, W2, XQshift); // conditionally negate outputs at end of signed operation + // *** move into M stage neg #(`XLEN) wneg(W, Wn); -// mux2 #(`XLEN) wnegmux(W, Wn, NegW, Wnn); -// mux2 #(`XLEN) wnextmux(W2, Wnn, negate, Wnext); - mux2 #(`XLEN) wnextmux(W2, Wn, NegW, Wnext); + mux2 #(`XLEN) wnextmux(W2, Wn, NegW, Wnext); //*** neg #(`XLEN) qneg(XQ, XQn); -// mux2 #(`XLEN) qnegmux(XQ, XQn, NegQ, XQnn); -// mux2 #(`XLEN) qnextmux(XQshift, XQnn, negate, XQnext); mux2 #(`XLEN) qnextmux(XQshift, XQn, NegQ, XQnext); flopen #(`XLEN) wreg(clk, start | (busy & (~negate | NegW)), Wnext, W); flopen #(`XLEN) xreg(clk, start | (busy & (~negate | NegQ)), XQnext, XQ); - // save D, which comes from SrcAE forwarding mux and could change because register file read is stalled during divide - // flopen #(`XLEN) dreg(clk, start, D, Dsaved); - //mux2 #(`XLEN) dmux(Dsaved, D, start, Din); - // outputs - // *** sign extension, handling W instructions assign div0 = (Din == 0); mux2 #(`XLEN) qmux(XQ, {`XLEN{1'b1}}, div0, Q); // Q taken from XQ register, or all 1s when dividing by zero mux2 #(`XLEN) remmux(W, Xsaved, div0, REM); // REM taken from W register, or from X when dividing by zero @@ -124,3 +116,17 @@ module intdiv_restoring ( endmodule // muldiv +module intdivrestoringstep( + input logic [`XLEN-1:0] W, XQ, DAbsB, + output logic [`XLEN-1:0] WOut, XQOut); + + logic [`XLEN-1:0] WShift, WPrime; + logic qi, qib; + + assign {WShift, XQOut} = {W[`XLEN-2:0], XQ, qi}; + assign {qib, WPrime} = {1'b0, WShift} + {1'b1, DAbsB} + 1; // subtractor, carry out determines quotient bit ***replace with add + assign qi = ~qib; + mux2 #(`XLEN) wrestoremux(WShift, WPrime, qi, WOut); +endmodule + +// *** clean up internal signals \ No newline at end of file diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index ca9b47b43..43cfba80d 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -47,7 +47,7 @@ module muldiv ( logic [`XLEN-1:0] MulDivResultE, MulDivResultM; logic [`XLEN-1:0] PrelimResultE; logic [`XLEN-1:0] QuotE, RemE; - logic [`XLEN*2-1:0] ProdE; + logic [`XLEN*2-1:0] ProdE, ProdM; logic enable_q; //logic [2:0] Funct3E_Q; @@ -55,19 +55,21 @@ module muldiv ( logic [`XLEN-1:0] X, D; //logic [`XLEN-1:0] Num0, Den0; - logic gclk; + // logic gclk; logic startDivideE, busy; logic signedDivide; // Multiplier mul mul(.*); + flopenrc #(`XLEN*2) ProdMReg(clk, reset, FlushM, ~StallM, ProdE, ProdM); + // Divide - // *** replace this clock gater + /*// *** replace this clock gater always @(negedge clk) begin enable_q <= ~StallM; end - assign gclk = enable_q & clk; + assign gclk = enable_q & clk; */ // Handle sign extension for W-type instructions if (`XLEN == 64) begin // RV64 has W-type instructions @@ -80,7 +82,8 @@ module muldiv ( assign signedDivide = ~Funct3E[0]; // simplified from (Funct3E[2]&~Funct3E[1]&~Funct3E[0]) | (Funct3E[2]&Funct3E[1]&~Funct3E[0]); //intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, startDivideE, signedDivide); - intdiv_restoring div(.clk, .reset, .StallM, .signedDivide, .start(startDivideE), .X(X), .D(D), .busy(busy), .done(DivDoneE), .Q(QuotE), .REM(RemE)); +// intdivrestoring div(.clk, .reset, .StallM, .signedDivide, .start(startDivideE), .X(X), .D(D), .busy(busy), .done(DivDoneE), .Q(QuotE), .REM(RemE)); + intdivrestoring div(.clk, .reset, .StallM, .signedDivide, .start(startDivideE), .X(X), .D(D), .busy(busy), .done(DivDoneE), .Q(QuotE), .REM(RemE)); // Start a divide when a new division instruction is received and the divider isn't already busy or finishing assign startDivideE = MulDivE & Funct3E[2] & ~busy & ~DivDoneE; diff --git a/wally-pipelined/testbench/testbench-arch.sv b/wally-pipelined/testbench/testbench-arch.sv index 7f4233ff3..c1ef5a237 100644 --- a/wally-pipelined/testbench/testbench-arch.sv +++ b/wally-pipelined/testbench/testbench-arch.sv @@ -430,7 +430,7 @@ string tests32f[] = '{ // tests = {tests64p,tests64i, tests64periph}; if (`C_SUPPORTED) tests = {tests, tests64ic}; // else tests = {tests, tests64iNOc}; - if (`M_SUPPORTED) tests = {tests, tests64m}; + if (`M_SUPPORTED) tests = {tests64m, tests}; /* if (`F_SUPPORTED) tests = {tests64f, tests}; if (`D_SUPPORTED) tests = {tests64d, tests}; if (`MEM_VIRTMEM) tests = {tests64mmu, tests}; @@ -449,7 +449,7 @@ string tests32f[] = '{ tests = {tests32priv, tests32i}; //tests = {tests32i, tests32priv}; if (`C_SUPPORTED) tests = {tests, tests32ic}; - if (`M_SUPPORTED) tests = {tests, tests32m}; + if (`M_SUPPORTED) tests = {tests32m, tests}; //if (`C_SUPPORTED) tests = {tests32ic, tests}; //if (`M_SUPPORTED) tests = {tests32m, tests}; /* tests = {tests32i, tests32p};//,tests32periph}; *** broken at the moment From 735132191cf91fcf2e30034f6eb71f104c41d074 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 2 Oct 2021 09:38:02 -0400 Subject: [PATCH 06/14] Moved muldiv result selection to M stage for performance --- wally-pipelined/src/muldiv/intdivrestoring.sv | 1 + wally-pipelined/src/muldiv/muldiv.sv | 55 +++++++++---------- 2 files changed, 26 insertions(+), 30 deletions(-) diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index 21e96c6e9..ed78718cf 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -59,6 +59,7 @@ module intdivrestoring ( assign DAbsB = ~Din; + // *** parameterize steps per cycle intdivrestoringstep step1(Win, XQin, DAbsB, W1, XQ1); intdivrestoringstep step2(W1, XQ1, DAbsB, W2, XQshift); diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index 43cfba80d..03df97e7c 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -31,7 +31,7 @@ module muldiv ( input logic [31:0] InstrD, // Execute Stage interface input logic [`XLEN-1:0] SrcAE, SrcBE, - input logic [2:0] Funct3E, + input logic [2:0] Funct3E, Funct3M, input logic MulDivE, W64E, // Writeback stage output logic [`XLEN-1:0] MulDivResultW, @@ -45,8 +45,8 @@ module muldiv ( generate if (`M_SUPPORTED) begin logic [`XLEN-1:0] MulDivResultE, MulDivResultM; - logic [`XLEN-1:0] PrelimResultE; - logic [`XLEN-1:0] QuotE, RemE; + logic [`XLEN-1:0] PrelimResultM; + logic [`XLEN-1:0] QuotM, RemM; logic [`XLEN*2-1:0] ProdE, ProdM; logic enable_q; @@ -57,7 +57,9 @@ module muldiv ( // logic gclk; logic startDivideE, busy; - logic signedDivide; + logic SignedDivideE; + logic W64M; + // Multiplier mul mul(.*); @@ -65,51 +67,44 @@ module muldiv ( // Divide - /*// *** replace this clock gater - always @(negedge clk) begin - enable_q <= ~StallM; - end - assign gclk = enable_q & clk; */ - // Handle sign extension for W-type instructions if (`XLEN == 64) begin // RV64 has W-type instructions - assign X = W64E ? {{32{SrcAE[31]&signedDivide}}, SrcAE[31:0]} : SrcAE; - assign D = W64E ? {{32{SrcBE[31]&signedDivide}}, SrcBE[31:0]} : SrcBE; + assign X = W64E ? {{32{SrcAE[31]&SignedDivideE}}, SrcAE[31:0]} : SrcAE; + assign D = W64E ? {{32{SrcBE[31]&SignedDivideE}}, SrcBE[31:0]} : SrcBE; end else begin // RV32 has no W-type instructions assign X = SrcAE; assign D = SrcBE; end - assign signedDivide = ~Funct3E[0]; // simplified from (Funct3E[2]&~Funct3E[1]&~Funct3E[0]) | (Funct3E[2]&Funct3E[1]&~Funct3E[0]); - //intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, startDivideE, signedDivide); -// intdivrestoring div(.clk, .reset, .StallM, .signedDivide, .start(startDivideE), .X(X), .D(D), .busy(busy), .done(DivDoneE), .Q(QuotE), .REM(RemE)); - intdivrestoring div(.clk, .reset, .StallM, .signedDivide, .start(startDivideE), .X(X), .D(D), .busy(busy), .done(DivDoneE), .Q(QuotE), .REM(RemE)); + assign SignedDivideE = ~Funct3E[0]; // simplified from (Funct3E[2]&~Funct3E[1]&~Funct3E[0]) | (Funct3E[2]&Funct3E[1]&~Funct3E[0]); + //intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, startDivideE, SignedDivideE); + intdivrestoring div(.clk, .reset, .StallM, .signedDivide(SignedDivideE), .start(startDivideE), .X(X), .D(D), .busy(busy), .done(DivDoneE), .Q(QuotM), .REM(RemM)); // Start a divide when a new division instruction is received and the divider isn't already busy or finishing - assign startDivideE = MulDivE & Funct3E[2] & ~busy & ~DivDoneE; + assign startDivideE = MulDivE & Funct3E[2] & ~busy & ~DivDoneE; // *** mabye DivDone should be M stage assign DivBusyE = startDivideE | busy; // Select result always_comb - case (Funct3E) - 3'b000: PrelimResultE = ProdE[`XLEN-1:0]; - 3'b001: PrelimResultE = ProdE[`XLEN*2-1:`XLEN]; - 3'b010: PrelimResultE = ProdE[`XLEN*2-1:`XLEN]; - 3'b011: PrelimResultE = ProdE[`XLEN*2-1:`XLEN]; - 3'b100: PrelimResultE = QuotE; - 3'b101: PrelimResultE = QuotE; - 3'b110: PrelimResultE = RemE; - 3'b111: PrelimResultE = RemE; - endcase // case (Funct3E) + case (Funct3M) + 3'b000: PrelimResultM = ProdM[`XLEN-1:0]; + 3'b001: PrelimResultM = ProdM[`XLEN*2-1:`XLEN]; + 3'b010: PrelimResultM = ProdM[`XLEN*2-1:`XLEN]; + 3'b011: PrelimResultM = ProdM[`XLEN*2-1:`XLEN]; + 3'b100: PrelimResultM = QuotM; + 3'b101: PrelimResultM = QuotM; + 3'b110: PrelimResultM = RemM; + 3'b111: PrelimResultM = RemM; + endcase // Handle sign extension for W-type instructions + flopenrc #(1) W64MReg(clk, reset, FlushM, ~StallM, W64E, W64M); if (`XLEN == 64) begin // RV64 has W-type instructions - assign MulDivResultE = W64E ? {{32{PrelimResultE[31]}}, PrelimResultE[31:0]} : PrelimResultE; + assign MulDivResultM = W64M ? {{32{PrelimResultM[31]}}, PrelimResultM[31:0]} : PrelimResultM; end else begin // RV32 has no W-type instructions - assign MulDivResultE = PrelimResultE; + assign MulDivResultM = PrelimResultM; end - flopenrc #(`XLEN) MulDivResultMReg(clk, reset, FlushM, ~StallM, MulDivResultE, MulDivResultM); // could let part of multiplication spill into Memory stage flopenrc #(`XLEN) MulDivResultWReg(clk, reset, FlushW, ~StallW, MulDivResultM, MulDivResultW); end else begin // no M instructions supported From 0e0e204d3d3a6460aee63c683ea1ad15a9b473fb Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 2 Oct 2021 10:03:02 -0400 Subject: [PATCH 07/14] Moved negating divider otuput to M stage --- .../regression/wave-dos/peripheral-waves.do | 21 +++++- wally-pipelined/src/muldiv/intdivrestoring.sv | 74 ++++++++++--------- wally-pipelined/src/muldiv/muldiv.sv | 11 +-- 3 files changed, 65 insertions(+), 41 deletions(-) diff --git a/wally-pipelined/regression/wave-dos/peripheral-waves.do b/wally-pipelined/regression/wave-dos/peripheral-waves.do index 2da82b869..2362b0511 100644 --- a/wally-pipelined/regression/wave-dos/peripheral-waves.do +++ b/wally-pipelined/regression/wave-dos/peripheral-waves.do @@ -35,6 +35,23 @@ add wave -hex /testbench/dut/hart/ieu/dp/SrcAE add wave -hex /testbench/dut/hart/ieu/dp/SrcBE add wave -hex /testbench/dut/hart/ieu/dp/ALUResultE #add wave /testbench/dut/hart/ieu/dp/PCSrcE +add wave /testbench/dut/hart/mdu/genblk1/div/start +add wave /testbench/dut/hart/mdu/DivBusyE +add wave /testbench/dut/hart/mdu/DivDoneE +add wave -hex /testbench/dut/hart/mdu/genblk1/div/D +add wave -hex /testbench/dut/hart/mdu/genblk1/div/Din +add wave -hex /testbench/dut/hart/mdu/genblk1/div/X +add wave -hex /testbench/dut/hart/mdu/genblk1/div/Win +add wave -hex /testbench/dut/hart/mdu/genblk1/div/XQin +add wave -hex /testbench/dut/hart/mdu/genblk1/div/Wshift +add wave -hex /testbench/dut/hart/mdu/genblk1/div/XQshift +add wave -hex /testbench/dut/hart/mdu/genblk1/div/Wnext +add wave -hex /testbench/dut/hart/mdu/genblk1/div/qi +add wave -hex /testbench/dut/hart/mdu/genblk1/div/Wprime +add wave -hex /testbench/dut/hart/mdu/genblk1/div/W +add wave -hex /testbench/dut/hart/mdu/genblk1/div/XQ +add wave -hex /testbench/dut/hart/mdu/genblk1/div/REM + add wave -divider add wave -hex /testbench/dut/hart/ifu/PCM add wave -hex /testbench/dut/hart/ifu/InstrM @@ -48,9 +65,9 @@ add wave -hex /testbench/dut/hart/lsu/dcache/ReadDataM add wave -hex /testbench/dut/hart/ebu/ReadDataM add wave -divider add wave -hex /testbench/PCW -add wave -hex /testbench/InstrW +#add wave -hex /testbench/InstrW add wave -hex /testbench/dut/hart/ieu/c/InstrValidW -add wave /testbench/InstrWName +#add wave /testbench/InstrWName add wave -hex /testbench/dut/hart/ReadDataW add wave -hex /testbench/dut/hart/ieu/dp/ResultW add wave -hex /testbench/dut/hart/ieu/dp/RegWriteW diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index ed78718cf..831fe9d02 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -28,9 +28,9 @@ module intdivrestoring ( input logic clk, input logic reset, - input logic StallM, - input logic signedDivide, - input logic start, + input logic StallM, FlushM, + input logic SignedDivideE, + input logic StartDivideE, input logic [`XLEN-1:0] X, D, output logic busy, done, output logic [`XLEN-1:0] Q, REM @@ -41,78 +41,84 @@ module intdivrestoring ( localparam STEPBITS = $clog2(`XLEN)-1; logic [STEPBITS:0] step; logic div0; - logic negate, init, startd, SignX, SignD, NegW, NegQ; + logic init, startd, SignX, SignD, NegW, NegQ; + logic SignedDivideM; + // *** add pipe stages to everything // Setup for signed division abs #(`XLEN) absd(D, Dabs); - mux2 #(`XLEN) dabsmux(D, Dabs, signedDivide, D2); - flopen #(`XLEN) dsavereg(clk, start, D2, Dsaved); - mux2 #(`XLEN) dfirstmux(Dsaved, D, start, Din); + mux2 #(`XLEN) dabsmux(D, Dabs, SignedDivideE, D2); + flopen #(`XLEN) dsavereg(clk, StartDivideE, D2, Dsaved); + mux2 #(`XLEN) dfirstmux(Dsaved, D, StartDivideE, Din); abs #(`XLEN) absx(X, Xabs); - mux2 #(`XLEN) xabsmux(X, Xabs, signedDivide & ~div0, X2); // need original X as remainder if doing divide by 0 - flopen #(`XLEN) xsavereg(clk, start, X2, Xsaved); - mux2 #(`XLEN) xfirstmux(Xsaved, X, start, Xinit); + mux2 #(`XLEN) xabsmux(X, Xabs, SignedDivideE & ~div0, X2); // need original X as remainder if doing divide by 0 + flopen #(`XLEN) xsavereg(clk, StartDivideE, X2, Xsaved); + mux2 #(`XLEN) xfirstmux(Xsaved, X, StartDivideE, Xinit); mux2 #(`XLEN) wmux(W, {`XLEN{1'b0}}, init, Win); mux2 #(`XLEN) xmux(XQ, Xinit, init, XQin); assign DAbsB = ~Din; + assign div0 = (Din == 0); // *** eventually replace with just the negedge saved D // *** parameterize steps per cycle intdivrestoringstep step1(Win, XQin, DAbsB, W1, XQ1); - intdivrestoringstep step2(W1, XQ1, DAbsB, W2, XQshift); +// intdivrestoringstep step2(W1, XQ1, DAbsB, W2, XQshift); + intdivrestoringstep step2(W1, XQ1, DAbsB, Wnext, XQnext); // conditionally negate outputs at end of signed operation - // *** move into M stage - neg #(`XLEN) wneg(W, Wn); - mux2 #(`XLEN) wnextmux(W2, Wn, NegW, Wnext); //*** - neg #(`XLEN) qneg(XQ, XQn); - mux2 #(`XLEN) qnextmux(XQshift, XQn, NegQ, XQnext); - flopen #(`XLEN) wreg(clk, start | (busy & (~negate | NegW)), Wnext, W); - flopen #(`XLEN) xreg(clk, start | (busy & (~negate | NegQ)), XQnext, XQ); + +// flopen #(`XLEN) wreg(clk, StartDivideE | (busy & (~negate | NegW)), Wnext, W); +// flopen #(`XLEN) xreg(clk, StartDivideE | (busy & (~negate | NegQ)), XQnext, XQ); + flopen #(`XLEN) wreg(clk, StartDivideE | busy, Wnext, W); // *** could become just busy once start moves to its own cycle + flopen #(`XLEN) xreg(clk, StartDivideE | busy, XQnext, XQ); // outputs - assign div0 = (Din == 0); - mux2 #(`XLEN) qmux(XQ, {`XLEN{1'b1}}, div0, Q); // Q taken from XQ register, or all 1s when dividing by zero - mux2 #(`XLEN) remmux(W, Xsaved, div0, REM); // REM taken from W register, or from X when dividing by zero + neg #(`XLEN) wneg(W, Wn); +// mux2 #(`XLEN) wnextmux(W2, Wn, NegW, Wnext); //*** + neg #(`XLEN) qneg(XQ, XQn); +// mux2 #(`XLEN) qnextmux(XQshift, XQn, NegQ, XQnext); + mux3 #(`XLEN) qmux(XQ, XQn, {`XLEN{1'b1}}, {div0, NegQ}, Q); // Q taken from XQ register, or all 1s when dividing by zero *** + mux3 #(`XLEN) remmux(W, Wn, Xsaved, {div0, NegW}, REM); // REM taken from W register, or from X when dividing by zero // busy logic always_ff @(posedge clk) if (reset) begin - busy = 0; done = 0; step = 0; negate = 0; - end else if (start & ~StallM) begin + busy = 0; done = 0; step = 0; //negate = 0; + end else if (StartDivideE & ~StallM) begin if (div0) done = 1; else begin busy = 1; step = 1; end - end else if (busy & ~done & ~(startd & signedDivide)) begin // pause one cycle at beginning of signed operations for absolute value + end else if (busy & ~done & ~(startd & SignedDivideE)) begin // pause one cycle at beginning of signed operations for absolute value step = step + 1; if (step[STEPBITS]) begin // *** early terminate on division by 0 - if (signedDivide & ~negate) begin +/* if (SignedDivideE & ~negate) begin negate = 1; - end else begin + end else begin*/ step = 0; busy = 0; - negate = 0; + //negate = 0; done = 1; - end + //end end end else if (done) begin done = 0; busy = 0; - negate = 0; + //negate = 0; end // initialize on the start cycle for unsigned operations, or one cycle later for signed operations (giving time for abs) - flop #(1) initflop(clk, start, startd); - mux2 #(1) initmux(start, startd, signedDivide, init); + flop #(1) initflop(clk, StartDivideE, startd); + mux2 #(1) initmux(StartDivideE, startd, SignedDivideE, init); // save signs of original inputs - flopen #(2) signflops(clk, start, {D[`XLEN-1], X[`XLEN-1]}, {SignD, SignX}); + flopenrc #(1) SignedDivideMReg(clk, reset, FlushM, ~StallM, SignedDivideE, SignedDivideM); + flopen #(2) signflops(clk, StartDivideE, {D[`XLEN-1], X[`XLEN-1]}, {SignD, SignX}); // On final setp of signed operations, negate outputs as needed - assign NegW = SignX & negate; - assign NegQ = (SignX ^ SignD) & negate; + assign NegW = SignedDivideM & SignX; // & negate; + assign NegQ = SignedDivideM & (SignX ^ SignD); // & negate; endmodule // muldiv diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index 03df97e7c..17e943490 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -56,7 +56,7 @@ module muldiv ( //logic [`XLEN-1:0] Num0, Den0; // logic gclk; - logic startDivideE, busy; + logic StartDivideE, busy; logic SignedDivideE; logic W64M; @@ -77,12 +77,13 @@ module muldiv ( end assign SignedDivideE = ~Funct3E[0]; // simplified from (Funct3E[2]&~Funct3E[1]&~Funct3E[0]) | (Funct3E[2]&Funct3E[1]&~Funct3E[0]); - //intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, startDivideE, SignedDivideE); - intdivrestoring div(.clk, .reset, .StallM, .signedDivide(SignedDivideE), .start(startDivideE), .X(X), .D(D), .busy(busy), .done(DivDoneE), .Q(QuotM), .REM(RemM)); + //intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, StartDivideE, SignedDivideE); + intdivrestoring div(.clk, .reset, .StallM, .FlushM, + .SignedDivideE, .StartDivideE, .X(X), .D(D), .busy(busy), .done(DivDoneE), .Q(QuotM), .REM(RemM)); // Start a divide when a new division instruction is received and the divider isn't already busy or finishing - assign startDivideE = MulDivE & Funct3E[2] & ~busy & ~DivDoneE; // *** mabye DivDone should be M stage - assign DivBusyE = startDivideE | busy; + assign StartDivideE = MulDivE & Funct3E[2] & ~busy & ~DivDoneE; // *** mabye DivDone should be M stage + assign DivBusyE = StartDivideE | busy; // Select result always_comb From d4437b842a072e318598cbd8a50dd702a67a03f5 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 2 Oct 2021 10:13:49 -0400 Subject: [PATCH 08/14] Divider code cleanup --- .../regression/wave-dos/peripheral-waves.do | 2 +- wally-pipelined/src/muldiv/intdivrestoring.sv | 43 +++++++------------ wally-pipelined/src/muldiv/muldiv.sv | 8 ++-- 3 files changed, 20 insertions(+), 33 deletions(-) diff --git a/wally-pipelined/regression/wave-dos/peripheral-waves.do b/wally-pipelined/regression/wave-dos/peripheral-waves.do index 2362b0511..9ff6e4fe2 100644 --- a/wally-pipelined/regression/wave-dos/peripheral-waves.do +++ b/wally-pipelined/regression/wave-dos/peripheral-waves.do @@ -35,7 +35,7 @@ add wave -hex /testbench/dut/hart/ieu/dp/SrcAE add wave -hex /testbench/dut/hart/ieu/dp/SrcBE add wave -hex /testbench/dut/hart/ieu/dp/ALUResultE #add wave /testbench/dut/hart/ieu/dp/PCSrcE -add wave /testbench/dut/hart/mdu/genblk1/div/start +add wave /testbench/dut/hart/mdu/genblk1/div/StartDivideE add wave /testbench/dut/hart/mdu/DivBusyE add wave /testbench/dut/hart/mdu/DivDoneE add wave -hex /testbench/dut/hart/mdu/genblk1/div/D diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index 831fe9d02..79a978ef0 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -32,7 +32,7 @@ module intdivrestoring ( input logic SignedDivideE, input logic StartDivideE, input logic [`XLEN-1:0] X, D, - output logic busy, done, + output logic BusyE, done, output logic [`XLEN-1:0] Q, REM ); @@ -64,49 +64,39 @@ module intdivrestoring ( // *** parameterize steps per cycle intdivrestoringstep step1(Win, XQin, DAbsB, W1, XQ1); -// intdivrestoringstep step2(W1, XQ1, DAbsB, W2, XQshift); intdivrestoringstep step2(W1, XQ1, DAbsB, Wnext, XQnext); - // conditionally negate outputs at end of signed operation - -// flopen #(`XLEN) wreg(clk, StartDivideE | (busy & (~negate | NegW)), Wnext, W); -// flopen #(`XLEN) xreg(clk, StartDivideE | (busy & (~negate | NegQ)), XQnext, XQ); - flopen #(`XLEN) wreg(clk, StartDivideE | busy, Wnext, W); // *** could become just busy once start moves to its own cycle - flopen #(`XLEN) xreg(clk, StartDivideE | busy, XQnext, XQ); + flopen #(`XLEN) wreg(clk, StartDivideE | BusyE, Wnext, W); // *** could become just busy once start moves to its own cycle + flopen #(`XLEN) xreg(clk, StartDivideE | BusyE, XQnext, XQ); // outputs + // On final setp of signed operations, negate outputs as needed + assign NegW = SignedDivideM & SignX; + assign NegQ = SignedDivideM & (SignX ^ SignD); neg #(`XLEN) wneg(W, Wn); -// mux2 #(`XLEN) wnextmux(W2, Wn, NegW, Wnext); //*** neg #(`XLEN) qneg(XQ, XQn); -// mux2 #(`XLEN) qnextmux(XQshift, XQn, NegQ, XQnext); - mux3 #(`XLEN) qmux(XQ, XQn, {`XLEN{1'b1}}, {div0, NegQ}, Q); // Q taken from XQ register, or all 1s when dividing by zero *** - mux3 #(`XLEN) remmux(W, Wn, Xsaved, {div0, NegW}, REM); // REM taken from W register, or from X when dividing by zero + mux3 #(`XLEN) qmux(XQ, XQn, {`XLEN{1'b1}}, {div0, NegQ}, Q); // Q taken from XQ register, negated if necessary, or all 1s when dividing by zero + mux3 #(`XLEN) remmux(W, Wn, Xsaved, {div0, NegW}, REM); // REM taken from W register, negated if necessary, or from X when dividing by zero // busy logic always_ff @(posedge clk) if (reset) begin - busy = 0; done = 0; step = 0; //negate = 0; + BusyE = 0; done = 0; step = 0; end else if (StartDivideE & ~StallM) begin if (div0) done = 1; else begin - busy = 1; step = 1; + BusyE = 1; step = 1; end - end else if (busy & ~done & ~(startd & SignedDivideE)) begin // pause one cycle at beginning of signed operations for absolute value + end else if (BusyE & ~done & ~(startd & SignedDivideE)) begin // pause one cycle at beginning of signed operations for absolute value step = step + 1; - if (step[STEPBITS]) begin // *** early terminate on division by 0 -/* if (SignedDivideE & ~negate) begin - negate = 1; - end else begin*/ + if (step[STEPBITS]) begin step = 0; - busy = 0; - //negate = 0; + BusyE = 0; done = 1; - //end end end else if (done) begin done = 0; - busy = 0; - //negate = 0; + BusyE = 0; end // initialize on the start cycle for unsigned operations, or one cycle later for signed operations (giving time for abs) @@ -115,10 +105,7 @@ module intdivrestoring ( // save signs of original inputs flopenrc #(1) SignedDivideMReg(clk, reset, FlushM, ~StallM, SignedDivideE, SignedDivideM); - flopen #(2) signflops(clk, StartDivideE, {D[`XLEN-1], X[`XLEN-1]}, {SignD, SignX}); - // On final setp of signed operations, negate outputs as needed - assign NegW = SignedDivideM & SignX; // & negate; - assign NegQ = SignedDivideM & (SignX ^ SignD); // & negate; + flopen #(2) signflops(clk, StartDivideE, {D[`XLEN-1], X[`XLEN-1]}, {SignD, SignX}); // *** shouldn't be necessary when capturing inputs properly endmodule // muldiv diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index 17e943490..be49bf057 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -56,7 +56,7 @@ module muldiv ( //logic [`XLEN-1:0] Num0, Den0; // logic gclk; - logic StartDivideE, busy; + logic StartDivideE, BusyE; logic SignedDivideE; logic W64M; @@ -79,11 +79,11 @@ module muldiv ( assign SignedDivideE = ~Funct3E[0]; // simplified from (Funct3E[2]&~Funct3E[1]&~Funct3E[0]) | (Funct3E[2]&Funct3E[1]&~Funct3E[0]); //intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, StartDivideE, SignedDivideE); intdivrestoring div(.clk, .reset, .StallM, .FlushM, - .SignedDivideE, .StartDivideE, .X(X), .D(D), .busy(busy), .done(DivDoneE), .Q(QuotM), .REM(RemM)); + .SignedDivideE, .StartDivideE, .X(X), .D(D), .BusyE, .done(DivDoneE), .Q(QuotM), .REM(RemM)); // Start a divide when a new division instruction is received and the divider isn't already busy or finishing - assign StartDivideE = MulDivE & Funct3E[2] & ~busy & ~DivDoneE; // *** mabye DivDone should be M stage - assign DivBusyE = StartDivideE | busy; + assign StartDivideE = MulDivE & Funct3E[2] & ~BusyE & ~DivDoneE; // *** mabye DivDone should be M stage + assign DivBusyE = StartDivideE | BusyE; // Select result always_comb From d532bde931009e1768a85068861e6192424e4713 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 2 Oct 2021 10:36:51 -0400 Subject: [PATCH 09/14] Added negative edge triggered flop to save inputs; do absolute value in first cycle for signed division --- wally-pipelined/src/generic/abs.sv | 38 ----------------- wally-pipelined/src/muldiv/intdivrestoring.sv | 42 +++++++++++-------- 2 files changed, 25 insertions(+), 55 deletions(-) delete mode 100644 wally-pipelined/src/generic/abs.sv diff --git a/wally-pipelined/src/generic/abs.sv b/wally-pipelined/src/generic/abs.sv deleted file mode 100644 index 7ddbd38b6..000000000 --- a/wally-pipelined/src/generic/abs.sv +++ /dev/null @@ -1,38 +0,0 @@ -/////////////////////////////////////////// -// neg.sv -// -// Written: David_Harris@hmc.edu 28 September 2021 -// Modified: -// -// Purpose: 2's complement negator -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software -// is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT -// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -/////////////////////////////////////////// - -`include "wally-config.vh" - -module abs #(parameter WIDTH = 8) ( - input logic [WIDTH-1:0] a, - output logic [WIDTH-1:0] y); - - logic [WIDTH-1:0] minusa; - - // select -a if sign bit of a is 1 - neg #(WIDTH) neg(a, minusa); - mux2 #(WIDTH) absmux(a, minusa, a[WIDTH-1], y); -endmodule - diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index 79a978ef0..d6bdea523 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -36,7 +36,7 @@ module intdivrestoring ( output logic [`XLEN-1:0] Q, REM ); - logic [`XLEN-1:0] W, W2, Win, Wshift, Wprime, Wn, Wnn, Wnext, XQ, XQin, XQshift, XQn, XQnn, XQnext, Dsaved, Din, Dabs, D2, Xabs, X2, Xsaved, Xinit, DAbsB, W1, XQ1; + logic [`XLEN-1:0] W, W2, Win, Wshift, Wprime, Wn, Wnn, Wnext, XQ, XQin, XQshift, XQn, XQnn, XQnext, Dsaved, Din, Dabs, D2, Dn, Xn, Xabs, X2, Xsaved, Xinit, DAbsB, W1, XQ1; logic qi, qib; // curent quotient bit localparam STEPBITS = $clog2(`XLEN)-1; logic [STEPBITS:0] step; @@ -45,22 +45,28 @@ module intdivrestoring ( logic SignedDivideM; // *** add pipe stages to everything - // Setup for signed division - abs #(`XLEN) absd(D, Dabs); - mux2 #(`XLEN) dabsmux(D, Dabs, SignedDivideE, D2); - flopen #(`XLEN) dsavereg(clk, StartDivideE, D2, Dsaved); - mux2 #(`XLEN) dfirstmux(Dsaved, D, StartDivideE, Din); + // save inputs on the negative edge of the execute clock. + // This is unusual practice, but the inputs are not guaranteed to be stable due to some hazard and forwarding logic. + // Saving the inputs is the most hardware-efficient way to fix the issue. + flopen #(`XLEN) dsavereg(~clk, StartDivideE, D, Dsaved); + flopen #(`XLEN) xsavereg(~clk, StartDivideE, X, Xsaved); + assign SignD = Dsaved[`XLEN-1]; // *** do some of these need pipelining for consecutive divides? + assign SignX = Xsaved[`XLEN-1]; + assign div0 = (Dsaved == 0); // *** eventually replace with just the negedge saved D - abs #(`XLEN) absx(X, Xabs); - mux2 #(`XLEN) xabsmux(X, Xabs, SignedDivideE & ~div0, X2); // need original X as remainder if doing divide by 0 - flopen #(`XLEN) xsavereg(clk, StartDivideE, X2, Xsaved); - mux2 #(`XLEN) xfirstmux(Xsaved, X, StartDivideE, Xinit); + // Setup for signed division + neg #(`XLEN) negd(Dsaved, Dn); + mux2 #(`XLEN) dabsmux(Dsaved, Dn, SignedDivideE & SignD, Din); // take absolute value for signed operations + assign DAbsB = ~Din; +// mux2 #(`XLEN) dfirstmux(Dsaved, D, StartDivideE, Din); + + neg #(`XLEN) negx(Xsaved, Xn); + mux2 #(`XLEN) xabsmux(Xsaved, Xn, SignedDivideE & SignX, Xinit); // need original X as remainder if doing divide by 0 +// mux2 #(`XLEN) xfirstmux(Xsaved, X, StartDivideE, Xinit); mux2 #(`XLEN) wmux(W, {`XLEN{1'b0}}, init, Win); mux2 #(`XLEN) xmux(XQ, Xinit, init, XQin); - assign DAbsB = ~Din; - assign div0 = (Din == 0); // *** eventually replace with just the negedge saved D // *** parameterize steps per cycle intdivrestoringstep step1(Win, XQin, DAbsB, W1, XQ1); @@ -71,6 +77,8 @@ module intdivrestoring ( // outputs // On final setp of signed operations, negate outputs as needed + //flopen #(2) signflops(clk, StartDivideE, {D[`XLEN-1], X[`XLEN-1]}, {SignD, SignX}); // *** shouldn't be necessary when capturing inputs properly + assign NegW = SignedDivideM & SignX; assign NegQ = SignedDivideM & (SignX ^ SignD); neg #(`XLEN) wneg(W, Wn); @@ -85,9 +93,9 @@ module intdivrestoring ( end else if (StartDivideE & ~StallM) begin if (div0) done = 1; else begin - BusyE = 1; step = 1; + BusyE = 1; step = 0; end - end else if (BusyE & ~done & ~(startd & SignedDivideE)) begin // pause one cycle at beginning of signed operations for absolute value + end else if (BusyE & ~done) begin // pause one cycle at beginning of signed operations for absolute value step = step + 1; if (step[STEPBITS]) begin step = 0; @@ -98,14 +106,14 @@ module intdivrestoring ( done = 0; BusyE = 0; end + assign init = (step == 0); // initialize on the start cycle for unsigned operations, or one cycle later for signed operations (giving time for abs) - flop #(1) initflop(clk, StartDivideE, startd); - mux2 #(1) initmux(StartDivideE, startd, SignedDivideE, init); +// flop #(1) initflop(clk, StartDivideE, startd); +// mux2 #(1) initmux(StartDivideE, startd, SignedDivideE, init); // save signs of original inputs flopenrc #(1) SignedDivideMReg(clk, reset, FlushM, ~StallM, SignedDivideE, SignedDivideM); - flopen #(2) signflops(clk, StartDivideE, {D[`XLEN-1], X[`XLEN-1]}, {SignD, SignX}); // *** shouldn't be necessary when capturing inputs properly endmodule // muldiv From a86ce5cd37a842636dd69b934b9427f216d583c7 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 2 Oct 2021 10:41:09 -0400 Subject: [PATCH 10/14] Divider code cleanup --- wally-pipelined/src/muldiv/intdivrestoring.sv | 24 +++++++------------ 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index d6bdea523..f9c8a735f 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -54,35 +54,33 @@ module intdivrestoring ( assign SignX = Xsaved[`XLEN-1]; assign div0 = (Dsaved == 0); // *** eventually replace with just the negedge saved D - // Setup for signed division + // Take absolute value for signed operations neg #(`XLEN) negd(Dsaved, Dn); mux2 #(`XLEN) dabsmux(Dsaved, Dn, SignedDivideE & SignD, Din); // take absolute value for signed operations - assign DAbsB = ~Din; -// mux2 #(`XLEN) dfirstmux(Dsaved, D, StartDivideE, Din); - neg #(`XLEN) negx(Xsaved, Xn); mux2 #(`XLEN) xabsmux(Xsaved, Xn, SignedDivideE & SignX, Xinit); // need original X as remainder if doing divide by 0 -// mux2 #(`XLEN) xfirstmux(Xsaved, X, StartDivideE, Xinit); + // Negate D for subtraction + assign DAbsB = ~Din; + + // initialization multiplexers on first cycle of operation (one cycle after start is asserted) mux2 #(`XLEN) wmux(W, {`XLEN{1'b0}}, init, Win); mux2 #(`XLEN) xmux(XQ, Xinit, init, XQin); - // *** parameterize steps per cycle intdivrestoringstep step1(Win, XQin, DAbsB, W1, XQ1); intdivrestoringstep step2(W1, XQ1, DAbsB, Wnext, XQnext); - flopen #(`XLEN) wreg(clk, StartDivideE | BusyE, Wnext, W); // *** could become just busy once start moves to its own cycle - flopen #(`XLEN) xreg(clk, StartDivideE | BusyE, XQnext, XQ); + flopen #(`XLEN) wreg(clk, /*StartDivideE | */BusyE, Wnext, W); // *** could become just busy once start moves to its own cycle + flopen #(`XLEN) xreg(clk, /*StartDivideE | */BusyE, XQnext, XQ); - // outputs + // Output selection logic in Memory Stage // On final setp of signed operations, negate outputs as needed - //flopen #(2) signflops(clk, StartDivideE, {D[`XLEN-1], X[`XLEN-1]}, {SignD, SignX}); // *** shouldn't be necessary when capturing inputs properly - assign NegW = SignedDivideM & SignX; assign NegQ = SignedDivideM & (SignX ^ SignD); neg #(`XLEN) wneg(W, Wn); neg #(`XLEN) qneg(XQ, XQn); + // Select appropriate output: normal, negated, or for divide by zero mux3 #(`XLEN) qmux(XQ, XQn, {`XLEN{1'b1}}, {div0, NegQ}, Q); // Q taken from XQ register, negated if necessary, or all 1s when dividing by zero mux3 #(`XLEN) remmux(W, Wn, Xsaved, {div0, NegW}, REM); // REM taken from W register, negated if necessary, or from X when dividing by zero @@ -108,10 +106,6 @@ module intdivrestoring ( end assign init = (step == 0); - // initialize on the start cycle for unsigned operations, or one cycle later for signed operations (giving time for abs) -// flop #(1) initflop(clk, StartDivideE, startd); -// mux2 #(1) initmux(StartDivideE, startd, SignedDivideE, init); - // save signs of original inputs flopenrc #(1) SignedDivideMReg(clk, reset, FlushM, ~StallM, SignedDivideE, SignedDivideM); From fe69513bb77bd04014846385b82b9647cea4b14f Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 2 Oct 2021 20:55:37 -0400 Subject: [PATCH 11/14] Partial divider cleanup --- .../regression/wave-dos/peripheral-waves.do | 7 ++-- wally-pipelined/src/muldiv/intdivrestoring.sv | 35 +++++++++---------- wally-pipelined/src/muldiv/muldiv.sv | 12 +++---- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/wally-pipelined/regression/wave-dos/peripheral-waves.do b/wally-pipelined/regression/wave-dos/peripheral-waves.do index 9ff6e4fe2..57eb5babc 100644 --- a/wally-pipelined/regression/wave-dos/peripheral-waves.do +++ b/wally-pipelined/regression/wave-dos/peripheral-waves.do @@ -38,9 +38,9 @@ add wave -hex /testbench/dut/hart/ieu/dp/ALUResultE add wave /testbench/dut/hart/mdu/genblk1/div/StartDivideE add wave /testbench/dut/hart/mdu/DivBusyE add wave /testbench/dut/hart/mdu/DivDoneE -add wave -hex /testbench/dut/hart/mdu/genblk1/div/D +add wave -hex /testbench/dut/hart/mdu/genblk1/div/DE add wave -hex /testbench/dut/hart/mdu/genblk1/div/Din -add wave -hex /testbench/dut/hart/mdu/genblk1/div/X +add wave -hex /testbench/dut/hart/mdu/genblk1/div/XE add wave -hex /testbench/dut/hart/mdu/genblk1/div/Win add wave -hex /testbench/dut/hart/mdu/genblk1/div/XQin add wave -hex /testbench/dut/hart/mdu/genblk1/div/Wshift @@ -50,7 +50,8 @@ add wave -hex /testbench/dut/hart/mdu/genblk1/div/qi add wave -hex /testbench/dut/hart/mdu/genblk1/div/Wprime add wave -hex /testbench/dut/hart/mdu/genblk1/div/W add wave -hex /testbench/dut/hart/mdu/genblk1/div/XQ -add wave -hex /testbench/dut/hart/mdu/genblk1/div/REM +add wave -hex /testbench/dut/hart/mdu/genblk1/div/RemM +add wave -hex /testbench/dut/hart/mdu/genblk1/div/QuotM add wave -divider add wave -hex /testbench/dut/hart/ifu/PCM diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index f9c8a735f..45ffbfb58 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -31,12 +31,12 @@ module intdivrestoring ( input logic StallM, FlushM, input logic SignedDivideE, input logic StartDivideE, - input logic [`XLEN-1:0] X, D, + input logic [`XLEN-1:0] XE, DE, output logic BusyE, done, - output logic [`XLEN-1:0] Q, REM + output logic [`XLEN-1:0] QuotM, RemM ); - logic [`XLEN-1:0] W, W2, Win, Wshift, Wprime, Wn, Wnn, Wnext, XQ, XQin, XQshift, XQn, XQnn, XQnext, Dsaved, Din, Dabs, D2, Dn, Xn, Xabs, X2, Xsaved, Xinit, DAbsB, W1, XQ1; + logic [`XLEN-1:0] W, W2, Win, Wshift, Wprime, Wn, Wnn, Wnext, XQ, XQin, XQshift, XQn, XQnn, XQnext, DSavedE, Din, Dabs, D2, Dn, Xn, Xabs, X2, XSavedE, Xinit, DAbsB, W1, XQ1; logic qi, qib; // curent quotient bit localparam STEPBITS = $clog2(`XLEN)-1; logic [STEPBITS:0] step; @@ -48,17 +48,18 @@ module intdivrestoring ( // save inputs on the negative edge of the execute clock. // This is unusual practice, but the inputs are not guaranteed to be stable due to some hazard and forwarding logic. // Saving the inputs is the most hardware-efficient way to fix the issue. - flopen #(`XLEN) dsavereg(~clk, StartDivideE, D, Dsaved); - flopen #(`XLEN) xsavereg(~clk, StartDivideE, X, Xsaved); - assign SignD = Dsaved[`XLEN-1]; // *** do some of these need pipelining for consecutive divides? - assign SignX = Xsaved[`XLEN-1]; - assign div0 = (Dsaved == 0); // *** eventually replace with just the negedge saved D + flopen #(`XLEN) dsavereg(~clk, StartDivideE, DE, DSavedE); + flopen #(`XLEN) xsavereg(~clk, StartDivideE, XE, XSavedE); + flopenrc #(1) SignedDivideMReg(clk, reset, FlushM, ~StallM, SignedDivideE, SignedDivideM); + assign SignD = DSavedE[`XLEN-1]; // *** do some of these need pipelining for consecutive divides? + assign SignX = XSavedE[`XLEN-1]; + assign div0 = (DSavedE == 0); // *** eventually replace with just the negedge saved D // Take absolute value for signed operations - neg #(`XLEN) negd(Dsaved, Dn); - mux2 #(`XLEN) dabsmux(Dsaved, Dn, SignedDivideE & SignD, Din); // take absolute value for signed operations - neg #(`XLEN) negx(Xsaved, Xn); - mux2 #(`XLEN) xabsmux(Xsaved, Xn, SignedDivideE & SignX, Xinit); // need original X as remainder if doing divide by 0 + neg #(`XLEN) negd(DSavedE, Dn); + mux2 #(`XLEN) dabsmux(DSavedE, Dn, SignedDivideE & SignD, Din); // take absolute value for signed operations + neg #(`XLEN) negx(XSavedE, Xn); + mux2 #(`XLEN) xabsmux(XSavedE, Xn, SignedDivideE & SignX, Xinit); // need original X as remainder if doing divide by 0 // Negate D for subtraction assign DAbsB = ~Din; @@ -71,8 +72,8 @@ module intdivrestoring ( intdivrestoringstep step1(Win, XQin, DAbsB, W1, XQ1); intdivrestoringstep step2(W1, XQ1, DAbsB, Wnext, XQnext); - flopen #(`XLEN) wreg(clk, /*StartDivideE | */BusyE, Wnext, W); // *** could become just busy once start moves to its own cycle - flopen #(`XLEN) xreg(clk, /*StartDivideE | */BusyE, XQnext, XQ); + flopen #(`XLEN) wreg(clk, BusyE, Wnext, W); // *** could become just busy once start moves to its own cycle + flopen #(`XLEN) xreg(clk, BusyE, XQnext, XQ); // Output selection logic in Memory Stage // On final setp of signed operations, negate outputs as needed @@ -81,8 +82,8 @@ module intdivrestoring ( neg #(`XLEN) wneg(W, Wn); neg #(`XLEN) qneg(XQ, XQn); // Select appropriate output: normal, negated, or for divide by zero - mux3 #(`XLEN) qmux(XQ, XQn, {`XLEN{1'b1}}, {div0, NegQ}, Q); // Q taken from XQ register, negated if necessary, or all 1s when dividing by zero - mux3 #(`XLEN) remmux(W, Wn, Xsaved, {div0, NegW}, REM); // REM taken from W register, negated if necessary, or from X when dividing by zero + mux3 #(`XLEN) qmux(XQ, XQn, {`XLEN{1'b1}}, {div0, NegQ}, QuotM); // Q taken from XQ register, negated if necessary, or all 1s when dividing by zero + mux3 #(`XLEN) remmux(W, Wn, XSavedE, {div0, NegW}, RemM); // REM taken from W register, negated if necessary, or from X when dividing by zero // busy logic always_ff @(posedge clk) @@ -106,8 +107,6 @@ module intdivrestoring ( end assign init = (step == 0); - // save signs of original inputs - flopenrc #(1) SignedDivideMReg(clk, reset, FlushM, ~StallM, SignedDivideE, SignedDivideM); endmodule // muldiv diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index be49bf057..734965195 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -52,7 +52,7 @@ module muldiv ( logic enable_q; //logic [2:0] Funct3E_Q; logic div0error; // ***unused - logic [`XLEN-1:0] X, D; + logic [`XLEN-1:0] XE, DE; //logic [`XLEN-1:0] Num0, Den0; // logic gclk; @@ -69,17 +69,17 @@ module muldiv ( // Handle sign extension for W-type instructions if (`XLEN == 64) begin // RV64 has W-type instructions - assign X = W64E ? {{32{SrcAE[31]&SignedDivideE}}, SrcAE[31:0]} : SrcAE; - assign D = W64E ? {{32{SrcBE[31]&SignedDivideE}}, SrcBE[31:0]} : SrcBE; + assign XE = W64E ? {{32{SrcAE[31]&SignedDivideE}}, SrcAE[31:0]} : SrcAE; + assign DE = W64E ? {{32{SrcBE[31]&SignedDivideE}}, SrcBE[31:0]} : SrcBE; end else begin // RV32 has no W-type instructions - assign X = SrcAE; - assign D = SrcBE; + assign XE = SrcAE; + assign DE = SrcBE; end assign SignedDivideE = ~Funct3E[0]; // simplified from (Funct3E[2]&~Funct3E[1]&~Funct3E[0]) | (Funct3E[2]&Funct3E[1]&~Funct3E[0]); //intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, StartDivideE, SignedDivideE); intdivrestoring div(.clk, .reset, .StallM, .FlushM, - .SignedDivideE, .StartDivideE, .X(X), .D(D), .BusyE, .done(DivDoneE), .Q(QuotM), .REM(RemM)); + .SignedDivideE, .StartDivideE, .XE, .DE, .BusyE, .done(DivDoneE), .QuotM, .RemM); // Start a divide when a new division instruction is received and the divider isn't already busy or finishing assign StartDivideE = MulDivE & Funct3E[2] & ~BusyE & ~DivDoneE; // *** mabye DivDone should be M stage From 775520c05a5b388d7f1d17e7dd43957e0274d719 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 2 Oct 2021 20:57:54 -0400 Subject: [PATCH 12/14] Partial divider cleanup 2 --- wally-pipelined/src/muldiv/intdivrestoring.sv | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index 45ffbfb58..894de243d 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -88,13 +88,14 @@ module intdivrestoring ( // busy logic always_ff @(posedge clk) if (reset) begin - BusyE = 0; done = 0; step = 0; + BusyE = 0; done = 0; step = 0; init = 0; end else if (StartDivideE & ~StallM) begin if (div0) done = 1; else begin - BusyE = 1; step = 0; + BusyE = 1; step = 0; init = 1; end end else if (BusyE & ~done) begin // pause one cycle at beginning of signed operations for absolute value + init = 0; step = step + 1; if (step[STEPBITS]) begin step = 0; @@ -105,7 +106,7 @@ module intdivrestoring ( done = 0; BusyE = 0; end - assign init = (step == 0); + //assign init = (step == 0); endmodule // muldiv From 67690c2ed7e7f732a17ff1ddf05e78ee5556029c Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 2 Oct 2021 21:00:13 -0400 Subject: [PATCH 13/14] Partial divider cleanup 3 --- wally-pipelined/src/muldiv/intdivrestoring.sv | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index 894de243d..7b3509ea0 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -36,7 +36,7 @@ module intdivrestoring ( output logic [`XLEN-1:0] QuotM, RemM ); - logic [`XLEN-1:0] W, W2, Win, Wshift, Wprime, Wn, Wnn, Wnext, XQ, XQin, XQshift, XQn, XQnn, XQnext, DSavedE, Din, Dabs, D2, Dn, Xn, Xabs, X2, XSavedE, Xinit, DAbsB, W1, XQ1; + logic [`XLEN-1:0] W, W2, Win, Wshift, Wprime, Wn, Wnn, Wnext, XQ, XQin, XQshift, XQn, XQnn, XQnext, DSavedE, Din, Dabs, D2, DnE, XnE, Xabs, X2, XSavedE, Xinit, DAbsB, W1, XQ1; logic qi, qib; // curent quotient bit localparam STEPBITS = $clog2(`XLEN)-1; logic [STEPBITS:0] step; @@ -56,10 +56,10 @@ module intdivrestoring ( assign div0 = (DSavedE == 0); // *** eventually replace with just the negedge saved D // Take absolute value for signed operations - neg #(`XLEN) negd(DSavedE, Dn); - mux2 #(`XLEN) dabsmux(DSavedE, Dn, SignedDivideE & SignD, Din); // take absolute value for signed operations - neg #(`XLEN) negx(XSavedE, Xn); - mux2 #(`XLEN) xabsmux(XSavedE, Xn, SignedDivideE & SignX, Xinit); // need original X as remainder if doing divide by 0 + neg #(`XLEN) negd(DSavedE, DnE); + mux2 #(`XLEN) dabsmux(DSavedE, DnE, SignedDivideE & SignD, Din); // take absolute value for signed operations + neg #(`XLEN) negx(XSavedE, XnE); + mux2 #(`XLEN) xabsmux(XSavedE, XnE, SignedDivideE & SignX, Xinit); // need original X as remainder if doing divide by 0 // Negate D for subtraction assign DAbsB = ~Din; From 3441991d93999fb5155ec294d0a5f2b7d20ded83 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 2 Oct 2021 21:10:35 -0400 Subject: [PATCH 14/14] Divider mostly cleaned up --- wally-pipelined/src/muldiv/intdivrestoring.sv | 26 +++--------- .../src/muldiv/intdivrestoringstep.sv | 40 +++++++++++++++++++ 2 files changed, 45 insertions(+), 21 deletions(-) create mode 100644 wally-pipelined/src/muldiv/intdivrestoringstep.sv diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index 7b3509ea0..e9221cc50 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -50,10 +50,10 @@ module intdivrestoring ( // Saving the inputs is the most hardware-efficient way to fix the issue. flopen #(`XLEN) dsavereg(~clk, StartDivideE, DE, DSavedE); flopen #(`XLEN) xsavereg(~clk, StartDivideE, XE, XSavedE); - flopenrc #(1) SignedDivideMReg(clk, reset, FlushM, ~StallM, SignedDivideE, SignedDivideM); + flopenrc #(1) SignedDivideMReg(clk, reset, FlushM, ~StallM, SignedDivideE, SignedDivideM); assign SignD = DSavedE[`XLEN-1]; // *** do some of these need pipelining for consecutive divides? assign SignX = XSavedE[`XLEN-1]; - assign div0 = (DSavedE == 0); // *** eventually replace with just the negedge saved D + assign div0 = (DSavedE == 0); // Take absolute value for signed operations neg #(`XLEN) negd(DSavedE, DnE); @@ -72,7 +72,7 @@ module intdivrestoring ( intdivrestoringstep step1(Win, XQin, DAbsB, W1, XQ1); intdivrestoringstep step2(W1, XQ1, DAbsB, Wnext, XQnext); - flopen #(`XLEN) wreg(clk, BusyE, Wnext, W); // *** could become just busy once start moves to its own cycle + flopen #(`XLEN) wreg(clk, BusyE, Wnext, W); flopen #(`XLEN) xreg(clk, BusyE, XQnext, XQ); // Output selection logic in Memory Stage @@ -105,24 +105,8 @@ module intdivrestoring ( end else if (done) begin done = 0; BusyE = 0; - end - //assign init = (step == 0); - + end -endmodule // muldiv - - -module intdivrestoringstep( - input logic [`XLEN-1:0] W, XQ, DAbsB, - output logic [`XLEN-1:0] WOut, XQOut); - - logic [`XLEN-1:0] WShift, WPrime; - logic qi, qib; - - assign {WShift, XQOut} = {W[`XLEN-2:0], XQ, qi}; - assign {qib, WPrime} = {1'b0, WShift} + {1'b1, DAbsB} + 1; // subtractor, carry out determines quotient bit ***replace with add - assign qi = ~qib; - mux2 #(`XLEN) wrestoremux(WShift, WPrime, qi, WOut); -endmodule +endmodule // *** clean up internal signals \ No newline at end of file diff --git a/wally-pipelined/src/muldiv/intdivrestoringstep.sv b/wally-pipelined/src/muldiv/intdivrestoringstep.sv new file mode 100644 index 000000000..3dcf7da50 --- /dev/null +++ b/wally-pipelined/src/muldiv/intdivrestoringstep.sv @@ -0,0 +1,40 @@ +/////////////////////////////////////////// +// intdivrestoringstep.sv +// +// Written: David_Harris@hmc.edu 2 October 2021 +// Modified: +// +// Purpose: Restoring integer division using a shift register and subtractor +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module intdivrestoringstep( + input logic [`XLEN-1:0] W, XQ, DAbsB, + output logic [`XLEN-1:0] WOut, XQOut); + + logic [`XLEN-1:0] WShift, WPrime; + logic qi, qib; + + assign {WShift, XQOut} = {W[`XLEN-2:0], XQ, qi}; + assign {qib, WPrime} = {1'b0, WShift} + {1'b1, DAbsB} + 1; // subtractor, carry out determines quotient bit ***replace with add + assign qi = ~qib; + mux2 #(`XLEN) wrestoremux(WShift, WPrime, qi, WOut); +endmodule +