Reduced cycle count for DIVW/DIVUW by two

This commit is contained in:
David Harris 2021-10-03 09:42:22 -04:00
parent 648cc8ef64
commit 48e33c79a9
5 changed files with 36 additions and 32 deletions

View File

@ -67,7 +67,7 @@
// Integer Divider Configuration // Integer Divider Configuration
// DIV_BITSPERCYCLE must be 1, 2, or 4 // DIV_BITSPERCYCLE must be 1, 2, or 4
`define DIV_BITSPERCYCLE 4 `define DIV_BITSPERCYCLE 1
// Legal number of PMP entries are 0, 16, or 64 // Legal number of PMP entries are 0, 16, or 64
`define PMP_ENTRIES 64 `define PMP_ENTRIES 64

View File

@ -31,7 +31,7 @@ module intdivrestoring (
input logic clk, input logic clk,
input logic reset, input logic reset,
input logic StallM, FlushM, input logic StallM, FlushM,
input logic SignedDivideE, input logic SignedDivideE, W64E,
input logic StartDivideE, input logic StartDivideE,
input logic [`XLEN-1:0] XE, DE, input logic [`XLEN-1:0] XE, DE,
output logic BusyE, DivDoneM, output logic BusyE, DivDoneM,
@ -40,7 +40,7 @@ module intdivrestoring (
logic [`XLEN-1:0] WE[`DIV_BITSPERCYCLE:0]; logic [`XLEN-1:0] WE[`DIV_BITSPERCYCLE:0];
logic [`XLEN-1:0] XQE[`DIV_BITSPERCYCLE:0]; logic [`XLEN-1:0] XQE[`DIV_BITSPERCYCLE:0];
logic [`XLEN-1:0] DSavedE, XSavedE, XSavedM, DnE, DAbsBE, XnE, XInitE, WM, XQM, WnM, XQnM; logic [`XLEN-1:0] DSavedE, XSavedE, XSavedM, DinE, XinE, DnE, DAbsBE, XnE, XInitE, WM, XQM, WnM, XQnM;
localparam STEPBITS = $clog2(`XLEN/`DIV_BITSPERCYCLE); localparam STEPBITS = $clog2(`XLEN/`DIV_BITSPERCYCLE);
logic [STEPBITS:0] step; logic [STEPBITS:0] step;
logic Div0E, Div0M; logic Div0E, Div0M;
@ -52,9 +52,22 @@ module intdivrestoring (
// Saving the inputs is the most hardware-efficient way to fix the issue. // Saving the inputs is the most hardware-efficient way to fix the issue.
flopen #(`XLEN) dsavereg(~clk, StartDivideE, DE, DSavedE); flopen #(`XLEN) dsavereg(~clk, StartDivideE, DE, DSavedE);
flopen #(`XLEN) xsavereg(~clk, StartDivideE, XE, XSavedE); flopen #(`XLEN) xsavereg(~clk, StartDivideE, XE, XSavedE);
assign SignDE = DSavedE[`XLEN-1];
assign SignXE = XSavedE[`XLEN-1]; // Handle sign extension for W-type instructions
assign Div0E = (DSavedE == 0); generate
if (`XLEN == 64) begin // RV64 has W-type instructions
mux2 #(`XLEN) xinmux(XSavedE, {XSavedE[31:0], 32'b0}, W64E, XinE);
mux2 #(`XLEN) dinmux(DSavedE, {{32{DSavedE[31]&SignedDivideE}}, DSavedE[31:0]}, W64E, DinE);
end else begin // RV32 has no W-type instructions
assign XinE = XSavedE;
assign DinE = DSavedE;
end
endgenerate
// Extract sign bits and check fo division by zero
assign SignDE = DinE[`XLEN-1];
assign SignXE = XinE[`XLEN-1];
assign Div0E = (DinE == 0);
// pipeline registers // pipeline registers
flopenrc #(1) SignedDivideMReg(clk, reset, FlushM, ~StallM, SignedDivideE, SignedDivideM); flopenrc #(1) SignedDivideMReg(clk, reset, FlushM, ~StallM, SignedDivideE, SignedDivideM);
@ -64,10 +77,10 @@ module intdivrestoring (
flopenrc #(`XLEN) XSavedMReg(clk, reset, FlushM, ~StallM, XSavedE, XSavedM); // is this truly necessary? flopenrc #(`XLEN) XSavedMReg(clk, reset, FlushM, ~StallM, XSavedE, XSavedM); // is this truly necessary?
// Take absolute value for signed operations, and negate D to handle subtraction in divider stages // Take absolute value for signed operations, and negate D to handle subtraction in divider stages
neg #(`XLEN) negd(DSavedE, DnE); neg #(`XLEN) negd(DinE, DnE);
mux2 #(`XLEN) dabsmux(DnE, DSavedE, SignedDivideE & SignDE, DAbsBE); // take absolute value for signed operations, and negate for subtraction setp mux2 #(`XLEN) dabsmux(DnE, DinE, SignedDivideE & SignDE, DAbsBE); // take absolute value for signed operations, and negate for subtraction setp
neg #(`XLEN) negx(XSavedE, XnE); neg #(`XLEN) negx(XinE, XnE);
mux2 #(`XLEN) xabsmux(XSavedE, XnE, SignedDivideE & SignXE, XInitE); // need original X as remainder if doing divide by 0 mux2 #(`XLEN) xabsmux(XinE, XnE, SignedDivideE & SignXE, XInitE); // need original X as remainder if doing divide by 0
// initialization multiplexers on first cycle of operation (one cycle after start is asserted) // initialization multiplexers on first cycle of operation (one cycle after start is asserted)
mux2 #(`XLEN) wmux(WM, {`XLEN{1'b0}}, DivInitE, WE[0]); mux2 #(`XLEN) wmux(WM, {`XLEN{1'b0}}, DivInitE, WE[0]);
@ -106,7 +119,7 @@ module intdivrestoring (
end else if (BusyE & ~DivDoneM) begin // pause one cycle at beginning of signed operations for absolute value end else if (BusyE & ~DivDoneM) begin // pause one cycle at beginning of signed operations for absolute value
DivInitE = 0; DivInitE = 0;
step = step + 1; step = step + 1;
if (step[STEPBITS]) begin if (step[STEPBITS] | (`XLEN==64) & W64E & step[STEPBITS-1]) begin // complete in half the time for W-type instructions
step = 0; step = 0;
BusyE = 0; BusyE = 0;
DivDoneM = 1; DivDoneM = 1;

View File

@ -34,11 +34,10 @@ module intdivrestoringstep(
logic [`XLEN-1:0] WShift, WPrime; logic [`XLEN-1:0] WShift, WPrime;
logic qi, qib; logic qi, qib;
assign {WShift, XQOut} = {W[`XLEN-2:0], XQ, qi}; assign {WShift, XQOut} = {W[`XLEN-2:0], XQ, qi}; // shift W and X/Q left, insert quotient bit at bottom
adder #(`XLEN+1) wdsub({1'b0, WShift}, {1'b1, DAbsB}, {qib, WPrime}); adder #(`XLEN+1) wdsub({1'b0, WShift}, {1'b1, DAbsB}, {qib, WPrime}); // effective subtractor, carry out determines quotient bit
//assign {qib, WPrime} = {1'b0, WShift} + {1'b1, DAbsB}; // effective subtractor, carry out determines quotient bit
assign qi = ~qib; assign qi = ~qib;
mux2 #(`XLEN) wrestoremux(WShift, WPrime, qi, WOut); mux2 #(`XLEN) wrestoremux(WShift, WPrime, qi, WOut); // if quotient is zero, restore W
endmodule endmodule
/* verilator lint_on UNOPTFLAT */ /* verilator lint_on UNOPTFLAT */

View File

@ -65,20 +65,12 @@ module muldiv (
flopenrc #(`XLEN*2) ProdMReg(clk, reset, FlushM, ~StallM, ProdE, ProdM); flopenrc #(`XLEN*2) ProdMReg(clk, reset, FlushM, ~StallM, ProdE, ProdM);
// Divide // Divide
assign XE = SrcAE;
// Handle sign extension for W-type instructions assign DE = SrcBE;
if (`XLEN == 64) begin // RV64 has W-type instructions
assign XE = W64E ? {{32{SrcAE[31]&SignedDivideE}}, SrcAE[31:0]} : SrcAE;
assign DE = W64E ? {{32{SrcBE[31]&SignedDivideE}}, SrcBE[31:0]} : SrcBE;
end else begin // RV32 has no W-type instructions
assign XE = SrcAE;
assign DE = SrcBE;
end
assign SignedDivideE = ~Funct3E[0]; // simplified from (Funct3E[2]&~Funct3E[1]&~Funct3E[0]) | (Funct3E[2]&Funct3E[1]&~Funct3E[0]); assign SignedDivideE = ~Funct3E[0]; // simplified from (Funct3E[2]&~Funct3E[1]&~Funct3E[0]) | (Funct3E[2]&Funct3E[1]&~Funct3E[0]);
//intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, StartDivideE, SignedDivideE); //intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, StartDivideE, SignedDivideE);
intdivrestoring div(.clk, .reset, .StallM, .FlushM, intdivrestoring div(.clk, .reset, .StallM, .FlushM,
.SignedDivideE, .StartDivideE, .XE, .DE, .BusyE, .DivDoneM, .QuotM, .RemM); .SignedDivideE, .W64E, .StartDivideE, .XE, .DE, .BusyE, .DivDoneM, .QuotM, .RemM);
// Start a divide when a new division instruction is received and the divider isn't already busy or finishing // Start a divide when a new division instruction is received and the divider isn't already busy or finishing
assign StartDivideE = MulDivE & Funct3E[2] & ~BusyE & ~DivDoneM; assign StartDivideE = MulDivE & Funct3E[2] & ~BusyE & ~DivDoneM;

View File

@ -161,6 +161,10 @@ string tests32f[] = '{
}; };
string tests64m[] = '{ string tests64m[] = '{
"rv64m/I-REMUW-01", "3000",
"rv64m/I-REMW-01", "3000",
"rv64m/I-DIVUW-01", "3000",
"rv64m/I-DIVW-01", "3000",
"rv64m/I-MUL-01", "3000", "rv64m/I-MUL-01", "3000",
"rv64m/I-MULH-01", "3000", "rv64m/I-MULH-01", "3000",
"rv64m/I-MULHSU-01", "3000", "rv64m/I-MULHSU-01", "3000",
@ -168,12 +172,8 @@ string tests32f[] = '{
"rv64m/I-MULW-01", "3000", "rv64m/I-MULW-01", "3000",
"rv64m/I-DIV-01", "3000", "rv64m/I-DIV-01", "3000",
"rv64m/I-DIVU-01", "3000", "rv64m/I-DIVU-01", "3000",
"rv64m/I-DIVUW-01", "3000",
"rv64m/I-DIVW-01", "3000",
"rv64m/I-REM-01", "3000", "rv64m/I-REM-01", "3000",
"rv64m/I-REMU-01", "3000", "rv64m/I-REMU-01", "3000"
"rv64m/I-REMUW-01", "3000",
"rv64m/I-REMW-01", "3000"
}; };
string tests64ic[] = '{ string tests64ic[] = '{
@ -536,11 +536,11 @@ string tests32f[] = '{
tests = {tests64p,tests64i, tests64periph}; tests = {tests64p,tests64i, tests64periph};
if (`C_SUPPORTED) tests = {tests, tests64ic}; if (`C_SUPPORTED) tests = {tests, tests64ic};
else tests = {tests, tests64iNOc}; else tests = {tests, tests64iNOc};
if (`M_SUPPORTED) tests = {tests, tests64m};
if (`F_SUPPORTED) tests = {tests64f, tests}; if (`F_SUPPORTED) tests = {tests64f, tests};
if (`D_SUPPORTED) tests = {tests64d, tests}; if (`D_SUPPORTED) tests = {tests64d, tests};
if (`MEM_VIRTMEM) tests = {tests64mmu, tests}; if (`MEM_VIRTMEM) tests = {tests64mmu, tests};
if (`A_SUPPORTED) tests = {tests64a, tests}; if (`A_SUPPORTED) tests = {tests64a, tests};
if (`M_SUPPORTED) tests = {tests64m, tests};
end end
//tests = {tests64a, tests}; //tests = {tests64a, tests};
end else begin // RV32 end else begin // RV32