Use FPU divider for integer division when F is supported

This commit is contained in:
David Harris 2022-12-14 17:03:13 -08:00
parent 4a0e4aed99
commit 643a2e7cf9
9 changed files with 53 additions and 38 deletions

View File

@ -52,7 +52,8 @@ module fdivsqrt(
output logic FDivBusyE, IFDivStartE, FDivDoneE,
// output logic DivDone,
output logic [`NE+1:0] QeM,
output logic [`DIVb:0] QmM
output logic [`DIVb:0] QmM,
output logic [`XLEN-1:0] FPIntDivResultM
// output logic [`XLEN-1:0] RemM,
);
@ -88,5 +89,5 @@ module fdivsqrt(
.WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .Firstun,
.SqrtM, .SpecialCaseM, .RemOpM(Funct3M[1]), .ForwardedSrcAE,
.n, .ALTBM, .m, .BZero, .As,
.QmM, .WZero, .DivSM);
.QmM, .WZero, .DivSM, .FPIntDivResultM);
endmodule

View File

@ -43,7 +43,8 @@ module fdivsqrtpostproc(
input logic [`DIVBLEN:0] n, m,
output logic [`DIVb:0] QmM,
output logic WZero,
output logic DivSM
output logic DivSM,
output logic [`XLEN-1:0] FPIntDivResultM
);
logic [`DIVb+3:0] W, Sum, RemDM;
@ -53,7 +54,7 @@ module fdivsqrtpostproc(
logic [`DIVBLEN:0] NormShiftM;
logic [`DIVb:0] IntQuotM, NormQuotM;
logic [`DIVb+3:0] IntRemM, NormRemM;
logic [`DIVb+3:0] PreResultM, ResultM;
logic [`DIVb+3:0] PreResultM, PreFPIntDivResultM;
// check for early termination on an exact result. If the result is not exact, the sticky should be set
aplusbeq0 #(`DIVb+4) wspluswceq0(WS, WC, weq0);
@ -136,8 +137,9 @@ module fdivsqrtpostproc(
// division takes the result from the next cycle, which is shifted to the left one more time so the square root also needs to be shifted
assign ResultM = ($signed(PreResultM) >>> NormShiftM) + {{(`DIVb+3){1'b0}}, (PostIncM & ~RemOpM)};
assign PreFPIntDivResultM = ($signed(PreResultM) >>> NormShiftM) + {{(`DIVb+3){1'b0}}, (PostIncM & ~RemOpM)};
assign FPIntDivResultM = PreFPIntDivResultM[`XLEN-1:0];
assign PreQmM = NegStickyM ? FirstUM : FirstU; // Select U or U-1 depending on negative sticky bit
assign QmM = SqrtM ? (PreQmM << 1) : PreQmM;
endmodule

View File

@ -55,7 +55,8 @@ module fpu (
output logic FCvtIntW, // select FCvtIntRes (to IEU)
output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) (to HZU)
output logic IllegalFPUInstrM, // Is the instruction an illegal fpu instruction (to privileged unit)
output logic [4:0] SetFflagsM // FPU flags (to privileged unit)
output logic [4:0] SetFflagsM, // FPU flags (to privileged unit)
output logic [`XLEN-1:0] FPIntDivResultW
);
// FPU specifics:
@ -152,6 +153,7 @@ module fpu (
logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed
logic [`FLEN-1:0] BoxedOneE; // Zero value for Z for multiplication, with NaN boxing if needed
logic StallUnpackedM;
logic [`XLEN-1:0] FPIntDivResultM;
// DECODE STAGE
@ -267,7 +269,7 @@ module fpu (
.XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE,
.ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E,
.StallE, .StallM, .TrapM, .DivSM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .QeM,
.QmM /*, .DivDone(DivDoneM) */);
.QmM, .FPIntDivResultM /*, .DivDone(DivDoneM) */);
//
// compare
@ -387,7 +389,8 @@ module fpu (
// M/W pipe registers
flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW);
flopenrc #(`XLEN) MWRegInt(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW);
flopenrc #(`XLEN) MWRegIntCvtRes(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW);
flopenrc #(`XLEN) MWRegIntDivRes(clk, reset, FlushW, ~StallW, FPIntDivResultM, FPIntDivResultW);
// BEGIN WRITEBACK STAGE

View File

@ -65,7 +65,7 @@ module controller(
output logic FWriteIntM,
// Writeback stage control signals
input logic StallW, FlushW,
output logic RegWriteW, // for datapath and Hazard Unit
output logic RegWriteW, DivW, // for datapath and Hazard Unit
output logic [2:0] ResultSrcW,
// Stall during CSRs
output logic CSRWriteFencePendingDEM,
@ -109,6 +109,7 @@ module controller(
logic IllegalERegAdrD;
logic [1:0] AtomicE;
logic FencePendingD, FencePendingE, FencePendingM;
logic DivE, DivM;
// Extract fields
@ -222,16 +223,17 @@ module controller(
assign MemReadE = MemRWE[1];
assign SCE = (ResultSrcE == 3'b100);
assign RegWriteE = IEURegWriteE | FWriteIntE; // IRF register writes could come from IEU or FPU controllers
assign DivE = MDUE & Funct3E[2]; // Division operation
// Memory stage pipeline control register
flopenrc #(19) controlregM(clk, reset, FlushM, ~StallM,
{RegWriteE, ResultSrcE, MemRWE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, FWriteIntE, AtomicE, InvalidateICacheE, FlushDCacheE, FencePendingE, InstrValidE},
{RegWriteM, ResultSrcM, MemRWM, CSRReadM, CSRWriteM, PrivilegedM, Funct3M, FWriteIntM, AtomicM, InvalidateICacheM, FlushDCacheM, FencePendingM, InstrValidM});
flopenrc #(20) controlregM(clk, reset, FlushM, ~StallM,
{RegWriteE, ResultSrcE, MemRWE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, FWriteIntE, AtomicE, InvalidateICacheE, FlushDCacheE, FencePendingE, InstrValidE, DivE},
{RegWriteM, ResultSrcM, MemRWM, CSRReadM, CSRWriteM, PrivilegedM, Funct3M, FWriteIntM, AtomicM, InvalidateICacheM, FlushDCacheM, FencePendingM, InstrValidM, DivM});
// Writeback stage pipeline control register
flopenrc #(4) controlregW(clk, reset, FlushW, ~StallW,
{RegWriteM, ResultSrcM},
{RegWriteW, ResultSrcW});
flopenrc #(5) controlregW(clk, reset, FlushW, ~StallW,
{RegWriteM, ResultSrcM, DivM},
{RegWriteW, ResultSrcW, DivW});
// Stall pipeline at Fetch if a CSR Write or Fence is pending in the subsequent stages
assign CSRWriteFencePendingDEM = CSRWriteD | CSRWriteE | CSRWriteM | FencePendingD | FencePendingE | FencePendingM;

View File

@ -57,14 +57,15 @@ module datapath (
output logic [`XLEN-1:0] WriteDataM,
// Writeback stage signals
input logic StallW, FlushW,
(* mark_debug = "true" *) input logic RegWriteW,
(* mark_debug = "true" *) input logic RegWriteW, DivW,
input logic SquashSCW,
input logic [2:0] ResultSrcW,
input logic [`XLEN-1:0] FCvtIntResW,
input logic [`XLEN-1:0] ReadDataW,
// input logic [`XLEN-1:0] PCLinkW,
input logic [`XLEN-1:0] CSRReadValW, MDUResultW,
// Hazard Unit signals
input logic [`XLEN-1:0] FPIntDivResultW,
// Hazard Unit signals
output logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E,
output logic [4:0] RdE, RdM, RdW
);
@ -85,7 +86,7 @@ module datapath (
// Writeback stage signals
logic [`XLEN-1:0] SCResultW;
logic [`XLEN-1:0] ResultW;
logic [`XLEN-1:0] IFResultW, IFCvtResultW;
logic [`XLEN-1:0] IFResultW, IFCvtResultW, MulDivResultW;
// Decode stage
assign Rs1D = InstrD[19:15];
@ -125,10 +126,12 @@ module datapath (
if (`F_SUPPORTED) begin:fpmux
mux2 #(`XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM);
mux2 #(`XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, FCvtIntW, IFCvtResultW);
mux2 #(`XLEN) divresultmuxW(MDUResultW, FPIntDivResultW, DivW, MulDivResultW);
end else begin:fpmux
assign IFResultM = IEUResultM; assign IFCvtResultW = IFResultW;
assign MulDivResultW = MDUResultW;
end
mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);
mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MulDivResultW, SCResultW, ResultSrcW, ResultW);
// handle Store Conditional result if atomic extension supported
if (`A_SUPPORTED) assign SCResultW = {{(`XLEN-1){1'b0}}, SquashSCW};

View File

@ -58,6 +58,7 @@ module ieu (
output logic InvalidateICacheM, FlushDCacheM,
// Writeback stage
input logic [`XLEN-1:0] FPIntDivResultW,
input logic [`XLEN-1:0] CSRReadValW, MDUResultW,
input logic [`XLEN-1:0] FCvtIntResW,
output logic [4:0] RdW,
@ -83,6 +84,7 @@ module ieu (
logic SCE;
logic [4:0] RdE;
logic FWriteIntM;
logic DivW;
// forwarding signals
logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E;
@ -99,15 +101,15 @@ module ieu (
.Funct3E, .MDUE, .W64E, .JumpE, .SCE, .BranchSignedE, .StallM, .FlushM, .MemRWM,
.CSRReadM, .CSRWriteM, .PrivilegedM, .AtomicM, .Funct3M,
.RegWriteM, .InvalidateICacheM, .FlushDCacheM, .InstrValidM, .FWriteIntM,
.StallW, .FlushW, .RegWriteW, .ResultSrcW, .CSRWriteFencePendingDEM, .StoreStallD);
.StallW, .FlushW, .RegWriteW, .DivW, .ResultSrcW, .CSRWriteFencePendingDEM, .StoreStallD);
datapath dp(
.clk, .reset, .ImmSrcD, .InstrD, .StallE, .FlushE, .ForwardAE, .ForwardBE,
.ALUControlE, .Funct3E, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .JumpE, .BranchSignedE,
.PCE, .PCLinkE, .FlagsE, .IEUAdrE, .ForwardedSrcAE, .ForwardedSrcBE,
.StallM, .FlushM, .FWriteIntM, .FIntResM, .SrcAM, .WriteDataM, .FCvtIntW,
.StallW, .FlushW, .RegWriteW, .SquashSCW, .ResultSrcW, .ReadDataW, .FCvtIntResW,
.CSRReadValW, .MDUResultW, .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW);
.StallW, .FlushW, .RegWriteW, .DivW, .SquashSCW, .ResultSrcW, .ReadDataW, .FCvtIntResW,
.CSRReadValW, .MDUResultW, .FPIntDivResultW, .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW);
forward fw(
.Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW,

View File

@ -59,10 +59,17 @@ module muldiv (
// Divide
// Start a divide when a new division instruction is received and the divider isn't already busy or finishing
assign DivE = MDUE & Funct3E[2];
assign DivSignedE = ~Funct3E[0];
intdivrestoring div(.clk, .reset, .StallM, .TrapM, .DivSignedE, .W64E, .DivE,
.ForwardedSrcAE, .ForwardedSrcBE, .DivBusyE, .QuotM, .RemM);
// When F extensions are supported, use the FPU divider instead
if (`F_SUPPORTED) begin
assign QuotM = 0;
assign RemM = 0;
assign DivBusyE = 0;
end else begin
assign DivE = MDUE & Funct3E[2];
assign DivSignedE = ~Funct3E[0];
intdivrestoring div(.clk, .reset, .StallM, .TrapM, .DivSignedE, .W64E, .DivE,
.ForwardedSrcAE, .ForwardedSrcBE, .DivBusyE, .QuotM, .RemM);
end
// Result multiplexer
always_comb

View File

@ -99,6 +99,7 @@ module wallypipelinedcore (
logic FpLoadStoreM;
logic [1:0] FResSelW;
logic [4:0] SetFflagsM;
logic [`XLEN-1:0] FPIntDivResultW;
// memory management unit signals
logic ITLBWriteF;
@ -228,7 +229,7 @@ module wallypipelinedcore (
.RdM, .FIntResM, .InvalidateICacheM, .FlushDCacheM,
// Writeback stage
.CSRReadValW, .MDUResultW,
.CSRReadValW, .MDUResultW, .FPIntDivResultW,
.RdW, .ReadDataW(ReadDataW[`XLEN-1:0]),
.InstrValidM,
.FCvtIntResW,
@ -405,7 +406,8 @@ module wallypipelinedcore (
.FCvtIntW, // fpu result selection
.FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage)
.IllegalFPUInstrM, // Is the instruction an illegal fpu instruction
.SetFflagsM // FPU flags (to privileged unit)
.SetFflagsM, // FPU flags (to privileged unit)
.FPIntDivResultW
); // floating point unit
end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low
assign FStallD = 0;

View File

@ -8,8 +8,7 @@ wally_workdir = $(work)/wally-riscv-arch-test
current_dir = $(shell pwd)
#XLEN ?= 64
all: root fsd_fld_tempfix arch32 wally32 wally32e arch64 wally64
#all: root fsd_fld_tempfix wally32
all: root arch32 wally32 wally32e arch64 wally64
root:
mkdir -p $(work_dir)
@ -20,14 +19,8 @@ root:
sed 's,{0},$(current_dir),g;s,{1},64gc,g' config.ini > config64.ini
sed 's,{0},$(current_dir),g;s,{1},32e,g' config.ini > config32e.ini
fsd_fld_tempfix:
# this is a temporary fix, there's a typo on the rv64i_m/D/src/d_fsd-align-01.S and rv64i_m/D/src/d_fld-align-01.S tests
# https://github.com/riscv-non-isa/riscv-arch-test/issues/266
find ../../addins/riscv-arch-test/riscv-test-suite -type f -name "*d_fld-align*.S" | xargs -I{} sed -i 's,regex(\.\*32\.\*),regex(\.\*64\.\*),g' {}
find ../../addins/riscv-arch-test/riscv-test-suite -type f -name "*d_fsd-align*.S" | xargs -I{} sed -i 's,regex(\.\*32\.\*),regex(\.\*64\.\*),g' {}
arch32:
riscof --verbose debug run --work-dir=$(work_dir) --config=config32.ini --suite=$(arch_dir)/riscv-test-suite/ --env=$(arch_dir)/riscv-test-suite/env --no-browser
riscof run --work-dir=$(work_dir) --config=config32.ini --suite=$(arch_dir)/riscv-test-suite/ --env=$(arch_dir)/riscv-test-suite/env --no-browser
rsync -a $(work_dir)/rv32i_m/ $(arch_workdir)/rv32i_m/ || echo "error suppressed"
arch64: