Use FPU divider for integer division when F is supported

This commit is contained in:
David Harris 2022-12-14 17:03:13 -08:00
parent 09dcb56217
commit 5f637ef4a7
9 changed files with 53 additions and 38 deletions

View File

@ -52,7 +52,8 @@ module fdivsqrt(
output logic FDivBusyE, IFDivStartE, FDivDoneE, output logic FDivBusyE, IFDivStartE, FDivDoneE,
// output logic DivDone, // output logic DivDone,
output logic [`NE+1:0] QeM, output logic [`NE+1:0] QeM,
output logic [`DIVb:0] QmM output logic [`DIVb:0] QmM,
output logic [`XLEN-1:0] FPIntDivResultM
// output logic [`XLEN-1:0] RemM, // output logic [`XLEN-1:0] RemM,
); );
@ -88,5 +89,5 @@ module fdivsqrt(
.WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .Firstun, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .Firstun,
.SqrtM, .SpecialCaseM, .RemOpM(Funct3M[1]), .ForwardedSrcAE, .SqrtM, .SpecialCaseM, .RemOpM(Funct3M[1]), .ForwardedSrcAE,
.n, .ALTBM, .m, .BZero, .As, .n, .ALTBM, .m, .BZero, .As,
.QmM, .WZero, .DivSM); .QmM, .WZero, .DivSM, .FPIntDivResultM);
endmodule endmodule

View File

@ -43,7 +43,8 @@ module fdivsqrtpostproc(
input logic [`DIVBLEN:0] n, m, input logic [`DIVBLEN:0] n, m,
output logic [`DIVb:0] QmM, output logic [`DIVb:0] QmM,
output logic WZero, output logic WZero,
output logic DivSM output logic DivSM,
output logic [`XLEN-1:0] FPIntDivResultM
); );
logic [`DIVb+3:0] W, Sum, RemDM; logic [`DIVb+3:0] W, Sum, RemDM;
@ -53,7 +54,7 @@ module fdivsqrtpostproc(
logic [`DIVBLEN:0] NormShiftM; logic [`DIVBLEN:0] NormShiftM;
logic [`DIVb:0] IntQuotM, NormQuotM; logic [`DIVb:0] IntQuotM, NormQuotM;
logic [`DIVb+3:0] IntRemM, NormRemM; logic [`DIVb+3:0] IntRemM, NormRemM;
logic [`DIVb+3:0] PreResultM, ResultM; logic [`DIVb+3:0] PreResultM, PreFPIntDivResultM;
// check for early termination on an exact result. If the result is not exact, the sticky should be set // check for early termination on an exact result. If the result is not exact, the sticky should be set
aplusbeq0 #(`DIVb+4) wspluswceq0(WS, WC, weq0); aplusbeq0 #(`DIVb+4) wspluswceq0(WS, WC, weq0);
@ -136,7 +137,8 @@ module fdivsqrtpostproc(
// division takes the result from the next cycle, which is shifted to the left one more time so the square root also needs to be shifted // division takes the result from the next cycle, which is shifted to the left one more time so the square root also needs to be shifted
assign ResultM = ($signed(PreResultM) >>> NormShiftM) + {{(`DIVb+3){1'b0}}, (PostIncM & ~RemOpM)}; assign PreFPIntDivResultM = ($signed(PreResultM) >>> NormShiftM) + {{(`DIVb+3){1'b0}}, (PostIncM & ~RemOpM)};
assign FPIntDivResultM = PreFPIntDivResultM[`XLEN-1:0];
assign PreQmM = NegStickyM ? FirstUM : FirstU; // Select U or U-1 depending on negative sticky bit assign PreQmM = NegStickyM ? FirstUM : FirstU; // Select U or U-1 depending on negative sticky bit
assign QmM = SqrtM ? (PreQmM << 1) : PreQmM; assign QmM = SqrtM ? (PreQmM << 1) : PreQmM;

View File

@ -55,7 +55,8 @@ module fpu (
output logic FCvtIntW, // select FCvtIntRes (to IEU) output logic FCvtIntW, // select FCvtIntRes (to IEU)
output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) (to HZU) output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) (to HZU)
output logic IllegalFPUInstrM, // Is the instruction an illegal fpu instruction (to privileged unit) output logic IllegalFPUInstrM, // Is the instruction an illegal fpu instruction (to privileged unit)
output logic [4:0] SetFflagsM // FPU flags (to privileged unit) output logic [4:0] SetFflagsM, // FPU flags (to privileged unit)
output logic [`XLEN-1:0] FPIntDivResultW
); );
// FPU specifics: // FPU specifics:
@ -152,6 +153,7 @@ module fpu (
logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed
logic [`FLEN-1:0] BoxedOneE; // Zero value for Z for multiplication, with NaN boxing if needed logic [`FLEN-1:0] BoxedOneE; // Zero value for Z for multiplication, with NaN boxing if needed
logic StallUnpackedM; logic StallUnpackedM;
logic [`XLEN-1:0] FPIntDivResultM;
// DECODE STAGE // DECODE STAGE
@ -267,7 +269,7 @@ module fpu (
.XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE,
.ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E,
.StallE, .StallM, .TrapM, .DivSM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .QeM, .StallE, .StallM, .TrapM, .DivSM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .QeM,
.QmM /*, .DivDone(DivDoneM) */); .QmM, .FPIntDivResultM /*, .DivDone(DivDoneM) */);
// //
// compare // compare
@ -387,7 +389,8 @@ module fpu (
// M/W pipe registers // M/W pipe registers
flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW); flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW);
flopenrc #(`XLEN) MWRegInt(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW); flopenrc #(`XLEN) MWRegIntCvtRes(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW);
flopenrc #(`XLEN) MWRegIntDivRes(clk, reset, FlushW, ~StallW, FPIntDivResultM, FPIntDivResultW);
// BEGIN WRITEBACK STAGE // BEGIN WRITEBACK STAGE

View File

@ -65,7 +65,7 @@ module controller(
output logic FWriteIntM, output logic FWriteIntM,
// Writeback stage control signals // Writeback stage control signals
input logic StallW, FlushW, input logic StallW, FlushW,
output logic RegWriteW, // for datapath and Hazard Unit output logic RegWriteW, DivW, // for datapath and Hazard Unit
output logic [2:0] ResultSrcW, output logic [2:0] ResultSrcW,
// Stall during CSRs // Stall during CSRs
output logic CSRWriteFencePendingDEM, output logic CSRWriteFencePendingDEM,
@ -109,6 +109,7 @@ module controller(
logic IllegalERegAdrD; logic IllegalERegAdrD;
logic [1:0] AtomicE; logic [1:0] AtomicE;
logic FencePendingD, FencePendingE, FencePendingM; logic FencePendingD, FencePendingE, FencePendingM;
logic DivE, DivM;
// Extract fields // Extract fields
@ -222,16 +223,17 @@ module controller(
assign MemReadE = MemRWE[1]; assign MemReadE = MemRWE[1];
assign SCE = (ResultSrcE == 3'b100); assign SCE = (ResultSrcE == 3'b100);
assign RegWriteE = IEURegWriteE | FWriteIntE; // IRF register writes could come from IEU or FPU controllers assign RegWriteE = IEURegWriteE | FWriteIntE; // IRF register writes could come from IEU or FPU controllers
assign DivE = MDUE & Funct3E[2]; // Division operation
// Memory stage pipeline control register // Memory stage pipeline control register
flopenrc #(19) controlregM(clk, reset, FlushM, ~StallM, flopenrc #(20) controlregM(clk, reset, FlushM, ~StallM,
{RegWriteE, ResultSrcE, MemRWE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, FWriteIntE, AtomicE, InvalidateICacheE, FlushDCacheE, FencePendingE, InstrValidE}, {RegWriteE, ResultSrcE, MemRWE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, FWriteIntE, AtomicE, InvalidateICacheE, FlushDCacheE, FencePendingE, InstrValidE, DivE},
{RegWriteM, ResultSrcM, MemRWM, CSRReadM, CSRWriteM, PrivilegedM, Funct3M, FWriteIntM, AtomicM, InvalidateICacheM, FlushDCacheM, FencePendingM, InstrValidM}); {RegWriteM, ResultSrcM, MemRWM, CSRReadM, CSRWriteM, PrivilegedM, Funct3M, FWriteIntM, AtomicM, InvalidateICacheM, FlushDCacheM, FencePendingM, InstrValidM, DivM});
// Writeback stage pipeline control register // Writeback stage pipeline control register
flopenrc #(4) controlregW(clk, reset, FlushW, ~StallW, flopenrc #(5) controlregW(clk, reset, FlushW, ~StallW,
{RegWriteM, ResultSrcM}, {RegWriteM, ResultSrcM, DivM},
{RegWriteW, ResultSrcW}); {RegWriteW, ResultSrcW, DivW});
// Stall pipeline at Fetch if a CSR Write or Fence is pending in the subsequent stages // Stall pipeline at Fetch if a CSR Write or Fence is pending in the subsequent stages
assign CSRWriteFencePendingDEM = CSRWriteD | CSRWriteE | CSRWriteM | FencePendingD | FencePendingE | FencePendingM; assign CSRWriteFencePendingDEM = CSRWriteD | CSRWriteE | CSRWriteM | FencePendingD | FencePendingE | FencePendingM;

View File

@ -57,13 +57,14 @@ module datapath (
output logic [`XLEN-1:0] WriteDataM, output logic [`XLEN-1:0] WriteDataM,
// Writeback stage signals // Writeback stage signals
input logic StallW, FlushW, input logic StallW, FlushW,
(* mark_debug = "true" *) input logic RegWriteW, (* mark_debug = "true" *) input logic RegWriteW, DivW,
input logic SquashSCW, input logic SquashSCW,
input logic [2:0] ResultSrcW, input logic [2:0] ResultSrcW,
input logic [`XLEN-1:0] FCvtIntResW, input logic [`XLEN-1:0] FCvtIntResW,
input logic [`XLEN-1:0] ReadDataW, input logic [`XLEN-1:0] ReadDataW,
// input logic [`XLEN-1:0] PCLinkW, // input logic [`XLEN-1:0] PCLinkW,
input logic [`XLEN-1:0] CSRReadValW, MDUResultW, input logic [`XLEN-1:0] CSRReadValW, MDUResultW,
input logic [`XLEN-1:0] FPIntDivResultW,
// Hazard Unit signals // Hazard Unit signals
output logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, output logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E,
output logic [4:0] RdE, RdM, RdW output logic [4:0] RdE, RdM, RdW
@ -85,7 +86,7 @@ module datapath (
// Writeback stage signals // Writeback stage signals
logic [`XLEN-1:0] SCResultW; logic [`XLEN-1:0] SCResultW;
logic [`XLEN-1:0] ResultW; logic [`XLEN-1:0] ResultW;
logic [`XLEN-1:0] IFResultW, IFCvtResultW; logic [`XLEN-1:0] IFResultW, IFCvtResultW, MulDivResultW;
// Decode stage // Decode stage
assign Rs1D = InstrD[19:15]; assign Rs1D = InstrD[19:15];
@ -125,10 +126,12 @@ module datapath (
if (`F_SUPPORTED) begin:fpmux if (`F_SUPPORTED) begin:fpmux
mux2 #(`XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM); mux2 #(`XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM);
mux2 #(`XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, FCvtIntW, IFCvtResultW); mux2 #(`XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, FCvtIntW, IFCvtResultW);
mux2 #(`XLEN) divresultmuxW(MDUResultW, FPIntDivResultW, DivW, MulDivResultW);
end else begin:fpmux end else begin:fpmux
assign IFResultM = IEUResultM; assign IFCvtResultW = IFResultW; assign IFResultM = IEUResultM; assign IFCvtResultW = IFResultW;
assign MulDivResultW = MDUResultW;
end end
mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW); mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MulDivResultW, SCResultW, ResultSrcW, ResultW);
// handle Store Conditional result if atomic extension supported // handle Store Conditional result if atomic extension supported
if (`A_SUPPORTED) assign SCResultW = {{(`XLEN-1){1'b0}}, SquashSCW}; if (`A_SUPPORTED) assign SCResultW = {{(`XLEN-1){1'b0}}, SquashSCW};

View File

@ -58,6 +58,7 @@ module ieu (
output logic InvalidateICacheM, FlushDCacheM, output logic InvalidateICacheM, FlushDCacheM,
// Writeback stage // Writeback stage
input logic [`XLEN-1:0] FPIntDivResultW,
input logic [`XLEN-1:0] CSRReadValW, MDUResultW, input logic [`XLEN-1:0] CSRReadValW, MDUResultW,
input logic [`XLEN-1:0] FCvtIntResW, input logic [`XLEN-1:0] FCvtIntResW,
output logic [4:0] RdW, output logic [4:0] RdW,
@ -83,6 +84,7 @@ module ieu (
logic SCE; logic SCE;
logic [4:0] RdE; logic [4:0] RdE;
logic FWriteIntM; logic FWriteIntM;
logic DivW;
// forwarding signals // forwarding signals
logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E; logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E;
@ -99,15 +101,15 @@ module ieu (
.Funct3E, .MDUE, .W64E, .JumpE, .SCE, .BranchSignedE, .StallM, .FlushM, .MemRWM, .Funct3E, .MDUE, .W64E, .JumpE, .SCE, .BranchSignedE, .StallM, .FlushM, .MemRWM,
.CSRReadM, .CSRWriteM, .PrivilegedM, .AtomicM, .Funct3M, .CSRReadM, .CSRWriteM, .PrivilegedM, .AtomicM, .Funct3M,
.RegWriteM, .InvalidateICacheM, .FlushDCacheM, .InstrValidM, .FWriteIntM, .RegWriteM, .InvalidateICacheM, .FlushDCacheM, .InstrValidM, .FWriteIntM,
.StallW, .FlushW, .RegWriteW, .ResultSrcW, .CSRWriteFencePendingDEM, .StoreStallD); .StallW, .FlushW, .RegWriteW, .DivW, .ResultSrcW, .CSRWriteFencePendingDEM, .StoreStallD);
datapath dp( datapath dp(
.clk, .reset, .ImmSrcD, .InstrD, .StallE, .FlushE, .ForwardAE, .ForwardBE, .clk, .reset, .ImmSrcD, .InstrD, .StallE, .FlushE, .ForwardAE, .ForwardBE,
.ALUControlE, .Funct3E, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .JumpE, .BranchSignedE, .ALUControlE, .Funct3E, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .JumpE, .BranchSignedE,
.PCE, .PCLinkE, .FlagsE, .IEUAdrE, .ForwardedSrcAE, .ForwardedSrcBE, .PCE, .PCLinkE, .FlagsE, .IEUAdrE, .ForwardedSrcAE, .ForwardedSrcBE,
.StallM, .FlushM, .FWriteIntM, .FIntResM, .SrcAM, .WriteDataM, .FCvtIntW, .StallM, .FlushM, .FWriteIntM, .FIntResM, .SrcAM, .WriteDataM, .FCvtIntW,
.StallW, .FlushW, .RegWriteW, .SquashSCW, .ResultSrcW, .ReadDataW, .FCvtIntResW, .StallW, .FlushW, .RegWriteW, .DivW, .SquashSCW, .ResultSrcW, .ReadDataW, .FCvtIntResW,
.CSRReadValW, .MDUResultW, .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW); .CSRReadValW, .MDUResultW, .FPIntDivResultW, .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW);
forward fw( forward fw(
.Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW, .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW,

View File

@ -59,10 +59,17 @@ module muldiv (
// Divide // Divide
// Start a divide when a new division instruction is received and the divider isn't already busy or finishing // Start a divide when a new division instruction is received and the divider isn't already busy or finishing
// When F extensions are supported, use the FPU divider instead
if (`F_SUPPORTED) begin
assign QuotM = 0;
assign RemM = 0;
assign DivBusyE = 0;
end else begin
assign DivE = MDUE & Funct3E[2]; assign DivE = MDUE & Funct3E[2];
assign DivSignedE = ~Funct3E[0]; assign DivSignedE = ~Funct3E[0];
intdivrestoring div(.clk, .reset, .StallM, .TrapM, .DivSignedE, .W64E, .DivE, intdivrestoring div(.clk, .reset, .StallM, .TrapM, .DivSignedE, .W64E, .DivE,
.ForwardedSrcAE, .ForwardedSrcBE, .DivBusyE, .QuotM, .RemM); .ForwardedSrcAE, .ForwardedSrcBE, .DivBusyE, .QuotM, .RemM);
end
// Result multiplexer // Result multiplexer
always_comb always_comb

View File

@ -99,6 +99,7 @@ module wallypipelinedcore (
logic FpLoadStoreM; logic FpLoadStoreM;
logic [1:0] FResSelW; logic [1:0] FResSelW;
logic [4:0] SetFflagsM; logic [4:0] SetFflagsM;
logic [`XLEN-1:0] FPIntDivResultW;
// memory management unit signals // memory management unit signals
logic ITLBWriteF; logic ITLBWriteF;
@ -228,7 +229,7 @@ module wallypipelinedcore (
.RdM, .FIntResM, .InvalidateICacheM, .FlushDCacheM, .RdM, .FIntResM, .InvalidateICacheM, .FlushDCacheM,
// Writeback stage // Writeback stage
.CSRReadValW, .MDUResultW, .CSRReadValW, .MDUResultW, .FPIntDivResultW,
.RdW, .ReadDataW(ReadDataW[`XLEN-1:0]), .RdW, .ReadDataW(ReadDataW[`XLEN-1:0]),
.InstrValidM, .InstrValidM,
.FCvtIntResW, .FCvtIntResW,
@ -405,7 +406,8 @@ module wallypipelinedcore (
.FCvtIntW, // fpu result selection .FCvtIntW, // fpu result selection
.FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) .FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage)
.IllegalFPUInstrM, // Is the instruction an illegal fpu instruction .IllegalFPUInstrM, // Is the instruction an illegal fpu instruction
.SetFflagsM // FPU flags (to privileged unit) .SetFflagsM, // FPU flags (to privileged unit)
.FPIntDivResultW
); // floating point unit ); // floating point unit
end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low
assign FStallD = 0; assign FStallD = 0;

View File

@ -8,8 +8,7 @@ wally_workdir = $(work)/wally-riscv-arch-test
current_dir = $(shell pwd) current_dir = $(shell pwd)
#XLEN ?= 64 #XLEN ?= 64
all: root fsd_fld_tempfix arch32 wally32 wally32e arch64 wally64 all: root arch32 wally32 wally32e arch64 wally64
#all: root fsd_fld_tempfix wally32
root: root:
mkdir -p $(work_dir) mkdir -p $(work_dir)
@ -20,14 +19,8 @@ root:
sed 's,{0},$(current_dir),g;s,{1},64gc,g' config.ini > config64.ini sed 's,{0},$(current_dir),g;s,{1},64gc,g' config.ini > config64.ini
sed 's,{0},$(current_dir),g;s,{1},32e,g' config.ini > config32e.ini sed 's,{0},$(current_dir),g;s,{1},32e,g' config.ini > config32e.ini
fsd_fld_tempfix:
# this is a temporary fix, there's a typo on the rv64i_m/D/src/d_fsd-align-01.S and rv64i_m/D/src/d_fld-align-01.S tests
# https://github.com/riscv-non-isa/riscv-arch-test/issues/266
find ../../addins/riscv-arch-test/riscv-test-suite -type f -name "*d_fld-align*.S" | xargs -I{} sed -i 's,regex(\.\*32\.\*),regex(\.\*64\.\*),g' {}
find ../../addins/riscv-arch-test/riscv-test-suite -type f -name "*d_fsd-align*.S" | xargs -I{} sed -i 's,regex(\.\*32\.\*),regex(\.\*64\.\*),g' {}
arch32: arch32:
riscof --verbose debug run --work-dir=$(work_dir) --config=config32.ini --suite=$(arch_dir)/riscv-test-suite/ --env=$(arch_dir)/riscv-test-suite/env --no-browser riscof run --work-dir=$(work_dir) --config=config32.ini --suite=$(arch_dir)/riscv-test-suite/ --env=$(arch_dir)/riscv-test-suite/env --no-browser
rsync -a $(work_dir)/rv32i_m/ $(arch_workdir)/rv32i_m/ || echo "error suppressed" rsync -a $(work_dir)/rv32i_m/ $(arch_workdir)/rv32i_m/ || echo "error suppressed"
arch64: arch64: