Merge pull request #474 from davidharrishmc/dev

FP and synthesis cleanup
This commit is contained in:
Rose Thompson 2023-11-14 12:03:01 -08:00 committed by GitHub
commit bf51948616
9 changed files with 35 additions and 20 deletions

3
.gitmodules vendored
View File

@ -8,9 +8,6 @@
[submodule "addins/imperas-riscv-tests"] [submodule "addins/imperas-riscv-tests"]
path = addins/imperas-riscv-tests path = addins/imperas-riscv-tests
url = https://github.com/riscv-ovpsim/imperas-riscv-tests url = https://github.com/riscv-ovpsim/imperas-riscv-tests
[submodule "addins/riscv-tests"]
path = addins/riscv-tests
url = https://github.com/riscv-software-src/riscv-tests
[submodule "addins/riscv-dv"] [submodule "addins/riscv-dv"]
path = addins/riscv-dv path = addins/riscv-dv
url = https://github.com/google/riscv-dv url = https://github.com/google/riscv-dv

@ -1 +1 @@
Subproject commit 1480febc3ace5f471baeee4b1ae0d8fea16e4762 Subproject commit 4c5eb87983f51ca7fcf7855306877b3d1c3aabf1

@ -1 +1 @@
Subproject commit 197179fdc9dfeeca821e848f373c897a3fdae86c Subproject commit 2c5675d7a58e98d47bef3a6cf5a8373397b0d0be

@ -1 +0,0 @@
Subproject commit cf04274f50621fd9ef9147793cca6dd1657985c7

View File

@ -40,7 +40,7 @@ localparam ZIFENCEI_SUPPORTED = 1;
localparam COUNTERS = 12'd32; localparam COUNTERS = 12'd32;
localparam ZICNTR_SUPPORTED = 1; localparam ZICNTR_SUPPORTED = 1;
localparam ZIHPM_SUPPORTED = 1; localparam ZIHPM_SUPPORTED = 1;
localparam ZFH_SUPPORTED = 0; localparam ZFH_SUPPORTED = 1;
localparam SSTC_SUPPORTED = 1; localparam SSTC_SUPPORTED = 1;
localparam ZICBOM_SUPPORTED = 1; localparam ZICBOM_SUPPORTED = 1;
localparam ZICBOZ_SUPPORTED = 1; localparam ZICBOZ_SUPPORTED = 1;

View File

@ -76,7 +76,7 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) (
if (P.IDIV_ON_FPU) ResultBitsE = IntDivE ? IntResultBitsE : FPResultBitsE; if (P.IDIV_ON_FPU) ResultBitsE = IntDivE ? IntResultBitsE : FPResultBitsE;
else ResultBitsE = FPResultBitsE; else ResultBitsE = FPResultBitsE;
assign CyclesE = (ResultBitsE-1)/(P.RK) + 1; // ceil (ResultBitsE/rk) CyclesE = (ResultBitsE-1)/(P.RK) + 1; // ceil (ResultBitsE/rk)
end end
/* verilator lint_on WIDTH */ /* verilator lint_on WIDTH */

View File

@ -86,9 +86,9 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) (
////////////////////////// //////////////////////////
// If the result is not exact, the sticky should be set // If the result is not exact, the sticky should be set
assign DivStickyM = ~WZeroM & ~(SpecialCaseM & SqrtM); // ***unsure why SpecialCaseM has to be gated by SqrtM, but otherwise fails regression on divide assign DivStickyM = ~WZeroM & ~SpecialCaseM;
// Determine if sticky bit is negative // *** look for ways to optimize this. Shift shouldn't be needed. // Determine if sticky bit is negative
assign Sum = WC + WS; assign Sum = WC + WS;
assign NegStickyM = Sum[P.DIVb+3]; assign NegStickyM = Sum[P.DIVb+3];
mux2 #(P.DIVb+1) preummux(FirstU, FirstUM, NegStickyM, PreUmM); // Select U or U-1 depending on negative sticky bit mux2 #(P.DIVb+1) preummux(FirstU, FirstUM, NegStickyM, PreUmM); // Select U or U-1 depending on negative sticky bit

View File

@ -26,7 +26,7 @@
// and limitations under the License. // and limitations under the License.
//////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////
module hazard ( module hazard import cvw::*; #(parameter cvw_t P) (
// Detect hazards // Detect hazards
input logic BPWrongE, CSRWriteFenceM, RetM, TrapM, input logic BPWrongE, CSRWriteFenceM, RetM, TrapM,
input logic LoadStallD, StoreStallD, MDUStallD, CSRRdStallD, input logic LoadStallD, StoreStallD, MDUStallD, CSRRdStallD,
@ -46,9 +46,28 @@ module hazard (
logic WFIStallM, WFIInterruptedM; logic WFIStallM, WFIInterruptedM;
logic ValidWfiM, ValidTrapM, ValidRetM, ValidCSRWriteFenceM, ValidCSRRdStallD;
logic ValidFPUStallD, ValidFCvtIntStallD, ValidFDivBusyE, ValidMDUStallD, ValidDivBusyE;
// Gate Stall/Flush sources with supported features
// This is not logically necessary because the original signals are already 0 when the feature is unsupported
// However, synthesis does not propagate the constant 0 across modules
// By gating these signals, synthesis eliminates unnecessary stall/flush logic, saving about 10% cycle time for rv32e
// These lines of code gating with a compile-time constant generate no hardware.
assign ValidWfiM = wfiM & P.ZICSR_SUPPORTED;
assign ValidTrapM = TrapM & P.ZICSR_SUPPORTED;
assign ValidRetM = RetM & P.ZICSR_SUPPORTED;
assign ValidCSRWriteFenceM = CSRWriteFenceM & P.ZICSR_SUPPORTED;
assign ValidCSRRdStallD = CSRRdStallD & P.ZICSR_SUPPORTED;
assign ValidFPUStallD = RetM & P.F_SUPPORTED;
assign ValidFCvtIntStallD = RetM & P.F_SUPPORTED;
assign ValidFDivBusyE = FDivBusyE & P.F_SUPPORTED;
assign ValidMDUStallD = MDUStallD & P.M_SUPPORTED;
assign ValidDivBusyE = DivBusyE & P.M_SUPPORTED;
// WFI logic // WFI logic
assign WFIStallM = wfiM & ~IntPendingM; // WFI waiting for an interrupt or timeout assign WFIStallM = ValidWfiM & ~IntPendingM; // WFI waiting for an interrupt or timeout
assign WFIInterruptedM = wfiM & IntPendingM; // WFI detects a pending interrupt. Retire WFI; trap if interrupt is enabled. assign WFIInterruptedM = ValidWfiM & IntPendingM; // WFI detects a pending interrupt. Retire WFI; trap if interrupt is enabled.
// stalls and flushes // stalls and flushes
// loads: stall for one cycle if the subsequent instruction depends on the load // loads: stall for one cycle if the subsequent instruction depends on the load
@ -70,10 +89,10 @@ module hazard (
// Branch misprediction is found in the Execute stage and must flush the next two instructions. // Branch misprediction is found in the Execute stage and must flush the next two instructions.
// However, an active division operation resides in the Execute stage, and when the BP incorrectly mispredicts the divide as a taken branch, the divde must still complete // However, an active division operation resides in the Execute stage, and when the BP incorrectly mispredicts the divide as a taken branch, the divde must still complete
// When a WFI is interrupted and causes a trap, it flushes the rest of the pipeline but not the W stage, because the WFI needs to commit // When a WFI is interrupted and causes a trap, it flushes the rest of the pipeline but not the W stage, because the WFI needs to commit
assign FlushDCause = TrapM | RetM | CSRWriteFenceM | BPWrongE; assign FlushDCause = ValidTrapM | ValidRetM | ValidCSRWriteFenceM | BPWrongE;
assign FlushECause = TrapM | RetM | CSRWriteFenceM |(BPWrongE & ~(DivBusyE | FDivBusyE)); assign FlushECause = ValidTrapM | ValidRetM | ValidCSRWriteFenceM |(BPWrongE & ~(ValidDivBusyE | ValidFDivBusyE));
assign FlushMCause = TrapM | RetM | CSRWriteFenceM; assign FlushMCause = ValidTrapM | ValidRetM | ValidCSRWriteFenceM;
assign FlushWCause = TrapM & ~WFIInterruptedM; assign FlushWCause = ValidTrapM & ~WFIInterruptedM;
// Stall causes // Stall causes
// Most data depenency stalls are identified in the decode stage // Most data depenency stalls are identified in the decode stage
@ -84,8 +103,8 @@ module hazard (
// The IFU stalls the entire pipeline rather than just Fetch to avoid complications with instructions later in the pipeline causing Exceptions // The IFU stalls the entire pipeline rather than just Fetch to avoid complications with instructions later in the pipeline causing Exceptions
// A trap could be asserted at the start of a IFU/LSU stall, and should flush the memory operation // A trap could be asserted at the start of a IFU/LSU stall, and should flush the memory operation
assign StallFCause = '0; assign StallFCause = '0;
assign StallDCause = (LoadStallD | StoreStallD | MDUStallD | CSRRdStallD | FCvtIntStallD | FPUStallD) & ~FlushDCause; assign StallDCause = (LoadStallD | StoreStallD | ValidMDUStallD | ValidCSRRdStallD | ValidFCvtIntStallD | ValidFPUStallD) & ~FlushDCause;
assign StallECause = (DivBusyE | FDivBusyE) & ~FlushECause; assign StallECause = (ValidDivBusyE | ValidFDivBusyE) & ~FlushECause;
assign StallMCause = WFIStallM & ~FlushMCause; assign StallMCause = WFIStallM & ~FlushMCause;
// Need to gate IFUStallF when the equivalent FlushFCause = FlushDCause = 1. // Need to gate IFUStallF when the equivalent FlushFCause = FlushDCause = 1.
// assign StallWCause = ((IFUStallF & ~FlushDCause) | LSUStallM) & ~FlushWCause; // assign StallWCause = ((IFUStallF & ~FlushDCause) | LSUStallM) & ~FlushWCause;

View File

@ -264,7 +264,7 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) (
end end
// global stall and flush control // global stall and flush control
hazard hzu( hazard #(P) hzu(
.BPWrongE, .CSRWriteFenceM, .RetM, .TrapM, .BPWrongE, .CSRWriteFenceM, .RetM, .TrapM,
.LoadStallD, .StoreStallD, .MDUStallD, .CSRRdStallD, .LoadStallD, .StoreStallD, .MDUStallD, .CSRRdStallD,
.LSUStallM, .IFUStallF, .LSUStallM, .IFUStallF,