From 8eed89616cc0b3dc160ca11bb23566625ceb339c Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Wed, 23 Jun 2021 16:42:40 -0400 Subject: [PATCH 01/20] fpu clean-up --- wally-pipelined/src/fpu/fpu.sv | 229 ++++++++---------- .../testbench/testbench-imperas.sv | 2 +- 2 files changed, 100 insertions(+), 131 deletions(-) diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index fc38b2f69..e886c66e3 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -1,7 +1,7 @@ /////////////////////////////////////////// // -// Written: -// Modified: +// Written: Katherine Parry, Bret Mathis +// Modified: 6/23/2021 // // Purpose: FPU // @@ -25,23 +25,23 @@ `include "wally-config.vh" module fpu ( - input logic [2:0] FRM_REGW, // Rounding mode from CSR - input logic reset, input logic clk, + input logic reset, + input logic [2:0] FRM_REGW, // Rounding mode from CSR input logic [31:0] InstrD, + input logic [`XLEN-1:0] ReadDataW, // Read data from memory + input logic RegWriteD, // register write enable from ieu input logic [`XLEN-1:0] SrcAE, // Integer input being processed input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg input logic StallE, StallM, StallW, input logic FlushE, FlushM, FlushW, - input logic [`XLEN-1:0] ReadDataW, // Read data from memory - input logic RegWriteD, // register write enable from ieu - output logic [4:0] SetFflagsM, // FPU flags output logic [1:0] FMemRWM, // Read/write enable for memory {read, write} output logic FStallD, // Stall the decode stage if Div/Sqrt instruction output logic FWriteIntE, FWriteIntM, FWriteIntW, // Write integer register enable output logic [`XLEN-1:0] FWriteDataM, // Data to be written to memory output logic FDivBusyE, // Is the divison/sqrt unit busy output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction + output logic [4:0] SetFflagsM, // FPU flags output logic [`XLEN-1:0] FPUResultW); // FPU result // control logic signal instantiation @@ -58,10 +58,10 @@ module fpu ( logic FInput2UsedD; // Is input 2 used logic FInput3UsedD; // Is input 3 used logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result - logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM, FOpCtrlW; // Select which opperation to do in each component + logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM, FOpCtrlW; // Select which opperation to do in each component logic SelLoadInputE, SelLoadInputM; // Select which adress to load when single precision - // regfile signals //*** KEP lint warning - changed `XLEN-1 to 63 + // regfile signals logic [4:0] RdE, RdM, RdW; // what adress to write to // ***Can take from ieu insted of pipelining logic [63:0] FWDM; // Write data for FP register logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage @@ -147,26 +147,6 @@ module fpu ( logic [63:0] FPUResult64W, FPUResult64E; logic [4:0] FPUFlagsW; - // pipeline control logic - logic PipeEnableDE; - logic PipeEnableEM; - logic PipeEnableMW; - logic PipeClearDE; - logic PipeClearEM; - logic PipeClearMW; - - // temporarily assign pipe clear and enable signals - // to never flush & always be running - localparam PipeClear = 1'b0; - localparam PipeEnable = 1'b1; - always_comb begin - PipeEnableDE = ~StallE; - PipeEnableEM = ~StallM; - PipeEnableMW = ~StallW; - PipeClearDE = FlushE; - PipeClearEM = FlushM; - PipeClearMW = FlushW; - end //DECODE STAGE @@ -185,29 +165,18 @@ module fpu ( //***************** // fpregfile D/E pipe registers //***************** - flopenrc #(64) DEReg1(clk, reset, PipeClearDE, PipeEnableDE, FRD1D, FRD1E); - flopenrc #(64) DEReg2(clk, reset, PipeClearDE, PipeEnableDE, FRD2D, FRD2E); - flopenrc #(64) DEReg3(clk, reset, PipeClearDE, PipeEnableDE, FRD3D, FRD3E); + flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); + flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); + flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); //***************** // other D/E pipe registers //***************** - flopenrc #(1) DEReg4(clk, reset, PipeClearDE, PipeEnableDE, FWriteEnD, FWriteEnE); - flopenrc #(3) DEReg5(clk, reset, PipeClearDE, PipeEnableDE, FResultSelD, FResultSelE); - flopenrc #(3) DEReg6(clk, reset, PipeClearDE, PipeEnableDE, FrmD, FrmE); - flopenrc #(1) DEReg7(clk, reset, PipeClearDE, PipeEnableDE, FmtD, FmtE); - flopenrc #(5) DEReg8(clk, reset, PipeClearDE, PipeEnableDE, InstrD[11:7], RdE); - flopenrc #(4) DEReg9(clk, reset, PipeClearDE, PipeEnableDE, FOpCtrlD, FOpCtrlE); - flopenrc #(1) DEReg10(clk, reset, PipeClearDE, PipeEnableDE, FDivStartD, FDivStartE); - flopenrc #(2) DEReg11(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput1D, FForwardInput1E); - flopenrc #(2) DEReg12(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput2D, FForwardInput2E); - flopenrc #(1) DEReg13(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput3D, FForwardInput3E); - flopenrc #(64) DEReg14(clk, reset, PipeClearDE, PipeEnableDE, FPUResult64W, FPUResult64E); - flopenrc #(1) DEReg15(clk, reset, PipeClearDE, PipeEnableDE, FWriteIntD, FWriteIntE); - flopenrc #(1) DEReg16(clk, reset, PipeClearDE, PipeEnableDE, FOutputInput2D, FOutputInput2E); - flopenrc #(2) DEReg17(clk, reset, PipeClearDE, PipeEnableDE, FMemRWD, FMemRWE); - flopenrc #(1) DEReg18(clk, reset, PipeClearDE, PipeEnableDE, InstrD[15], SelLoadInputE); - + flopenrc #(64) DEReg14(clk, reset, FlushE, ~StallE, FPUResult64W, FPUResult64E); + flopenrc #(28) CtrlRegE(clk, reset, FlushE, ~StallE, + {FWriteEnD, FResultSelD, FrmD, FmtD, InstrD[11:7], FOpCtrlD, FDivStartD, FForwardInput1D, FForwardInput2D, FForwardInput3D, FWriteIntD, FOutputInput2D, FMemRWD, InstrD[15]}, + {FWriteEnE, FResultSelE, FrmE, FmtE, RdE, FOpCtrlE, FDivStartE, FForwardInput1E, FForwardInput2E, FForwardInput3E, FWriteIntE, FOutputInput2E, FMemRWE, SelLoadInputE}); + //EXECUTION STAGE // input muxs for forwarding @@ -253,91 +222,91 @@ module fpu ( //***************** //fpregfile D/E pipe registers //***************** - flopenrc #(64) EMFpReg1(clk, reset, PipeClearEM, PipeEnableEM, FInput1E, FInput1M); - flopenrc #(64) EMFpReg2(clk, reset, PipeClearEM, PipeEnableEM, FInput2E, FInput2M); - flopenrc #(64) EMFpReg3(clk, reset, PipeClearEM, PipeEnableEM, FInput3E, FInput3M); + flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FInput1E, FInput1M); + flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, FInput2E, FInput2M); + flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, FInput3E, FInput3M); //***************** // fma E/M pipe registers //***************** - flopenrc #(106) EMRegFma3(clk, reset, PipeClearEM, PipeEnableEM, ProdManE, ProdManM); - flopenrc #(162) EMRegFma4(clk, reset, PipeClearEM, PipeEnableEM, AlignedAddendE, AlignedAddendM); - flopenrc #(13) EMRegFma6(clk, reset, PipeClearEM, PipeEnableEM, ProdExpE, ProdExpM); - flopenrc #(1) EMRegFma7(clk, reset, PipeClearEM, PipeEnableEM, AddendStickyE, AddendStickyM); - flopenrc #(1) EMRegFma8(clk, reset, PipeClearEM, PipeEnableEM, KillProdE, KillProdM); - flopenrc #(1) EMRegFma10(clk, reset, PipeClearEM, PipeEnableEM, XZeroE, XZeroM); - flopenrc #(1) EMRegFma11(clk, reset, PipeClearEM, PipeEnableEM, YZeroE, YZeroM); - flopenrc #(1) EMRegFma12(clk, reset, PipeClearEM, PipeEnableEM, ZZeroE, ZZeroM); - flopenrc #(1) EMRegFma16(clk, reset, PipeClearEM, PipeEnableEM, XInfE, XInfM); - flopenrc #(1) EMRegFma17(clk, reset, PipeClearEM, PipeEnableEM, YInfE, YInfM); - flopenrc #(1) EMRegFma18(clk, reset, PipeClearEM, PipeEnableEM, ZInfE, ZInfM); - flopenrc #(1) EMRegFma19(clk, reset, PipeClearEM, PipeEnableEM, XNaNE, XNaNM); - flopenrc #(1) EMRegFma20(clk, reset, PipeClearEM, PipeEnableEM, YNaNE, YNaNM); - flopenrc #(1) EMRegFma21(clk, reset, PipeClearEM, PipeEnableEM, ZNaNE, ZNaNM); + flopenrc #(106) EMRegFma3(clk, reset, FlushM, ~StallM, ProdManE, ProdManM); + flopenrc #(162) EMRegFma4(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM); + flopenrc #(13) EMRegFma6(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM); + flopenrc #(1) EMRegFma7(clk, reset, FlushM, ~StallM, AddendStickyE, AddendStickyM); + flopenrc #(1) EMRegFma8(clk, reset, FlushM, ~StallM, KillProdE, KillProdM); + flopenrc #(1) EMRegFma10(clk, reset, FlushM, ~StallM, XZeroE, XZeroM); + flopenrc #(1) EMRegFma11(clk, reset, FlushM, ~StallM, YZeroE, YZeroM); + flopenrc #(1) EMRegFma12(clk, reset, FlushM, ~StallM, ZZeroE, ZZeroM); + flopenrc #(1) EMRegFma16(clk, reset, FlushM, ~StallM, XInfE, XInfM); + flopenrc #(1) EMRegFma17(clk, reset, FlushM, ~StallM, YInfE, YInfM); + flopenrc #(1) EMRegFma18(clk, reset, FlushM, ~StallM, ZInfE, ZInfM); + flopenrc #(1) EMRegFma19(clk, reset, FlushM, ~StallM, XNaNE, XNaNM); + flopenrc #(1) EMRegFma20(clk, reset, FlushM, ~StallM, YNaNE, YNaNM); + flopenrc #(1) EMRegFma21(clk, reset, FlushM, ~StallM, ZNaNE, ZNaNM); //***************** // fpadd E/M pipe registers //***************** - flopenrc #(64) EMRegAdd1(clk, reset, PipeClearEM, PipeEnableEM, AddSumE, AddSumM); - flopenrc #(64) EMRegAdd2(clk, reset, PipeClearEM, PipeEnableEM, AddSumTcE, AddSumTcM); - flopenrc #(4) EMRegAdd3(clk, reset, PipeClearEM, PipeEnableEM, AddSelInvE, AddSelInvM); - flopenrc #(11) EMRegAdd4(clk, reset, PipeClearEM, PipeEnableEM, AddExpPostSumE, AddExpPostSumM); - flopenrc #(1) EMRegAdd5(clk, reset, PipeClearEM, PipeEnableEM, AddCorrSignE, AddCorrSignM); - flopenrc #(1) EMRegAdd6(clk, reset, PipeClearEM, PipeEnableEM, AddOp1NormE, AddOp1NormM); - flopenrc #(1) EMRegAdd7(clk, reset, PipeClearEM, PipeEnableEM, AddOp2NormE, AddOp2NormM); - flopenrc #(1) EMRegAdd8(clk, reset, PipeClearEM, PipeEnableEM, AddOpANormE, AddOpANormM); - flopenrc #(1) EMRegAdd9(clk, reset, PipeClearEM, PipeEnableEM, AddOpBNormE, AddOpBNormM); - flopenrc #(1) EMRegAdd10(clk, reset, PipeClearEM, PipeEnableEM, AddInvalidE, AddInvalidM); - flopenrc #(1) EMRegAdd11(clk, reset, PipeClearEM, PipeEnableEM, AddDenormInE, AddDenormInM); - flopenrc #(1) EMRegAdd12(clk, reset, PipeClearEM, PipeEnableEM, AddConvertE, AddConvertM); - flopenrc #(1) EMRegAdd13(clk, reset, PipeClearEM, PipeEnableEM, AddSwapE, AddSwapM); - flopenrc #(1) EMRegAdd14(clk, reset, PipeClearEM, PipeEnableEM, AddNormOvflowE, AddNormOvflowM); - flopenrc #(1) EMRegAdd15(clk, reset, PipeClearEM, PipeEnableEM, AddSignAE, AddSignAM); - flopenrc #(64) EMRegAdd16(clk, reset, PipeClearEM, PipeEnableEM, AddFloat1E, AddFloat1M); - flopenrc #(64) EMRegAdd17(clk, reset, PipeClearEM, PipeEnableEM, AddFloat2E, AddFloat2M); - flopenrc #(12) EMRegAdd18(clk, reset, PipeClearEM, PipeEnableEM, AddExp1DenormE, AddExp1DenormM); - flopenrc #(12) EMRegAdd19(clk, reset, PipeClearEM, PipeEnableEM, AddExp2DenormE, AddExp2DenormM); - flopenrc #(11) EMRegAdd20(clk, reset, PipeClearEM, PipeEnableEM, AddExponentE, AddExponentM); - flopenrc #(3) EMRegAdd23(clk, reset, PipeClearEM, PipeEnableEM, AddRmE, AddRmM); - flopenrc #(4) EMRegAdd24(clk, reset, PipeClearEM, PipeEnableEM, AddOpTypeE, AddOpTypeM); - flopenrc #(1) EMRegAdd25(clk, reset, PipeClearEM, PipeEnableEM, AddPE, AddPM); - flopenrc #(1) EMRegAdd26(clk, reset, PipeClearEM, PipeEnableEM, AddOvEnE, AddOvEnM); - flopenrc #(1) EMRegAdd27(clk, reset, PipeClearEM, PipeEnableEM, AddUnEnE, AddUnEnM); + flopenrc #(64) EMRegAdd1(clk, reset, FlushM, ~StallM, AddSumE, AddSumM); + flopenrc #(64) EMRegAdd2(clk, reset, FlushM, ~StallM, AddSumTcE, AddSumTcM); + flopenrc #(4) EMRegAdd3(clk, reset, FlushM, ~StallM, AddSelInvE, AddSelInvM); + flopenrc #(11) EMRegAdd4(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM); + flopenrc #(1) EMRegAdd5(clk, reset, FlushM, ~StallM, AddCorrSignE, AddCorrSignM); + flopenrc #(1) EMRegAdd6(clk, reset, FlushM, ~StallM, AddOp1NormE, AddOp1NormM); + flopenrc #(1) EMRegAdd7(clk, reset, FlushM, ~StallM, AddOp2NormE, AddOp2NormM); + flopenrc #(1) EMRegAdd8(clk, reset, FlushM, ~StallM, AddOpANormE, AddOpANormM); + flopenrc #(1) EMRegAdd9(clk, reset, FlushM, ~StallM, AddOpBNormE, AddOpBNormM); + flopenrc #(1) EMRegAdd10(clk, reset, FlushM, ~StallM, AddInvalidE, AddInvalidM); + flopenrc #(1) EMRegAdd11(clk, reset, FlushM, ~StallM, AddDenormInE, AddDenormInM); + flopenrc #(1) EMRegAdd12(clk, reset, FlushM, ~StallM, AddConvertE, AddConvertM); + flopenrc #(1) EMRegAdd13(clk, reset, FlushM, ~StallM, AddSwapE, AddSwapM); + flopenrc #(1) EMRegAdd14(clk, reset, FlushM, ~StallM, AddNormOvflowE, AddNormOvflowM); + flopenrc #(1) EMRegAdd15(clk, reset, FlushM, ~StallM, AddSignAE, AddSignAM); + flopenrc #(64) EMRegAdd16(clk, reset, FlushM, ~StallM, AddFloat1E, AddFloat1M); + flopenrc #(64) EMRegAdd17(clk, reset, FlushM, ~StallM, AddFloat2E, AddFloat2M); + flopenrc #(12) EMRegAdd18(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM); + flopenrc #(12) EMRegAdd19(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM); + flopenrc #(11) EMRegAdd20(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM); + flopenrc #(3) EMRegAdd23(clk, reset, FlushM, ~StallM, AddRmE, AddRmM); + flopenrc #(4) EMRegAdd24(clk, reset, FlushM, ~StallM, AddOpTypeE, AddOpTypeM); + flopenrc #(1) EMRegAdd25(clk, reset, FlushM, ~StallM, AddPE, AddPM); + flopenrc #(1) EMRegAdd26(clk, reset, FlushM, ~StallM, AddOvEnE, AddOvEnM); + flopenrc #(1) EMRegAdd27(clk, reset, FlushM, ~StallM, AddUnEnE, AddUnEnM); //***************** // fpcmp E/M pipe registers //***************** - flopenrc #(8) EMRegCmp1(clk, reset, PipeClearEM, PipeEnableEM, WE, WM); - flopenrc #(8) EMRegCmp2(clk, reset, PipeClearEM, PipeEnableEM, XE, XM); - flopenrc #(1) EMRegcmp3(clk, reset, PipeClearEM, PipeEnableEM, ANaNE, ANaNM); - flopenrc #(1) EMRegCmp4(clk, reset, PipeClearEM, PipeEnableEM, BNaNE, BNaNM); - flopenrc #(1) EMRegCmp5(clk, reset, PipeClearEM, PipeEnableEM, AzeroE, AzeroM); - flopenrc #(1) EMRegCmp6(clk, reset, PipeClearEM, PipeEnableEM, BzeroE, BzeroM); + flopenrc #(8) EMRegCmp1(clk, reset, FlushM, ~StallM, WE, WM); + flopenrc #(8) EMRegCmp2(clk, reset, FlushM, ~StallM, XE, XM); + flopenrc #(1) EMRegcmp3(clk, reset, FlushM, ~StallM, ANaNE, ANaNM); + flopenrc #(1) EMRegCmp4(clk, reset, FlushM, ~StallM, BNaNE, BNaNM); + flopenrc #(1) EMRegCmp5(clk, reset, FlushM, ~StallM, AzeroE, AzeroM); + flopenrc #(1) EMRegCmp6(clk, reset, FlushM, ~StallM, BzeroE, BzeroM); // put this in for the event we want to delay fsgn - will otherwise bypass //***************** // fpsgn E/M pipe registers //***************** - flopenrc #(64) EMRegSgn2(clk, reset, PipeClearEM, PipeEnableEM, SgnResultE, SgnResultM); - flopenrc #(5) EMRegSgn3(clk, reset, PipeClearEM, PipeEnableEM, SgnFlagsE, SgnFlagsM); + flopenrc #(64) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnResultE, SgnResultM); + flopenrc #(5) EMRegSgn3(clk, reset, FlushM, ~StallM, SgnFlagsE, SgnFlagsM); //***************** // other E/M pipe registers //***************** - flopenrc #(1) EMReg1(clk, reset, PipeClearEM, PipeEnableEM, FWriteEnE, FWriteEnM); - flopenrc #(3) EMReg2(clk, reset, PipeClearEM, PipeEnableEM, FResultSelE, FResultSelM); - flopenrc #(3) EMReg3(clk, reset, PipeClearEM, PipeEnableEM, FrmE, FrmM); - flopenrc #(1) EMReg4(clk, reset, PipeClearEM, PipeEnableEM, FmtE, FmtM); - flopenrc #(5) EMReg5(clk, reset, PipeClearEM, PipeEnableEM, RdE, RdM); - flopenrc #(4) EMReg6(clk, reset, PipeClearEM, PipeEnableEM, FOpCtrlE, FOpCtrlM); - flopenrc #(1) EMReg7(clk, reset, PipeClearEM, PipeEnableEM, FWriteIntE, FWriteIntM); - flopenrc #(2) EMReg8(clk, reset, PipeClearEM, PipeEnableEM, FMemRWE, FMemRWM); - flopenrc #(1) EMReg9(clk, reset, PipeClearEM, PipeEnableEM, SelLoadInputE, SelLoadInputM); + flopenrc #(1) EMReg1(clk, reset, FlushM, ~StallM, FWriteEnE, FWriteEnM); + flopenrc #(3) EMReg2(clk, reset, FlushM, ~StallM, FResultSelE, FResultSelM); + flopenrc #(3) EMReg3(clk, reset, FlushM, ~StallM, FrmE, FrmM); + flopenrc #(1) EMReg4(clk, reset, FlushM, ~StallM, FmtE, FmtM); + flopenrc #(5) EMReg5(clk, reset, FlushM, ~StallM, RdE, RdM); + flopenrc #(4) EMReg6(clk, reset, FlushM, ~StallM, FOpCtrlE, FOpCtrlM); + flopenrc #(1) EMReg7(clk, reset, FlushM, ~StallM, FWriteIntE, FWriteIntM); + flopenrc #(2) EMReg8(clk, reset, FlushM, ~StallM, FMemRWE, FMemRWM); + flopenrc #(1) EMReg9(clk, reset, FlushM, ~StallM, SelLoadInputE, SelLoadInputM); //***************** // fpuclassify E/M pipe registers //***************** - flopenrc #(64) EMRegClass(clk, reset, PipeClearEM, PipeEnableEM, ClassResultE, ClassResultM); + flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResultE, ClassResultM); //BEGIN MEMORY STAGE @@ -366,56 +335,56 @@ module fpu ( //***************** //fpregfile M/W pipe registers //***************** - flopenrc #(64) MWFpReg1(clk, reset, PipeClearMW, PipeEnableMW, FInput1M, FInput1W); + flopenrc #(64) MWFpReg1(clk, reset, FlushW, ~StallW, FInput1M, FInput1W); //***************** // fma M/W pipe registers //***************** - flopenrc #(64) MWRegFma1(clk, reset, PipeClearMW, PipeEnableMW, FmaResultM, FmaResultW); - flopenrc #(5) MWRegFma2(clk, reset, PipeClearMW, PipeEnableMW, FmaFlagsM, FmaFlagsW); + flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FmaResultM, FmaResultW); + flopenrc #(5) MWRegFma2(clk, reset, FlushW, ~StallW, FmaFlagsM, FmaFlagsW); //***************** // fpdiv M/W pipe registers //***************** - flopenrc #(64) MWRegDiv1(clk, reset, PipeClearMW, PipeEnableMW, FDivResultM, FDivResultW); - flopenrc #(5) MWRegDiv2(clk, reset, PipeClearMW, PipeEnableMW, FDivFlagsM, FDivFlagsW); - flopenrc #(1) MWRegDiv3(clk, reset, PipeClearMW, PipeEnableMW, DivDenormM, DivDenormW); + flopenrc #(64) MWRegDiv1(clk, reset, FlushW, ~StallW, FDivResultM, FDivResultW); + flopenrc #(5) MWRegDiv2(clk, reset, FlushW, ~StallW, FDivFlagsM, FDivFlagsW); + flopenrc #(1) MWRegDiv3(clk, reset, FlushW, ~StallW, DivDenormM, DivDenormW); //***************** // fpadd M/W pipe registers //***************** - flopenrc #(64) MWRegAdd1(clk, reset, PipeClearMW, PipeEnableMW, FAddResultM, FAddResultW); - flopenrc #(5) MWRegAdd2(clk, reset, PipeClearMW, PipeEnableMW, FAddFlagsM, FAddFlagsW); + flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResultM, FAddResultW); + flopenrc #(5) MWRegAdd2(clk, reset, FlushW, ~StallW, FAddFlagsM, FAddFlagsW); //***************** // fpcmp M/W pipe registers //***************** - flopenrc #(1) MWRegCmp1(clk, reset, PipeClearMW, PipeEnableMW, CmpInvalidM, CmpInvalidW); - flopenrc #(2) MWRegCmp2(clk, reset, PipeClearMW, PipeEnableMW, CmpFCCM, CmpFCCW); - flopenrc #(64) MWRegCmp3(clk, reset, PipeClearMW, PipeEnableMW, FCmpResultM, FCmpResultW); + flopenrc #(1) MWRegCmp1(clk, reset, FlushW, ~StallW, CmpInvalidM, CmpInvalidW); + flopenrc #(2) MWRegCmp2(clk, reset, FlushW, ~StallW, CmpFCCM, CmpFCCW); + flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, FCmpResultM, FCmpResultW); //***************** // fpsgn M/W pipe registers //***************** - flopenrc #(64) MWRegSgn1(clk, reset, PipeClearMW, PipeEnableMW, SgnResultM, SgnResultW); - flopenrc #(5) MWRegSgn2(clk, reset, PipeClearMW, PipeEnableMW, SgnFlagsM, SgnFlagsW); + flopenrc #(64) MWRegSgn1(clk, reset, FlushW, ~StallW, SgnResultM, SgnResultW); + flopenrc #(5) MWRegSgn2(clk, reset, FlushW, ~StallW, SgnFlagsM, SgnFlagsW); //***************** // other M/W pipe registers //***************** - flopenrc #(1) MWReg1(clk, reset, PipeClearMW, PipeEnableMW, FWriteEnM, FWriteEnW); - flopenrc #(3) MWReg2(clk, reset, PipeClearMW, PipeEnableMW, FResultSelM, FResultSelW); - flopenrc #(1) MWReg3(clk, reset, PipeClearMW, PipeEnableMW, FmtM, FmtW); - flopenrc #(5) MWReg4(clk, reset, PipeClearMW, PipeEnableMW, RdM, RdW); - flopenrc #(64) MWReg5(clk, reset, PipeClearMW, PipeEnableMW, AlignedSrcAM, SrcAW); - // flopenrc #(64) MWReg6(clk, reset, PipeClearMW, PipeEnableMW, FLoadStoreResultM, FLoadStoreResultW); - flopenrc #(1) MWReg7(clk, reset, PipeClearMW, PipeEnableMW, FWriteIntM, FWriteIntW); - flopenrc #(4) MWReg6(clk, reset, PipeClearMW, PipeEnableMW, FOpCtrlM, FOpCtrlW); + flopenrc #(1) MWReg1(clk, reset, FlushW, ~StallW, FWriteEnM, FWriteEnW); + flopenrc #(3) MWReg2(clk, reset, FlushW, ~StallW, FResultSelM, FResultSelW); + flopenrc #(1) MWReg3(clk, reset, FlushW, ~StallW, FmtM, FmtW); + flopenrc #(5) MWReg4(clk, reset, FlushW, ~StallW, RdM, RdW); + flopenrc #(64) MWReg5(clk, reset, FlushW, ~StallW, AlignedSrcAM, SrcAW); + // flopenrc #(64) MWReg6(clk, reset, FlushW, ~StallW, FLoadStoreResultM, FLoadStoreResultW); + flopenrc #(1) MWReg7(clk, reset, FlushW, ~StallW, FWriteIntM, FWriteIntW); + flopenrc #(4) MWReg6(clk, reset, FlushW, ~StallW, FOpCtrlM, FOpCtrlW); //***************** // fpuclassify M/W pipe registers //***************** - flopenrc #(64) MWRegClass(clk, reset, PipeClearMW, PipeEnableMW, ClassResultM, ClassResultW); + flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, ClassResultM, ClassResultW); diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 1bbe6124b..2b052dcdf 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -554,7 +554,7 @@ string tests32f[] = '{ if (`C_SUPPORTED % 2 == 1) tests = {tests, tests32ic}; else tests = {tests, tests32iNOc}; if (`M_SUPPORTED % 2 == 1) tests = {tests, tests32m}; - // if (`F_SUPPORTED) tests = {tests32f, tests}; + if (`F_SUPPORTED) tests = {tests32f, tests}; if (`A_SUPPORTED) tests = {tests, tests32a}; if (`MEM_VIRTMEM) tests = {tests, tests32mmu}; end From be962cb1ffcb04c2f4cb5e0bb16fb777b39fe514 Mon Sep 17 00:00:00 2001 From: bbracker Date: Thu, 24 Jun 2021 01:42:35 -0400 Subject: [PATCH 02/20] overhauled linux testbench and spoofed MTTIME interrupt --- .../regression/regression-wally.py | 2 +- .../regression/wally-buildroot-batch.do | 1 - wally-pipelined/regression/wally-buildroot.do | 1 - .../regression/wave-dos/linux-waves.do | 3 +- wally-pipelined/testbench/testbench-linux.sv | 912 +++++++++--------- 5 files changed, 484 insertions(+), 435 deletions(-) diff --git a/wally-pipelined/regression/regression-wally.py b/wally-pipelined/regression/regression-wally.py index eac221cd6..fcd6d4be1 100755 --- a/wally-pipelined/regression/regression-wally.py +++ b/wally-pipelined/regression/regression-wally.py @@ -31,7 +31,7 @@ configs = [ TestCase( name="buildroot", cmd="vsim -do wally-buildroot-batch.do -c > {}", - grepstr="# loaded 2000000 instructions" + grepstr="# loaded 2500000 instructions" ), TestCase( name="rv32ic", diff --git a/wally-pipelined/regression/wally-buildroot-batch.do b/wally-pipelined/regression/wally-buildroot-batch.do index c16655e1e..6eea258e6 100644 --- a/wally-pipelined/regression/wally-buildroot-batch.do +++ b/wally-pipelined/regression/wally-buildroot-batch.do @@ -36,5 +36,4 @@ vsim workopt -suppress 8852,12070 run -all run -all -exec ./slack-notifier/slack-notifier.py quit diff --git a/wally-pipelined/regression/wally-buildroot.do b/wally-pipelined/regression/wally-buildroot.do index 452ba54d4..c2312f75e 100644 --- a/wally-pipelined/regression/wally-buildroot.do +++ b/wally-pipelined/regression/wally-buildroot.do @@ -39,5 +39,4 @@ vsim workopt -suppress 8852,12070 run -all do ./wave-dos/linux-waves.do run -all -exec ./slack-notifier/slack-notifier.py ##quit diff --git a/wally-pipelined/regression/wave-dos/linux-waves.do b/wally-pipelined/regression/wave-dos/linux-waves.do index b7dfd8c5a..b37276441 100644 --- a/wally-pipelined/regression/wave-dos/linux-waves.do +++ b/wally-pipelined/regression/wave-dos/linux-waves.do @@ -19,12 +19,13 @@ add wave /testbench/dut/hart/FlushW add wave -divider F add wave -hex /testbench/dut/hart/ifu/PCF add wave -divider D -add wave -hex /testbench/pcExpected +add wave -hex /testbench/PCDexpected add wave -hex /testbench/dut/hart/ifu/PCD add wave -hex /testbench/PCtextD add wave /testbench/InstrDName add wave -hex /testbench/dut/hart/ifu/InstrD add wave -hex /testbench/dut/hart/ieu/c/InstrValidD +add wave -hex /testbench/PCDwrong add wave -divider E add wave -hex /testbench/dut/hart/ifu/PCE add wave -hex /testbench/PCtextE diff --git a/wally-pipelined/testbench/testbench-linux.sv b/wally-pipelined/testbench/testbench-linux.sv index df8fad8cd..15e0e3634 100644 --- a/wally-pipelined/testbench/testbench-linux.sv +++ b/wally-pipelined/testbench/testbench-linux.sv @@ -26,13 +26,15 @@ `include "wally-config.vh" module testbench(); - logic clk, reset; - logic [31:0] GPIOPinsIn; - logic [31:0] GPIOPinsOut, GPIOPinsEn; - - // instantiate device to be tested - logic [31:0] CheckInstrD; + + parameter waveOnICount = 2514000; // # of instructions at which to turn on waves in graphical sim + + /////////////////////////////////////////////////////////////////////////////// + ///////////////////////////////////// DUT ///////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + logic clk, reset; + logic [`AHBW-1:0] HRDATA; logic [31:0] HADDR; logic [`AHBW-1:0] HWDATA; @@ -45,155 +47,97 @@ module testbench(); logic HCLK, HRESETn; logic [`AHBW-1:0] HRDATAEXT; logic HREADYEXT, HRESPEXT; - logic UARTSout; - - logic ignoreRFwrite; - - parameter waveOnICount = 2060000; // # of instructions at which to turn on waves in graphical sim + logic [31:0] GPIOPinsIn; + logic [31:0] GPIOPinsOut, GPIOPinsEn; + logic UARTSin, UARTSout; assign GPIOPinsIn = 0; assign UARTSin = 1; - // instantiate processor and memories wallypipelinedsoc dut(.*); - /** - * Walk the page table stored in dtim according to sv39 logic and translate a - * virtual address to a physical address. - * - * See section 4.3.2 of the RISC-V Privileged specification for a full - * explanation of the below algorithm. - */ - function logic [`XLEN-1:0] adrTranslator( - input logic [`XLEN-1:0] adrIn); - begin - logic SvMode, PTE_R, PTE_X; - logic [`XLEN-1:0] SATP, PTE; - logic [55:0] BaseAdr, PAdr; - logic [8:0] VPN [2:0]; - logic [11:0] Offset; + /////////////////////////////////////////////////////////////////////////////// + //////////////////////// Signals & Shared Macros ////////////////////////// + //////////////////////// AKA stuff that comes first /////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // Sorry if these have gotten decontextualized. + // Verilog expects them to be defined before they are used. - int i; + // ------------------- + // Signal Declarations + // ------------------- + // Testbench Core + integer instrs; + integer warningCount = 0; + string trashString; // should never be read from + logic [31:0] InstrMask; + logic forcedInstr; + logic [63:0] lastPCD; + logic PCDwrong; + // PC, Instr Checking + logic [`XLEN-1:0] PCW; + logic [63:0] lastInstrDExpected, lastPC, lastPC2; + integer data_file_PCF, scan_file_PCF; + integer data_file_PCD, scan_file_PCD; + integer data_file_PCM, scan_file_PCM; + integer data_file_PCW, scan_file_PCW; + string PCtextF, PCtextF2; + string PCtextD, PCtextD2; + string PCtextE; + string PCtextM; + string PCtextW; + logic [31:0] InstrFExpected, InstrDExpected, InstrMExpected, InstrWExpected; + logic [63:0] PCFexpected, PCDexpected, PCMexpected, PCWexpected; + // RegFile Write Checking + logic ignoreRFwrite; + logic [63:0] regExpected; + integer regNumExpected; + integer data_file_rf, scan_file_rf; + // Bus Unit Read/Write Checking + logic [63:0] readMask; + logic [`XLEN-1:0] readAdrExpected, readAdrTranslated; + logic [`XLEN-1:0] writeDataExpected, writeAdrExpected, writeAdrTranslated; + integer data_file_memR, scan_file_memR; + integer data_file_memW, scan_file_memW; + // CSR Checking + integer totalCSR = 0; + logic [99:0] StartCSRexpected[63:0]; + string StartCSRname[99:0]; + integer data_file_csr, scan_file_csr; + + // ----------- + // Error Macro + // ----------- + `define ERROR \ + #10; \ + $display("processed %0d instructions with %0d warnings", instrs, warningCount); \ + $stop; - // Grab the SATP register from privileged unit - SATP = dut.hart.priv.csr.SATP_REGW; + // ---------------- + // PC Updater Macro + // ---------------- + `define SCAN_PC(DATAFILE,SCANFILE,PCTEXT,PCTEXT2,CHECKINSTR,PCEXPECTED) \ + SCANFILE = $fscanf(DATAFILE, "%s\n", PCTEXT); \ + PCTEXT2 = ""; \ + while (PCTEXT2 != "***") begin \ + PCTEXT = {PCTEXT, " ", PCTEXT2}; \ + SCANFILE = $fscanf(DATAFILE, "%s\n", PCTEXT2); \ + end \ + SCANFILE = $fscanf(DATAFILE, "%x\n", CHECKINSTR); \ + SCANFILE = $fscanf(DATAFILE, "%x\n", PCEXPECTED); - // Split the virtual address into page number segments and offset - VPN[2] = adrIn[38:30]; - VPN[1] = adrIn[29:21]; - VPN[0] = adrIn[20:12]; - Offset = adrIn[11:0]; - - // We do not support sv48; only sv39 - SvMode = SATP[63]; - - // Only perform translation if translation is on and the processor is not - // in machine mode - if (SvMode && (dut.hart.priv.PrivilegeModeW != `M_MODE)) begin - BaseAdr = SATP[43:0] << 12; - - for (i = 2; i >= 0; i--) begin - PAdr = BaseAdr + (VPN[i] << 3); - - // dtim.RAM is 64-bit addressed. PAdr specifies a byte. We right shift - // by 3 (the PTE size) to get the requested 64-bit PTE. - PTE = dut.uncore.dtim.RAM[PAdr >> 3]; - PTE_R = PTE[1]; - PTE_X = PTE[3]; - if (PTE_R || PTE_X) begin - // Leaf page found - break; - end else begin - // Go to next level of table - BaseAdr = PTE[53:10] << 12; - end - end - - // Determine which parts of the PTE page number to use based on the - // level of the page table we reached. - if (i == 2) begin - // Gigapage - assign adrTranslator = {8'b0, PTE[53:28], VPN[1], VPN[0], Offset}; - end else if (i == 1) begin - // Megapage - assign adrTranslator = {8'b0, PTE[53:19], VPN[0], Offset}; - end else begin - // Kilopage - assign adrTranslator = {8'b0, PTE[53:10], Offset}; - end - end else begin - // Direct translation if address translation is not on - assign adrTranslator = adrIn; - end - end - endfunction - - // initialize test + /////////////////////////////////////////////////////////////////////////////// + //////////////////////////////// Testbench Core /////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // -------------- + // Initialization + // -------------- initial begin - ignoreRFwrite <= 0; + instrs = 0; + PCDwrong = 0; reset <= 1; # 22; reset <= 0; end - - // read pc trace file - integer data_file_PC, scan_file_PC; - initial begin - data_file_PC = $fopen({`LINUX_TEST_VECTORS,"parsedPC.txt"}, "r"); - if (data_file_PC == 0) begin - $display("file couldn't be opened"); - $stop; - end - end - - integer data_file_PCW, scan_file_PCW; - initial begin - data_file_PCW = $fopen({`LINUX_TEST_VECTORS,"parsedPC.txt"}, "r"); - if (data_file_PCW == 0) begin - $display("file couldn't be opened"); - $stop; - end - end - - // read register trace file - integer data_file_rf, scan_file_rf; - initial begin - data_file_rf = $fopen({`LINUX_TEST_VECTORS,"parsedRegs.txt"}, "r"); - if (data_file_rf == 0) begin - $display("file couldn't be opened"); - $stop; - end - end - - // read CSR trace file - integer data_file_csr, scan_file_csr; - initial begin - data_file_csr = $fopen({`LINUX_TEST_VECTORS,"parsedCSRs.txt"}, "r"); - if (data_file_csr == 0) begin - $display("file couldn't be opened"); - $stop; - end - end - - // read memreads trace file - integer data_file_memR, scan_file_memR; - initial begin - data_file_memR = $fopen({`LINUX_TEST_VECTORS,"parsedMemRead.txt"}, "r"); - if (data_file_memR == 0) begin - $display("file couldn't be opened"); - $stop; - end - end - - // read memwrite trace file - integer data_file_memW, scan_file_memW; - initial begin - data_file_memW = $fopen({`LINUX_TEST_VECTORS,"parsedMemWrite.txt"}, "r"); - if (data_file_memW == 0) begin - $display("file couldn't be opened"); - $stop; - end - end - // initial loading of memories initial begin $readmemh({`LINUX_TEST_VECTORS,"bootmem.txt"}, dut.uncore.bootdtim.RAM, 'h1000 >> 3); @@ -201,49 +145,247 @@ module testbench(); $readmemb(`TWO_BIT_PRELOAD, dut.hart.ifu.bpred.bpred.Predictor.DirPredictor.PHT.memory); $readmemb(`BTB_PRELOAD, dut.hart.ifu.bpred.bpred.TargetPredictor.memory.memory); end - - integer warningCount = 0; - integer instrs; - - //logic[63:0] adrTranslation[4:0]; - //string translationType[4:0] = {"rf", "writeAdr", "PCW", "PC", "readAdr"}; - //initial begin - // for(int i=0; i<5; i++) begin - // adrTranslation[i] = 64'b0; - // end - //end - - //function logic equal(logic[63:0] adr, logic[63:0] adrExpected, integer func); - // if (adr[11:0] !== adrExpected[11:0]) begin - // equal = 1'b0; - // end else begin - // equal = 1'b1; - // if ((adr+adrTranslation[func]) !== adrExpected) begin - // adrTranslation[func] = adrExpected - adr; - // $display("warning: probably new address translation %x for %s at instr %0d", adrTranslation[func], translationType[func], instrs); - // warningCount += 1; - // end - // end - //endfunction - - // pretty sure this isn't necessary anymore, but keeping this for now since its easier - function logic equal(logic[63:0] adr, logic[63:0] adrExpected, integer func); - equal = adr === adrExpected; - endfunction - - - `define ERROR \ - #10; \ - $display("processed %0d instructions with %0d warnings", instrs, warningCount); \ - $stop; - - logic [63:0] pcExpected; - logic [63:0] regExpected; - integer regNumExpected; - logic [`XLEN-1:0] PCW; + // ------- + // Running + // ------- + always + begin + clk <= 1; # 5; clk <= 0; # 5; + end + + // ------------------------------------- + // Special warnings for important faults + // ------------------------------------- + always @(dut.hart.priv.csr.genblk1.csrm.MCAUSE_REGW) begin + if (dut.hart.priv.csr.genblk1.csrm.MCAUSE_REGW == 2 && instrs > 1) begin + $display("!!!!!! illegal instruction !!!!!!!!!!"); + $display("(as a reminder, MCAUSE and MEPC are set by this)"); + $display("at %0t ps, PCM %x, instr %0d, HADDR %x", $time, dut.hart.ifu.PCM, instrs, HADDR); + `ERROR + end + if (dut.hart.priv.csr.genblk1.csrm.MCAUSE_REGW == 5 && instrs != 0) begin + $display("!!!!!! illegal (physical) memory access !!!!!!!!!!"); + $display("(as a reminder, MCAUSE and MEPC are set by this)"); + $display("at %0t ps, PCM %x, instr %0d, HADDR %x", $time, dut.hart.ifu.PCM, instrs, HADDR); + `ERROR + end + end + + // ----------------------- + // RegFile Write Hijacking + // ----------------------- + always @(PCW or dut.hart.ieu.InstrValidW) begin + if(dut.hart.ieu.InstrValidW && PCW != 0) begin + // Hack to compensate for how Wally's MTIME may diverge from QEMU's MTIME (and that is okay) + if (PCtextW.substr(0,5) == "rdtime") begin + ignoreRFwrite <= 1; + scan_file_rf = $fscanf(data_file_rf, "%d\n", regNumExpected); + scan_file_rf = $fscanf(data_file_rf, "%x\n", regExpected); + force dut.hart.ieu.dp.regf.wd3 = regExpected; + // Hack to compensate for QEMU's incorrect MSTATUS + end else if (PCtextW.substr(0,3) == "csrr" && PCtextW.substr(10,16) == "mstatus") begin + force dut.hart.ieu.dp.regf.wd3 = dut.hart.ieu.dp.WriteDataW & ~64'ha00000000; + end else + release dut.hart.ieu.dp.regf.wd3; + end + end + + // ---------------- + // Big Chunky Block + // ---------------- + always @(reset or dut.hart.ifu.InstrRawD or dut.hart.ifu.PCD) begin// or negedge dut.hart.ifu.StallE) begin // Why do we care about StallE? Everything seems to run fine without it. + if(~HWRITE) begin // *** Should this need to consider HWRITE? + #2; + // If PCD/InstrD aren't garbage + if (~reset && dut.hart.ifu.InstrRawD[15:0] !== {16{1'bx}} && dut.hart.ifu.PCD !== 64'h0) begin // && ~dut.hart.ifu.StallE) begin + // If Wally's PCD has updated + if (dut.hart.ifu.PCD !== lastPCD) begin + lastInstrDExpected = InstrDExpected; + lastPC <= dut.hart.ifu.PCD; + lastPC2 <= lastPC; + // If PCD isn't going to be flushed + if (~PCDwrong || lastPC == PCDexpected) begin + + // Stop if we've reached the end + if($feof(data_file_PCF)) begin + $display("no more PC data to read... CONGRATULATIONS!!!"); + `ERROR + end + + // Increment PC + `SCAN_PC(data_file_PCF, scan_file_PCF, PCtextF, PCtextF2, InstrFExpected, PCFexpected); + `SCAN_PC(data_file_PCD, scan_file_PCD, PCtextD, PCtextD2, InstrDExpected, PCDexpected); + + // NOP out certain instructions + if(dut.hart.ifu.PCD===PCDexpected) begin + if((dut.hart.ifu.PCD == 32'h80001dc6) || // for now, NOP out any stores to PLIC + (dut.hart.ifu.PCD == 32'h80001de0) || + (dut.hart.ifu.PCD == 32'h80001de2)) begin + $display("warning: NOPing out %s at PCD=%0x, instr %0d, time %0t", PCtextD, dut.hart.ifu.PCD, instrs, $time); + force InstrDExpected = 32'b0010011; + force dut.hart.ifu.InstrRawD = 32'b0010011; + while (clk != 0) #1; + while (clk != 1) #1; + release dut.hart.ifu.InstrRawD; + release InstrDExpected; + warningCount += 1; + forcedInstr = 1; + end else begin + forcedInstr = 0; + end + end + + // Increment instruction count + if (instrs <= 10 || (instrs <= 100 && instrs % 10 == 0) || + (instrs <= 1000 && instrs % 100 == 0) || (instrs <= 10000 && instrs % 1000 == 0) || + (instrs <= 100000 && instrs % 10000 == 0) || (instrs % 100000 == 0)) begin + $display("loaded %0d instructions", instrs); + end + instrs += 1; + + // Stop before bugs so "do" file can turn on waves + if (instrs == waveOnICount) begin + $display("turning on waves at %0d instructions", instrs); + $stop; + end + + // Check if PCD is going to be flushed due to a branch or jump + if (`BPRED_ENABLED) begin + PCDwrong = dut.hart.ifu.bpred.bpred.BPPredWrongE; + end else begin + casex (lastInstrDExpected[31:0]) + 32'b00000000001000000000000001110011, // URET + 32'b00010000001000000000000001110011, // SRET + 32'b00110000001000000000000001110011, // MRET + 32'bXXXXXXXXXXXXXXXXXXXXXXXXX1101111, // JAL + 32'bXXXXXXXXXXXXXXXXXXXXXXXXX1100111, // JALR + 32'bXXXXXXXXXXXXXXXXXXXXXXXXX1100011, // B + 32'bXXXXXXXXXXXXXXXX110XXXXXXXXXXX01, // C.BEQZ + 32'bXXXXXXXXXXXXXXXX111XXXXXXXXXXX01, // C.BNEZ + 32'bXXXXXXXXXXXXXXXX101XXXXXXXXXXX01: // C.J + PCDwrong = 1; + 32'bXXXXXXXXXXXXXXXX1001000000000010, // C.EBREAK: + 32'bXXXXXXXXXXXXXXXXX000XXXXX1110011: // Something that's not CSRR* + PCDwrong = 0; // tbh don't really know what should happen here + 32'b000110000000XXXXXXXXXXXXX1110011, // CSR* SATP, * + 32'bXXXXXXXXXXXXXXXX1000XXXXX0000010, // C.JR + 32'bXXXXXXXXXXXXXXXX1001XXXXX0000010: // C.JALR //this is RV64 only so no C.JAL + PCDwrong = 1; + default: + PCDwrong = 0; + endcase + end + + // Check PCD, InstrD + if (~PCDwrong && ~(dut.hart.ifu.PCD === PCDexpected)) begin + $display("%0t ps, instr %0d: PC does not equal PC expected: %x, %x", $time, instrs, dut.hart.ifu.PCD, PCDexpected); + `ERROR + end + InstrMask = InstrDExpected[1:0] == 2'b11 ? 32'hFFFFFFFF : 32'h0000FFFF; + if ((~forcedInstr) && (~PCDwrong) && ((InstrMask & dut.hart.ifu.InstrRawD) !== (InstrMask & InstrDExpected))) begin + $display("%0t ps, PCD %x, instr %0d: InstrD %x %s does not equal InstrDExpected %x %s", $time, dut.hart.ifu.PCD, instrs, dut.hart.ifu.InstrRawD, InstrDName, InstrDExpected, PCtextD); + `ERROR + end + + // Repeated instruction means QEMU had an interrupt which we need to spoof + if (PCFexpected == PCDexpected) begin + $display("Note at %0t ps, PCM %x %s, instr %0d: spoofing an interrupt", $time, dut.hart.ifu.PCM, PCtextM, instrs); + // Increment file pointers past the repeated instruction. + `SCAN_PC(data_file_PCF, scan_file_PCF, PCtextF, PCtextF2, InstrFExpected, PCFexpected); + `SCAN_PC(data_file_PCD, scan_file_PCD, PCtextD, PCtextD2, InstrDExpected, PCDexpected); + scan_file_memR = $fscanf(data_file_memR, "%x\n", readAdrExpected); + scan_file_memR = $fscanf(data_file_memR, "%x\n", HRDATA); + // Next force a timer interrupt (*** this may later need generalizing) + force dut.uncore.genblk1.clint.MTIME = dut.uncore.genblk1.clint.MTIMECMP + 1; + while (clk != 0) #1; + while (clk != 1) #1; + release dut.uncore.genblk1.clint.MTIME; + end + end + end + lastPCD = dut.hart.ifu.PCD; + end + end + end + + /////////////////////////////////////////////////////////////////////////////// + ///////////////////////////// PC,Instr Checking /////////////////////////////// + /////////////////////// (outside of Big Chunky Block) ///////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // -------------- + // Initialization + // -------------- + initial begin + data_file_PCF = $fopen({`LINUX_TEST_VECTORS,"parsedPC.txt"}, "r"); + data_file_PCD = $fopen({`LINUX_TEST_VECTORS,"parsedPC.txt"}, "r"); + data_file_PCM = $fopen({`LINUX_TEST_VECTORS,"parsedPC.txt"}, "r"); + data_file_PCW = $fopen({`LINUX_TEST_VECTORS,"parsedPC.txt"}, "r"); + if (data_file_PCW == 0) begin + $display("file couldn't be opened"); + $stop; + end + // This makes sure PCF is one instr ahead of PCD + `SCAN_PC(data_file_PCF, scan_file_PCF, PCtextF, PCtextF2, InstrFExpected, PCFexpected); + // This makes sure PCM is one instr ahead of PCW + `SCAN_PC(data_file_PCM, scan_file_PCM, trashString, trashString, InstrMExpected, PCMexpected); + end + + // ------------------- + // Additional Hardware + // ------------------- flopenr #(`XLEN) PCWReg(clk, reset, ~dut.hart.ieu.dp.StallW, dut.hart.ifu.PCM, PCW); + // PCF stuff isn't actually checked + // it only exists for helping detecting duplicate instructions in PCD + // which are the result of interrupts hitting QEMU + // PCD checking already happens in "Big Chunky Block" + // PCM stuff isn't actually checked + // it only exists for helping detecting duplicate instructions in PCW + // which are the result of interrupts hitting QEMU + // ------------ + // PCW Checking + // ------------ + always @(PCW or dut.hart.ieu.InstrValidW) begin + if(dut.hart.ieu.InstrValidW && PCW != 0) begin + if($feof(data_file_PCW)) begin + $display("no more PC data to read"); + `ERROR + end + `SCAN_PC(data_file_PCM, scan_file_PCM, trashString, trashString, InstrMExpected, PCMexpected); + `SCAN_PC(data_file_PCW, scan_file_PCW, trashString, trashString, InstrWExpected, PCWexpected); + // If repeated instr + if (PCMexpected == PCWexpected) begin + // Increment file pointers past the repeated instruction. + `SCAN_PC(data_file_PCM, scan_file_PCM, trashString, trashString, InstrMExpected, PCMexpected); + `SCAN_PC(data_file_PCW, scan_file_PCW, trashString, trashString, InstrWExpected, PCWexpected); + end + if(~(PCW === PCWexpected)) begin + $display("%0t ps, instr %0d: PCW does not equal PCW expected: %x, %x", $time, instrs, PCW, PCWexpected); + `ERROR + end + end + end + + + /////////////////////////////////////////////////////////////////////////////// + /////////////////////////// RegFile Write Checking //////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // -------------- + // Initialization + // -------------- + initial begin + data_file_rf = $fopen({`LINUX_TEST_VECTORS,"parsedRegs.txt"}, "r"); + if (data_file_rf == 0) begin + $display("file couldn't be opened"); + $stop; + end + end + initial + ignoreRFwrite <= 0; + // -------- + // Checking + // -------- genvar i; generate for(i=1; i<32; i++) begin @@ -251,33 +393,32 @@ module testbench(); if ($time == 0) begin scan_file_rf = $fscanf(data_file_rf, "%x\n", regExpected); if (dut.hart.ieu.dp.regf.rf[i] != regExpected) begin - $display("%0t ps, instr %0d: rf[%0d] does not equal rf expected: %x, %x", $time, instrs, i, dut.hart.ieu.dp.regf.rf[i], regExpected); + $display("%0t ps, PCW %x, instr %0d: rf[%0d] does not equal rf expected: %x, %x", $time, PCW, instrs, i, dut.hart.ieu.dp.regf.rf[i], regExpected); `ERROR end end else begin - if (ignoreRFwrite) + if (ignoreRFwrite) // this allows other testbench elements to force WriteData to take on the next regExpected ignoreRFwrite <= 0; else begin scan_file_rf = $fscanf(data_file_rf, "%d\n", regNumExpected); scan_file_rf = $fscanf(data_file_rf, "%x\n", regExpected); end if (i != regNumExpected) begin - $display("%0t ps, instr %0d: wrong register changed: %0d, %0d expected to switch to %x from %x", $time, instrs, i, regNumExpected, regExpected, dut.hart.ieu.dp.regf.rf[regNumExpected]); + $display("%0t ps, PCW %x %s, instr %0d: wrong register changed: %0d, %0d expected to switch to %x from %x", $time, PCW, PCtextW, instrs, i, regNumExpected, regExpected, dut.hart.ieu.dp.regf.rf[regNumExpected]); `ERROR end - if (~equal(dut.hart.ieu.dp.regf.rf[i],regExpected, 0)) begin - $display("%0t ps, instr %0d: rf[%0d] does not equal rf expected: %x, %x", $time, instrs, i, dut.hart.ieu.dp.regf.rf[i], regExpected); + if (~(dut.hart.ieu.dp.regf.rf[i] === regExpected)) begin + $display("%0t ps, PCW %x %s, instr %0d: rf[%0d] does not equal rf expected: %x, %x", $time, PCW, PCtextW, instrs, i, dut.hart.ieu.dp.regf.rf[i], regExpected); `ERROR end - //if (dut.hart.ieu.dp.regf.rf[i] !== regExpected) begin - // force dut.hart.ieu.dp.regf.rf[i] = regExpected; - // release dut.hart.ieu.dp.regf.rf[i]; - //end end end end endgenerate + /////////////////////////////////////////////////////////////////////////////// + //////////////////////// Bus Unit Read/Write Checking ///////////////////////// + /////////////////////////////////////////////////////////////////////////////// // RAM and bootram are addressed in 64-bit blocks - this logic handles R/W // including subwords. Brief explanation on signals: // @@ -289,17 +430,33 @@ module testbench(); // In the linux boot, the processor spends the first ~5 instructions in // bootram, before jr jumps to main RAM - logic [63:0] readMask; + // -------------- + // Initialization + // -------------- + initial begin + data_file_memR = $fopen({`LINUX_TEST_VECTORS,"parsedMemRead.txt"}, "r"); + if (data_file_memR == 0) begin + $display("file couldn't be opened"); + $stop; + end + end + initial begin + data_file_memW = $fopen({`LINUX_TEST_VECTORS,"parsedMemWrite.txt"}, "r"); + if (data_file_memW == 0) begin + $display("file couldn't be opened"); + $stop; + end + end + + // ------------ + // Read Checker + // ------------ assign readMask = ((1 << (8*(1 << HSIZE))) - 1) << 8 * HADDR[2:0]; - - logic [`XLEN-1:0] readAdrExpected, readAdrTranslated; - always @(dut.HRDATA) begin #2; if (dut.hart.MemRWM[1] && (dut.hart.ebu.CaptureDataM) && dut.HRDATA !== {64{1'bx}}) begin - //$display("%0t", $time); if($feof(data_file_memR)) begin $display("no more memR data to read"); `ERROR @@ -307,31 +464,29 @@ module testbench(); scan_file_memR = $fscanf(data_file_memR, "%x\n", readAdrExpected); scan_file_memR = $fscanf(data_file_memR, "%x\n", HRDATA); assign readAdrTranslated = adrTranslator(readAdrExpected); - if (~equal(HADDR,readAdrTranslated,4)) begin - $display("%0t ps, instr %0d: HADDR does not equal readAdrExpected: %x, %x", $time, instrs, HADDR, readAdrTranslated); + if (~(HADDR === readAdrTranslated)) begin + $display("%0t ps, PCM %x %s, instr %0d: HADDR does not equal readAdrExpected: %x, %x", $time, dut.hart.ifu.PCM, PCtextM, instrs, HADDR, readAdrTranslated); `ERROR end if ((readMask & HRDATA) !== (readMask & dut.HRDATA)) begin if (HADDR inside `LINUX_FIX_READ) begin - //$display("warning %0t ps, instr %0d, adr %0d: forcing HRDATA to expected: %x, %x", $time, instrs, HADDR, HRDATA, dut.HRDATA); + if (HADDR != 'h10000005) // Suppress the warning for UART LSR so we can read UART output + $display("warning %0t ps, PCM %x %s, instr %0d, adr %0d: forcing HRDATA to expected: %x, %x", $time, dut.hart.ifu.PCM, PCtextM, instrs, HADDR, HRDATA, dut.HRDATA); force dut.uncore.HRDATA = HRDATA; #9; release dut.uncore.HRDATA; warningCount += 1; end else begin - $display("%0t ps, instr %0d: ExpectedHRDATA does not equal dut.HRDATA: %x, %x from address %x, %x", $time, instrs, HRDATA, dut.HRDATA, HADDR, HSIZE); + $display("%0t ps, PCM %x %s, instr %0d: ExpectedHRDATA does not equal dut.HRDATA: %x, %x from address %x, %x", $time, dut.hart.ifu.PCM, PCtextM, instrs, HRDATA, dut.HRDATA, HADDR, HSIZE); `ERROR end end - //end else if(dut.hart.MemRWM[1]) begin - // $display("%x, %x, %x, %t", HADDR, dut.PCF, dut.HRDATA, $time); - end - end - logic [`XLEN-1:0] writeDataExpected, writeAdrExpected, writeAdrTranslated; - + // ------------- + // Write Checker + // ------------- // this might need to change //always @(HWDATA or HADDR or HSIZE or HWRITE) begin always @(negedge HWRITE) begin @@ -346,20 +501,28 @@ module testbench(); assign writeAdrTranslated = adrTranslator(writeAdrExpected); if (writeDataExpected != HWDATA && ~dut.uncore.HSELPLICD) begin - $display("%0t ps, instr %0d: HWDATA does not equal writeDataExpected: %x, %x", $time, instrs, HWDATA, writeDataExpected); + $display("%0t ps, PCM %x %s, instr %0d: HWDATA does not equal writeDataExpected: %x, %x", $time, dut.hart.ifu.PCM, PCtextM, instrs, HWDATA, writeDataExpected); `ERROR end - if (~equal(writeAdrTranslated,HADDR,1) && ~dut.uncore.HSELPLICD) begin - $display("%0t ps, instr %0d: HADDR does not equal writeAdrExpected: %x, %x", $time, instrs, HADDR, writeAdrTranslated); + if (~(writeAdrTranslated === HADDR) && ~dut.uncore.HSELPLICD) begin + $display("%0t ps, PCM %x %s, instr %0d: HADDR does not equal writeAdrExpected: %x, %x", $time, dut.hart.ifu.PCM, PCtextM, instrs, HADDR, writeAdrTranslated); `ERROR end end end - integer totalCSR = 0; - logic [99:0] StartCSRexpected[63:0]; - string StartCSRname[99:0]; + /////////////////////////////////////////////////////////////////////////////// + //////////////////////////////// CSR Checking ///////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // -------------- + // Initialization + // -------------- initial begin + data_file_csr = $fopen({`LINUX_TEST_VECTORS,"parsedCSRs.txt"}, "r"); + if (data_file_csr == 0) begin + $display("file couldn't be opened"); + $stop; + end while(1) begin scan_file_csr = $fscanf(data_file_csr, "%s\n", StartCSRname[totalCSR]); if(StartCSRname[totalCSR] == "---") begin @@ -370,22 +533,10 @@ module testbench(); end end - always @(dut.hart.priv.csr.genblk1.csrm.MCAUSE_REGW) begin - if (dut.hart.priv.csr.genblk1.csrm.MCAUSE_REGW == 2 && instrs > 1) begin - $display("!!!!!! illegal instruction !!!!!!!!!!"); - $display("(as a reminder, MCAUSE and MEPC are set by this)"); - $display("at %0t ps, instr %0d, HADDR %x", $time, instrs, HADDR); - `ERROR - end - if (dut.hart.priv.csr.genblk1.csrm.MCAUSE_REGW == 5 && instrs != 0) begin - $display("!!!!!! illegal (physical) memory access !!!!!!!!!!"); - $display("(as a reminder, MCAUSE and MEPC are set by this)"); - $display("at %0t ps, instr %0d, HADDR %x", $time, instrs, HADDR); - `ERROR - end - end - - string MSTATUSstring = "MSTATUS"; + // -------------- + // Checker Macros + // -------------- + string MSTATUSstring = "MSTATUS"; //string variables seem to compare more reliably than string literals string SEPCstring = "SEPC"; string SCAUSEstring = "SCAUSE"; string SSTATUSstring = "SSTATUS"; @@ -394,7 +545,6 @@ module testbench(); string CSR; \ string ``CSR``name = `"CSR`"; \ string expected``CSR``name; \ - //CSR checking \ always @(``PATH``.``CSR``_REGW) begin \ if ($time > 1 && (`BUILDROOT != 1 || ``CSR``name != SSTATUSstring)) begin \ if (``CSR``name == SEPCstring) begin #1; end \ @@ -403,16 +553,16 @@ module testbench(); scan_file_csr = $fscanf(data_file_csr, "%s\n", expected``CSR``name); \ scan_file_csr = $fscanf(data_file_csr, "%x\n", expected``CSR``); \ if(expected``CSR``name.icompare(``CSR``name)) begin \ - $display("%0t ps, instr %0d: %s changed, expected %s", $time, instrs, `"CSR`", expected``CSR``name); \ + $display("%0t ps, PCM %x %s, instr %0d: %s changed, expected %s", $time, dut.hart.ifu.PCM, PCtextM, instrs, `"CSR`", expected``CSR``name); \ end \ if (``CSR``name == MSTATUSstring) begin \ if (``PATH``.``CSR``_REGW != ((``expected``CSR) | 64'ha00000000)) begin \ - $display("%0t ps, instr %0d: %s does not equal %s expected: %x, %x", $time, instrs, ``CSR``name, expected``CSR``name, ``PATH``.``CSR``_REGW, (``expected``CSR) | 64'ha00000000); \ + $display("%0t ps, PCM %x %s, instr %0d: %s (should be MSTATUS) does not equal %s expected: %x, %x", $time, dut.hart.ifu.PCM, PCtextM, instrs, ``CSR``name, expected``CSR``name, ``PATH``.``CSR``_REGW, (``expected``CSR) | 64'ha00000000); \ `ERROR \ end \ end else \ if (``PATH``.``CSR``_REGW != ``expected``CSR[$bits(``PATH``.``CSR``_REGW)-1:0]) begin \ - $display("%0t ps, instr %0d: %s does not equal %s expected: %x, %x", $time, instrs, ``CSR``name, expected``CSR``name, ``PATH``.``CSR``_REGW, ``expected``CSR); \ + $display("%0t ps, PCM %x %s, instr %0d: %s does not equal %s expected: %x, %x", $time, dut.hart.ifu.PCM, PCtextM, instrs, ``CSR``name, expected``CSR``name, ``PATH``.``CSR``_REGW, ``expected``CSR); \ `ERROR \ end \ end else begin \ @@ -420,7 +570,7 @@ module testbench(); for(integer j=0; j= 0; i--) begin + PAdr = BaseAdr + (VPN[i] << 3); + // dtim.RAM is 64-bit addressed. PAdr specifies a byte. We right shift + // by 3 (the PTE size) to get the requested 64-bit PTE. + PTE = dut.uncore.dtim.RAM[PAdr >> 3]; + PTE_R = PTE[1]; + PTE_X = PTE[3]; + if (PTE_R || PTE_X) begin + // Leaf page found + break; + end else begin + // Go to next level of table + BaseAdr = PTE[53:10] << 12; + end + end + // Determine which parts of the PTE page number to use based on the + // level of the page table we reached. + if (i == 2) begin + // Gigapage + assign adrTranslator = {8'b0, PTE[53:28], VPN[1], VPN[0], Offset}; + end else if (i == 1) begin + // Megapage + assign adrTranslator = {8'b0, PTE[53:19], VPN[0], Offset}; + end else begin + // Kilopage + assign adrTranslator = {8'b0, PTE[53:10], Offset}; + end + end else begin + // Direct translation if address translation is not on + assign adrTranslator = adrIn; + end + end + endfunction endmodule + + module instrTrackerTB( input logic clk, reset, input logic [31:0] InstrF,InstrD,InstrE,InstrM,InstrW, - output string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName); - - // stage Instr to Writeback for visualization - //flopr #(32) InstrWReg(clk, reset, InstrM, InstrW); - + output string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName); instrNameDecTB fdec(InstrF, InstrFName); instrNameDecTB ddec(InstrD, InstrDName); instrNameDecTB edec(InstrE, InstrEName); From cee468b21aedfa9f4c1ec49c7e6fe4437d55f10c Mon Sep 17 00:00:00 2001 From: bbracker Date: Thu, 24 Jun 2021 01:54:46 -0400 Subject: [PATCH 03/20] whoops meant to remove notifications from busybear, not buildroot --- wally-pipelined/regression/wally-buildroot-batch.do | 1 + wally-pipelined/regression/wally-buildroot.do | 1 + wally-pipelined/regression/wally-busybear-batch.do | 1 - wally-pipelined/regression/wally-busybear.do | 1 - 4 files changed, 2 insertions(+), 2 deletions(-) diff --git a/wally-pipelined/regression/wally-buildroot-batch.do b/wally-pipelined/regression/wally-buildroot-batch.do index 6eea258e6..c16655e1e 100644 --- a/wally-pipelined/regression/wally-buildroot-batch.do +++ b/wally-pipelined/regression/wally-buildroot-batch.do @@ -36,4 +36,5 @@ vsim workopt -suppress 8852,12070 run -all run -all +exec ./slack-notifier/slack-notifier.py quit diff --git a/wally-pipelined/regression/wally-buildroot.do b/wally-pipelined/regression/wally-buildroot.do index c2312f75e..452ba54d4 100644 --- a/wally-pipelined/regression/wally-buildroot.do +++ b/wally-pipelined/regression/wally-buildroot.do @@ -39,4 +39,5 @@ vsim workopt -suppress 8852,12070 run -all do ./wave-dos/linux-waves.do run -all +exec ./slack-notifier/slack-notifier.py ##quit diff --git a/wally-pipelined/regression/wally-busybear-batch.do b/wally-pipelined/regression/wally-busybear-batch.do index e819d7804..a4a80eb74 100644 --- a/wally-pipelined/regression/wally-busybear-batch.do +++ b/wally-pipelined/regression/wally-busybear-batch.do @@ -36,5 +36,4 @@ vopt work_busybear.testbench -o workopt_busybear vsim workopt_busybear -suppress 8852,12070 run -all -exec ./slack-notifier/slack-notifier.py quit diff --git a/wally-pipelined/regression/wally-busybear.do b/wally-pipelined/regression/wally-busybear.do index 8d6af28bc..11876dded 100644 --- a/wally-pipelined/regression/wally-busybear.do +++ b/wally-pipelined/regression/wally-busybear.do @@ -40,5 +40,4 @@ do ./wave-dos/linux-waves.do #-- Run the Simulation run -all -exec ./slack-notifier/slack-notifier.py ##quit From 53d545cdfe1393de95152cbae842e11321190445 Mon Sep 17 00:00:00 2001 From: bbracker Date: Thu, 24 Jun 2021 02:00:01 -0400 Subject: [PATCH 04/20] regression can overcome the fact that buildroots UART prints stuff --- wally-pipelined/regression/regression-wally.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wally-pipelined/regression/regression-wally.py b/wally-pipelined/regression/regression-wally.py index fcd6d4be1..b1ded5e7a 100755 --- a/wally-pipelined/regression/regression-wally.py +++ b/wally-pipelined/regression/regression-wally.py @@ -26,12 +26,12 @@ configs = [ TestCase( name="busybear", cmd="vsim -do wally-busybear-batch.do -c > {}", - grepstr="# loaded 100000 instructions" + grepstr="loaded 100000 instructions" ), TestCase( name="buildroot", cmd="vsim -do wally-buildroot-batch.do -c > {}", - grepstr="# loaded 2500000 instructions" + grepstr="loaded 2500000 instructions" ), TestCase( name="rv32ic", From 2d9c91096b4f32372e6bdb827771763052cd2ded Mon Sep 17 00:00:00 2001 From: bbracker Date: Thu, 24 Jun 2021 08:35:00 -0400 Subject: [PATCH 05/20] make linux testgen be nohup-friendly and make parser account for lr/sc memory accesses --- .gitignore | 1 + wally-pipelined/linux-testgen/logAllBuildroot.sh | 2 +- wally-pipelined/linux-testgen/parse_gdb_output.py | 11 ++++++++++- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index c6cac56fd..fe21942d0 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,7 @@ testsBP/*/*/*.elf* testsBP/*/OBJ/* testsBP/*/*.a wally-pipelined/linux-testgen/linux-testvectors/* +wally-pipelined/linux-testgen/nohup* !wally-pipelined/linux-testgen/linux-testvectors/tvCopier.py !wally-pipelined/linux-testgen/linux-testvectors/tvLinker.sh wally-pipelined/regression/slack-notifier/slack-webhook-url.txt diff --git a/wally-pipelined/linux-testgen/logAllBuildroot.sh b/wally-pipelined/linux-testgen/logAllBuildroot.sh index df8b506a8..d045ee98c 100755 --- a/wally-pipelined/linux-testgen/logAllBuildroot.sh +++ b/wally-pipelined/linux-testgen/logAllBuildroot.sh @@ -24,4 +24,4 @@ # =========== Just Do the Thing ========== # Uncomment this version for the whole thing # - Logs info needed by buildroot testbench -(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>&1 >/dev/null | pv -l | ./parse_qemu.py | ./parse_gdb_output.py "/courses/e190ax/buildroot_boot/") & riscv64-unknown-elf-gdb -x gdbinit_qemulog +(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>&1 >/dev/null | ./parse_qemu.py | ./parse_gdb_output.py "/courses/e190ax/buildroot_boot_new/") & riscv64-unknown-elf-gdb -x gdbinit_qemulog diff --git a/wally-pipelined/linux-testgen/parse_gdb_output.py b/wally-pipelined/linux-testgen/parse_gdb_output.py index 739a97e31..7e48fa637 100755 --- a/wally-pipelined/linux-testgen/parse_gdb_output.py +++ b/wally-pipelined/linux-testgen/parse_gdb_output.py @@ -44,7 +44,7 @@ try: instrs += 1 storeAMO = '' if instrs % 10000 == 0: - print(instrs) + print(instrs,flush=True) # Instr in human assembly wPC.write('{} ***\n'.format(' '.join(l.split(':')[1].split()[0:2]))) if '\tld' in l or '\tlw' in l or '\tlh' in l or '\tlb' in l: @@ -63,6 +63,15 @@ try: storeLoc = readLoc storeReg = l.split()[-1].split(',')[1] storeAMO = l.split()[-2] + if '\tlr' in l: + currentRead = l.split()[-1].split(',')[0] + readOffset = "0" + readLoc = l.split()[-1].split('(')[1][:-1] + readType = "0" # *** I don't see that readType or lastReadType are ever used; we can probably get rid of them + if '\tsc' in l: + storeOffset = "0" + storeLoc = l.split()[-1].split('(')[1][:-1] + storeReg = l.split()[-1].split(',')[1] if '\tsd' in l or '\tsw' in l or '\tsh' in l or '\tsb' in l: s = l.split('#')[0].split()[-1] storeReg = s.split(',')[0] From 86e369df5284d9357a390d8b2ac9fe91f8152a3e Mon Sep 17 00:00:00 2001 From: bbracker Date: Thu, 24 Jun 2021 11:20:21 -0400 Subject: [PATCH 06/20] fixed forwarding --- wally-pipelined/src/ebu/ahblite.sv | 4 ++-- wally-pipelined/src/ieu/controller.sv | 3 ++- wally-pipelined/src/ieu/datapath.sv | 20 +++++++++++++------ wally-pipelined/src/ieu/forward.sv | 2 +- wally-pipelined/src/ieu/ieu.sv | 6 ++++-- wally-pipelined/src/lsu/lsu.sv | 2 +- wally-pipelined/src/muldiv/muldiv.sv | 4 ++-- wally-pipelined/src/privileged/csr.sv | 4 ++-- wally-pipelined/src/privileged/privileged.sv | 2 +- .../src/wally/wallypipelinedhart.sv | 9 ++++----- 10 files changed, 33 insertions(+), 23 deletions(-) diff --git a/wally-pipelined/src/ebu/ahblite.sv b/wally-pipelined/src/ebu/ahblite.sv index c59dfa9b5..9ace1077b 100644 --- a/wally-pipelined/src/ebu/ahblite.sv +++ b/wally-pipelined/src/ebu/ahblite.sv @@ -62,7 +62,7 @@ module ahblite ( // Signals to PMA checker (metadata of proposed access) output logic AtomicAccessM, ExecuteAccessF, WriteAccessM, ReadAccessM, // Return from bus - output logic [`XLEN-1:0] ReadDataW, + output logic [`XLEN-1:0] ReadDataM, ReadDataW, // AHB-Lite external signals input logic [`AHBW-1:0] HRDATA, input logic HREADY, HRESP, @@ -87,7 +87,7 @@ module ahblite ( logic GrantData; logic [31:0] AccessAddress; logic [2:0] AccessSize, PTESize, ISize; - logic [`AHBW-1:0] HRDATAMasked, ReadDataM, CapturedData, ReadDataWnext, WriteData; + logic [`AHBW-1:0] HRDATAMasked, CapturedData, ReadDataWnext, WriteData; logic IReady, DReady; logic CaptureDataM,CapturedDataAvailable; diff --git a/wally-pipelined/src/ieu/controller.sv b/wally-pipelined/src/ieu/controller.sv index b27541d42..09ded48ba 100644 --- a/wally-pipelined/src/ieu/controller.sv +++ b/wally-pipelined/src/ieu/controller.sv @@ -53,6 +53,7 @@ module controller( output logic [1:0] AtomicM, output logic [2:0] Funct3M, output logic RegWriteM, // for Hazard Unit + output logic [2:0] ResultSrcM, output logic InstrValidM, // Writeback stage control signals input logic StallW, FlushW, @@ -72,7 +73,7 @@ module controller( // pipelined control signals logic RegWriteE; - logic [2:0] ResultSrcD, ResultSrcE, ResultSrcM; + logic [2:0] ResultSrcD, ResultSrcE; logic [1:0] MemRWD, MemRWE; logic JumpD; logic BranchD, BranchE; diff --git a/wally-pipelined/src/ieu/datapath.sv b/wally-pipelined/src/ieu/datapath.sv index 13db65a37..848ed89a5 100644 --- a/wally-pipelined/src/ieu/datapath.sv +++ b/wally-pipelined/src/ieu/datapath.sv @@ -45,6 +45,9 @@ module datapath ( // Memory stage signals input logic StallM, FlushM, input logic [`XLEN-1:0] FWriteDataM, + input logic SquashSCM, + input logic [2:0] ResultSrcM, + input logic [`XLEN-1:0] CSRReadValM, ReadDataM, MulDivResultM, output logic [`XLEN-1:0] SrcAM, output logic [`XLEN-1:0] WriteDataM, MemAdrM, // Writeback stage signals @@ -54,7 +57,6 @@ module datapath ( input logic RegWriteW, input logic SquashSCW, input logic [2:0] ResultSrcW, - // input logic [`XLEN-1:0] PCLinkW, input logic [`XLEN-1:0] CSRReadValW, ReadDataW, MulDivResultW, // Hazard Unit signals output logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, @@ -76,7 +78,9 @@ module datapath ( logic [`XLEN-1:0] WriteDataE; logic [`XLEN-1:0] TargetBaseE; // Memory stage signals + logic [`XLEN-1:0] SCResultM; logic [`XLEN-1:0] ALUResultM; + logic [`XLEN-1:0] ResultM; // Writeback stage signals logic [`XLEN-1:0] SCResultW; logic [`XLEN-1:0] ALUResultW; @@ -102,8 +106,8 @@ module datapath ( flopenrc #(5) Rs2EReg(clk, reset, FlushE, ~StallE, Rs2D, Rs2E); flopenrc #(5) RdEReg(clk, reset, FlushE, ~StallE, RdD, RdE); - mux4 #(`XLEN) faemux(RD1E, WriteDataW, ALUResultM, FWriteDataM, ForwardAE, PreSrcAE); - mux4 #(`XLEN) fbemux(RD2E, WriteDataW, ALUResultM, FWriteDataM, ForwardBE, WriteDataE); + mux4 #(`XLEN) faemux(RD1E, WriteDataW, ResultM, FWriteDataM, ForwardAE, PreSrcAE); + mux4 #(`XLEN) fbemux(RD2E, WriteDataW, ResultM, FWriteDataM, ForwardBE, WriteDataE); mux2 #(`XLEN) srcamux(PreSrcAE, PCE, ALUSrcAE, SrcAE); mux2 #(`XLEN) srcamux2(SrcAE, PCLinkE, JumpE, SrcAE2); mux2 #(`XLEN) srcbmux(WriteDataE, ExtImmE, ALUSrcBE, SrcBE); @@ -118,6 +122,7 @@ module datapath ( assign MemAdrM = ALUResultM; flopenrc #(`XLEN) WriteDataMReg(clk, reset, FlushM, ~StallM, WriteDataE, WriteDataM); flopenrc #(5) RdMEg(clk, reset, FlushM, ~StallM, RdE, RdM); + mux5 #(`XLEN) resultmuxM(ALUResultM, ReadDataM, CSRReadValM, MulDivResultM, SCResultM, ResultSrcM, ResultM); // Writeback stage pipeline register and logic flopenrc #(`XLEN) ALUResultWReg(clk, reset, FlushW, ~StallW, ALUResultM, ALUResultW); @@ -125,13 +130,16 @@ module datapath ( // handle Store Conditional result if atomic extension supported generate - if (`A_SUPPORTED) + if (`A_SUPPORTED) begin + assign SCResultM = SquashSCM ? {{(`XLEN-1){1'b0}}, 1'b1} : {{(`XLEN-1){1'b0}}, 1'b0}; assign SCResultW = SquashSCW ? {{(`XLEN-1){1'b0}}, 1'b1} : {{(`XLEN-1){1'b0}}, 1'b0}; - else + end else begin + assign SCResultM = 0; assign SCResultW = 0; + end endgenerate - mux5 #(`XLEN) resultmux(ALUResultW, ReadDataW, CSRReadValW, MulDivResultW, SCResultW, ResultSrcW, ResultW); + mux5 #(`XLEN) resultmuxW(ALUResultW, ReadDataW, CSRReadValW, MulDivResultW, SCResultW, ResultSrcW, ResultW); /* -----\/----- EXCLUDED -----\/----- // This mux4:1 no longer needs to include PCLinkW. This is set correctly in the execution stage. // *** need to look at how the decoder is coded to fix. diff --git a/wally-pipelined/src/ieu/forward.sv b/wally-pipelined/src/ieu/forward.sv index cdc6d2700..6729ed424 100644 --- a/wally-pipelined/src/ieu/forward.sv +++ b/wally-pipelined/src/ieu/forward.sv @@ -43,7 +43,7 @@ module forward( if (Rs1E != 5'b0) if ((Rs1E == RdM) & RegWriteM) ForwardAE = 2'b10; else if ((Rs1E == RdW) & (RegWriteW|FWriteIntW)) ForwardAE = 2'b01; - else if ((Rs1E == RdM) & FWriteIntM) ForwardAE = 2'b11; + else if ((Rs1E == RdM) & FWriteIntM) ForwardAE = 2'b11; if (Rs2E != 5'b0) if ((Rs2E == RdM) & RegWriteM) ForwardBE = 2'b10; diff --git a/wally-pipelined/src/ieu/ieu.sv b/wally-pipelined/src/ieu/ieu.sv index 0bd9d598f..73c619f62 100644 --- a/wally-pipelined/src/ieu/ieu.sv +++ b/wally-pipelined/src/ieu/ieu.sv @@ -43,9 +43,10 @@ module ieu ( // Memory stage interface input logic DataMisalignedM, input logic DataAccessFaultM, - input logic SquashSCW, input logic FWriteIntM, input logic [`XLEN-1:0] FWriteDataM, + input logic SquashSCM, + input logic [`XLEN-1:0] CSRReadValM, ReadDataM, MulDivResultM, output logic [1:0] MemRWM, output logic [1:0] AtomicM, output logic [`XLEN-1:0] MemAdrM, WriteDataM, @@ -55,6 +56,7 @@ module ieu ( input logic [`XLEN-1:0] CSRReadValW, ReadDataW, MulDivResultW, input logic FWriteIntW, input logic [`XLEN-1:0] FPUResultW, + input logic SquashSCW, // input logic [`XLEN-1:0] PCLinkW, output logic InstrValidM, InstrValidW, // hazards @@ -72,7 +74,7 @@ module ieu ( logic [2:0] FlagsE; logic [4:0] ALUControlE; logic ALUSrcAE, ALUSrcBE; - logic [2:0] ResultSrcW; + logic [2:0] ResultSrcM, ResultSrcW; logic TargetSrcE; // forwarding signals diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index ffa79adfe..f44340a45 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -45,6 +45,7 @@ module lsu ( output logic [1:0] AtomicMaskedM, output logic DataMisalignedM, output logic CommittedM, + output logic SquashSCM, // Writeback Stage input logic MemAckW, input logic [`XLEN-1:0] ReadDataW, @@ -81,7 +82,6 @@ module lsu ( ); - logic SquashSCM; logic DTLBPageFaultM; logic MemAccessM; logic [1:0] CurrState, NextState; diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index e10b0c55d..691b3b5ae 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -34,7 +34,7 @@ module muldiv ( input logic [2:0] Funct3E, input logic MulDivE, W64E, // Writeback stage - output logic [`XLEN-1:0] MulDivResultW, + output logic [`XLEN-1:0] MulDivResultM, MulDivResultW, // Divide Done output logic DivDoneE, output logic DivBusyE, @@ -44,7 +44,7 @@ module muldiv ( generate if (`M_SUPPORTED) begin - logic [`XLEN-1:0] MulDivResultE, MulDivResultM; + logic [`XLEN-1:0] MulDivResultE; logic [`XLEN-1:0] PrelimResultE; logic [`XLEN-1:0] QuotE, RemE; logic [`XLEN*2-1:0] ProdE; diff --git a/wally-pipelined/src/privileged/csr.sv b/wally-pipelined/src/privileged/csr.sv index 213bcde33..e88cb561d 100644 --- a/wally-pipelined/src/privileged/csr.sv +++ b/wally-pipelined/src/privileged/csr.sv @@ -65,12 +65,12 @@ module csr #(parameter input logic [4:0] SetFflagsM, output logic [2:0] FRM_REGW, // output logic [11:0] MIP_REGW, SIP_REGW, UIP_REGW, MIE_REGW, SIE_REGW, UIE_REGW, - output logic [`XLEN-1:0] CSRReadValW, + output logic [`XLEN-1:0] CSRReadValM, CSRReadValW, output logic IllegalCSRAccessM ); localparam NOP = 32'h13; - logic [`XLEN-1:0] CSRMReadValM, CSRSReadValM, CSRUReadValM, CSRNReadValM, CSRCReadValM, CSRReadValM; + logic [`XLEN-1:0] CSRMReadValM, CSRSReadValM, CSRUReadValM, CSRNReadValM, CSRCReadValM; logic [`XLEN-1:0] CSRSrcM, CSRRWM, CSRRSM, CSRRCM, CSRWriteValM; logic [`XLEN-1:0] MSTATUS_REGW, SSTATUS_REGW, USTATUS_REGW; diff --git a/wally-pipelined/src/privileged/privileged.sv b/wally-pipelined/src/privileged/privileged.sv index 1275cd4b8..ab794a4ad 100644 --- a/wally-pipelined/src/privileged/privileged.sv +++ b/wally-pipelined/src/privileged/privileged.sv @@ -34,7 +34,7 @@ module privileged ( input logic [`XLEN-1:0] SrcAM, input logic [`XLEN-1:0] PCF,PCD,PCE,PCM, input logic [31:0] InstrD, InstrE, InstrM, InstrW, - output logic [`XLEN-1:0] CSRReadValW, + output logic [`XLEN-1:0] CSRReadValM, CSRReadValW, output logic [`XLEN-1:0] PrivilegedNextPCM, output logic RetM, TrapM, NonBusTrapM, output logic ITLBFlushF, DTLBFlushM, diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index b32770b9a..1fd1408a4 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -71,7 +71,7 @@ module wallypipelinedhart ( logic [31:0] InstrD, InstrE, InstrM, InstrW; logic [`XLEN-1:0] PCD, PCE, PCM, PCLinkE, PCLinkW; logic [`XLEN-1:0] PCTargetE; - logic [`XLEN-1:0] CSRReadValW, MulDivResultW; + logic [`XLEN-1:0] CSRReadValM, MulDivResultM, CSRReadValW, MulDivResultW; logic [`XLEN-1:0] PrivilegedNextPCM; logic [1:0] MemRWM; logic InstrValidM, InstrValidW; @@ -96,7 +96,7 @@ module wallypipelinedhart ( logic [1:0] FMemRWM; logic RegWriteD; logic [`XLEN-1:0] FWriteDataM; - logic SquashSCW; + logic SquashSCM, SquashSCW; logic FStallD; logic FWriteIntE, FWriteIntW, FWriteIntM; logic FDivBusyE; @@ -136,7 +136,7 @@ module wallypipelinedhart ( logic [2:0] Funct3M; logic [`XLEN-1:0] MemAdrM, WriteDataM; logic [`PA_BITS-1:0] MemPAdrM; - logic [`XLEN-1:0] ReadDataW; + logic [`XLEN-1:0] ReadDataM, ReadDataW; logic [`PA_BITS-1:0] InstrPAdrF; logic [`XLEN-1:0] InstrRData; logic InstrReadF; @@ -153,8 +153,7 @@ module wallypipelinedhart ( logic[`XLEN-1:0] WriteDatatmpM; logic [4:0] InstrClassM; - - + ifu ifu(.InstrInF(InstrRData), .*); // instruction fetch unit: PC, branch prediction, instruction cache ieu ieu(.*); // integer execution unit: integer register file, datapath and controller From 2155a4e485fd9b50a52a3f937fcf583b7543e1ee Mon Sep 17 00:00:00 2001 From: bbracker Date: Thu, 24 Jun 2021 17:39:37 -0400 Subject: [PATCH 07/20] Revert "fixed forwarding" This reverts commit 86e369df5284d9357a390d8b2ac9fe91f8152a3e. --- wally-pipelined/src/ebu/ahblite.sv | 4 ++-- wally-pipelined/src/ieu/controller.sv | 3 +-- wally-pipelined/src/ieu/datapath.sv | 20 ++++++------------- wally-pipelined/src/ieu/forward.sv | 2 +- wally-pipelined/src/ieu/ieu.sv | 6 ++---- wally-pipelined/src/lsu/lsu.sv | 2 +- wally-pipelined/src/muldiv/muldiv.sv | 4 ++-- wally-pipelined/src/privileged/csr.sv | 4 ++-- wally-pipelined/src/privileged/privileged.sv | 2 +- .../src/wally/wallypipelinedhart.sv | 9 +++++---- 10 files changed, 23 insertions(+), 33 deletions(-) diff --git a/wally-pipelined/src/ebu/ahblite.sv b/wally-pipelined/src/ebu/ahblite.sv index 9ace1077b..c59dfa9b5 100644 --- a/wally-pipelined/src/ebu/ahblite.sv +++ b/wally-pipelined/src/ebu/ahblite.sv @@ -62,7 +62,7 @@ module ahblite ( // Signals to PMA checker (metadata of proposed access) output logic AtomicAccessM, ExecuteAccessF, WriteAccessM, ReadAccessM, // Return from bus - output logic [`XLEN-1:0] ReadDataM, ReadDataW, + output logic [`XLEN-1:0] ReadDataW, // AHB-Lite external signals input logic [`AHBW-1:0] HRDATA, input logic HREADY, HRESP, @@ -87,7 +87,7 @@ module ahblite ( logic GrantData; logic [31:0] AccessAddress; logic [2:0] AccessSize, PTESize, ISize; - logic [`AHBW-1:0] HRDATAMasked, CapturedData, ReadDataWnext, WriteData; + logic [`AHBW-1:0] HRDATAMasked, ReadDataM, CapturedData, ReadDataWnext, WriteData; logic IReady, DReady; logic CaptureDataM,CapturedDataAvailable; diff --git a/wally-pipelined/src/ieu/controller.sv b/wally-pipelined/src/ieu/controller.sv index 09ded48ba..b27541d42 100644 --- a/wally-pipelined/src/ieu/controller.sv +++ b/wally-pipelined/src/ieu/controller.sv @@ -53,7 +53,6 @@ module controller( output logic [1:0] AtomicM, output logic [2:0] Funct3M, output logic RegWriteM, // for Hazard Unit - output logic [2:0] ResultSrcM, output logic InstrValidM, // Writeback stage control signals input logic StallW, FlushW, @@ -73,7 +72,7 @@ module controller( // pipelined control signals logic RegWriteE; - logic [2:0] ResultSrcD, ResultSrcE; + logic [2:0] ResultSrcD, ResultSrcE, ResultSrcM; logic [1:0] MemRWD, MemRWE; logic JumpD; logic BranchD, BranchE; diff --git a/wally-pipelined/src/ieu/datapath.sv b/wally-pipelined/src/ieu/datapath.sv index 848ed89a5..13db65a37 100644 --- a/wally-pipelined/src/ieu/datapath.sv +++ b/wally-pipelined/src/ieu/datapath.sv @@ -45,9 +45,6 @@ module datapath ( // Memory stage signals input logic StallM, FlushM, input logic [`XLEN-1:0] FWriteDataM, - input logic SquashSCM, - input logic [2:0] ResultSrcM, - input logic [`XLEN-1:0] CSRReadValM, ReadDataM, MulDivResultM, output logic [`XLEN-1:0] SrcAM, output logic [`XLEN-1:0] WriteDataM, MemAdrM, // Writeback stage signals @@ -57,6 +54,7 @@ module datapath ( input logic RegWriteW, input logic SquashSCW, input logic [2:0] ResultSrcW, + // input logic [`XLEN-1:0] PCLinkW, input logic [`XLEN-1:0] CSRReadValW, ReadDataW, MulDivResultW, // Hazard Unit signals output logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, @@ -78,9 +76,7 @@ module datapath ( logic [`XLEN-1:0] WriteDataE; logic [`XLEN-1:0] TargetBaseE; // Memory stage signals - logic [`XLEN-1:0] SCResultM; logic [`XLEN-1:0] ALUResultM; - logic [`XLEN-1:0] ResultM; // Writeback stage signals logic [`XLEN-1:0] SCResultW; logic [`XLEN-1:0] ALUResultW; @@ -106,8 +102,8 @@ module datapath ( flopenrc #(5) Rs2EReg(clk, reset, FlushE, ~StallE, Rs2D, Rs2E); flopenrc #(5) RdEReg(clk, reset, FlushE, ~StallE, RdD, RdE); - mux4 #(`XLEN) faemux(RD1E, WriteDataW, ResultM, FWriteDataM, ForwardAE, PreSrcAE); - mux4 #(`XLEN) fbemux(RD2E, WriteDataW, ResultM, FWriteDataM, ForwardBE, WriteDataE); + mux4 #(`XLEN) faemux(RD1E, WriteDataW, ALUResultM, FWriteDataM, ForwardAE, PreSrcAE); + mux4 #(`XLEN) fbemux(RD2E, WriteDataW, ALUResultM, FWriteDataM, ForwardBE, WriteDataE); mux2 #(`XLEN) srcamux(PreSrcAE, PCE, ALUSrcAE, SrcAE); mux2 #(`XLEN) srcamux2(SrcAE, PCLinkE, JumpE, SrcAE2); mux2 #(`XLEN) srcbmux(WriteDataE, ExtImmE, ALUSrcBE, SrcBE); @@ -122,7 +118,6 @@ module datapath ( assign MemAdrM = ALUResultM; flopenrc #(`XLEN) WriteDataMReg(clk, reset, FlushM, ~StallM, WriteDataE, WriteDataM); flopenrc #(5) RdMEg(clk, reset, FlushM, ~StallM, RdE, RdM); - mux5 #(`XLEN) resultmuxM(ALUResultM, ReadDataM, CSRReadValM, MulDivResultM, SCResultM, ResultSrcM, ResultM); // Writeback stage pipeline register and logic flopenrc #(`XLEN) ALUResultWReg(clk, reset, FlushW, ~StallW, ALUResultM, ALUResultW); @@ -130,16 +125,13 @@ module datapath ( // handle Store Conditional result if atomic extension supported generate - if (`A_SUPPORTED) begin - assign SCResultM = SquashSCM ? {{(`XLEN-1){1'b0}}, 1'b1} : {{(`XLEN-1){1'b0}}, 1'b0}; + if (`A_SUPPORTED) assign SCResultW = SquashSCW ? {{(`XLEN-1){1'b0}}, 1'b1} : {{(`XLEN-1){1'b0}}, 1'b0}; - end else begin - assign SCResultM = 0; + else assign SCResultW = 0; - end endgenerate - mux5 #(`XLEN) resultmuxW(ALUResultW, ReadDataW, CSRReadValW, MulDivResultW, SCResultW, ResultSrcW, ResultW); + mux5 #(`XLEN) resultmux(ALUResultW, ReadDataW, CSRReadValW, MulDivResultW, SCResultW, ResultSrcW, ResultW); /* -----\/----- EXCLUDED -----\/----- // This mux4:1 no longer needs to include PCLinkW. This is set correctly in the execution stage. // *** need to look at how the decoder is coded to fix. diff --git a/wally-pipelined/src/ieu/forward.sv b/wally-pipelined/src/ieu/forward.sv index 6729ed424..cdc6d2700 100644 --- a/wally-pipelined/src/ieu/forward.sv +++ b/wally-pipelined/src/ieu/forward.sv @@ -43,7 +43,7 @@ module forward( if (Rs1E != 5'b0) if ((Rs1E == RdM) & RegWriteM) ForwardAE = 2'b10; else if ((Rs1E == RdW) & (RegWriteW|FWriteIntW)) ForwardAE = 2'b01; - else if ((Rs1E == RdM) & FWriteIntM) ForwardAE = 2'b11; + else if ((Rs1E == RdM) & FWriteIntM) ForwardAE = 2'b11; if (Rs2E != 5'b0) if ((Rs2E == RdM) & RegWriteM) ForwardBE = 2'b10; diff --git a/wally-pipelined/src/ieu/ieu.sv b/wally-pipelined/src/ieu/ieu.sv index 73c619f62..0bd9d598f 100644 --- a/wally-pipelined/src/ieu/ieu.sv +++ b/wally-pipelined/src/ieu/ieu.sv @@ -43,10 +43,9 @@ module ieu ( // Memory stage interface input logic DataMisalignedM, input logic DataAccessFaultM, + input logic SquashSCW, input logic FWriteIntM, input logic [`XLEN-1:0] FWriteDataM, - input logic SquashSCM, - input logic [`XLEN-1:0] CSRReadValM, ReadDataM, MulDivResultM, output logic [1:0] MemRWM, output logic [1:0] AtomicM, output logic [`XLEN-1:0] MemAdrM, WriteDataM, @@ -56,7 +55,6 @@ module ieu ( input logic [`XLEN-1:0] CSRReadValW, ReadDataW, MulDivResultW, input logic FWriteIntW, input logic [`XLEN-1:0] FPUResultW, - input logic SquashSCW, // input logic [`XLEN-1:0] PCLinkW, output logic InstrValidM, InstrValidW, // hazards @@ -74,7 +72,7 @@ module ieu ( logic [2:0] FlagsE; logic [4:0] ALUControlE; logic ALUSrcAE, ALUSrcBE; - logic [2:0] ResultSrcM, ResultSrcW; + logic [2:0] ResultSrcW; logic TargetSrcE; // forwarding signals diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index f44340a45..ffa79adfe 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -45,7 +45,6 @@ module lsu ( output logic [1:0] AtomicMaskedM, output logic DataMisalignedM, output logic CommittedM, - output logic SquashSCM, // Writeback Stage input logic MemAckW, input logic [`XLEN-1:0] ReadDataW, @@ -82,6 +81,7 @@ module lsu ( ); + logic SquashSCM; logic DTLBPageFaultM; logic MemAccessM; logic [1:0] CurrState, NextState; diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index 691b3b5ae..e10b0c55d 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -34,7 +34,7 @@ module muldiv ( input logic [2:0] Funct3E, input logic MulDivE, W64E, // Writeback stage - output logic [`XLEN-1:0] MulDivResultM, MulDivResultW, + output logic [`XLEN-1:0] MulDivResultW, // Divide Done output logic DivDoneE, output logic DivBusyE, @@ -44,7 +44,7 @@ module muldiv ( generate if (`M_SUPPORTED) begin - logic [`XLEN-1:0] MulDivResultE; + logic [`XLEN-1:0] MulDivResultE, MulDivResultM; logic [`XLEN-1:0] PrelimResultE; logic [`XLEN-1:0] QuotE, RemE; logic [`XLEN*2-1:0] ProdE; diff --git a/wally-pipelined/src/privileged/csr.sv b/wally-pipelined/src/privileged/csr.sv index e88cb561d..213bcde33 100644 --- a/wally-pipelined/src/privileged/csr.sv +++ b/wally-pipelined/src/privileged/csr.sv @@ -65,12 +65,12 @@ module csr #(parameter input logic [4:0] SetFflagsM, output logic [2:0] FRM_REGW, // output logic [11:0] MIP_REGW, SIP_REGW, UIP_REGW, MIE_REGW, SIE_REGW, UIE_REGW, - output logic [`XLEN-1:0] CSRReadValM, CSRReadValW, + output logic [`XLEN-1:0] CSRReadValW, output logic IllegalCSRAccessM ); localparam NOP = 32'h13; - logic [`XLEN-1:0] CSRMReadValM, CSRSReadValM, CSRUReadValM, CSRNReadValM, CSRCReadValM; + logic [`XLEN-1:0] CSRMReadValM, CSRSReadValM, CSRUReadValM, CSRNReadValM, CSRCReadValM, CSRReadValM; logic [`XLEN-1:0] CSRSrcM, CSRRWM, CSRRSM, CSRRCM, CSRWriteValM; logic [`XLEN-1:0] MSTATUS_REGW, SSTATUS_REGW, USTATUS_REGW; diff --git a/wally-pipelined/src/privileged/privileged.sv b/wally-pipelined/src/privileged/privileged.sv index ab794a4ad..1275cd4b8 100644 --- a/wally-pipelined/src/privileged/privileged.sv +++ b/wally-pipelined/src/privileged/privileged.sv @@ -34,7 +34,7 @@ module privileged ( input logic [`XLEN-1:0] SrcAM, input logic [`XLEN-1:0] PCF,PCD,PCE,PCM, input logic [31:0] InstrD, InstrE, InstrM, InstrW, - output logic [`XLEN-1:0] CSRReadValM, CSRReadValW, + output logic [`XLEN-1:0] CSRReadValW, output logic [`XLEN-1:0] PrivilegedNextPCM, output logic RetM, TrapM, NonBusTrapM, output logic ITLBFlushF, DTLBFlushM, diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index 1fd1408a4..b32770b9a 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -71,7 +71,7 @@ module wallypipelinedhart ( logic [31:0] InstrD, InstrE, InstrM, InstrW; logic [`XLEN-1:0] PCD, PCE, PCM, PCLinkE, PCLinkW; logic [`XLEN-1:0] PCTargetE; - logic [`XLEN-1:0] CSRReadValM, MulDivResultM, CSRReadValW, MulDivResultW; + logic [`XLEN-1:0] CSRReadValW, MulDivResultW; logic [`XLEN-1:0] PrivilegedNextPCM; logic [1:0] MemRWM; logic InstrValidM, InstrValidW; @@ -96,7 +96,7 @@ module wallypipelinedhart ( logic [1:0] FMemRWM; logic RegWriteD; logic [`XLEN-1:0] FWriteDataM; - logic SquashSCM, SquashSCW; + logic SquashSCW; logic FStallD; logic FWriteIntE, FWriteIntW, FWriteIntM; logic FDivBusyE; @@ -136,7 +136,7 @@ module wallypipelinedhart ( logic [2:0] Funct3M; logic [`XLEN-1:0] MemAdrM, WriteDataM; logic [`PA_BITS-1:0] MemPAdrM; - logic [`XLEN-1:0] ReadDataM, ReadDataW; + logic [`XLEN-1:0] ReadDataW; logic [`PA_BITS-1:0] InstrPAdrF; logic [`XLEN-1:0] InstrRData; logic InstrReadF; @@ -153,7 +153,8 @@ module wallypipelinedhart ( logic[`XLEN-1:0] WriteDatatmpM; logic [4:0] InstrClassM; - + + ifu ifu(.InstrInF(InstrRData), .*); // instruction fetch unit: PC, branch prediction, instruction cache ieu ieu(.*); // integer execution unit: integer register file, datapath and controller From 7e3483b28373c1686c9e51c346d8cb2ab73ef92b Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Thu, 24 Jun 2021 18:39:18 -0400 Subject: [PATCH 08/20] FPU forwarding reworked pt.1 --- wally-pipelined/src/fpu/fctrl.sv | 20 +- wally-pipelined/src/fpu/fpu.sv | 139 +++--- wally-pipelined/src/fpu/fpuaddcvt1.sv | 14 +- wally-pipelined/src/fpu/fpuclassify.sv | 16 +- wally-pipelined/src/fpu/fpucmp1.sv | 269 ++++++++++- wally-pipelined/src/fpu/fpucmp2.sv | 422 +++++++++--------- wally-pipelined/src/fpu/fpuhazard.sv | 60 ++- wally-pipelined/src/fpu/fsgn.sv | 16 +- wally-pipelined/src/hazard/hazard.sv | 4 +- wally-pipelined/src/ieu/controller.sv | 8 +- wally-pipelined/src/ieu/datapath.sv | 28 +- wally-pipelined/src/ieu/forward.sv | 8 +- wally-pipelined/src/ieu/ieu.sv | 9 +- .../src/wally/wallypipelinedhart.sv | 31 +- .../testbench/testbench-imperas.sv | 2 +- 15 files changed, 653 insertions(+), 393 deletions(-) diff --git a/wally-pipelined/src/fpu/fctrl.sv b/wally-pipelined/src/fpu/fctrl.sv index 5749d0db7..a9fcb564e 100755 --- a/wally-pipelined/src/fpu/fctrl.sv +++ b/wally-pipelined/src/fpu/fctrl.sv @@ -6,6 +6,7 @@ module fctrl ( input logic [2:0] Funct3D, input logic [2:0] FRM_REGW, output logic IllegalFPUInstrD, + output logic IsFPD, output logic FWriteEnD, output logic FDivStartD, output logic [2:0] FResultSelD, @@ -27,20 +28,19 @@ module fctrl ( //write is enabled for all fp instruciton op codes //sans fp load - logic isFP, isFPLD; always_comb begin //case statement is easier to modify //in case of errors case(OpD) //fp instructions sans load - 7'b1010011 : isFP = 1'b1; - 7'b1000011 : isFP = 1'b1; - 7'b1000111 : isFP = 1'b1; - 7'b1001011 : isFP = 1'b1; - 7'b1001111 : isFP = 1'b1; - 7'b0100111 : isFP = 1'b1; - 7'b0000111 : isFP = 1'b1;// KEP change 7'b1010011 to 7'b0000111 - default : isFP = 1'b0; + 7'b1010011 : IsFPD = 1'b1; + 7'b1000011 : IsFPD = 1'b1; + 7'b1000111 : IsFPD = 1'b1; + 7'b1001011 : IsFPD = 1'b1; + 7'b1001111 : IsFPD = 1'b1; + 7'b0100111 : IsFPD = 1'b1; + 7'b0000111 : IsFPD = 1'b1;// KEP change 7'b1010011 to 7'b0000111 + default : IsFPD = 1'b0; endcase end @@ -218,5 +218,5 @@ module fctrl ( // is add/cvt and is to int or is classify or is cmp and not max/min or is output ReadData1 and is mv assign FWriteIntD = ((FResultSelD == 3'b100)&Funct7D[3]) | (FResultSelD == 3'b101) | ((FResultSelD == 3'b001)&~Funct7D[2]) | ((FResultSelD == 3'b111)&OpD[6]); // if not writting to int reg and not a store function and not move - assign FWriteEnD = ~FWriteIntD & ~OpD[5] & ~((FResultSelD == 3'b111)&OpD[6]) & isFP; + assign FWriteEnD = ~FWriteIntD & ~OpD[5] & ~((FResultSelD == 3'b111)&OpD[6]) & IsFPD; endmodule diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index e886c66e3..7f93d33a7 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -30,15 +30,15 @@ module fpu ( input logic [2:0] FRM_REGW, // Rounding mode from CSR input logic [31:0] InstrD, input logic [`XLEN-1:0] ReadDataW, // Read data from memory - input logic RegWriteD, // register write enable from ieu input logic [`XLEN-1:0] SrcAE, // Integer input being processed input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg input logic StallE, StallM, StallW, input logic FlushE, FlushM, FlushW, - output logic [1:0] FMemRWM, // Read/write enable for memory {read, write} + output logic IsFPD, IsFPE, // Read/write enable for memory {read, write} output logic FStallD, // Stall the decode stage if Div/Sqrt instruction output logic FWriteIntE, FWriteIntM, FWriteIntW, // Write integer register enable - output logic [`XLEN-1:0] FWriteDataM, // Data to be written to memory + output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory + output logic [`XLEN-1:0] FIntResM, output logic FDivBusyE, // Is the divison/sqrt unit busy output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction output logic [4:0] SetFflagsM, // FPU flags @@ -51,24 +51,27 @@ module fpu ( logic FDivStartD, FDivStartE; // Start division logic FWriteIntD; // Write to integer register logic FOutputInput2D, FOutputInput2E; // Put Input2 in Input1 if a store instruction - logic [1:0] FMemRWD, FMemRWE; // Read and write enable for memory - logic [1:0] FForwardInput1D, FForwardInput1E; // Input1 forwarding mux control signal - logic [1:0] FForwardInput2D, FForwardInput2E; // Input2 forwarding mux control signal - logic FForwardInput3D, FForwardInput3E; // Input3 forwarding mux control signal - logic FInput2UsedD; // Is input 2 used - logic FInput3UsedD; // Is input 3 used + logic [1:0] FMemRWD; // Read and write enable for memory + logic [1:0] ForwardXD, ForwardXE; // Input1 forwarding mux control signal + logic [1:0] ForwardYD, ForwardYE; // Input2 forwarding mux control signal + logic [1:0] ForwardZD, ForwardZE; // Input3 forwarding mux control signal + logic SrcYUsedD; // Is input 2 used + logic SrcZUsedD; // Is input 3 used logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM, FOpCtrlW; // Select which opperation to do in each component logic SelLoadInputE, SelLoadInputM; // Select which adress to load when single precision + logic FInput2UsedD, FInput3UsedD; + logic [4:0] Adr1E, Adr2E, Adr3E; // regfile signals logic [4:0] RdE, RdM, RdW; // what adress to write to // ***Can take from ieu insted of pipelining logic [63:0] FWDM; // Write data for FP register logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage - logic [63:0] FInput1E, FInput1M, FInput1W, FInput1tmpE; // Input 1 to the various units (after forwarding) - logic [63:0] FInput2E, FInput2M; // Input 2 to the various units (after forwarding) - logic [63:0] FInput3E, FInput3M; // Input 3 to the various units (after forwarding) + logic [63:0] SrcXE, SrcXM, SrcXW; // Input 1 to the various units (after forwarding) + logic [`XLEN-1:0] SrcXMAligned; + logic [63:0] SrcYE, SrcYM, SrcYW; // Input 2 to the various units (after forwarding) + logic [63:0] SrcZE, SrcZM; // Input 3 to the various units (after forwarding) logic [63:0] FLoadResultW, FLoadStoreResultM, FLoadStoreResultW; // Result for load, store, and move to int-reg instructions // div/sqrt signals @@ -123,19 +126,13 @@ module fpu ( logic [4:0] FAddFlagsM, FAddFlagsW; // cmp signals - logic [7:0] WE, WM; - logic [7:0] XE, XM; - logic ANaNE, ANaNM; - logic BNaNE, BNaNM; - logic AzeroE, AzeroM; - logic BzeroE, BzeroM; - logic CmpInvalidM, CmpInvalidW; - logic [1:0] CmpFCCM, CmpFCCW; - logic [63:0] FCmpResultM, FCmpResultW; + logic CmpInvalidE, CmpInvalidM, CmpInvalidW; + logic [63:0] FCmpResultE, FCmpResultM, FCmpResultW; // fsgn signals logic [63:0] SgnResultE, SgnResultM, SgnResultW; logic [4:0] SgnFlagsE, SgnFlagsM, SgnFlagsW; + logic [63:0] FResM; // instantiation of W stage regfile signals logic [63:0] AlignedSrcAM, ForwardSrcAM, SrcAW; @@ -150,8 +147,6 @@ module fpu ( //DECODE STAGE - // Hazard unit for FPU - fpuhazard hazard(.Adr1(InstrD[19:15]), .Adr2(InstrD[24:20]), .Adr3(InstrD[31:27]), .*); // top-level controller for FPU fctrl ctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .*); @@ -172,22 +167,45 @@ module fpu ( //***************** // other D/E pipe registers //***************** - flopenrc #(64) DEReg14(clk, reset, FlushE, ~StallE, FPUResult64W, FPUResult64E); - flopenrc #(28) CtrlRegE(clk, reset, FlushE, ~StallE, - {FWriteEnD, FResultSelD, FrmD, FmtD, InstrD[11:7], FOpCtrlD, FDivStartD, FForwardInput1D, FForwardInput2D, FForwardInput3D, FWriteIntD, FOutputInput2D, FMemRWD, InstrD[15]}, - {FWriteEnE, FResultSelE, FrmE, FmtE, RdE, FOpCtrlE, FDivStartE, FForwardInput1E, FForwardInput2E, FForwardInput3E, FWriteIntE, FOutputInput2E, FMemRWE, SelLoadInputE}); - + // flopenrc #(64) DEReg14(clk, reset, FlushE, ~StallE, FPUResult64W, FPUResult64E); + // flopenrc #(1) CtrlRegE1(clk, reset, FlushE, ~StallE, FWriteEnD, FWriteEnE); + // flopenrc #(3) CtrlRegE2(clk, reset, FlushE, ~StallE, FResultSelD, FResultSelE); + // flopenrc #(3) CtrlRegE3(clk, reset, FlushE, ~StallE, FrmD, FrmE); + // flopenrc #(1) CtrlRegE4(clk, reset, FlushE, ~StallE, FmtD, FmtE); + // flopenrc #(5) CtrlRegE5(clk, reset, FlushE, ~StallE, InstrD[11:7], RdE); + // flopenrc #(4) CtrlRegE6(clk, reset, FlushE, ~StallE, FOpCtrlD, FOpCtrlE); + flopenrc #(1) CtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE); + flopenrc #(15) CtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, + {Adr1E, Adr2E, Adr3E}); + // flopenrc #(1) CtrlRegE8(clk, reset, FlushE, ~StallE, FWriteIntD, FWriteIntE); + // flopenrc #(1) CtrlRegE9(clk, reset, FlushE, ~StallE, FOutputInput2D, FOutputInput2E); + // flopenrc #(2) CtrlRegE10(clk, reset, FlushE, ~StallE, FMemRWD, FMemRWE); + // flopenrc #(1) CtrlRegE11(clk, reset, FlushE, ~StallE, InstrD[15], SelLoadInputE); + flopenrc #(20) CtrlRegE(clk, reset, FlushE, ~StallE, + {FWriteEnD, FResultSelD, FrmD, FmtD, InstrD[11:7], FOpCtrlD, FWriteIntD, InstrD[15], IsFPD}, + {FWriteEnE, FResultSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE, SelLoadInputE, IsFPE}); + //EXECUTION STAGE // input muxs for forwarding - mux2 #(64) SrcAMuxForward({SrcAM[31:0], 32'b0}, {SrcAM, {64-`XLEN{1'b0}}}, FmtM, ForwardSrcAM); - mux4 #(64) FInput1Emux(FRD1E, FPUResult64W, FPUResult64E, ForwardSrcAM, FForwardInput1E, FInput1tmpE); - mux3 #(64) FInput2Emux(FRD2E, FPUResult64W, FPUResult64E, FForwardInput2E, FInput2E); - mux2 #(64) FInput3Emux(FRD3E, FPUResult64E, FForwardInput3E, FInput3E); - mux2 #(64) FOutputInput2mux(FInput1tmpE, FInput2E, FOutputInput2E, FInput1E); + // single vs double for SRCAM + // mux2 #(64) SrcAMuxForward({SrcAM[31:0], 32'b0}, {SrcAM, {64-`XLEN{1'b0}}}, FmtM, ForwardSrcAM); + // //input 1 forwarding mux + // mux4 #(64) SrcXEmux(FRD1E, FPUResult64W, FPUResult64E, ForwardSrcAM, ForwardXE, SrcXtmpE); + // mux3 #(64) SrcYEmux(FRD2E, FPUResult64W, FPUResult64E, ForwardYE, SrcYE); + // mux2 #(64) SrcZEmux(FRD3E, FPUResult64E, ForwardZE, SrcZE); + // mux2 #(64) FOutputInput2mux(SrcXtmpE, SrcYE, FOutputInput2E, SrcXE); + + // Hazard unit for FPU + fpuhazard hazard(.*); + + mux3 #(64) fxemux(FRD1E, FPUResult64W, FResM, ForwardXE, SrcXE); + mux3 #(64) fyemux(FRD2E, FPUResult64W, FResM, ForwardYE, SrcYE); + mux3 #(64) fzemux(FRD3E, FPUResult64W, FResM, ForwardZE, SrcZE); + // first of two-stage instance of floating-point fused multiply-add unit - fma1 fma1 (.X(FInput1E), .Y(FInput2E), .Z(FInput3E), .FOpCtrlE(FOpCtrlE[2:0]),.*); + fma1 fma1 (.X(SrcXE), .Y(SrcYE), .Z(SrcZE), .FOpCtrlE(FOpCtrlE[2:0]),.*); // first and only instance of floating-point divider logic fpdivClk; @@ -198,10 +216,10 @@ module fpu ( .ECLK(fpdivClk)); // capture the inputs for div/sqrt - flopenrc #(64) reg_input1 (.d(FInput1E), .q(DivInput1E), + flopenrc #(64) reg_input1 (.d(SrcXE), .q(DivInput1E), .en(~HoldInputs), .clear(FDivSqrtDoneE), .reset(reset), .clk(clk)); - flopenrc #(64) reg_input2 (.d(FInput2E), .q(DivInput2E), + flopenrc #(64) reg_input2 (.d(SrcYE), .q(DivInput2E), .en(~HoldInputs), .clear(FDivSqrtDoneE), .reset(reset), .clk(clk)); @@ -211,20 +229,21 @@ module fpu ( fpuaddcvt1 fpadd1 (.*); // first of two-stage instance of floating-point comparator - fpucmp1 fpcmp1 (WE, XE, ANaNE, BNaNE, AzeroE, BzeroE, FInput1E, FInput2E, FOpCtrlE[1:0]); + fpucmp1 fpcmp1 (SrcXE, SrcYE, FOpCtrlE[2:0], FmtE, CmpInvalidE, FCmpResultE); // first and only instance of floating-point sign converter fpusgn fpsgn (.SgnOpCodeE(FOpCtrlE[1:0]),.*); // first and only instance of floating-point classify unit fpuclassify fpuclass (.*); + assign FWriteDataE = FmtE ? SrcYE[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcYE[63:32]}; //***************** //fpregfile D/E pipe registers //***************** - flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FInput1E, FInput1M); - flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, FInput2E, FInput2M); - flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, FInput3E, FInput3M); + flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, SrcXE, SrcXM); + flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, SrcYE, SrcYM); + flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, SrcZE, SrcZM); //***************** // fma E/M pipe registers @@ -276,12 +295,15 @@ module fpu ( //***************** // fpcmp E/M pipe registers //***************** - flopenrc #(8) EMRegCmp1(clk, reset, FlushM, ~StallM, WE, WM); - flopenrc #(8) EMRegCmp2(clk, reset, FlushM, ~StallM, XE, XM); - flopenrc #(1) EMRegcmp3(clk, reset, FlushM, ~StallM, ANaNE, ANaNM); - flopenrc #(1) EMRegCmp4(clk, reset, FlushM, ~StallM, BNaNE, BNaNM); - flopenrc #(1) EMRegCmp5(clk, reset, FlushM, ~StallM, AzeroE, AzeroM); - flopenrc #(1) EMRegCmp6(clk, reset, FlushM, ~StallM, BzeroE, BzeroM); + // flopenrc #(8) EMRegCmp1(clk, reset, FlushM, ~StallM, WE, WM); + // flopenrc #(8) EMRegCmp2(clk, reset, FlushM, ~StallM, XE, XM); + // flopenrc #(1) EMRegcmp3(clk, reset, FlushM, ~StallM, ANaNE, ANaNM); + // flopenrc #(1) EMRegCmp4(clk, reset, FlushM, ~StallM, BNaNE, BNaNM); + // flopenrc #(1) EMRegCmp5(clk, reset, FlushM, ~StallM, AzeroE, AzeroM); + // flopenrc #(1) EMRegCmp6(clk, reset, FlushM, ~StallM, BzeroE, BzeroM); + flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpInvalidE, CmpInvalidM); + // flopenrc #(2) EMRegCmp2(clk, reset, FlushM, ~StallM, CmpFCCE, CmpFCCM); + flopenrc #(64) EMRegCmp3(clk, reset, FlushM, ~StallM, FCmpResultE, FCmpResultM); // put this in for the event we want to delay fsgn - will otherwise bypass //***************** @@ -300,7 +322,7 @@ module fpu ( flopenrc #(5) EMReg5(clk, reset, FlushM, ~StallM, RdE, RdM); flopenrc #(4) EMReg6(clk, reset, FlushM, ~StallM, FOpCtrlE, FOpCtrlM); flopenrc #(1) EMReg7(clk, reset, FlushM, ~StallM, FWriteIntE, FWriteIntM); - flopenrc #(2) EMReg8(clk, reset, FlushM, ~StallM, FMemRWE, FMemRWM); + // flopenrc #(2) EMReg8(clk, reset, FlushM, ~StallM, FMemRWE, FMemRWM); flopenrc #(1) EMReg9(clk, reset, FlushM, ~StallM, SelLoadInputE, SelLoadInputM); //***************** @@ -310,32 +332,35 @@ module fpu ( //BEGIN MEMORY STAGE - assign FWriteDataM = FmtM ? FInput1M[63:64-`XLEN] : {{`XLEN-32{1'b0}}, FInput1M[63:32]}; + mux2 #(64) FResMux(AlignedSrcAM, SgnResultM, FResultSelM == 3'b011, FResM); + assign SrcXMAligned = FmtM ? SrcXM[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcXM[63:32]}; + mux3 #(`XLEN) IntResMux(SrcXMAligned, FCmpResultM[`XLEN-1:0], ClassResultM[`XLEN-1:0], {FResultSelM == 3'b101, FResultSelM == 3'b001}, FIntResM); + //adjecent adress values are sent to the FPU, select the correct one // -imm is 80000 most of the time vs the error one which is 00000 // mux3 #(64) FLoadResultMux({HRDATA[31:0], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA[`AHBW-1:`AHBW-32], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA, {64-`AHBW{1'b0}}}, {FmtM, SelLoadInputM}, FLoadResultM); - // mux2 #(64) FLoadStoreResultMux(FLoadResultM, FInput1M, |FOpCtrlM[2:1], FLoadStoreResultM); + // mux2 #(64) FLoadStoreResultMux(FLoadResultM, SrcXM, |FOpCtrlM[2:1], FLoadStoreResultM); - fma2 fma2(.X(FInput1M), .Y(FInput2M), .Z(FInput3M), .FOpCtrlM(FOpCtrlM[2:0]), .*); + fma2 fma2(.X(SrcXM), .Y(SrcYM), .Z(SrcZM), .FOpCtrlM(FOpCtrlM[2:0]), .*); // second instance of two-stage floating-point add/cvt unit fpuaddcvt2 fpadd2 (.*); // second instance of two-stage floating-point comparator - fpucmp2 fpcmp2 (.Invalid(CmpInvalidM), .FCC(CmpFCCM), .ANaN(ANaNM), .BNaN(BNaNM), .Azero(AzeroM), - .Bzero(BzeroM), .w(WM), .x(XM), .Sel({1'b0, FmtM}), .op1(FInput1M), .op2(FInput2M), .*); + // fpucmp2 fpcmp2 (.Invalid(CmpInvalidM), .FCC(CmpFCCM), .ANaN(ANaNM), .BNaN(BNaNM), .Azero(AzeroM), + // .Bzero(BzeroM), .w(WM), .x(XM), .Sel({1'b0, FmtM}), .op1(SrcXM), .op2(SrcYM), .*); // Align SrcA to MSB when single precicion mux2 #(64) SrcAMux({SrcAM[31:0], 32'b0}, {{64-`XLEN{1'b0}}, SrcAM}, FmtM, AlignedSrcAM); - //***************** //fpregfile M/W pipe registers //***************** - flopenrc #(64) MWFpReg1(clk, reset, FlushW, ~StallW, FInput1M, FInput1W); + flopenrc #(64) MWFpReg1(clk, reset, FlushW, ~StallW, SrcXM, SrcXW); + flopenrc #(64) MWFpReg2(clk, reset, FlushW, ~StallW, SrcYM, SrcYW); //***************** // fma M/W pipe registers @@ -360,7 +385,7 @@ module fpu ( // fpcmp M/W pipe registers //***************** flopenrc #(1) MWRegCmp1(clk, reset, FlushW, ~StallW, CmpInvalidM, CmpInvalidW); - flopenrc #(2) MWRegCmp2(clk, reset, FlushW, ~StallW, CmpFCCM, CmpFCCW); + // flopenrc #(2) MWRegCmp2(clk, reset, FlushW, ~StallW, CmpFCCM, CmpFCCW); flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, FCmpResultM, FCmpResultW); //***************** @@ -396,10 +421,10 @@ module fpu ( // mux3 #(64) FLoadResultMux({ReadD[31:0], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA[`AHBW-1:`AHBW-32], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA, {64-`AHBW{1'b0}}}, {FmtM, SelLoadInputM}, FLoadResultM); - // mux2 #(64) FLoadStoreResultMux(FLoadResultM, FInput1M, |FOpCtrlM[2:1], FLoadStoreResultM); + // mux2 #(64) FLoadStoreResultMux(FLoadResultM, SrcXM, |FOpCtrlM[2:1], FLoadStoreResultM); //***RV32D needs to give two bus transactions mux2 #(64) FLoadResultMux({ReadDataW[31:0], {32{1'b0}}}, {ReadDataW, {64-`XLEN{1'b0}}}, FmtW, FLoadResultW); - mux2 #(64) FLoadStoreResultMux(FLoadResultW, FInput1W, |FOpCtrlW[2:1], FLoadStoreResultW); + mux2 #(64) FLoadStoreResultMux(FLoadResultW, SrcYW, |FOpCtrlW[2:1], FLoadStoreResultW); diff --git a/wally-pipelined/src/fpu/fpuaddcvt1.sv b/wally-pipelined/src/fpu/fpuaddcvt1.sv index febd47d1b..8f045dcdb 100755 --- a/wally-pipelined/src/fpu/fpuaddcvt1.sv +++ b/wally-pipelined/src/fpu/fpuaddcvt1.sv @@ -27,10 +27,10 @@ // -module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, FInput1E, FInput2E, FOpCtrlE, FmtE); +module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, SrcXE, SrcYE, FOpCtrlE, FmtE); - input logic [63:0] FInput1E; // 1st input operand (A) - input logic [63:0] FInput2E; // 2nd input operand (B) + input logic [63:0] SrcXE; // 1st input operand (A) + input logic [63:0] SrcYE; // 2nd input operand (B) input logic [3:0] FOpCtrlE; // Function opcode input logic FmtE; // Result Precision (1 for double, 0 for single) @@ -81,12 +81,12 @@ module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, // and the sign of the first operand is set appropratiately based on // if the operation is absolute value or negation. - convert_inputs conv1 (AddFloat1E, AddFloat2E, FInput1E, FInput2E, FOpCtrlE, P); + convert_inputs conv1 (AddFloat1E, AddFloat2E, SrcXE, SrcYE, FOpCtrlE, P); // Test for exceptions and return the "Invalid Operation" and // "Denormalized" Input Flags. The "AddSelInvE" is used in // the third pipeline stage to select the result. Also, AddOp1NormE - // and AddOp2NormE are one if FInput1E and FInput2E are not zero or denormalized. + // and AddOp2NormE are one if SrcXE and SrcYE are not zero or denormalized. // sub is one if the effective operation is subtaction. exception exc1 (AddSelInvE, AddInvalidE, AddDenormInE, AddOp1NormE, AddOp2NormE, sub, @@ -159,8 +159,8 @@ module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, // Place either the sign-extened 32-bit value or the original 64-bit value // into IntValue (to be used for integer to floating point conversion) - assign IntValue [31:0] = FInput1E[31:0]; - assign IntValue [63:32] = FOpCtrlE[0] ? {32{FInput1E[31]}} : FInput1E[63:32]; + assign IntValue [31:0] = SrcXE[31:0]; + assign IntValue [63:32] = FOpCtrlE[0] ? {32{SrcXE[31]}} : SrcXE[63:32]; // If doing an integer to floating point conversion, mantissaA3 is set to // IntVal and the prenomalized exponent is set to 1084. Otherwise, diff --git a/wally-pipelined/src/fpu/fpuclassify.sv b/wally-pipelined/src/fpu/fpuclassify.sv index 1000bdf42..b320b2f07 100644 --- a/wally-pipelined/src/fpu/fpuclassify.sv +++ b/wally-pipelined/src/fpu/fpuclassify.sv @@ -1,7 +1,8 @@ + `include "wally-config.vh" module fpuclassify ( - input logic [63:0] FInput1E, + input logic [63:0] SrcXE, input logic FmtE, // 0-single 1-double output logic [63:0] ClassResultE ); @@ -13,9 +14,9 @@ module fpuclassify ( logic ExpNotZero, ExpOnes, ManNotZero, ExpZero, ManZero, FirstBitMan; // single and double precision layouts - assign single = FInput1E[63:32]; - assign double = FInput1E; - assign sign = FInput1E[63]; + assign single = SrcXE[63:32]; + assign double = SrcXE; + assign sign = SrcXE[63]; // basic calculations for readabillity assign ExpNotZero = FmtE ? |double[62:52] : |single[30:23]; @@ -43,10 +44,7 @@ module fpuclassify ( // bit 7 - +infinity // bit 8 - signaling NaN // bit 9 - quiet NaN - assign ClassResultE = FmtE ? {{54{1'b0}}, FirstBitMan&NaN, ~FirstBitMan&NaN, ~sign&infinity, ~sign&normal, - ~sign&subnormal, ~sign&zero, sign&zero, sign&subnormal, sign&normal, sign&infinity} : - {{22{1'b0}}, FirstBitMan&NaN, ~FirstBitMan&NaN, ~sign&infinity, ~sign&normal, - ~sign&subnormal, ~sign&zero, sign&zero, sign&subnormal, sign&normal, sign&infinity, {32{1'b0}}}; - + assign ClassResultE = {{54{1'b0}}, FirstBitMan&NaN, ~FirstBitMan&NaN, ~sign&infinity, ~sign&normal, + ~sign&subnormal, ~sign&zero, sign&zero, sign&subnormal, sign&normal, sign&infinity}; endmodule diff --git a/wally-pipelined/src/fpu/fpucmp1.sv b/wally-pipelined/src/fpu/fpucmp1.sv index 1cf267f22..3a8245e63 100755 --- a/wally-pipelined/src/fpu/fpucmp1.sv +++ b/wally-pipelined/src/fpu/fpucmp1.sv @@ -1,3 +1,4 @@ + // // File name : fpcomp.v // Title : Floating-Point Comparator @@ -17,9 +18,9 @@ // and correct for sign bits // // This module takes 64-bits inputs op1 and op2, VSS, and VDD -// signals, and a 2-bit signal Sel that indicates the type of +// signals, and a 2-bit signal FOpCtrlE that indicates the type of // operands being compared as indicated below. -// Sel Description +// FOpCtrlE Description // 00 double precision numbers // 01 single precision numbers // 10 half precision numbers @@ -37,24 +38,41 @@ // It also produces an invalid operation flag, which is one // if either of the input operands is a signaling NaN per 754 -module fpucmp1 (w, x, ANaN, BNaN, Azero, Bzero, op1, op2, Sel);///***fix Sel to match spec - - input logic [63:0] op1; - input logic [63:0] op2; - input logic [1:0] Sel; +`include "wally-config.vh" +module fpucmp1 ( + input logic [63:0] op1, + input logic [63:0] op2, + input logic [2:0] FOpCtrlE, + input logic FmtE, - output logic [7:0] w, x; - output logic ANaN, BNaN; - output logic Azero, Bzero; + + output logic Invalid, // Invalid Operation + // output logic [1:0] FCC, // Condition Codes + output logic [63:0] FCmpResultE); + // Perform magnitude comparison between the 63 least signficant bits + // of the input operands. Only LT and EQ are returned, since GT can + // be determined from these values. + logic [1:0] FCC; // Condition Codes + logic [7:0] w, x; + logic ANaN, BNaN; + logic Azero, Bzero; + logic LT; // magnitude op1 < magnitude op2 + logic EQ; // magnitude op1 = magnitude op2 + + magcompare64b_1 magcomp1 (w, x, {~op1[63], op1[62:0]}, {~op2[63], op2[62:0]}); + + // Determine final values based on output of magnitude comparison, + // sign bits, and special case testing. + exception_cmp_1 exc1 (ANaN, BNaN, Azero, Bzero, op1, op2, FOpCtrlE); // Perform magnitude comparison between the 63 least signficant bits // of the input operands. Only LT and EQ are returned, since GT can // be determined from these values. - magcompare64b_1 magcomp2 (w, x, {~op1[63], op1[62:0]}, {~op2[63], op2[62:0]}); + magcompare64b_2 magcomp2 (LT, EQ, w, x); // Determine final values based on output of magnitude comparison, // sign bits, and special case testing. - exception_cmp_1 exc1 (ANaN, BNaN, Azero, Bzero, op1, op2, Sel); + exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(ANaN), .BNaN(BNaN), .Azero(Azero), .Bzero(Bzero), .FOpCtrlE(FOpCtrlE), .A(op1), .B(op2), .*); endmodule // fpcomp @@ -178,9 +196,9 @@ module magcompare64b_1 (w, x, A, B); endmodule // magcompare64b // This module takes 64-bits inputs A and B, two magnitude comparison -// flags LT_mag and EQ_mag, and a 2-bit signal Sel that indicates the type of +// flags LT_mag and EQ_mag, and a 2-bit signal FOpCtrlE that indicates the type of // operands being compared as indicated below. -// Sel Description +// FOpCtrlE Description // 00 double precision numbers // 01 single precision numbers // 10 half precision numbers @@ -196,11 +214,11 @@ endmodule // magcompare64b // It also produces a invalid operation flag, which is one // if either of the input operands is a signaling NaN. -module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, Sel); +module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, FOpCtrlE); input logic [63:0] A; input logic [63:0] B; - input logic [1:0] Sel; + input logic [2:0] FOpCtrlE; logic dp, sp, hp; @@ -209,9 +227,9 @@ module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, Sel); output logic Azero; output logic Bzero; - assign dp = !Sel[1]&!Sel[0]; - assign sp = !Sel[1]&Sel[0]; - assign hp = Sel[1]&!Sel[0]; + assign dp = !FOpCtrlE[1]&!FOpCtrlE[0]; + assign sp = !FOpCtrlE[1]&FOpCtrlE[0]; + assign hp = FOpCtrlE[1]&!FOpCtrlE[0]; // Test if A or B is NaN. assign ANaN = (A[62]&A[61]&A[60]&A[59]&A[58]) & @@ -232,3 +250,216 @@ module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, Sel); assign Bzero = (B[62:0] == 63'h0); endmodule // exception_cmp +// +// File name : fpcomp.v +// Title : Floating-Point Comparator +// project : FPU +// Library : fpcomp +// Author(s) : James E. Stine +// Purpose : definition of main unit to floating-point comparator +// notes : +// +// Copyright Oklahoma State University +// +// Floating Point Comparator (Algorithm) +// +// 1.) Performs sign-extension if the inputs are 32-bit integers. +// 2.) Perform a magnitude comparison on the lower 63 bits of the inputs +// 3.) Check for special cases (+0=-0, unordered, and infinite values) +// and correct for sign bits +// +// This module takes 64-bits inputs op1 and op2, VSS, and VDD +// signals, and a 2-bit signal FOpCtrlE that indicates the type of +// operands being compared as indicated below. +// FOpCtrlE Description +// 00 double precision numbers +// 01 single precision numbers +// 10 half precision numbers +// 11 (unused) +// +// The comparator produces a 2-bit signal FCC, which +// indicates the result of the comparison: +// +// fcc decscription +// 00 A = B +// 01 A < B +// 10 A > B +// 11 A and B are unordered (i.e., A or B is NaN) +// +// It also produces an invalid operation flag, which is one +// if either of the input operands is a signaling NaN per 754 + + +/*module magcompare2b (LT, GT, A, B); + + input logic [1:0] A; + input logic [1:0] B; + + output logic LT; + output logic GT; + + // Determine if A < B using a minimized sum-of-products expression + assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0]; + // Determine if A > B using a minimized sum-of-products expression + assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0]; + +endmodule*/ // magcompare2b + +// 2-bit magnitude comparator +// This module compares two 2-bit values A and B. LT is '1' if A < B +// and GT is '1'if A > B. LT and GT are both '0' if A = B. However, +// this version actually incorporates don't cares into the equation to +// simplify the optimization + +// module magcompare2c (LT, GT, A, B); + +// input logic [1:0] A; +// input logic [1:0] B; + +// output logic LT; +// output logic GT; + +// assign LT = B[1] | (!A[1]&B[0]); +// assign GT = A[1] | (!B[1]&A[0]); + +// endmodule // magcompare2b + +// This module compares two 64-bit values A and B. LT is '1' if A < B +// and EQ is '1'if A = B. LT and GT are both '0' if A > B. +// This structure was modified so +// that it only does a strict magnitdude comparison, and only +// returns flags for less than (LT) and eqaual to (EQ). It uses a tree +// of 63 2-bit magnitude comparators, followed by one OR gates. +// +// J. E. Stine and M. J. Schulte, "A combined two's complement and +// floating-point comparator," 2005 IEEE International Symposium on +// Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1. +// doi: 10.1109/ISCAS.2005.1464531 + +module magcompare64b_2 (LT, EQ, w, x); + + input logic [7:0] w; + input logic [7:0] x; + logic [3:0] y; + logic [3:0] z; + logic [1:0] a; + logic [1:0] b; + logic GT; + + output logic LT; + output logic EQ; + + magcompare2c mag39(y[0], z[0], x[1:0], w[1:0]); + magcompare2c mag3A(y[1], z[1], x[3:2], w[3:2]); + magcompare2c mag3B(y[2], z[2], x[5:4], w[5:4]); + magcompare2c mag3C(y[3], z[3], x[7:6], w[7:6]); + + magcompare2c mag3D(a[0], b[0], z[1:0], y[1:0]); + magcompare2c mag3E(a[1], b[1], z[3:2], y[3:2]); + + magcompare2c mag3F(LT, GT, b[1:0], a[1:0]); + + assign EQ = ~(LT | GT); + +endmodule // magcompare64b + +// This module takes 64-bits inputs A and B, two magnitude comparison +// flags LT_mag and EQ_mag, and a 2-bit signal FOpCtrlE that indicates the type of +// operands being compared as indicated below. +// FOpCtrlE Description +// 00 double precision numbers +// 01 single precision numbers +// 10 half precision numbers +// 11 bfloat precision numbers +// +// The comparator produces a 2-bit signal fcc, which +// indicates the result of the comparison as follows: +// fcc decscription +// 00 A = B +// 01 A < B +// 10 A > B +// 11 A and B are unordered (i.e., A or B is NaN) +// It also produces a invalid operation flag, which is one +// if either of the input operands is a signaling NaN. + +module exception_cmp_2 ( + input logic [63:0] A, + input logic [63:0] B, + input logic FmtE, + input logic LT_mag, + input logic EQ_mag, + input logic [2:0] FOpCtrlE, + + output logic invalid, + output logic [1:0] fcc, + output logic [63:0] FCmpResultE, + + input logic Azero, + input logic Bzero, + input logic ANaN, + input logic BNaN); + + logic dp; + logic sp; + logic hp; + logic ASNaN; + logic BSNaN; + logic UO; + logic GT; + logic LT; + logic EQ; + logic [62:0] sixtythreezeros = 63'h0; + + assign dp = !FOpCtrlE[1]&!FOpCtrlE[0]; + assign sp = !FOpCtrlE[1]&FOpCtrlE[0]; + assign hp = FOpCtrlE[1]&!FOpCtrlE[0]; + + // Values are unordered if ((A is NaN) OR (B is NaN)) AND (a floating + // point comparison is being performed. + assign UO = (ANaN | BNaN); + + // Test if A or B is a signaling NaN. + assign ASNaN = ANaN & (sp&~A[53] | dp&~A[50] | hp&~A[56]); + assign BSNaN = BNaN & (sp&~B[53] | dp&~B[50] | hp&~B[56]); + + // If either A or B is a signaling NaN the "Invalid Operation" + // exception flag is set to one; otherwise it is zero. + assign invalid = (ASNaN | BSNaN); + + // A and B are equal if (their magnitudes are equal) AND ((their signs are + // equal) or (their magnitudes are zero AND they are floating point + // numbers)). Also, A and B are not equal if they are unordered. + assign EQ = (EQ_mag | (Azero&Bzero)) & (~UO); + + // A is less than B if (A is negative and B is posiive) OR + // (A and B are positive and the magnitude of A is less than + // the magnitude of B) or (A and B are negative integers and + // the magnitude of A is less than the magnitude of B) or + // (A and B are negative floating point numbers and + // the magnitude of A is greater than the magnitude of B). + // Also, A is not less than B if A and B are equal or unordered. + assign LT = ((~LT_mag & A[63] & B[63]) | + (LT_mag & ~(A[63] & B[63])))&~EQ&~UO; + + // A is greater than B when LT, EQ, and UO are are false. + assign GT = ~(LT | EQ | UO); + + // Note: it may be possible to optimize the setting of fcc + // a little more, but it is probably not worth the effort. + + // Set the bits of fcc based on LT, GT, EQ, and UO + assign fcc[0] = LT | UO; + assign fcc[1] = GT | UO; + + always_comb begin + case (FOpCtrlE[2:0]) + 3'b111: FCmpResultE = LT ? A : B;//min + 3'b101: FCmpResultE = GT ? A : B;//max + 3'b010: FCmpResultE = {63'b0, EQ};//equal + 3'b001: FCmpResultE = {63'b0, LT};//less than + 3'b011: FCmpResultE = {63'b0, LT|EQ};//less than or equal + default: FCmpResultE = 64'b0; + endcase + end + +endmodule // exception_cmp diff --git a/wally-pipelined/src/fpu/fpucmp2.sv b/wally-pipelined/src/fpu/fpucmp2.sv index 42a780ac1..ee14afb94 100755 --- a/wally-pipelined/src/fpu/fpucmp2.sv +++ b/wally-pipelined/src/fpu/fpucmp2.sv @@ -1,243 +1,243 @@ -// -// File name : fpcomp.v -// Title : Floating-Point Comparator -// project : FPU -// Library : fpcomp -// Author(s) : James E. Stine -// Purpose : definition of main unit to floating-point comparator -// notes : -// -// Copyright Oklahoma State University -// -// Floating Point Comparator (Algorithm) -// -// 1.) Performs sign-extension if the inputs are 32-bit integers. -// 2.) Perform a magnitude comparison on the lower 63 bits of the inputs -// 3.) Check for special cases (+0=-0, unordered, and infinite values) -// and correct for sign bits -// -// This module takes 64-bits inputs op1 and op2, VSS, and VDD -// signals, and a 2-bit signal Sel that indicates the type of -// operands being compared as indicated below. -// Sel Description -// 00 double precision numbers -// 01 single precision numbers -// 10 half precision numbers -// 11 (unused) -// -// The comparator produces a 2-bit signal FCC, which -// indicates the result of the comparison: -// -// fcc decscription -// 00 A = B -// 01 A < B -// 10 A > B -// 11 A and B are unordered (i.e., A or B is NaN) -// -// It also produces an invalid operation flag, which is one -// if either of the input operands is a signaling NaN per 754 +// // +// // File name : fpcomp.v +// // Title : Floating-Point Comparator +// // project : FPU +// // Library : fpcomp +// // Author(s) : James E. Stine +// // Purpose : definition of main unit to floating-point comparator +// // notes : +// // +// // Copyright Oklahoma State University +// // +// // Floating Point Comparator (Algorithm) +// // +// // 1.) Performs sign-extension if the inputs are 32-bit integers. +// // 2.) Perform a magnitude comparison on the lower 63 bits of the inputs +// // 3.) Check for special cases (+0=-0, unordered, and infinite values) +// // and correct for sign bits +// // +// // This module takes 64-bits inputs op1 and op2, VSS, and VDD +// // signals, and a 2-bit signal Sel that indicates the type of +// // operands being compared as indicated below. +// // Sel Description +// // 00 double precision numbers +// // 01 single precision numbers +// // 10 half precision numbers +// // 11 (unused) +// // +// // The comparator produces a 2-bit signal FCC, which +// // indicates the result of the comparison: +// // +// // fcc decscription +// // 00 A = B +// // 01 A < B +// // 10 A > B +// // 11 A and B are unordered (i.e., A or B is NaN) +// // +// // It also produces an invalid operation flag, which is one +// // if either of the input operands is a signaling NaN per 754 -module fpucmp2 ( - input logic [63:0] op1, - input logic [63:0] op2, - input logic [1:0] Sel, - input logic [7:0] w, x, - input logic ANaN, BNaN, - input logic Azero, Bzero, - input logic [3:0] FOpCtrlM, - input logic FmtM, +// module fpucmp2 ( +// input logic [63:0] op1, +// input logic [63:0] op2, +// input logic [1:0] Sel, +// input logic [7:0] w, x, +// input logic ANaN, BNaN, +// input logic Azero, Bzero, +// input logic [3:0] FOpCtrlM, +// input logic FmtM, - output logic Invalid, // Invalid Operation - output logic [1:0] FCC, // Condition Codes - output logic [63:0] FCmpResultM); +// output logic Invalid, // Invalid Operation +// output logic [1:0] FCC, // Condition Codes +// output logic [63:0] FCmpResultM); - logic LT; // magnitude op1 < magnitude op2 - logic EQ; // magnitude op1 = magnitude op2 +// logic LT; // magnitude op1 < magnitude op2 +// logic EQ; // magnitude op1 = magnitude op2 - // Perform magnitude comparison between the 63 least signficant bits - // of the input operands. Only LT and EQ are returned, since GT can - // be determined from these values. - magcompare64b_2 magcomp2 (LT, EQ, w, x); +// // Perform magnitude comparison between the 63 least signficant bits +// // of the input operands. Only LT and EQ are returned, since GT can +// // be determined from these values. +// magcompare64b_2 magcomp2 (LT, EQ, w, x); - // Determine final values based on output of magnitude comparison, - // sign bits, and special case testing. - exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(ANaN), .BNaN(BNaN), .Azero(Azero), .Bzero(Bzero), .Sel(Sel), .A(op1), .B(op2), .*); +// // Determine final values based on output of magnitude comparison, +// // sign bits, and special case testing. +// exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(ANaN), .BNaN(BNaN), .Azero(Azero), .Bzero(Bzero), .Sel(Sel), .A(op1), .B(op2), .*); -endmodule // fpcomp +// endmodule // fpcomp -/*module magcompare2b (LT, GT, A, B); - - input logic [1:0] A; - input logic [1:0] B; - - output logic LT; - output logic GT; - - // Determine if A < B using a minimized sum-of-products expression - assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0]; - // Determine if A > B using a minimized sum-of-products expression - assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0]; - -endmodule*/ // magcompare2b - -// 2-bit magnitude comparator -// This module compares two 2-bit values A and B. LT is '1' if A < B -// and GT is '1'if A > B. LT and GT are both '0' if A = B. However, -// this version actually incorporates don't cares into the equation to -// simplify the optimization - -// module magcompare2c (LT, GT, A, B); +// /*module magcompare2b (LT, GT, A, B); // input logic [1:0] A; // input logic [1:0] B; -// output logic LT; -// output logic GT; +// output logic LT; +// output logic GT; -// assign LT = B[1] | (!A[1]&B[0]); -// assign GT = A[1] | (!B[1]&A[0]); +// // Determine if A < B using a minimized sum-of-products expression +// assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0]; +// // Determine if A > B using a minimized sum-of-products expression +// assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0]; -// endmodule // magcompare2b +// endmodule*/ // magcompare2b -// This module compares two 64-bit values A and B. LT is '1' if A < B -// and EQ is '1'if A = B. LT and GT are both '0' if A > B. -// This structure was modified so -// that it only does a strict magnitdude comparison, and only -// returns flags for less than (LT) and eqaual to (EQ). It uses a tree -// of 63 2-bit magnitude comparators, followed by one OR gates. -// -// J. E. Stine and M. J. Schulte, "A combined two's complement and -// floating-point comparator," 2005 IEEE International Symposium on -// Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1. -// doi: 10.1109/ISCAS.2005.1464531 +// // 2-bit magnitude comparator +// // This module compares two 2-bit values A and B. LT is '1' if A < B +// // and GT is '1'if A > B. LT and GT are both '0' if A = B. However, +// // this version actually incorporates don't cares into the equation to +// // simplify the optimization -module magcompare64b_2 (LT, EQ, w, x); +// // module magcompare2c (LT, GT, A, B); - input logic [7:0] w; - input logic [7:0] x; - logic [3:0] y; - logic [3:0] z; - logic [1:0] a; - logic [1:0] b; - logic GT; +// // input logic [1:0] A; +// // input logic [1:0] B; - output logic LT; - output logic EQ; +// // output logic LT; +// // output logic GT; + +// // assign LT = B[1] | (!A[1]&B[0]); +// // assign GT = A[1] | (!B[1]&A[0]); + +// // endmodule // magcompare2b + +// // This module compares two 64-bit values A and B. LT is '1' if A < B +// // and EQ is '1'if A = B. LT and GT are both '0' if A > B. +// // This structure was modified so +// // that it only does a strict magnitdude comparison, and only +// // returns flags for less than (LT) and eqaual to (EQ). It uses a tree +// // of 63 2-bit magnitude comparators, followed by one OR gates. +// // +// // J. E. Stine and M. J. Schulte, "A combined two's complement and +// // floating-point comparator," 2005 IEEE International Symposium on +// // Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1. +// // doi: 10.1109/ISCAS.2005.1464531 + +// module magcompare64b_2 (LT, EQ, w, x); + +// input logic [7:0] w; +// input logic [7:0] x; +// logic [3:0] y; +// logic [3:0] z; +// logic [1:0] a; +// logic [1:0] b; +// logic GT; - magcompare2c mag39(y[0], z[0], x[1:0], w[1:0]); - magcompare2c mag3A(y[1], z[1], x[3:2], w[3:2]); - magcompare2c mag3B(y[2], z[2], x[5:4], w[5:4]); - magcompare2c mag3C(y[3], z[3], x[7:6], w[7:6]); +// output logic LT; +// output logic EQ; - magcompare2c mag3D(a[0], b[0], z[1:0], y[1:0]); - magcompare2c mag3E(a[1], b[1], z[3:2], y[3:2]); +// magcompare2c mag39(y[0], z[0], x[1:0], w[1:0]); +// magcompare2c mag3A(y[1], z[1], x[3:2], w[3:2]); +// magcompare2c mag3B(y[2], z[2], x[5:4], w[5:4]); +// magcompare2c mag3C(y[3], z[3], x[7:6], w[7:6]); - magcompare2c mag3F(LT, GT, b[1:0], a[1:0]); - - assign EQ = ~(LT | GT); - -endmodule // magcompare64b - -// This module takes 64-bits inputs A and B, two magnitude comparison -// flags LT_mag and EQ_mag, and a 2-bit signal Sel that indicates the type of -// operands being compared as indicated below. -// Sel Description -// 00 double precision numbers -// 01 single precision numbers -// 10 half precision numbers -// 11 bfloat precision numbers -// -// The comparator produces a 2-bit signal fcc, which -// indicates the result of the comparison as follows: -// fcc decscription -// 00 A = B -// 01 A < B -// 10 A > B -// 11 A and B are unordered (i.e., A or B is NaN) -// It also produces a invalid operation flag, which is one -// if either of the input operands is a signaling NaN. - -module exception_cmp_2 ( - input logic [63:0] A, - input logic [63:0] B, - input logic FmtM, - input logic LT_mag, - input logic EQ_mag, - input logic [1:0] Sel, - input logic [3:0] FOpCtrlM, +// magcompare2c mag3D(a[0], b[0], z[1:0], y[1:0]); +// magcompare2c mag3E(a[1], b[1], z[3:2], y[3:2]); - output logic invalid, - output logic [1:0] fcc, - output logic [63:0] FCmpResultM, +// magcompare2c mag3F(LT, GT, b[1:0], a[1:0]); - input logic Azero, - input logic Bzero, - input logic ANaN, - input logic BNaN); +// assign EQ = ~(LT | GT); + +// endmodule // magcompare64b + +// // This module takes 64-bits inputs A and B, two magnitude comparison +// // flags LT_mag and EQ_mag, and a 2-bit signal Sel that indicates the type of +// // operands being compared as indicated below. +// // Sel Description +// // 00 double precision numbers +// // 01 single precision numbers +// // 10 half precision numbers +// // 11 bfloat precision numbers +// // +// // The comparator produces a 2-bit signal fcc, which +// // indicates the result of the comparison as follows: +// // fcc decscription +// // 00 A = B +// // 01 A < B +// // 10 A > B +// // 11 A and B are unordered (i.e., A or B is NaN) +// // It also produces a invalid operation flag, which is one +// // if either of the input operands is a signaling NaN. + +// module exception_cmp_2 ( +// input logic [63:0] A, +// input logic [63:0] B, +// input logic FmtM, +// input logic LT_mag, +// input logic EQ_mag, +// input logic [1:0] Sel, +// input logic [3:0] FOpCtrlM, - logic dp; - logic sp; - logic hp; - logic ASNaN; - logic BSNaN; - logic UO; - logic GT; - logic LT; - logic EQ; - logic [62:0] sixtythreezeros = 63'h0; +// output logic invalid, +// output logic [1:0] fcc, +// output logic [63:0] FCmpResultM, - assign dp = !Sel[1]&!Sel[0]; - assign sp = !Sel[1]&Sel[0]; - assign hp = Sel[1]&!Sel[0]; - - // Values are unordered if ((A is NaN) OR (B is NaN)) AND (a floating - // point comparison is being performed. - assign UO = (ANaN | BNaN); - - // Test if A or B is a signaling NaN. - assign ASNaN = ANaN & (sp&~A[53] | dp&~A[50] | hp&~A[56]); - assign BSNaN = BNaN & (sp&~B[53] | dp&~B[50] | hp&~B[56]); - - // If either A or B is a signaling NaN the "Invalid Operation" - // exception flag is set to one; otherwise it is zero. - assign invalid = (ASNaN | BSNaN); - - // A and B are equal if (their magnitudes are equal) AND ((their signs are - // equal) or (their magnitudes are zero AND they are floating point - // numbers)). Also, A and B are not equal if they are unordered. - assign EQ = (EQ_mag | (Azero&Bzero)) & (~UO); +// input logic Azero, +// input logic Bzero, +// input logic ANaN, +// input logic BNaN); - // A is less than B if (A is negative and B is posiive) OR - // (A and B are positive and the magnitude of A is less than - // the magnitude of B) or (A and B are negative integers and - // the magnitude of A is less than the magnitude of B) or - // (A and B are negative floating point numbers and - // the magnitude of A is greater than the magnitude of B). - // Also, A is not less than B if A and B are equal or unordered. - assign LT = ((~LT_mag & A[63] & B[63]) | - (LT_mag & ~(A[63] & B[63])))&~EQ&~UO; +// logic dp; +// logic sp; +// logic hp; +// logic ASNaN; +// logic BSNaN; +// logic UO; +// logic GT; +// logic LT; +// logic EQ; +// logic [62:0] sixtythreezeros = 63'h0; + +// assign dp = !Sel[1]&!Sel[0]; +// assign sp = !Sel[1]&Sel[0]; +// assign hp = Sel[1]&!Sel[0]; + +// // Values are unordered if ((A is NaN) OR (B is NaN)) AND (a floating +// // point comparison is being performed. +// assign UO = (ANaN | BNaN); + +// // Test if A or B is a signaling NaN. +// assign ASNaN = ANaN & (sp&~A[53] | dp&~A[50] | hp&~A[56]); +// assign BSNaN = BNaN & (sp&~B[53] | dp&~B[50] | hp&~B[56]); + +// // If either A or B is a signaling NaN the "Invalid Operation" +// // exception flag is set to one; otherwise it is zero. +// assign invalid = (ASNaN | BSNaN); + +// // A and B are equal if (their magnitudes are equal) AND ((their signs are +// // equal) or (their magnitudes are zero AND they are floating point +// // numbers)). Also, A and B are not equal if they are unordered. +// assign EQ = (EQ_mag | (Azero&Bzero)) & (~UO); - // A is greater than B when LT, EQ, and UO are are false. - assign GT = ~(LT | EQ | UO); +// // A is less than B if (A is negative and B is posiive) OR +// // (A and B are positive and the magnitude of A is less than +// // the magnitude of B) or (A and B are negative integers and +// // the magnitude of A is less than the magnitude of B) or +// // (A and B are negative floating point numbers and +// // the magnitude of A is greater than the magnitude of B). +// // Also, A is not less than B if A and B are equal or unordered. +// assign LT = ((~LT_mag & A[63] & B[63]) | +// (LT_mag & ~(A[63] & B[63])))&~EQ&~UO; + +// // A is greater than B when LT, EQ, and UO are are false. +// assign GT = ~(LT | EQ | UO); - // Note: it may be possible to optimize the setting of fcc - // a little more, but it is probably not worth the effort. +// // Note: it may be possible to optimize the setting of fcc +// // a little more, but it is probably not worth the effort. - // Set the bits of fcc based on LT, GT, EQ, and UO - assign fcc[0] = LT | UO; - assign fcc[1] = GT | UO; +// // Set the bits of fcc based on LT, GT, EQ, and UO +// assign fcc[0] = LT | UO; +// assign fcc[1] = GT | UO; - always_comb begin - case (FOpCtrlM[2:0]) - 3'b111: FCmpResultM = LT ? A : B;//min - 3'b101: FCmpResultM = GT ? A : B;//max - 3'b010: FCmpResultM = FmtM ? {63'b0, EQ} : {31'b0, EQ, 32'b0};//equal - 3'b001: FCmpResultM = FmtM ? {63'b0, LT} : {31'b0, LT, 32'b0};//less than - 3'b011: FCmpResultM = FmtM ? {63'b0, LT|EQ} : {31'b0, LT|EQ, 32'b0};//less than or equal - default: FCmpResultM = 64'b0; - endcase - end +// always_comb begin +// case (FOpCtrlM[2:0]) +// 3'b111: FCmpResultM = LT ? A : B;//min +// 3'b101: FCmpResultM = GT ? A : B;//max +// 3'b010: FCmpResultM = FmtM ? {63'b0, EQ} : {31'b0, EQ, 32'b0};//equal +// 3'b001: FCmpResultM = FmtM ? {63'b0, LT} : {31'b0, LT, 32'b0};//less than +// 3'b011: FCmpResultM = FmtM ? {63'b0, LT|EQ} : {31'b0, LT|EQ, 32'b0};//less than or equal +// default: FCmpResultM = 64'b0; +// endcase +// end -endmodule // exception_cmp +// endmodule // exception_cmp diff --git a/wally-pipelined/src/fpu/fpuhazard.sv b/wally-pipelined/src/fpu/fpuhazard.sv index 959ef4763..03667d84f 100644 --- a/wally-pipelined/src/fpu/fpuhazard.sv +++ b/wally-pipelined/src/fpu/fpuhazard.sv @@ -26,47 +26,41 @@ `include "wally-config.vh" module fpuhazard( - input logic [4:0] Adr1, Adr2, Adr3, - input logic FWriteEnE, FWriteEnM, FWriteEnW, - input logic [4:0] RdE, RdM, RdW, - input logic FDivBusyE, - input logic RegWriteD, - input logic [2:0] FResultSelD, FResultSelE, - input logic IllegalFPUInstrD, - input logic FInput2UsedD, FInput3UsedD, - // Stall outputs - output logic FStallD, - output logic [1:0] FForwardInput1D, FForwardInput2D, - output logic FForwardInput3D + input logic [4:0] Adr1E, Adr2E, Adr3E, + input logic FWriteEnM, FWriteEnW, + input logic [4:0] RdM, RdW, + input logic [2:0] FResultSelM, + output logic FStallD, + output logic [1:0] ForwardXE, ForwardYE, ForwardZE ); always_comb begin // set ReadData as default - FForwardInput1D = 2'b00; - FForwardInput2D = 2'b00; - FForwardInput3D = 1'b0; - FStallD = FDivBusyE; - if (~IllegalFPUInstrD) begin -// if taking a value from int register - if ((Adr1 == RdE) & (FWriteEnE | ((FResultSelE == 3'b110) & RegWriteD))) - if (FResultSelE == 3'b110) FForwardInput1D = 2'b11; // choose SrcAM - else FStallD = 1'b1; // otherwise stall - else if ((Adr1 == RdM) & FWriteEnM) FForwardInput1D = 2'b01; // choose FPUResultDirW - else if ((Adr1 == RdW) & FWriteEnW) FForwardInput1D = 2'b11; // choose FPUResultDirE + ForwardXE = 2'b00; // choose FRD1E + ForwardYE = 2'b00; // choose FRD2E + ForwardZE = 2'b00; // choose FRD3E + FStallD = 0; + + if ((Adr1E == RdM) & FWriteEnM) + // if the result will be FResM + if(FResultSelM == 3'b110 | FResultSelM == 3'b011) ForwardXE = 2'b10; // choose FResM + else FStallD = 1; // if the result won't be ready stall + else if ((Adr1E == RdW) & FWriteEnW) ForwardXE = 2'b01; // choose FPUResult64W - if(FInput2UsedD) - if ((Adr2 == RdE) & FWriteEnE) FStallD = 1'b1; - else if ((Adr2 == RdM) & FWriteEnM) FForwardInput2D = 2'b01; // choose FPUResultDirW - else if ((Adr2 == RdW) & FWriteEnW) FForwardInput2D = 2'b10; // choose FPUResultDirE + if ((Adr2E == RdM) & FWriteEnM) + // if the result will be FResM + if(FResultSelM == 3'b110 | FResultSelM == 3'b011) ForwardYE = 2'b10; // choose FResM + else FStallD = 1; // if the result won't be ready stall + else if ((Adr2E == RdW) & FWriteEnW) ForwardYE = 2'b01; // choose FPUResult64W - - if(FInput3UsedD) - if ((Adr3 == RdE) & FWriteEnE) FStallD = 1'b1; - else if ((Adr3 == RdM) & FWriteEnM) FStallD = 1'b1; - else if ((Adr3 == RdW) & FWriteEnW) FForwardInput3D = 1'b1; // choose FPUResultDirE - end + + if ((Adr3E == RdM) & FWriteEnM) + // if the result will be FResM + if(FResultSelM == 3'b110 | FResultSelM == 3'b011) ForwardZE = 2'b10; // choose FResM + else FStallD = 1; // if the result won't be ready stall + else if ((Adr3E == RdW) & FWriteEnW) ForwardZE = 2'b01; // choose FPUResult64W end diff --git a/wally-pipelined/src/fpu/fsgn.sv b/wally-pipelined/src/fpu/fsgn.sv index 2850af86e..62d0e7d7c 100755 --- a/wally-pipelined/src/fpu/fsgn.sv +++ b/wally-pipelined/src/fpu/fsgn.sv @@ -1,8 +1,8 @@ //performs the fsgnj/fsgnjn/fsgnjx RISCV instructions -module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, FInput1E, FInput2E); +module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, SrcXE, SrcYE); - input [63:0] FInput1E, FInput2E; + input [63:0] SrcXE, SrcYE; input [1:0] SgnOpCodeE; output [63:0] SgnResultE; output [4:0] SgnFlagsE; @@ -11,18 +11,18 @@ module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, FInput1E, FInput2E); //op code designation: // - //00 - fsgnj - directly copy over sign value of FInput2E - //01 - fsgnjn - negate sign value of FInput2E - //10 - fsgnjx - XOR sign values of FInput1E & FInput2E + //00 - fsgnj - directly copy over sign value of SrcYE + //01 - fsgnjn - negate sign value of SrcYE + //10 - fsgnjx - XOR sign values of SrcXE & SrcYE // - assign SgnResultE[63] = SgnOpCodeE[1] ? (FInput1E[63] ^ FInput2E[63]) : (FInput2E[63] ^ SgnOpCodeE[0]); - assign SgnResultE[62:0] = FInput1E[62:0]; + assign SgnResultE[63] = SgnOpCodeE[1] ? (SrcXE[63] ^ SrcYE[63]) : (SrcYE[63] ^ SgnOpCodeE[0]); + assign SgnResultE[62:0] = SrcXE[62:0]; //If the exponent is all ones, then the value is either Inf or NaN, //both of which will produce a QNaN/SNaN value of some sort. This will //set the invalid flag high. - assign AonesExp = FInput1E[62]&FInput1E[61]&FInput1E[60]&FInput1E[59]&FInput1E[58]&FInput1E[57]&FInput1E[56]&FInput1E[55]&FInput1E[54]&FInput1E[53]&FInput1E[52]; + assign AonesExp = SrcXE[62]&SrcXE[61]&SrcXE[60]&SrcXE[59]&SrcXE[58]&SrcXE[57]&SrcXE[56]&SrcXE[55]&SrcXE[54]&SrcXE[53]&SrcXE[52]; //the only flag that can occur during this operation is invalid //due to changing sign on already existing NaN diff --git a/wally-pipelined/src/hazard/hazard.sv b/wally-pipelined/src/hazard/hazard.sv index 016d8e1ad..356574d0f 100644 --- a/wally-pipelined/src/hazard/hazard.sv +++ b/wally-pipelined/src/hazard/hazard.sv @@ -32,7 +32,7 @@ module hazard( input logic BPPredWrongE, CSRWritePendingDEM, RetM, TrapM, input logic LoadStallD, MulDivStallD, CSRRdStallD, input logic DataStall, ICacheStallF, - input logic FPUStallD, + input logic FPUStallD, FStallD, input logic DivBusyE,FDivBusyE, // Stall & flush outputs output logic StallF, StallD, StallE, StallM, StallW, @@ -56,7 +56,7 @@ module hazard( // If any stages are stalled, the first stage that isn't stalled must flush. assign StallFCause = CSRWritePendingDEM && ~(TrapM || RetM || BPPredWrongE); - assign StallDCause = (LoadStallD || MulDivStallD || CSRRdStallD || FPUStallD) && ~(TrapM || RetM || BPPredWrongE); // stall in decode if instruction is a load/mul/csr dependent on previous + assign StallDCause = (LoadStallD || MulDivStallD || CSRRdStallD || FPUStallD || FStallD) && ~(TrapM || RetM || BPPredWrongE); // stall in decode if instruction is a load/mul/csr dependent on previous assign StallECause = DivBusyE || FDivBusyE; assign StallMCause = 0; assign StallWCause = DataStall || ICacheStallF; diff --git a/wally-pipelined/src/ieu/controller.sv b/wally-pipelined/src/ieu/controller.sv index 09ded48ba..ab25401e7 100644 --- a/wally-pipelined/src/ieu/controller.sv +++ b/wally-pipelined/src/ieu/controller.sv @@ -45,15 +45,16 @@ module controller( output logic MemReadE, CSRReadE, // for Hazard Unit output logic [2:0] Funct3E, output logic MulDivE, W64E, - output logic JumpE, + output logic JumpE, + output logic [1:0] MemRWE, // Memory stage control signals input logic StallM, FlushM, output logic [1:0] MemRWM, output logic CSRReadM, CSRWriteM, PrivilegedM, output logic [1:0] AtomicM, output logic [2:0] Funct3M, - output logic RegWriteM, // for Hazard Unit output logic [2:0] ResultSrcM, + output logic RegWriteM, // for Hazard Unit output logic InstrValidM, // Writeback stage control signals input logic StallW, FlushW, @@ -74,7 +75,7 @@ module controller( // pipelined control signals logic RegWriteE; logic [2:0] ResultSrcD, ResultSrcE; - logic [1:0] MemRWD, MemRWE; + logic [1:0] MemRWD; logic JumpD; logic BranchD, BranchE; logic [1:0] ALUOpD; @@ -141,6 +142,7 @@ module controller( ControlsD = `CTRLW'b1_000_00_00_011_0_00_0_0_1_0_0_1_00_0; // W-type Multiply/Divide else ControlsD = `CTRLW'b0_000_00_00_000_0_00_0_0_0_0_0_0_00_1; // non-implemented instruction + //7'b1010011: ControlsD = `CTRLW'b0_000_00_00_101_0_00_0_0_0_0_0_0_00_1; // FP 7'b1100011: ControlsD = `CTRLW'b0_010_00_00_000_1_01_0_0_0_0_0_0_00_0; // beq 7'b1100111: ControlsD = `CTRLW'b1_000_00_00_000_0_00_1_1_0_0_0_0_00_0; // jalr 7'b1101111: ControlsD = `CTRLW'b1_011_00_00_000_0_00_1_0_0_0_0_0_00_0; // jal diff --git a/wally-pipelined/src/ieu/datapath.sv b/wally-pipelined/src/ieu/datapath.sv index 848ed89a5..635c12f24 100644 --- a/wally-pipelined/src/ieu/datapath.sv +++ b/wally-pipelined/src/ieu/datapath.sv @@ -37,6 +37,9 @@ module datapath ( input logic ALUSrcAE, ALUSrcBE, input logic TargetSrcE, input logic JumpE, + input logic IsFPE, + input logic [1:0] MemRWE, + input logic [`XLEN-1:0] FWriteDataE, input logic [`XLEN-1:0] PCE, input logic [`XLEN-1:0] PCLinkE, output logic [2:0] FlagsE, @@ -44,16 +47,16 @@ module datapath ( output logic [`XLEN-1:0] SrcAE, SrcBE, // Memory stage signals input logic StallM, FlushM, - input logic [`XLEN-1:0] FWriteDataM, input logic SquashSCM, + input logic FWriteIntM, input logic [2:0] ResultSrcM, + input logic [`XLEN-1:0] FIntResM, input logic [`XLEN-1:0] CSRReadValM, ReadDataM, MulDivResultM, output logic [`XLEN-1:0] SrcAM, output logic [`XLEN-1:0] WriteDataM, MemAdrM, // Writeback stage signals input logic StallW, FlushW, input logic FWriteIntW, - input logic [`XLEN-1:0] FPUResultW, input logic RegWriteW, input logic SquashSCW, input logic [2:0] ResultSrcW, @@ -72,7 +75,7 @@ module datapath ( logic [`XLEN-1:0] RD1E, RD2E; logic [`XLEN-1:0] ExtImmE; - logic [`XLEN-1:0] PreSrcAE, SrcAE2, SrcBE2; + logic [`XLEN-1:0] PreSrcAE, PreSrcBE, SrcAE2, SrcBE2; logic [`XLEN-1:0] ALUResultE; logic [`XLEN-1:0] WriteDataE; @@ -92,8 +95,7 @@ module datapath ( assign Rs2D = InstrD[24:20]; assign RdD = InstrD[11:7]; - //Mux for writting floating point - mux2 #(`XLEN) writedatamux(ResultW, FPUResultW, FWriteIntW, WriteDataW); + //Mux for writting floating point regfile regf(clk, reset, {RegWriteW | FWriteIntW}, Rs1D, Rs2D, RdW, WriteDataW, RD1D, RD2D); extend ext(.InstrD(InstrD[31:7]), .*); @@ -106,11 +108,12 @@ module datapath ( flopenrc #(5) Rs2EReg(clk, reset, FlushE, ~StallE, Rs2D, Rs2E); flopenrc #(5) RdEReg(clk, reset, FlushE, ~StallE, RdD, RdE); - mux4 #(`XLEN) faemux(RD1E, WriteDataW, ResultM, FWriteDataM, ForwardAE, PreSrcAE); - mux4 #(`XLEN) fbemux(RD2E, WriteDataW, ResultM, FWriteDataM, ForwardBE, WriteDataE); + mux3 #(`XLEN) faemux(RD1E, WriteDataW, ResultM, ForwardAE, PreSrcAE); + mux3 #(`XLEN) fbemux(RD2E, WriteDataW, ResultM, ForwardBE, PreSrcBE); + mux2 #(`XLEN) writedatamux(PreSrcBE, FWriteDataE, IsFPE, WriteDataE); mux2 #(`XLEN) srcamux(PreSrcAE, PCE, ALUSrcAE, SrcAE); mux2 #(`XLEN) srcamux2(SrcAE, PCLinkE, JumpE, SrcAE2); - mux2 #(`XLEN) srcbmux(WriteDataE, ExtImmE, ALUSrcBE, SrcBE); + mux2 #(`XLEN) srcbmux(PreSrcBE, ExtImmE, ALUSrcBE, SrcBE); mux2 #(`XLEN) srcbmux2(SrcBE, {`XLEN{1'b0}}, JumpE, SrcBE2); // *** May be able to remove this mux. alu #(`XLEN) alu(SrcAE2, SrcBE2, ALUControlE, ALUResultE, FlagsE); mux2 #(`XLEN) targetsrcmux(PCE, SrcAE, TargetSrcE, TargetBaseE); @@ -122,10 +125,11 @@ module datapath ( assign MemAdrM = ALUResultM; flopenrc #(`XLEN) WriteDataMReg(clk, reset, FlushM, ~StallM, WriteDataE, WriteDataM); flopenrc #(5) RdMEg(clk, reset, FlushM, ~StallM, RdE, RdM); - mux5 #(`XLEN) resultmuxM(ALUResultM, ReadDataM, CSRReadValM, MulDivResultM, SCResultM, ResultSrcM, ResultM); + //mux6 #(`XLEN) resultmuxM(ALUResultM, ReadDataM, CSRReadValM, MulDivResultM, SCResultM, FIntResM, ResultSrcM, ResultM); //Wasn't doing anything + mux2 #(`XLEN) resultmuxM(ALUResultM, FIntResM, FWriteIntM, ResultM); //Wasn't doing anything // Writeback stage pipeline register and logic - flopenrc #(`XLEN) ALUResultWReg(clk, reset, FlushW, ~StallW, ALUResultM, ALUResultW); + flopenrc #(`XLEN) ResultWReg(clk, reset, FlushW, ~StallW, ResultM, ResultW); flopenrc #(5) RdWEg(clk, reset, FlushW, ~StallW, RdM, RdW); // handle Store Conditional result if atomic extension supported @@ -139,11 +143,11 @@ module datapath ( end endgenerate - mux5 #(`XLEN) resultmuxW(ALUResultW, ReadDataW, CSRReadValW, MulDivResultW, SCResultW, ResultSrcW, ResultW); + mux5 #(`XLEN) resultmuxW(ResultW, ReadDataW, CSRReadValW, MulDivResultW, SCResultW, ResultSrcW, WriteDataW); /* -----\/----- EXCLUDED -----\/----- // This mux4:1 no longer needs to include PCLinkW. This is set correctly in the execution stage. // *** need to look at how the decoder is coded to fix. - mux4 #(`XLEN) resultmux(ALUResultW, ReadDataW, PCLinkW, CSRReadValW, ResultSrcW, ResultW); + mux4 #(`XLEN) resultmux(ALUResultW, ReadDataW, PCLinkW, CSRReadValW, ResultSrcW, WriteDataW); >>>>>>> bp -----/\----- EXCLUDED -----/\----- */ diff --git a/wally-pipelined/src/ieu/forward.sv b/wally-pipelined/src/ieu/forward.sv index 6729ed424..259d41f24 100644 --- a/wally-pipelined/src/ieu/forward.sv +++ b/wally-pipelined/src/ieu/forward.sv @@ -41,14 +41,12 @@ module forward( ForwardAE = 2'b00; ForwardBE = 2'b00; if (Rs1E != 5'b0) - if ((Rs1E == RdM) & RegWriteM) ForwardAE = 2'b10; + if ((Rs1E == RdM) & (RegWriteM|FWriteIntM)) ForwardAE = 2'b10; else if ((Rs1E == RdW) & (RegWriteW|FWriteIntW)) ForwardAE = 2'b01; - else if ((Rs1E == RdM) & FWriteIntM) ForwardAE = 2'b11; if (Rs2E != 5'b0) - if ((Rs2E == RdM) & RegWriteM) ForwardBE = 2'b10; + if ((Rs2E == RdM) & (RegWriteM|FWriteIntM)) ForwardBE = 2'b10; else if ((Rs2E == RdW) & (RegWriteW|FWriteIntW)) ForwardBE = 2'b01; - else if ((Rs2E == RdM) & FWriteIntM) ForwardBE = 2'b11; end // Stall on dependent operations that finish in Mem Stage and can't bypass in time @@ -57,4 +55,4 @@ module forward( assign MulDivStallD = MulDivE & ((Rs1D == RdE) | (Rs2D == RdE)) | MulDivE | DivBusyE; // *** extend with stalls for divide assign CSRRdStallD = CSRReadE & ((Rs1D == RdE) | (Rs2D == RdE)); -endmodule +endmodule \ No newline at end of file diff --git a/wally-pipelined/src/ieu/ieu.sv b/wally-pipelined/src/ieu/ieu.sv index 73c619f62..b9198b0a6 100644 --- a/wally-pipelined/src/ieu/ieu.sv +++ b/wally-pipelined/src/ieu/ieu.sv @@ -35,7 +35,10 @@ module ieu ( // Execute Stage interface input logic [`XLEN-1:0] PCE, input logic [`XLEN-1:0] PCLinkE, - input logic FWriteIntE, + input logic FWriteIntE, + input logic IsFPE, + //input logic [1:0] FMemRWE, + input logic [`XLEN-1:0] FWriteDataE, output logic [`XLEN-1:0] PCTargetE, output logic MulDivE, W64E, output logic [2:0] Funct3E, @@ -44,8 +47,8 @@ module ieu ( input logic DataMisalignedM, input logic DataAccessFaultM, input logic FWriteIntM, - input logic [`XLEN-1:0] FWriteDataM, input logic SquashSCM, + input logic [`XLEN-1:0] FIntResM, input logic [`XLEN-1:0] CSRReadValM, ReadDataM, MulDivResultM, output logic [1:0] MemRWM, output logic [1:0] AtomicM, @@ -55,7 +58,6 @@ module ieu ( // Writeback stage input logic [`XLEN-1:0] CSRReadValW, ReadDataW, MulDivResultW, input logic FWriteIntW, - input logic [`XLEN-1:0] FPUResultW, input logic SquashSCW, // input logic [`XLEN-1:0] PCLinkW, output logic InstrValidM, InstrValidW, @@ -83,6 +85,7 @@ module ieu ( logic RegWriteM, RegWriteW; logic MemReadE, CSRReadE; logic JumpE; + logic [1:0] MemRWE; controller c(.*); datapath dp(.*); diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index 1fd1408a4..fb7d288dc 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -86,21 +86,26 @@ module wallypipelinedhart ( logic PCSrcE; logic CSRWritePendingDEM; - logic FPUStallD, LoadStallD, MulDivStallD, CSRRdStallD; + logic LoadStallD, MulDivStallD, CSRRdStallD; logic DivDoneE; logic DivBusyE; logic DivDoneW; - logic [4:0] SetFflagsM; - logic [2:0] FRM_REGW; - logic FloatRegWriteW; - logic [1:0] FMemRWM; logic RegWriteD; - logic [`XLEN-1:0] FWriteDataM; logic SquashSCM, SquashSCW; - logic FStallD; - logic FWriteIntE, FWriteIntW, FWriteIntM; - logic FDivBusyE; - logic IllegalFPUInstrD, IllegalFPUInstrE; + + // floating point unit signals + logic [2:0] FRM_REGW; + logic [1:0] FMemRWM, FMemRWE; + logic FStallD; + logic FWriteIntE, FWriteIntM, FWriteIntW; + logic [`XLEN-1:0] FWriteDataE; + logic [`XLEN-1:0] FIntResM; + logic FDivBusyE; + logic IsFPD, IsFPE; + logic IllegalFPUInstrD, IllegalFPUInstrE; + logic FloatRegWriteW; + logic FPUStallD; + logic [4:0] SetFflagsM; logic [`XLEN-1:0] FPUResultW; // memory management unit signals @@ -159,13 +164,13 @@ module wallypipelinedhart ( ieu ieu(.*); // integer execution unit: integer register file, datapath and controller - mux2 #(`XLEN) OutputInput2mux(WriteDataM, FWriteDataM, FMemRWM[0], WriteDatatmpM); - lsu lsu(.MemRWM(MemRWM|FMemRWM), .WriteDataM(WriteDatatmpM),.*); // data cache unit + // mux2 #(`XLEN) OutputInput2mux(WriteDataM, FWriteDataM, FMemRWM[0], WriteDatatmpM); + lsu lsu(.*); // data cache unit ahblite ebu( //.InstrReadF(1'b0), //.InstrRData(InstrF), // hook up InstrF later - .WriteDataM(WriteDatatmpM), + .WriteDataM(WriteDataM), .MemSizeM(Funct3M[1:0]), .UnsignedLoadM(Funct3M[2]), .Funct7M(InstrM[31:25]), .*); diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 2b052dcdf..11b8e5620 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -539,8 +539,8 @@ string tests32f[] = '{ if (`M_SUPPORTED) tests = {tests, tests64m}; if (`A_SUPPORTED) tests = {tests, tests64a}; if (`MEM_VIRTMEM) tests = {tests, tests64mmu}; - if (`D_SUPPORTED) tests = {tests64d, tests}; if (`F_SUPPORTED) tests = {tests64f, tests}; + if (`D_SUPPORTED) tests = {tests64d, tests}; end //tests = {tests64a, tests}; end else begin // RV32 From 192171826b2ca2a6b08692e7e6ae5f00857aeff9 Mon Sep 17 00:00:00 2001 From: bbracker Date: Fri, 25 Jun 2021 07:18:38 -0400 Subject: [PATCH 09/20] changed SC M-to-E fowarding to W-to-E forwarding to improve critical path --- wally-pipelined/src/ieu/controller.sv | 6 ++++-- wally-pipelined/src/ieu/forward.sv | 15 ++++++++------- wally-pipelined/src/ieu/ieu.sv | 3 ++- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/wally-pipelined/src/ieu/controller.sv b/wally-pipelined/src/ieu/controller.sv index b27541d42..3654437fd 100644 --- a/wally-pipelined/src/ieu/controller.sv +++ b/wally-pipelined/src/ieu/controller.sv @@ -49,7 +49,8 @@ module controller( // Memory stage control signals input logic StallM, FlushM, output logic [1:0] MemRWM, - output logic CSRReadM, CSRWriteM, PrivilegedM, + output logic CSRReadM, CSRWriteM, PrivilegedM, + output logic SCE, output logic [1:0] AtomicM, output logic [2:0] Funct3M, output logic RegWriteM, // for Hazard Unit @@ -202,7 +203,8 @@ module controller( assign PCSrcE = JumpE | BranchE & BranchTakenE; - assign MemReadE = MemRWE[1]; + assign MemReadE = MemRWE[1]; + assign SCE = (ResultSrcE == 3'b100); // Memory stage pipeline control register flopenrc #(15) controlregM(clk, reset, FlushM, ~StallM, diff --git a/wally-pipelined/src/ieu/forward.sv b/wally-pipelined/src/ieu/forward.sv index cdc6d2700..07c4daaf0 100644 --- a/wally-pipelined/src/ieu/forward.sv +++ b/wally-pipelined/src/ieu/forward.sv @@ -28,13 +28,14 @@ module forward( // Detect hazards input logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW, - input logic MemReadE, MulDivE, CSRReadE, - input logic RegWriteM, RegWriteW, - input logic DivDoneE, DivBusyE, - input logic FWriteIntE, FWriteIntM, FWriteIntW, + input logic MemReadE, MulDivE, CSRReadE, + input logic RegWriteM, RegWriteW, + input logic DivDoneE, DivBusyE, + input logic FWriteIntE, FWriteIntM, FWriteIntW, + input logic SCE, // Forwarding controls output logic [1:0] ForwardAE, ForwardBE, - output logic FPUStallD, LoadStallD, MulDivStallD, CSRRdStallD + output logic FPUStallD, LoadStallD, MulDivStallD, CSRRdStallD ); always_comb begin @@ -43,7 +44,7 @@ module forward( if (Rs1E != 5'b0) if ((Rs1E == RdM) & RegWriteM) ForwardAE = 2'b10; else if ((Rs1E == RdW) & (RegWriteW|FWriteIntW)) ForwardAE = 2'b01; - else if ((Rs1E == RdM) & FWriteIntM) ForwardAE = 2'b11; + else if ((Rs1E == RdM) & FWriteIntM) ForwardAE = 2'b11; if (Rs2E != 5'b0) if ((Rs2E == RdM) & RegWriteM) ForwardBE = 2'b10; @@ -53,7 +54,7 @@ module forward( // Stall on dependent operations that finish in Mem Stage and can't bypass in time assign FPUStallD = FWriteIntE & ((Rs1D == RdE) | (Rs2D == RdE)); - assign LoadStallD = MemReadE & ((Rs1D == RdE) | (Rs2D == RdE)); + assign LoadStallD = (MemReadE|SCE) & ((Rs1D == RdE) | (Rs2D == RdE)); assign MulDivStallD = MulDivE & ((Rs1D == RdE) | (Rs2D == RdE)) | MulDivE | DivBusyE; // *** extend with stalls for divide assign CSRRdStallD = CSRReadE & ((Rs1D == RdE) | (Rs2D == RdE)); diff --git a/wally-pipelined/src/ieu/ieu.sv b/wally-pipelined/src/ieu/ieu.sv index 0bd9d598f..62dc371b9 100644 --- a/wally-pipelined/src/ieu/ieu.sv +++ b/wally-pipelined/src/ieu/ieu.sv @@ -73,7 +73,8 @@ module ieu ( logic [4:0] ALUControlE; logic ALUSrcAE, ALUSrcBE; logic [2:0] ResultSrcW; - logic TargetSrcE; + logic TargetSrcE; + logic SCE; // forwarding signals logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW; From 5b47da21bac8e3de9a508920724b4007430c06d0 Mon Sep 17 00:00:00 2001 From: bbracker Date: Fri, 25 Jun 2021 08:15:19 -0400 Subject: [PATCH 10/20] made testbench-linux's PCDwrong be FlushD --- wally-pipelined/testbench/testbench-linux.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wally-pipelined/testbench/testbench-linux.sv b/wally-pipelined/testbench/testbench-linux.sv index 15e0e3634..b87174b9b 100644 --- a/wally-pipelined/testbench/testbench-linux.sv +++ b/wally-pipelined/testbench/testbench-linux.sv @@ -57,7 +57,7 @@ module testbench(); wallypipelinedsoc dut(.*); /////////////////////////////////////////////////////////////////////////////// - //////////////////////// Signals & Shared Macros ////////////////////////// + //////////////////////// Signals & Shared Macros /////////////////////////// //////////////////////// AKA stuff that comes first /////////////////////////// /////////////////////////////////////////////////////////////////////////////// // Sorry if these have gotten decontextualized. @@ -252,7 +252,7 @@ module testbench(); // Check if PCD is going to be flushed due to a branch or jump if (`BPRED_ENABLED) begin - PCDwrong = dut.hart.ifu.bpred.bpred.BPPredWrongE; + PCDwrong = dut.hart.hzu.FlushD; //Old version: dut.hart.ifu.bpred.bpred.BPPredWrongE; <-- This old version failed to account for MRET. end else begin casex (lastInstrDExpected[31:0]) 32'b00000000001000000000000001110011, // URET From 13cf7c0934077100e04a962a9bcf0738457dc3c4 Mon Sep 17 00:00:00 2001 From: bbracker Date: Fri, 25 Jun 2021 09:28:52 -0400 Subject: [PATCH 11/20] linux testbench now ignores HWRITE glitches caused by flush glitches --- wally-pipelined/regression/wave-dos/linux-waves.do | 1 + wally-pipelined/testbench/testbench-linux.sv | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/wally-pipelined/regression/wave-dos/linux-waves.do b/wally-pipelined/regression/wave-dos/linux-waves.do index b37276441..63623891c 100644 --- a/wally-pipelined/regression/wave-dos/linux-waves.do +++ b/wally-pipelined/regression/wave-dos/linux-waves.do @@ -4,6 +4,7 @@ view wave add wave -divider add wave /testbench/clk add wave /testbench/reset +add wave -dec /testbench/instrs add wave -divider Stalls_and_Flushes add wave /testbench/dut/hart/StallF diff --git a/wally-pipelined/testbench/testbench-linux.sv b/wally-pipelined/testbench/testbench-linux.sv index b87174b9b..6676d1a7c 100644 --- a/wally-pipelined/testbench/testbench-linux.sv +++ b/wally-pipelined/testbench/testbench-linux.sv @@ -27,7 +27,7 @@ module testbench(); - parameter waveOnICount = 2514000; // # of instructions at which to turn on waves in graphical sim + parameter waveOnICount = 2657000; // # of instructions at which to turn on waves in graphical sim /////////////////////////////////////////////////////////////////////////////// @@ -491,7 +491,7 @@ module testbench(); //always @(HWDATA or HADDR or HSIZE or HWRITE) begin always @(negedge HWRITE) begin //#1; - if ($time != 0) begin + if (($time != 0) && ~dut.hart.hzu.FlushM) begin if($feof(data_file_memW)) begin $display("no more memW data to read"); `ERROR From 2ab29c74f263c784a640ae67c68ce165aa99186e Mon Sep 17 00:00:00 2001 From: Abe Date: Fri, 25 Jun 2021 16:27:23 -0400 Subject: [PATCH 12/20] Fixed Coremark Score output printing. Also made it so that the loop that sets the iteration count increments iterations by 1 instead by increasing it by a factor of 10 each time (which was overkill for the timing that's needed to exit the loop) --- riscv-coremark/coremark/core_main.c | 45 +++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 6 deletions(-) diff --git a/riscv-coremark/coremark/core_main.c b/riscv-coremark/coremark/core_main.c index edd1ac467..a2c3ac679 100644 --- a/riscv-coremark/coremark/core_main.c +++ b/riscv-coremark/coremark/core_main.c @@ -211,26 +211,53 @@ MAIN_RETURN_TYPE main(int argc, char *argv[]) { core_init_state(results[0].size,results[i].seed1,results[i].memblock[3]); } } - + + /*int foreverLoop = 1; + secs_ret timing = 0; + int timingInt; + ee_printf("\nENTERING FOREVER WHILE LOOP\n"); + while(foreverLoop == 1) + { + start_time(); + //filler + stop_time(); + timing += time_in_secs(get_time()); + timingInt = (int)timing; + ee_printf("Timing is %d\n", timingInt); + }/* + /* automatically determine number of iterations if not set */ if (results[0].iterations==0) { secs_ret secs_passed=0; ee_u32 divisor; results[0].iterations=1; + int iterationInc = 0; + ee_printf("\n\nENTERING ITERATION WHILE LOOP\n"); while (secs_passed < (secs_ret)1) { - results[0].iterations*=10; + if(iterationInc != 0) + { + results[0].iterations++; + } + ee_printf("iterations is %d\n", results[0].iterations); start_time(); iterate(&results[0]); stop_time(); - secs_passed=time_in_secs(get_time()); + secs_passed = time_in_secs(get_time()); + int secs_passed_int = (int)secs_passed; + ee_printf("secs passed is %d\n", secs_passed_int); + iterationInc++; } + ee_printf("LEAVING ITERATION WHILE LOOP!\n\n"); /* now we know it executes for at least 1 sec, set actual run time at about 10 secs */ divisor=(ee_u32)secs_passed; + ee_printf("divisor is %lu\n", divisor); if (divisor==0) /* some machines cast float to int as 0 since this conversion is not defined by ANSI, but we know at least one second passed */ divisor=1; results[0].iterations*=1+10/divisor; + ee_printf("iterations is %d\n", results[0].iterations); } /* perform actual benchmark */ + ee_printf("Starting benchmark\n"); start_time(); #if (MULTITHREAD>1) if (default_num_contexts>MULTITHREAD) { @@ -249,7 +276,8 @@ MAIN_RETURN_TYPE main(int argc, char *argv[]) { #endif stop_time(); total_time=get_time(); - ee_printf("ending benchmark"); + ee_printf("total time is %u\n", total_time); + ee_printf("ending benchmark\n"); /* get a function of the input to report */ seedcrc=crc16(results[0].seed1,seedcrc); seedcrc=crc16(results[0].seed2,seedcrc); @@ -340,12 +368,17 @@ MAIN_RETURN_TYPE main(int argc, char *argv[]) { for (i=0 ; i Date: Fri, 25 Jun 2021 16:42:03 -0400 Subject: [PATCH 13/20] Updated timing functions to read from MTIME register, TICKS_PER_SEC set to 10000 so timer reads millisecs --- riscv-coremark/riscv64-baremetal/core_portme.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/riscv-coremark/riscv64-baremetal/core_portme.c b/riscv-coremark/riscv64-baremetal/core_portme.c index 8f17cb8bd..dab428306 100755 --- a/riscv-coremark/riscv64-baremetal/core_portme.c +++ b/riscv-coremark/riscv64-baremetal/core_portme.c @@ -114,9 +114,10 @@ void portable_free(void *p) { #define read_csr(reg) ({ unsigned long __tmp; \ asm volatile ("csrr %0, " #reg : "=r"(__tmp)); \ __tmp; }) - #define GETMYTIME(_t) (*_t=read_csr(cycle)) + #define GETMYTIME(_t) (_t = *(volatile unsigned long long*)0x0200BFF8) #define MYTIMEDIFF(fin,ini) ((fin)-(ini)) - #define TIMER_RES_DIVIDER 1 + // Changing TIMER_RES_DIVIDER to 1000000 sets EE_TICKS_PER_SEC to 1000 (now counting ticks per ms) + #define TIMER_RES_DIVIDER 10000 #define SAMPLE_TIME_IMPLEMENTATION 1 #endif #define EE_TICKS_PER_SEC (NSECS_PER_SEC / TIMER_RES_DIVIDER) @@ -132,7 +133,9 @@ static CORETIMETYPE start_time_val, stop_time_val; or zeroing some system parameters - e.g. setting the cpu clocks cycles to 0. */ void start_time(void) { - GETMYTIME(&start_time_val ); + GETMYTIME(start_time_val); + ee_printf("Timer started\n"); + ee_printf(" MTIME: %u\n", start_time_val); #if CALLGRIND_RUN CALLGRIND_START_INSTRUMENTATION #endif @@ -153,7 +156,9 @@ void stop_time(void) { #if MICA asm volatile("int3");/*1 */ #endif - GETMYTIME(&stop_time_val ); + GETMYTIME(stop_time_val); + ee_printf("Timer stopped\n"); + ee_printf(" MTIME: %u\n", stop_time_val); } /* Function: get_time Return an abstract "ticks" number that signifies time on the system. @@ -166,6 +171,7 @@ void stop_time(void) { */ CORE_TICKS get_time(void) { CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val)); + ee_printf(" Elapsed MTIME: %u\n", elapsed); return elapsed; } /* Function: time_in_secs @@ -176,13 +182,15 @@ CORE_TICKS get_time(void) { */ secs_ret time_in_secs(CORE_TICKS ticks) { secs_ret retval=((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC; + int retvalint = (int)retval; + ee_printf(" RETURN VALUE FROM TIME IN SECS FUNCTION: %d\n", retvalint); return retval; } #else #error "Please implement timing functionality in core_portme.c" #endif /* SAMPLE_TIME_IMPLEMENTATION */ -ee_u32 default_num_contexts=MULTITHREAD; +ee_u32 default_num_contexts = MULTITHREAD; /* Function: portable_init Target specific initialization code From 74833dc68c20aeed361330bbacdcda6a701a76b0 Mon Sep 17 00:00:00 2001 From: bbracker Date: Sat, 26 Jun 2021 07:18:26 -0400 Subject: [PATCH 14/20] split intermediate GDB output file into smaller files for better debug experience --- .gitignore | 1 + wally-pipelined/linux-testgen/logAllBuildroot.sh | 14 +++++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index fe21942d0..82c20503b 100644 --- a/.gitignore +++ b/.gitignore @@ -25,6 +25,7 @@ testsBP/*/OBJ/* testsBP/*/*.a wally-pipelined/linux-testgen/linux-testvectors/* wally-pipelined/linux-testgen/nohup* +wally-pipelined/linux-testgen/x* !wally-pipelined/linux-testgen/linux-testvectors/tvCopier.py !wally-pipelined/linux-testgen/linux-testvectors/tvLinker.sh wally-pipelined/regression/slack-notifier/slack-webhook-url.txt diff --git a/wally-pipelined/linux-testgen/logAllBuildroot.sh b/wally-pipelined/linux-testgen/logAllBuildroot.sh index d045ee98c..073fc5a9d 100755 --- a/wally-pipelined/linux-testgen/logAllBuildroot.sh +++ b/wally-pipelined/linux-testgen/logAllBuildroot.sh @@ -1,3 +1,9 @@ +# Oftentimes this script runs so long you'll go to sleep. +# But you don't want the script to die when your computer goes to sleep. +# So consider invoking this with nohup (i.e. "nohup ./logAllBuildroot.sh") +# You can run "tail -f nohup.out" to see what would've +# outputted to the terminal if you didn't use nohup + # =========== Debug the Process ========== # Uncomment this version for GDB/QEMU debugging # - Opens up GDB interactively @@ -15,6 +21,12 @@ # - Logs parse_qemu.py's simulated gdb output to qemu_in_gdb_format.txt #cat qemu_output.txt | ./parse_qemu.py >qemu_in_gdb_format.txt #cat qemu_output.txt | ./parse_qemu.py | ./parse_gdb_output.py "/courses/e190ax/buildroot_boot/" +# Uncomment this version in case you just want to have qemu_in_gdb_format.txt around +# It is often helpful for general debugging +(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>&1 >/dev/null | ./parse_qemu.py >qemu_in_gdb_format.txt) & riscv64-unknown-elf-gdb -x gdbinit_qemulog + +# Split qemu_in_gdb_format.txt into chunks of 100,000 instructions for easier inspection +#split -d -l 5600000 qemu_in_gdb_format.txt --verbose # Uncomment this version for parse_gdb_output.py debugging # - Uses qemu_in_gdb_format.txt @@ -24,4 +36,4 @@ # =========== Just Do the Thing ========== # Uncomment this version for the whole thing # - Logs info needed by buildroot testbench -(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>&1 >/dev/null | ./parse_qemu.py | ./parse_gdb_output.py "/courses/e190ax/buildroot_boot_new/") & riscv64-unknown-elf-gdb -x gdbinit_qemulog +#(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>&1 >/dev/null | ./parse_qemu.py | ./parse_gdb_output.py "/courses/e190ax/buildroot_boot_new/") & riscv64-unknown-elf-gdb -x gdbinit_qemulog From 17afd9e5e8b7e65a868324d8ab4934be9c30fa17 Mon Sep 17 00:00:00 2001 From: bbracker Date: Sat, 26 Jun 2021 07:19:51 -0400 Subject: [PATCH 15/20] temporarily disable PMP checking for EBU accesses. --- wally-pipelined/src/wally/wallypipelinedhart.sv | 1 + 1 file changed, 1 insertion(+) diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index e8064bcc7..a77c3ab01 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -171,6 +171,7 @@ module wallypipelinedhart ( ahblite ebu( //.InstrReadF(1'b0), //.InstrRData(InstrF), // hook up InstrF later + .ISquashBusAccessF(1'b0), // *** temporary hack to disable PMP instruction fetch checking .WriteDataM(WriteDataM), .MemSizeM(Funct3M[1:0]), .UnsignedLoadM(Funct3M[2]), .Funct7M(InstrM[31:25]), From 751e606fb729a947170809c7b7aa5caed7c0728b Mon Sep 17 00:00:00 2001 From: bbracker Date: Sat, 26 Jun 2021 08:30:58 -0400 Subject: [PATCH 16/20] trying out Noah and Kaveh's proposed hack for which CSRs to update for QEMU MMU bug --- wally-pipelined/linux-testgen/logAllBuildroot.sh | 4 ++-- wally-pipelined/linux-testgen/parse_qemu.py | 13 ++++++------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/wally-pipelined/linux-testgen/logAllBuildroot.sh b/wally-pipelined/linux-testgen/logAllBuildroot.sh index 073fc5a9d..740fa8c4b 100755 --- a/wally-pipelined/linux-testgen/logAllBuildroot.sh +++ b/wally-pipelined/linux-testgen/logAllBuildroot.sh @@ -23,7 +23,7 @@ #cat qemu_output.txt | ./parse_qemu.py | ./parse_gdb_output.py "/courses/e190ax/buildroot_boot/" # Uncomment this version in case you just want to have qemu_in_gdb_format.txt around # It is often helpful for general debugging -(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>&1 >/dev/null | ./parse_qemu.py >qemu_in_gdb_format.txt) & riscv64-unknown-elf-gdb -x gdbinit_qemulog +#(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>&1 >/dev/null | ./parse_qemu.py >qemu_in_gdb_format.txt) & riscv64-unknown-elf-gdb -x gdbinit_qemulog # Split qemu_in_gdb_format.txt into chunks of 100,000 instructions for easier inspection #split -d -l 5600000 qemu_in_gdb_format.txt --verbose @@ -36,4 +36,4 @@ # =========== Just Do the Thing ========== # Uncomment this version for the whole thing # - Logs info needed by buildroot testbench -#(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>&1 >/dev/null | ./parse_qemu.py | ./parse_gdb_output.py "/courses/e190ax/buildroot_boot_new/") & riscv64-unknown-elf-gdb -x gdbinit_qemulog +(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>&1 >/dev/null | ./parse_qemu.py | ./parse_gdb_output.py "/courses/e190ax/buildroot_boot_new/") & riscv64-unknown-elf-gdb -x gdbinit_qemulog diff --git a/wally-pipelined/linux-testgen/parse_qemu.py b/wally-pipelined/linux-testgen/parse_qemu.py index c7f31fb22..ac5d95f0b 100755 --- a/wally-pipelined/linux-testgen/parse_qemu.py +++ b/wally-pipelined/linux-testgen/parse_qemu.py @@ -40,13 +40,12 @@ def parseCSRs(l): val = int(l.split()[1],16) if inPageFault: # Not sure if these CSRs should be updated or not during page fault. - #if l.startswith("mstatus") or l.startswith("mepc") or l.startswith("mcause") or l.startswith("mtval") or l.startswith("sepc") or l.startswith("scause") or l.startswith("stval"): - # # We do update some CSRs - # CSRs[csr] = val - #else: - # # Others we preserve until changed later - # pageFaultCSRs[csr] = val - pageFaultCSRs[csr] = val + if l.startswith("mstatus") or l.startswith("mepc") or l.startswith("mcause") or l.startswith("mtval") or l.startswith("sepc") or l.startswith("scause") or l.startswith("stval"): + # We do update some CSRs + CSRs[csr] = val + else: + # Others we preserve until changed later + pageFaultCSRs[csr] = val elif pageFaultCSRs and (csr in pageFaultCSRs): if (val != pageFaultCSRs[csr]): del pageFaultCSRs[csr] From 0c2b7a1132ff01d48522c70be2ef33f74eea35d9 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Mon, 28 Jun 2021 18:53:58 -0400 Subject: [PATCH 17/20] FPU control signals changed and FMA works --- wally-pipelined/src/fpu/FMA/tbgen/tb.sv | 5 +- wally-pipelined/src/fpu/FMA/tbgen/test_gen.sh | 2 +- wally-pipelined/src/fpu/fctrl.sv | 267 ++++----- wally-pipelined/src/fpu/fma1.sv | 281 +++++----- wally-pipelined/src/fpu/fma2.sv | 516 +++++++++--------- wally-pipelined/src/fpu/fpu.sv | 149 ++--- wally-pipelined/src/fpu/fpuhazard.sv | 6 +- wally-pipelined/src/ieu/datapath.sv | 8 +- wally-pipelined/src/ieu/ieu.sv | 3 +- .../src/wally/wallypipelinedhart.sv | 23 +- 10 files changed, 571 insertions(+), 689 deletions(-) diff --git a/wally-pipelined/src/fpu/FMA/tbgen/tb.sv b/wally-pipelined/src/fpu/FMA/tbgen/tb.sv index 4c93cd575..5a8e7a868 100644 --- a/wally-pipelined/src/fpu/FMA/tbgen/tb.sv +++ b/wally-pipelined/src/fpu/FMA/tbgen/tb.sv @@ -45,8 +45,8 @@ assign FOpCtrlE = 3'b0; // down - 010 // up - 011 // nearest max mag - 100 -assign FrmE = 3'b010; -assign FmtE = 1'b1; +assign FrmE = 3'b011; +assign FmtE = 1'b0; assign wnan = FmtE ? &FmaResultM[62:52] && |FmaResultM[51:0] : &FmaResultM[62:55] && |FmaResultM[54:32]; @@ -110,7 +110,6 @@ always @(posedge clk) if(ans >= 64'h7FF8000000000000 && ans <= 64'h7FFfffffffffffff ) $display( "ans=qutNaN "); if(ans >= 64'hFFF8000000000000 && ans <= 64'hFFFfffffffffffff ) $display( "ans=qutNaN "); errors = errors + 1; - if (errors == 20) $stop; end if((FmtE==1'b0)&(FmaFlagsM != flags[4:0] || (!wnan && (FmaResultM != ans)) || (wnan && ansnan && ~(((xnan && (FmaResultM[62:0] == {FInput1E[62:55],1'b1,FInput1E[53:0]})) || (ynan && (FmaResultM[62:0] == {FInput2E[62:55],1'b1,FInput2E[53:0]})) || (znan && (FmaResultM[62:0] == {FInput3E[62:55],1'b1,FInput3E[53:0]})) || (FmaResultM[62:0] == ans[62:0]))) ))) begin diff --git a/wally-pipelined/src/fpu/FMA/tbgen/test_gen.sh b/wally-pipelined/src/fpu/FMA/tbgen/test_gen.sh index dc9562b1a..5f12e143c 100755 --- a/wally-pipelined/src/fpu/FMA/tbgen/test_gen.sh +++ b/wally-pipelined/src/fpu/FMA/tbgen/test_gen.sh @@ -1,3 +1,3 @@ -testfloat_gen f64_mulAdd -tininessafter -n 6133248 -rmin -seed 113355 -level 1 > testFloat +testfloat_gen f32_mulAdd -tininessafter -n 6133248 -rmax -seed 113355 -level 1 > testFloat tr -d ' ' < testFloat > testFloatNoSpace diff --git a/wally-pipelined/src/fpu/fctrl.sv b/wally-pipelined/src/fpu/fctrl.sv index a9fcb564e..3be9b281a 100755 --- a/wally-pipelined/src/fpu/fctrl.sv +++ b/wally-pipelined/src/fpu/fctrl.sv @@ -6,176 +6,128 @@ module fctrl ( input logic [2:0] Funct3D, input logic [2:0] FRM_REGW, output logic IllegalFPUInstrD, - output logic IsFPD, output logic FWriteEnD, output logic FDivStartD, output logic [2:0] FResultSelD, output logic [3:0] FOpCtrlD, + output logic [1:0] FResSelD, + output logic [1:0] FIntResSelD, output logic FmtD, output logic [2:0] FrmD, - output logic [1:0] FMemRWD, - output logic FOutputInput2D, - output logic FInput2UsedD, FInput3UsedD, output logic FWriteIntD); - - logic IllegalFPUInstr1D, IllegalFPUInstr2D; - // *** fix rounding for dynamic rounding + `define FCTRLW 15 + logic [`FCTRLW-1:0] ControlsD; + // FPU Instruction Decoder + always_comb + case(OpD) + // FWriteEn_FWriteInt_FResultSel_FOpCtrl_FResSel_FIntResSel_FDivStart_IllegalFPUInstr + 7'b0000111: case(Funct3D) + 3'b010: ControlsD = `FCTRLW'b1_0_000_0000_00_00_0_0; // flw + 3'b011: ControlsD = `FCTRLW'b1_0_000_0001_00_00_0_0; // fld + default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + endcase + 7'b0100111: case(Funct3D) + 3'b010: ControlsD = `FCTRLW'b0_0_000_0010_00_00_0_0; // fsw + 3'b011: ControlsD = `FCTRLW'b0_0_000_0011_00_00_0_0; // fsd + default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + endcase + 7'b1000011: ControlsD = `FCTRLW'b1_0_001_0000_00_00_0_0; // fmadd + 7'b1000111: ControlsD = `FCTRLW'b1_0_001_0001_00_00_0_0; // fmsub + 7'b1001011: ControlsD = `FCTRLW'b1_0_001_0010_00_00_0_0; // fnmsub + 7'b1001111: ControlsD = `FCTRLW'b1_0_001_0011_00_00_0_0; // fnmadd + 7'b1010011: casez(Funct7D) + 7'b00000??: ControlsD = `FCTRLW'b1_0_010_0000_00_00_0_0; // fadd + 7'b00001??: ControlsD = `FCTRLW'b1_0_010_0001_00_00_0_0; // fsub + 7'b00010??: ControlsD = `FCTRLW'b1_0_001_0100_00_00_0_0; // fmul + 7'b00011??: ControlsD = `FCTRLW'b1_0_011_0000_00_00_1_0; // fdiv + 7'b01011??: ControlsD = `FCTRLW'b1_0_011_0001_00_00_1_0; // fsqrt + 7'b00100??: case(Funct3D) + 3'b000: ControlsD = `FCTRLW'b1_0_100_0000_01_00_0_0; // fsgnj + 3'b001: ControlsD = `FCTRLW'b1_0_100_0001_01_00_0_0; // fsgnjn + 3'b010: ControlsD = `FCTRLW'b1_0_100_0010_01_00_0_0; // fsgnjx + default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + endcase + 7'b00101??: case(Funct3D) + 3'b000: ControlsD = `FCTRLW'b1_0_100_0111_10_00_0_0; // fmin + 3'b001: ControlsD = `FCTRLW'b1_0_100_0101_10_00_0_0; // fmax + default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + endcase + 7'b10100??: case(Funct3D) + 3'b010: ControlsD = `FCTRLW'b0_1_100_0010_00_00_0_0; // feq + 3'b001: ControlsD = `FCTRLW'b0_1_100_0001_00_00_0_0; // flt + 3'b000: ControlsD = `FCTRLW'b0_1_100_0011_00_00_0_0; // fle + default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + endcase + 7'b11100??: if (Funct3D == 3'b001) + ControlsD = `FCTRLW'b0_1_100_0000_00_10_0_0; // fclass + else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_100_0100_00_01_0_0; // fmv.x.w + else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_100_0101_00_01_0_0; // fmv.x.d + else ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + 7'b1100000: case(Rs2D[0]) + 1'b0: ControlsD = `FCTRLW'b0_1_010_0110_00_00_0_0; // fcvt.s.w + 1'b1: ControlsD = `FCTRLW'b0_1_010_0101_00_00_0_0; // fcvt.s.wu + default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + endcase + 7'b1101000: case(Rs2D[0]) + 1'b0: ControlsD = `FCTRLW'b1_1_010_0100_00_00_0_0; // fcvt.w.s + 1'b1: ControlsD = `FCTRLW'b1_1_010_0101_00_00_0_0; // fcvt.wu.s + default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + endcase + 7'b1111000: ControlsD = `FCTRLW'b1_0_100_0000_00_00_0_0; // fmv.w.x + 7'b0100000: ControlsD = `FCTRLW'b1_0_010_0010_00_00_0_0; // fcvt.s.d + 7'b1100001: case(Rs2D[0]) + 1'b0: ControlsD = `FCTRLW'b0_1_010_1110_00_00_0_0; // fcvt.d.w + 1'b1: ControlsD = `FCTRLW'b0_1_010_1111_00_00_0_0; // fcvt.d.wu + default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + endcase + 7'b1101001: case(Rs2D[0]) + 1'b0: ControlsD = `FCTRLW'b1_0_010_1100_00_00_0_0; // fcvt.w.d + 1'b1: ControlsD = `FCTRLW'b1_0_010_1101_00_00_0_0; // fcvt.wu.d + default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + endcase + 7'b1111001: ControlsD = `FCTRLW'b1_0_100_0001_00_00_0_0; // fmv.d.x + 7'b0100001: ControlsD = `FCTRLW'b1_0_010_1000_00_00_0_0; // fcvt.d.s + default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + endcase + default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + endcase + // unswizzle control bits + assign {FWriteEnD, FWriteIntD, FResultSelD, FOpCtrlD, FResSelD, FIntResSelD, FDivStartD, IllegalFPUInstrD} = ControlsD; + + // if dynamic rounding, choose FRM_REGW assign FrmD = &Funct3D ? FRM_REGW : Funct3D; - //all subsequent logic is based on the table present - //in Section 5 of Wally Architecture Specification - - //write is enabled for all fp instruciton op codes - //sans fp load - always_comb begin - //case statement is easier to modify - //in case of errors - case(OpD) - //fp instructions sans load - 7'b1010011 : IsFPD = 1'b1; - 7'b1000011 : IsFPD = 1'b1; - 7'b1000111 : IsFPD = 1'b1; - 7'b1001011 : IsFPD = 1'b1; - 7'b1001111 : IsFPD = 1'b1; - 7'b0100111 : IsFPD = 1'b1; - 7'b0000111 : IsFPD = 1'b1;// KEP change 7'b1010011 to 7'b0000111 - default : IsFPD = 1'b0; - endcase - end - - - - //useful intermediary signals - // - //(mult only not supported in current datapath) - //set third FMA operand to zero in this case - //(or equivalent) - - always_comb begin - //checks all but FMA/store/load - IllegalFPUInstr2D = 0; - FDivStartD = 1'b0; - if(OpD == 7'b1010011) begin - casez(Funct7D) - //compare - 7'b10100?? : FResultSelD = 3'b001; - //div/sqrt - 7'b0?011?? : begin FResultSelD = 3'b000; FDivStartD = 1'b1; end - //add/sub - 7'b0000??? : FResultSelD = 3'b100; - //mult - 7'b00010?? : FResultSelD = 3'b010; - //convert (not precision) - 7'b110?0?? : FResultSelD = 3'b100; - //convert (precision) - 7'b010000? : FResultSelD = 3'b100; - //Min/Max - 7'b00101?? : FResultSelD = 3'b001; - //sign injection - 7'b00100?? : FResultSelD = 3'b011; - //classify //only if funct3 = 001 - 7'b11100?? : if(Funct3D == 3'b001) FResultSelD = 3'b101; - //output ReadData1 - else if (Funct7D[1] == 0) FResultSelD = 3'b111; - //output SrcW - 7'b111100? : FResultSelD = 3'b110; - default : begin FResultSelD = 3'b0; IllegalFPUInstr2D = 1'b1; end - endcase - end - //FMA/store/load - else begin - case(OpD) - //4 FMA instructions - 7'b1000011 : FResultSelD = 3'b010; - 7'b1000111 : FResultSelD = 3'b010; - 7'b1001011 : FResultSelD = 3'b010; - 7'b1001111 : FResultSelD = 3'b010; - //store - 7'b0100111 : FResultSelD = 3'b111; - //load - 7'b0000111 : FResultSelD = 3'b111; - default : begin FResultSelD = 3'b0; IllegalFPUInstr2D = 1'b1; end - endcase - end - end - - assign FOutputInput2D = OpD == 7'b0100111; - - assign FMemRWD[0] = FOutputInput2D; - assign FMemRWD[1] = OpD == 7'b0000111; - - - - //register is chosen based on operation performed - //---- - //write selection is chosen in the same way as - //register selection - // - - // reg/write sel logic and assignment - // - // 3'b000 = div/sqrt - // 3'b001 = cmp - // 3'b010 = fma/mult - // 3'b011 = sgn inj - // 3'b100 = add/sub/cnvt - // 3'b101 = classify - // 3'b110 = output SrcAW - // 3'b111 = output ReadData1 - // - //reg select - - //this value is used enough to be shorthand - - - //operation control for each fp operation - //has to be expanded over standard to account for - //integrated fpadd/cvt - // - //will integrate FMA opcodes into design later - // - //conversion instructions will - //also need to be added later as I find the opcode - //version I used for this repo - - //let's do separate SOP for each type of operation -// assign FOpCtrlD[3] = 1'b0; -// -// - - - - always_comb begin - IllegalFPUInstr1D = 0; - FInput3UsedD = 0; - case (FResultSelD) - // div/sqrt + // Precision + // 0-single + // 1-double + assign FmtD = FResultSelD == 3'b000 ? Funct3D[0] : Funct7D[0]; + // div/sqrt // fdiv = ???0 // fsqrt = ???1 - 3'b000 : begin FOpCtrlD = {3'b0, Funct7D[5]}; FInput2UsedD = ~Funct7D[5]; end - // cmp + + // cmp // fmin = ?111 // fmax = ?101 // feq = ?010 // flt = ?001 // fle = ?011 // {?, is min or max, is eq or le, is lt or le} - 3'b001 : begin FOpCtrlD = {1'b0, Funct7D[2], ~Funct3D[0], ~(|Funct3D[2:1])}; FInput2UsedD = 1'b1; end - //fma/mult + + //fma/mult // fmadd = ?000 // fmsub = ?001 // fnmsub = ?010 -(a*b)+c // fnmadd = ?011 -(a*b)-c // fmul = ?100 // {?, is mul, is negitive, is sub} - 3'b010 : begin FOpCtrlD = {1'b0, OpD[4:2]}; FInput2UsedD = 1'b1; FInput3UsedD = ~OpD[4]; end - // sgn inj + + // sgn inj // fsgnj = ??00 // fsgnjn = ??01 // fsgnjx = ??10 - 3'b011 : begin FOpCtrlD = {2'b0, Funct3D[1:0]}; FInput2UsedD = 1'b1; end - // add/sub/cnvt + + // add/sub/cnvt // fadd = 0000 // fsub = 0001 // fcvt.w.s = 0100 @@ -188,35 +140,18 @@ module fctrl ( // fcvt.d.w = 1110 // fcvt.d.wu = 1111 // fcvt.d.s = 1000 - // { is double and not add/sub, is to/from int, is to int or float to double, is unsigned or sub - 3'b100 : begin FOpCtrlD = {Funct7D[0]&Funct7D[5], Funct7D[6], Funct7D[3] | (~Funct7D[6]&Funct7D[5]&~Funct7D[0]), (Rs2D[0]&Funct7D[5])|(Funct7D[2]&~Funct7D[5])}; FInput2UsedD = ~Funct7D[5]; end - // classify {?, ?, ?, ?} - 3'b101 : begin FOpCtrlD = 4'b0; FInput2UsedD = 1'b0; end - // output SrcAW + // { is double and not add/sub, is to/from int, is to int or float to double, is unsigned or sub} + // fmv.w.x = ???0 // fmv.w.d = ???1 - 3'b110 : begin FOpCtrlD = {3'b0, Funct7D[0]}; FInput2UsedD = 1'b0; end - // output Input1 + // flw = ?000 // fld = ?001 - // fsw = ?010 // output Input2 - // fsd = ?011 // output Input2 + // fsw = ?010 + // fsd = ?011 // fmv.x.w = ?100 // fmv.x.d = ?101 // {?, is mv, is store, is double or fmv} - 3'b111 : begin FOpCtrlD = {1'b0, OpD[6:5], Funct3D[0] | (OpD[6]&Funct7D[0])}; FInput2UsedD = OpD[5]; end - default : begin FOpCtrlD = 4'b0; IllegalFPUInstr1D = 1'b1; FInput2UsedD = 1'b0; end - endcase - end + - //precision - assign FmtD = (~&FResultSelD & Funct7D[0]) | (&FResultSelD & FOpCtrlD[0]); - - assign IllegalFPUInstrD = IllegalFPUInstr1D | IllegalFPUInstr2D; - //write to integer source if conv to int occurs - //AND of Funct7 for int results - // is add/cvt and is to int or is classify or is cmp and not max/min or is output ReadData1 and is mv - assign FWriteIntD = ((FResultSelD == 3'b100)&Funct7D[3]) | (FResultSelD == 3'b101) | ((FResultSelD == 3'b001)&~Funct7D[2]) | ((FResultSelD == 3'b111)&OpD[6]); - // if not writting to int reg and not a store function and not move - assign FWriteEnD = ~FWriteIntD & ~OpD[5] & ~((FResultSelD == 3'b111)&OpD[6]) & IsFPD; endmodule diff --git a/wally-pipelined/src/fpu/fma1.sv b/wally-pipelined/src/fpu/fma1.sv index ab9d2bb17..76f7316ba 100644 --- a/wally-pipelined/src/fpu/fma1.sv +++ b/wally-pipelined/src/fpu/fma1.sv @@ -1,111 +1,111 @@ module fma1( - input logic [63:0] X, // X - input logic [63:0] Y, // Y - input logic [63:0] Z, // Z - input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) - input logic FmtE, // precision 1 = double 0 = single - output logic [105:0] ProdManE, // 1.X frac * 1.Y frac - output logic [161:0] AlignedAddendE, // Z aligned for addition - output logic [12:0] ProdExpE, // X exponent + Y exponent - bias - output logic AddendStickyE, // sticky bit that is calculated during alignment - output logic KillProdE, // set the product to zero before addition if the product is too small to matter - output logic XZeroE, YZeroE, ZZeroE, // inputs are zero - output logic XInfE, YInfE, ZInfE, // inputs are infinity - output logic XNaNE, YNaNE, ZNaNE); // inputs are NaN + input logic [63:0] X, // X + input logic [63:0] Y, // Y + input logic [63:0] Z, // Z + input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) + input logic FmtE, // precision 1 = double 0 = single + output logic [105:0] ProdManE, // 1.X frac * 1.Y frac + output logic [161:0] AlignedAddendE, // Z aligned for addition + output logic [12:0] ProdExpE, // X exponent + Y exponent - bias + output logic AddendStickyE, // sticky bit that is calculated during alignment + output logic KillProdE, // set the product to zero before addition if the product is too small to matter + output logic XZeroE, YZeroE, ZZeroE, // inputs are zero + output logic XInfE, YInfE, ZInfE, // inputs are infinity + output logic XNaNE, YNaNE, ZNaNE); // inputs are NaN - logic [51:0] XFrac,YFrac,ZFrac; // input fraction - logic [52:0] XMan,YMan,ZMan; // input mantissa (with leading one) - logic [12:0] XExp,YExp,ZExp; // input exponents - logic XSgn,YSgn,ZSgn; // input signs - logic [12:0] AlignCnt; // how far to shift the addend to align with the product - logic [211:0] ZManShifted; // output of the alignment shifter including sticky bit - logic [211:0] ZManPreShifted; // input to the alignment shifter - logic XDenorm, YDenorm, ZDenorm; // inputs are denormal - logic [63:0] Addend; // value to add (Z or zero) - logic [12:0] Bias; // 1023 for double, 127 for single - logic XExpZero, YExpZero, ZExpZero; // input exponent zero - logic XFracZero, YFracZero, ZFracZero; // input fraction zero - logic XExpMax, YExpMax, ZExpMax; // input exponent all 1s + logic [51:0] XFrac,YFrac,ZFrac; // input fraction + logic [52:0] XMan,YMan,ZMan; // input mantissa (with leading one) + logic [12:0] XExp,YExp,ZExp; // input exponents + logic XSgn,YSgn,ZSgn; // input signs + logic [12:0] AlignCnt; // how far to shift the addend to align with the product + logic [213:0] ZManShifted; // output of the alignment shifter including sticky bit + logic [213:0] ZManPreShifted; // input to the alignment shifter + logic XDenorm, YDenorm, ZDenorm; // inputs are denormal + logic [63:0] Addend; // value to add (Z or zero) + logic [12:0] Bias; // 1023 for double, 127 for single + logic XExpZero, YExpZero, ZExpZero; // input exponent zero + logic XFracZero, YFracZero, ZFracZero; // input fraction zero + logic XExpMax, YExpMax, ZExpMax; // input exponent all 1s - /////////////////////////////////////////////////////////////////////////////// - // split inputs into the sign bit, fraction, and exponent to handle single or double precision - // - single precision is in the top half of the inputs - /////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // split inputs into the sign bit, fraction, and exponent to handle single or double precision + // - single precision is in the top half of the inputs + /////////////////////////////////////////////////////////////////////////////// - // Set addend to zero if FMUL instruction - assign Addend = FOpCtrlE[2] ? 64'b0 : Z; + // Set addend to zero if FMUL instruction + assign Addend = FOpCtrlE[2] ? 64'b0 : Z; - assign XSgn = X[63]; - assign YSgn = Y[63]; - assign ZSgn = Addend[63]; + assign XSgn = X[63]; + assign YSgn = Y[63]; + assign ZSgn = Addend[63]; - assign XExp = FmtE ? {2'b0, X[62:52]} : {5'b0, X[62:55]}; - assign YExp = FmtE ? {2'b0, Y[62:52]} : {5'b0, Y[62:55]}; - assign ZExp = FmtE ? {2'b0, Addend[62:52]} : {5'b0, Addend[62:55]}; + assign XExp = FmtE ? {2'b0, X[62:52]} : {5'b0, X[62:55]}; + assign YExp = FmtE ? {2'b0, Y[62:52]} : {5'b0, Y[62:55]}; + assign ZExp = FmtE ? {2'b0, Addend[62:52]} : {5'b0, Addend[62:55]}; - assign XFrac = FmtE ? X[51:0] : {X[54:32], 29'b0}; - assign YFrac = FmtE ? Y[51:0] : {Y[54:32], 29'b0}; - assign ZFrac = FmtE ? Addend[51:0] : {Addend[54:32], 29'b0}; - - assign XMan = {~XExpZero, XFrac}; - assign YMan = {~YExpZero, YFrac}; - assign ZMan = {~ZExpZero, ZFrac}; + assign XFrac = FmtE ? X[51:0] : {X[54:32], 29'b0}; + assign YFrac = FmtE ? Y[51:0] : {Y[54:32], 29'b0}; + assign ZFrac = FmtE ? Addend[51:0] : {Addend[54:32], 29'b0}; + + assign XMan = {~XExpZero, XFrac}; + assign YMan = {~YExpZero, YFrac}; + assign ZMan = {~ZExpZero, ZFrac}; - assign Bias = FmtE ? 13'h3ff : 13'h7f; + assign Bias = FmtE ? 13'h3ff : 13'h7f; - /////////////////////////////////////////////////////////////////////////////// - // determine if an input is a special value - /////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // determine if an input is a special value + /////////////////////////////////////////////////////////////////////////////// - assign XExpZero = ~|XExp; - assign YExpZero = ~|YExp; - assign ZExpZero = ~|ZExp; - - assign XFracZero = ~|XFrac; - assign YFracZero = ~|YFrac; - assign ZFracZero = ~|ZFrac; + assign XExpZero = ~|XExp; + assign YExpZero = ~|YExp; + assign ZExpZero = ~|ZExp; + + assign XFracZero = ~|XFrac; + assign YFracZero = ~|YFrac; + assign ZFracZero = ~|ZFrac; - assign XExpMax = FmtE ? &XExp[10:0] : &XExp[7:0]; - assign YExpMax = FmtE ? &YExp[10:0] : &YExp[7:0]; - assign ZExpMax = FmtE ? &ZExp[10:0] : &ZExp[7:0]; - - assign XNaNE = XExpMax & ~XFracZero; - assign YNaNE = YExpMax & ~YFracZero; - assign ZNaNE = ZExpMax & ~ZFracZero; + assign XExpMax = FmtE ? &XExp[10:0] : &XExp[7:0]; + assign YExpMax = FmtE ? &YExp[10:0] : &YExp[7:0]; + assign ZExpMax = FmtE ? &ZExp[10:0] : &ZExp[7:0]; + + assign XNaNE = XExpMax & ~XFracZero; + assign YNaNE = YExpMax & ~YFracZero; + assign ZNaNE = ZExpMax & ~ZFracZero; - assign XDenorm = XExpZero & ~XFracZero; - assign YDenorm = YExpZero & ~YFracZero; - assign ZDenorm = ZExpZero & ~ZFracZero; + assign XDenorm = XExpZero & ~XFracZero; + assign YDenorm = YExpZero & ~YFracZero; + assign ZDenorm = ZExpZero & ~ZFracZero; - assign XInfE = XExpMax & XFracZero; - assign YInfE = YExpMax & YFracZero; - assign ZInfE = ZExpMax & ZFracZero; + assign XInfE = XExpMax & XFracZero; + assign YInfE = YExpMax & YFracZero; + assign ZInfE = ZExpMax & ZFracZero; - assign XZeroE = XExpZero & XFracZero; - assign YZeroE = YExpZero & YFracZero; - assign ZZeroE = ZExpZero & ZFracZero; + assign XZeroE = XExpZero & XFracZero; + assign YZeroE = YExpZero & YFracZero; + assign ZZeroE = ZExpZero & ZFracZero; - /////////////////////////////////////////////////////////////////////////////// - // Calculate the product - // - When multipliying two fp numbers, add the exponents - // - Subtract the bias (XExp + YExp has two biases, one from each exponent) - // - Denormal numbers have an an exponent value of 1, however they are - // represented with an exponent of 0. add one if there is a denormal number - /////////////////////////////////////////////////////////////////////////////// - - // verilator lint_off WIDTH - assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 : - XExp + YExp - Bias + XDenorm + YDenorm; + /////////////////////////////////////////////////////////////////////////////// + // Calculate the product + // - When multipliying two fp numbers, add the exponents + // - Subtract the bias (XExp + YExp has two biases, one from each exponent) + // - Denormal numbers have an an exponent value of 1, however they are + // represented with an exponent of 0. add one if there is a denormal number + /////////////////////////////////////////////////////////////////////////////// + + // verilator lint_off WIDTH + assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 : + XExp + YExp - Bias + XDenorm + YDenorm; - // Calculate the product's mantissa - // - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one. - assign ProdManE = XMan * YMan; + // Calculate the product's mantissa + // - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one. + assign ProdManE = XMan * YMan; @@ -114,72 +114,71 @@ module fma1( - - /////////////////////////////////////////////////////////////////////////////// - // Alignment shifter - /////////////////////////////////////////////////////////////////////////////// + + /////////////////////////////////////////////////////////////////////////////// + // Alignment shifter + /////////////////////////////////////////////////////////////////////////////// - // determine the shift count for alignment - // - negitive means Z is larger, so shift Z left - // - positive means the product is larger, so shift Z right - // - Denormal numbers have an an exponent value of 1, however they are - // represented with an exponent of 0. add one to the exponent if it is a denormal number - assign AlignCnt = ProdExpE - ZExp - ZDenorm; - // verilator lint_on WIDTH + // determine the shift count for alignment + // - negitive means Z is larger, so shift Z left + // - positive means the product is larger, so shift Z right + // - Denormal numbers have an an exponent value of 1, however they are + // represented with an exponent of 0. add one to the exponent if it is a denormal number + assign AlignCnt = ProdExpE - ZExp - ZDenorm; + // verilator lint_on WIDTH - // Defualt Addition without shifting - // | 55'b0 | 106'b(product) | 2'b0 | - // |1'b0| addnend | + // Defualt Addition without shifting + // | 55'b0 | 106'b(product) | 2'b0 | + // |1'b0| addnend | - // the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...) - assign ZManPreShifted = {55'b0, ZMan, 104'b0}; - always_comb - begin - - // If the product is too small to effect the sum, kill the product + // the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...) + assign ZManPreShifted = {55'b0, ZMan, 106'b0}; + always_comb + begin + + // If the product is too small to effect the sum, kill the product - // | 55'b0 | 106'b(product) | 2'b0 | - // | addnend | - if ($signed(AlignCnt) <= $signed(-13'd56)) begin - KillProdE = 1; - ZManShifted = {107'b0, ZMan, 52'b0}; - AddendStickyE = ~(XZeroE|YZeroE); + // | 54'b0 | 106'b(product) | 2'b0 | + // | addnend | + if ($signed(AlignCnt) <= $signed(-13'd56)) begin + KillProdE = 1; + ZManShifted = ZManPreShifted;//{107'b0, ZMan, 54'b0}; + AddendStickyE = ~(XZeroE|YZeroE); - // If the Addend is shifted left (negitive AlignCnt) + // If the Addend is shifted left (negitive AlignCnt) - // | 55'b0 | 106'b(product) | 2'b0 | - // | addnend | - end else if($signed(AlignCnt) <= $signed(13'd0)) begin - KillProdE = 0; - ZManShifted = ZManPreShifted << -AlignCnt; - AddendStickyE = |(ZManShifted[49:0]); + // | 54'b0 | 106'b(product) | 2'b0 | + // | addnend | + end else if($signed(AlignCnt) <= $signed(13'd0)) begin + KillProdE = 0; + ZManShifted = ZManPreShifted << -AlignCnt; + AddendStickyE = |(ZManShifted[51:0]); - // If the Addend is shifted right (positive AlignCnt) + // If the Addend is shifted right (positive AlignCnt) - // | 55'b0 | 106'b(product) | 2'b0 | - // | addnend | - end else if ($signed(AlignCnt)<=$signed(13'd104)) begin - KillProdE = 0; - ZManShifted = ZManPreShifted >> AlignCnt; - AddendStickyE = |(ZManShifted[49:0]); + // | 54'b0 | 106'b(product) | 2'b0 | + // | addnend | + end else if ($signed(AlignCnt)<=$signed(13'd106)) begin + KillProdE = 0; + ZManShifted = ZManPreShifted >> AlignCnt; + AddendStickyE = |(ZManShifted[51:0]); - // If the addend is too small to effect the addition - // - The addend has to shift two past the end of the addend to be considered too small - // - The 2 extra bits are needed for rounding + // If the addend is too small to effect the addition + // - The addend has to shift two past the end of the addend to be considered too small + // - The 2 extra bits are needed for rounding - // | 55'b0 | 106'b(product) | 2'b0 | - // | addnend | - end else begin - KillProdE = 0; - ZManShifted = 0; - AddendStickyE = ~ZZeroE; + // | 54'b0 | 106'b(product) | 2'b0 | + // | addnend | + end else begin + KillProdE = 0; + ZManShifted = 0; + AddendStickyE = ~ZZeroE; - end - end + end + end - - assign AlignedAddendE = ZManShifted[211:50]; - -endmodule + + assign AlignedAddendE = ZManShifted[213:52]; +endmodule \ No newline at end of file diff --git a/wally-pipelined/src/fpu/fma2.sv b/wally-pipelined/src/fpu/fma2.sv index f9efe93e8..131f98394 100644 --- a/wally-pipelined/src/fpu/fma2.sv +++ b/wally-pipelined/src/fpu/fma2.sv @@ -1,127 +1,131 @@ + + module fma2( - input logic [63:0] X, // X - input logic [63:0] Y, // Y - input logic [63:0] Z, // Z - input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude - input logic [2:0] FOpCtrlM, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) - input logic FmtM, // precision 1 = double 0 = single - input logic [105:0] ProdManM, // 1.X frac * 1.Y frac - input logic [161:0] AlignedAddendM, // Z aligned for addition - input logic [12:0] ProdExpM, // X exponent + Y exponent - bias - input logic AddendStickyM, // sticky bit that is calculated during alignment - input logic KillProdM, // set the product to zero before addition if the product is too small to matter - input logic XZeroM, YZeroM, ZZeroM, // inputs are zero - input logic XInfM, YInfM, ZInfM, // inputs are infinity - input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN - output logic [63:0] FmaResultM, // FMA final result - output logic [4:0] FmaFlagsM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact} - + input logic [63:0] X, // X + input logic [63:0] Y, // Y + input logic [63:0] Z, // Z + input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude + input logic [2:0] FOpCtrlM, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) + input logic FmtM, // precision 1 = double 0 = single + input logic [105:0] ProdManM, // 1.X frac * 1.Y frac + input logic [161:0] AlignedAddendM, // Z aligned for addition + input logic [12:0] ProdExpM, // X exponent + Y exponent - bias + input logic AddendStickyM, // sticky bit that is calculated during alignment + input logic KillProdM, // set the product to zero before addition if the product is too small to matter + input logic XZeroM, YZeroM, ZZeroM, // inputs are zero + input logic XInfM, YInfM, ZInfM, // inputs are infinity + input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN + output logic [63:0] FmaResultM, // FMA final result + output logic [4:0] FmaFlagsM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact} + - logic [51:0] ResultFrac; // Result fraction - logic [10:0] ResultExp; // Result exponent - logic ResultSgn; // Result sign - logic [10:0] ZExp; // input exponent - logic XSgn, YSgn, ZSgn; // input sign - logic PSgn; // product sign - logic [105:0] ProdMan2; // product being added - logic [162:0] AlignedAddend2; // possibly inverted aligned Z - logic [161:0] Sum; // positive sum - logic [162:0] PreSum; // possibly negitive sum - logic [12:0] SumExp; // exponent of the normalized sum - logic [12:0] SumExpTmp; // exponent of the normalized sum not taking into account denormal or zero results - logic [12:0] SumExpTmpMinus1; // SumExpTmp-1 - logic [12:0] FullResultExp; // ResultExp with bits to determine sign and overflow - logic [53:0] NormSum; // normalized sum - logic [161:0] SumShifted; // sum shifted for normalization - logic [8:0] NormCnt; // output of the leading zero detector - logic NormSumSticky; // sticky bit calulated from the normalized sum - logic SumZero; // is the sum zero - logic NegSum; // is the sum negitive - logic InvZ; // invert Z if there is a subtraction (-product + Z or product - Z) - logic ResultDenorm; // is the result denormalized - logic Sticky; // Sticky bit - logic Plus1, Minus1, CalcPlus1, CalcMinus1; // do you add or subtract one for rounding - logic Invalid,Underflow,Overflow,Inexact; // flags - logic [8:0] DenormShift; // right shift if the result is denormalized - logic SubBySmallNum; // was there supposed to be a subtraction by a small number - logic [63:0] Addend; // value to add (Z or zero) - logic ZeroSgn; // the result's sign if the sum is zero - logic ResultSgnTmp; // the result's sign assuming the result is not zero - logic Guard, Round, LSBNormSum; // bits needed to determine rounding - logic [12:0] MaxExp; // maximum value of the exponent - logic [12:0] FracLen; // length of the fraction - logic SigNaN; // is an input a signaling NaN - logic UnderflowFlag; // Underflow singal used in FmaFlagsM (used to avoid a circular depencency) - logic [63:0] XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results + logic [51:0] ResultFrac; // Result fraction + logic [10:0] ResultExp; // Result exponent + logic ResultSgn; // Result sign + logic [10:0] ZExp; // input exponent + logic XSgn, YSgn, ZSgn; // input sign + logic PSgn; // product sign + logic [105:0] ProdMan2; // product being added + logic [162:0] AlignedAddend2; // possibly inverted aligned Z + logic [161:0] Sum; // positive sum + logic [162:0] PreSum; // possibly negitive sum + logic [12:0] SumExp; // exponent of the normalized sum + logic [12:0] SumExpTmp; // exponent of the normalized sum not taking into account denormal or zero results + logic [12:0] SumExpTmpMinus1; // SumExpTmp-1 + logic [12:0] FullResultExp; // ResultExp with bits to determine sign and overflow + logic [54:0] NormSum; // normalized sum + logic [161:0] SumShifted; // sum shifted for normalization + logic [8:0] NormCnt; // output of the leading zero detector + logic NormSumSticky; // sticky bit calulated from the normalized sum + logic SumZero; // is the sum zero + logic NegSum; // is the sum negitive + logic InvZ; // invert Z if there is a subtraction (-product + Z or product - Z) + logic ResultDenorm; // is the result denormalized + logic Sticky; // Sticky bit + logic Plus1, Minus1, CalcPlus1, CalcMinus1; // do you add or subtract one for rounding + logic UfPlus1, UfCalcPlus1; // do you add one (for determining underflow flag) + logic Invalid,Underflow,Overflow,Inexact; // flags + logic [8:0] DenormShift; // right shift if the result is denormalized + logic SubBySmallNum; // was there supposed to be a subtraction by a small number + logic [63:0] Addend; // value to add (Z or zero) + logic ZeroSgn; // the result's sign if the sum is zero + logic ResultSgnTmp; // the result's sign assuming the result is not zero + logic Guard, Round, LSBNormSum; // bits needed to determine rounding + logic UfGuard, UfRound, UfLSBNormSum; // bits needed to determine rounding for underflow flag + logic [12:0] MaxExp; // maximum value of the exponent + logic [12:0] FracLen; // length of the fraction + logic SigNaN; // is an input a signaling NaN + logic UnderflowFlag; // Underflow singal used in FmaFlagsM (used to avoid a circular depencency) + logic [63:0] XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results - - /////////////////////////////////////////////////////////////////////////////// - // Select input fields - // The following logic duplicates fma1 because it's cheaper to recompute than provide registers - /////////////////////////////////////////////////////////////////////////////// + + /////////////////////////////////////////////////////////////////////////////// + // Select input fields + // The following logic duplicates fma1 because it's cheaper to recompute than provide registers + /////////////////////////////////////////////////////////////////////////////// - // Set addend to zero if FMUL instruction - assign Addend = FOpCtrlM[2] ? 64'b0 : Z; + // Set addend to zero if FMUL instruction + assign Addend = FOpCtrlM[2] ? 64'b0 : Z; - // split inputs into the sign bit, and exponent to handle single or double precision - // - single precision is in the top half of the inputs - assign XSgn = X[63]; - assign YSgn = Y[63]; - assign ZSgn = Addend[63]^FOpCtrlM[0]; //Negate Z if subtraction + // split inputs into the sign bit, and exponent to handle single or double precision + // - single precision is in the top half of the inputs + assign XSgn = X[63]; + assign YSgn = Y[63]; + assign ZSgn = Addend[63]^FOpCtrlM[0]; //Negate Z if subtraction - assign ZExp = FmtM ? Addend[62:52] : {3'b0, Addend[62:55]}; + assign ZExp = FmtM ? Addend[62:52] : {3'b0, Addend[62:55]}; - // Calculate the product's sign - // Negate product's sign if FNMADD or FNMSUB - assign PSgn = XSgn ^ YSgn ^ FOpCtrlM[1]; + // Calculate the product's sign + // Negate product's sign if FNMADD or FNMSUB + assign PSgn = XSgn ^ YSgn ^ FOpCtrlM[1]; - /////////////////////////////////////////////////////////////////////////////// - // Addition - /////////////////////////////////////////////////////////////////////////////// - - // Negate Z when doing one of the following opperations: - // -prod + Z - // prod - Z - assign InvZ = ZSgn ^ PSgn; + /////////////////////////////////////////////////////////////////////////////// + // Addition + /////////////////////////////////////////////////////////////////////////////// + + // Negate Z when doing one of the following opperations: + // -prod + Z + // prod - Z + assign InvZ = ZSgn ^ PSgn; - // Choose an inverted or non-inverted addend - the one is added later - assign AlignedAddend2 = InvZ ? ~{1'b0, AlignedAddendM} : {1'b0, AlignedAddendM}; - // Kill the product if the product is too small to effect the addition (determined in fma1.sv) - assign ProdMan2 = KillProdM ? 106'b0 : ProdManM; + // Choose an inverted or non-inverted addend - the one is added later + assign AlignedAddend2 = InvZ ? ~{1'b0, AlignedAddendM} : {1'b0, AlignedAddendM}; + // Kill the product if the product is too small to effect the addition (determined in fma1.sv) + assign ProdMan2 = KillProdM ? 106'b0 : ProdManM; - // Do the addition - // - add one to negate if the added was inverted - // - the 2 extra bits at the begining and end are needed for rounding - assign PreSum = AlignedAddend2 + {55'b0, ProdMan2, 2'b0} + {162'b0, InvZ}; - - // Is the sum negitive - assign NegSum = PreSum[162]; - // If the sum is negitive, negate the sum. - assign Sum = NegSum ? -PreSum[161:0] : PreSum[161:0]; + // Do the addition + // - add one to negate if the added was inverted + // - the 2 extra bits at the begining and end are needed for rounding + assign PreSum = AlignedAddend2 + {55'b0, ProdMan2, 2'b0} + {162'b0, InvZ}; + + // Is the sum negitive + assign NegSum = PreSum[162]; + // If the sum is negitive, negate the sum. + assign Sum = NegSum ? -PreSum[161:0] : PreSum[161:0]; - /////////////////////////////////////////////////////////////////////////////// - // Leading one detector - /////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // Leading one detector + /////////////////////////////////////////////////////////////////////////////// - //*** replace with non-behavoral code - logic [8:0] i; - always_comb begin - i = 0; - while (~Sum[161-i] && $unsigned(i) <= $unsigned(9'd161)) i = i+1; // search for leading one - NormCnt = i+1; // compute shift count - end + //*** replace with non-behavoral code + logic [8:0] i; + always_comb begin + i = 0; + while (~Sum[161-i] && $unsigned(i) <= $unsigned(9'd161)) i = i+1; // search for leading one + NormCnt = i+1; // compute shift count + end @@ -133,112 +137,127 @@ module fma2( - /////////////////////////////////////////////////////////////////////////////// - // Normalization - /////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // Normalization + /////////////////////////////////////////////////////////////////////////////// - // Determine if the sum is zero - assign SumZero = ~(|Sum); + // Determine if the sum is zero + assign SumZero = ~(|Sum); - // determine the length of the fraction based on precision - assign FracLen = FmtM ? 13'd52 : 13'd23; + // determine the length of the fraction based on precision + assign FracLen = FmtM ? 13'd52 : 13'd23; - // Determine if the result is denormal - assign SumExpTmp = KillProdM ? {2'b0, ZExp} : ProdExpM + -({4'b0, NormCnt} - 13'd56); - assign ResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero; + // Determine if the result is denormal + assign SumExpTmp = KillProdM ? {2'b0, ZExp} : ProdExpM + -({4'b0, NormCnt} - 13'd56); + assign ResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero; - // Determine the shift needed for denormal results - assign SumExpTmpMinus1 = SumExpTmp-1; - assign DenormShift = ResultDenorm ? SumExpTmpMinus1[8:0] : 9'b0; + // Determine the shift needed for denormal results + assign SumExpTmpMinus1 = SumExpTmp-1; + assign DenormShift = ResultDenorm ? SumExpTmpMinus1[8:0] : 9'b0; - // Normalize the sum - assign SumShifted = SumZero ? 162'b0 : Sum << NormCnt+DenormShift; - assign NormSum = SumShifted[161:108]; - // Calculate the sticky bit - assign NormSumSticky = FmtM ? (|SumShifted[107:0]) : (|SumShifted[136:0]); - assign Sticky = AddendStickyM | NormSumSticky; + // Normalize the sum + assign SumShifted = SumZero ? 162'b0 : Sum << NormCnt+DenormShift; + assign NormSum = SumShifted[161:107]; + // Calculate the sticky bit + assign NormSumSticky = FmtM ? (|SumShifted[107:0]) : (|SumShifted[136:0]); + assign Sticky = AddendStickyM | NormSumSticky; - // Determine sum's exponent - assign SumExp = SumZero ? 13'b0 : - ResultDenorm ? 13'b0 : - SumExpTmp; + // Determine sum's exponent + assign SumExp = SumZero ? 13'b0 : + ResultDenorm ? 13'b0 : + SumExpTmp; - /////////////////////////////////////////////////////////////////////////////// - // Rounding - /////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // Rounding + /////////////////////////////////////////////////////////////////////////////// - // round to nearest even - // {Guard, Round, Sticky} - // 0xx - do nothing - // 100 - tie - Plus1 if result is odd (LSBNormSum = 1) - // - don't add 1 if a small number was supposed to be subtracted - // 101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number) - // 110/111 - Plus1 + // round to nearest even + // {Guard, Round, Sticky} + // 0xx - do nothing + // 100 - tie - Plus1 if result is odd (LSBNormSum = 1) + // - don't add 1 if a small number was supposed to be subtracted + // 101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number) + // 110/111 - Plus1 - // round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0 + // round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0 - // round to -infinity - // - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0 - // - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0 + // round to -infinity + // - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0 + // - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0 - // round to infinity - // - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0 - // - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0 + // round to infinity + // - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0 + // - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0 - // round to nearest max magnitude - // {Guard, Round, Sticky} - // 0xx - do nothing - // 100 - tie - Plus1 - // - don't add 1 if a small number was supposed to be subtracted - // 101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number) - // 110/111 - Plus1 + // round to nearest max magnitude + // {Guard, Round, Sticky} + // 0xx - do nothing + // 100 - tie - Plus1 + // - don't add 1 if a small number was supposed to be subtracted + // 101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number) + // 110/111 - Plus1 - // determine guard, round, and least significant bit of the result - assign Guard = FmtM ? NormSum[1] : NormSum[30]; - assign Round = FmtM ? NormSum[0] : NormSum[29]; - assign LSBNormSum = FmtM ? NormSum[2] : NormSum[31]; + // determine guard, round, and least significant bit of the result + assign Guard = FmtM ? NormSum[2] : NormSum[31]; + assign Round = FmtM ? NormSum[1] : NormSum[30]; + assign LSBNormSum = FmtM ? NormSum[3] : NormSum[32]; - // Deterimine if a small number was supposed to be subtrated - assign SubBySmallNum = AddendStickyM&InvZ&~(NormSumSticky)&~ZZeroM; + // used to determine underflow flag + assign UfGuard = FmtM ? NormSum[1] : NormSum[30]; + assign UfRound = FmtM ? NormSum[0] : NormSum[29]; + assign UfLSBNormSum = FmtM ? NormSum[2] : NormSum[31]; - always_comb begin - // Determine if you add 1 - case (FrmM) - 3'b000: CalcPlus1 = Guard & (Round | (Sticky&~(~Round&SubBySmallNum)) | (~Round&~Sticky&LSBNormSum&~SubBySmallNum));//round to nearest even - 3'b001: CalcPlus1 = 0;//round to zero - 3'b010: CalcPlus1 = ResultSgn & ~(SubBySmallNum & ~Guard & ~Round);//round down - 3'b011: CalcPlus1 = ~ResultSgn & ~(SubBySmallNum & ~Guard & ~Round);//round up - 3'b100: CalcPlus1 = (Guard & (Round | (Sticky&~(~Round&SubBySmallNum)) | (~Round&~Sticky&~SubBySmallNum)));//round to nearest max magnitude - default: CalcPlus1 = 1'bx; - endcase - // Determine if you subtract 1 - case (FrmM) - 3'b000: CalcMinus1 = 0;//round to nearest even - 3'b001: CalcMinus1 = SubBySmallNum & ~Guard & ~Round;//round to zero - 3'b010: CalcMinus1 = ~ResultSgn & ~Guard & ~Round & SubBySmallNum;//round down - 3'b011: CalcMinus1 = ResultSgn & ~Guard & ~Round & SubBySmallNum;//round up - 3'b100: CalcMinus1 = 0;//round to nearest max magnitude - default: CalcMinus1 = 1'bx; - endcase - - end + // Deterimine if a small number was supposed to be subtrated + assign SubBySmallNum = AddendStickyM&InvZ&~(NormSumSticky)&~ZZeroM; - // If an answer is exact don't round - assign Plus1 = CalcPlus1 & (Sticky | Guard | Round); - assign Minus1 = CalcMinus1 & (Sticky | Guard | Round); + always_comb begin + // Determine if you add 1 + case (FrmM) + 3'b000: CalcPlus1 = Guard & (Round | ((Sticky|UfGuard)&~(~Round&SubBySmallNum)) | (~Round&~(Sticky|UfGuard)&LSBNormSum&~SubBySmallNum));//round to nearest even + 3'b001: CalcPlus1 = 0;//round to zero + 3'b010: CalcPlus1 = ResultSgn & ~(SubBySmallNum & ~Guard & ~Round);//round down + 3'b011: CalcPlus1 = ~ResultSgn & ~(SubBySmallNum & ~Guard & ~Round);//round up + 3'b100: CalcPlus1 = (Guard & (Round | ((Sticky|UfGuard)&~(~Round&SubBySmallNum)) | (~Round&~(Sticky|UfGuard)&~SubBySmallNum)));//round to nearest max magnitude + default: CalcPlus1 = 1'bx; + endcase + // Determine if you add 1 (for underflow flag) + case (FrmM) + 3'b000: UfCalcPlus1 = UfGuard & (UfRound | (Sticky&~(~UfRound&SubBySmallNum)) | (~UfRound&~Sticky&UfLSBNormSum&~SubBySmallNum));//round to nearest even + 3'b001: UfCalcPlus1 = 0;//round to zero + 3'b010: UfCalcPlus1 = ResultSgn & ~(SubBySmallNum & ~UfGuard & ~UfRound);//round down + 3'b011: UfCalcPlus1 = ~ResultSgn & ~(SubBySmallNum & ~UfGuard & ~UfRound);//round up + 3'b100: UfCalcPlus1 = (UfGuard & (UfRound | (Sticky&~(~UfRound&SubBySmallNum)) | (~UfRound&~Sticky&~SubBySmallNum)));//round to nearest max magnitude + default: UfCalcPlus1 = 1'bx; + endcase + // Determine if you subtract 1 + case (FrmM) + 3'b000: CalcMinus1 = 0;//round to nearest even + 3'b001: CalcMinus1 = SubBySmallNum & ~Guard & ~Round;//round to zero + 3'b010: CalcMinus1 = ~ResultSgn & ~Guard & ~Round & SubBySmallNum;//round down + 3'b011: CalcMinus1 = ResultSgn & ~Guard & ~Round & SubBySmallNum;//round up + 3'b100: CalcMinus1 = 0;//round to nearest max magnitude + default: CalcMinus1 = 1'bx; + endcase + + end - // Compute rounded result - logic [64:0] RoundAdd; - logic [51:0] NormSumTruncated; - assign RoundAdd = FmtM ? Minus1 ? {65{1'b1}} : {64'b0, Plus1} : - Minus1 ? {{36{1'b1}}, 29'b0} : {35'b0, Plus1, 29'b0}; - assign NormSumTruncated = FmtM ? NormSum[53:2] : {NormSum[53:31], 29'b0}; + // If an answer is exact don't round + assign Plus1 = CalcPlus1 & (Sticky | UfGuard | Guard | Round); + assign UfPlus1 = UfCalcPlus1 & (Sticky | UfGuard | UfRound); + assign Minus1 = CalcMinus1 & (Sticky | UfGuard | Guard | Round); - assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd; + // Compute rounded result + logic [64:0] RoundAdd; + logic [51:0] NormSumTruncated; + assign RoundAdd = FmtM ? Minus1 ? {65{1'b1}} : {64'b0, Plus1} : + Minus1 ? {{36{1'b1}}, 29'b0} : {35'b0, Plus1, 29'b0}; + assign NormSumTruncated = FmtM ? NormSum[54:3] : {NormSum[54:32], 29'b0}; + + assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd; assign ResultExp = FullResultExp[10:0]; @@ -247,58 +266,57 @@ module fma2( - /////////////////////////////////////////////////////////////////////////////// - // Sign calculation - /////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // Sign calculation + /////////////////////////////////////////////////////////////////////////////// - // Determine the sign if the sum is zero - // if cancelation then 0 unless round to -infinity - // otherwise psign - assign ZeroSgn = (PSgn^ZSgn)&~Underflow ? FrmM == 3'b010 : PSgn; + // Determine the sign if the sum is zero + // if cancelation then 0 unless round to -infinity + // otherwise psign + assign ZeroSgn = (PSgn^ZSgn)&~Underflow ? FrmM == 3'b010 : PSgn; - // is the result negitive - // if p - z is the Sum negitive - // if -p + z is the Sum positive - // if -p - z then the Sum is negitive - assign ResultSgnTmp = InvZ&(ZSgn)&NegSum | InvZ&PSgn&~NegSum | ((ZSgn)&PSgn); - assign ResultSgn = SumZero ? ZeroSgn : ResultSgnTmp; + // is the result negitive + // if p - z is the Sum negitive + // if -p + z is the Sum positive + // if -p - z then the Sum is negitive + assign ResultSgnTmp = InvZ&(ZSgn)&NegSum | InvZ&PSgn&~NegSum | ((ZSgn)&PSgn); + assign ResultSgn = SumZero ? ZeroSgn : ResultSgnTmp; - /////////////////////////////////////////////////////////////////////////////// - // Flags - /////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // Flags + /////////////////////////////////////////////////////////////////////////////// - // Set Invalid flag for following cases: - // 1) Inf - Inf (unless x or y is NaN) - // 2) 0 * Inf - // 3) any input is a signaling NaN - assign MaxExp = FmtM ? 13'd2047 : 13'd255; - assign SigNaN = FmtM ? (XNaNM&~X[51]) | (YNaNM&~Y[51]) | (ZNaNM&~Addend[51]) : - (XNaNM&~X[54]) | (YNaNM&~Y[54]) | (ZNaNM&~Addend[54]); - assign Invalid = SigNaN | ((XInfM || YInfM) & ZInfM & (PSgn ^ ZSgn) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM); - - // Set Overflow flag if the number is too big to be represented - // - Don't set the overflow flag if an overflowed result isn't outputed - assign Overflow = FullResultExp >= MaxExp & ~FullResultExp[12]&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); + // Set Invalid flag for following cases: + // 1) any input is a signaling NaN + // 2) Inf - Inf (unless x or y is NaN) + // 3) 0 * Inf + assign MaxExp = FmtM ? 13'd2047 : 13'd255; + assign SigNaN = FmtM ? (XNaNM&~X[51]) | (YNaNM&~Y[51]) | (ZNaNM&~Addend[51]) : + (XNaNM&~X[54]) | (YNaNM&~Y[54]) | (ZNaNM&~Addend[54]); + assign Invalid = SigNaN | ((XInfM || YInfM) & ZInfM & (PSgn ^ ZSgn) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM); + + // Set Overflow flag if the number is too big to be represented + // - Don't set the overflow flag if an overflowed result isn't outputed + assign Overflow = FullResultExp >= MaxExp & ~FullResultExp[12]&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); - // Set Underflow flag if the number is too small to be represented in normal numbers - // - Don't set the underflow flag if the result is exact - assign Underflow = (SumExp[12] | ((SumExp == 0) & (Round|Guard|Sticky)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); - //assign UnderflowFlag = (Underflow | (FullResultExp == 0)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM)&(Round|Guard|Sticky)) & ~(FullResultExp == 1); - assign UnderflowFlag = (Underflow | (FullResultExp == 0)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM)&(Round|Guard|Sticky)) & ~(FullResultExp == 1); - // Set Inexact flag if the result is diffrent from what would be outputed given infinite precision - // - Don't set the underflow flag if an underflowed result isn't outputed - assign Inexact = (Sticky|Overflow|Guard|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); + // Set Underflow flag if the number is too small to be represented in normal numbers + // - Don't set the underflow flag if the result is exact + assign Underflow = (SumExp[12] | ((SumExp == 0) & (Round|Guard|Sticky|UfGuard)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); + assign UnderflowFlag = (FullResultExp[12] | ((FullResultExp == 0) | ((FullResultExp == 1) & (SumExp == 0) & ~(UfPlus1&UfLSBNormSum)))&(Round|Guard|Sticky))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); + // Set Inexact flag if the result is diffrent from what would be outputed given infinite precision + // - Don't set the underflow flag if an underflowed result isn't outputed + assign Inexact = (Sticky|UfGuard|Overflow|Guard|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); - // Combine flags - // - FMA can't set the Divide by zero flag - // - Don't set the underflow flag if the result was rounded up to a normal number - assign FmaFlagsM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact}; + // Combine flags + // - FMA can't set the Divide by zero flag + // - Don't set the underflow flag if the result was rounded up to a normal number + assign FmaFlagsM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact}; @@ -306,31 +324,31 @@ module fma2( - /////////////////////////////////////////////////////////////////////////////// - // Select the result - /////////////////////////////////////////////////////////////////////////////// - assign XNaNResult = FmtM ? {XSgn, X[62:52], 1'b1,X[50:0]} : {XSgn, X[62:55], 1'b1,X[53:0]}; - assign YNaNResult = FmtM ? {YSgn, Y[62:52], 1'b1,Y[50:0]} : {YSgn, Y[62:55], 1'b1,Y[53:0]}; - assign ZNaNResult = FmtM ? {ZSgn, Addend[62:52], 1'b1,Addend[50:0]} : {ZSgn, Addend[62:55], 1'b1,Addend[53:0]}; - assign OverflowResult = FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 11'h7fe, {52{1'b1}}} : - {ResultSgn, 11'h7ff, 52'b0} : - ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 8'hfe, {23{1'b1}}, 32'b0} : - {ResultSgn, 8'hff, 55'b0}; - assign InvalidResult = FmtM ? {ResultSgn, 11'h7ff, 1'b1, 51'b0} : {ResultSgn, 8'hff, 1'b1, 54'b0}; - assign KillProdResult = FmtM ?{ResultSgn, Addend[62:0] - {62'b0, (Minus1&AddendStickyM)}} + {62'b0, (Plus1&AddendStickyM)} : {ResultSgn, Addend[62:32] - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}, 32'b0}; - assign UnderflowResult = FmtM ? {ResultSgn, 63'b0} + {63'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))} : {{ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}, 32'b0}; - assign FmaResultM = XNaNM ? XNaNResult : - YNaNM ? YNaNResult : - ZNaNM ? ZNaNResult : - Invalid ? InvalidResult : // has to be before inf - XInfM ? {PSgn, X[62:0]} : - YInfM ? {PSgn, Y[62:0]} : - ZInfM ? {ZSgn, Addend[62:0]} : - Overflow ? OverflowResult : - KillProdM ? KillProdResult : // has to be after Underflow - Underflow & ~ResultDenorm ? UnderflowResult : - FmtM ? {ResultSgn, ResultExp, ResultFrac} : - {ResultSgn, ResultExp[7:0], ResultFrac, 3'b0}; + /////////////////////////////////////////////////////////////////////////////// + // Select the result + /////////////////////////////////////////////////////////////////////////////// + assign XNaNResult = FmtM ? {XSgn, X[62:52], 1'b1,X[50:0]} : {XSgn, X[62:55], 1'b1,X[53:0]}; + assign YNaNResult = FmtM ? {YSgn, Y[62:52], 1'b1,Y[50:0]} : {YSgn, Y[62:55], 1'b1,Y[53:0]}; + assign ZNaNResult = FmtM ? {ZSgn, Addend[62:52], 1'b1,Addend[50:0]} : {ZSgn, Addend[62:55], 1'b1,Addend[53:0]}; + assign OverflowResult = FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 11'h7fe, {52{1'b1}}} : + {ResultSgn, 11'h7ff, 52'b0} : + ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 8'hfe, {23{1'b1}}, 32'b0} : + {ResultSgn, 8'hff, 55'b0}; + assign InvalidResult = FmtM ? {ResultSgn, 11'h7ff, 1'b1, 51'b0} : {ResultSgn, 8'hff, 1'b1, 54'b0}; + assign KillProdResult = FmtM ?{ResultSgn, Addend[62:0] - {62'b0, (Minus1&AddendStickyM)}} + {62'b0, (Plus1&AddendStickyM)} : {ResultSgn, Addend[62:32] - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}, 32'b0}; + assign UnderflowResult = FmtM ? {ResultSgn, 63'b0} + {63'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))} : {{ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}, 32'b0}; + assign FmaResultM = XNaNM ? XNaNResult : + YNaNM ? YNaNResult : + ZNaNM ? ZNaNResult : + Invalid ? InvalidResult : // has to be before inf + XInfM ? {PSgn, X[62:0]} : + YInfM ? {PSgn, Y[62:0]} : + ZInfM ? {ZSgn, Addend[62:0]} : + Overflow ? OverflowResult : + KillProdM ? KillProdResult : // has to be after Underflow + Underflow & ~ResultDenorm ? UnderflowResult : + FmtM ? {ResultSgn, ResultExp, ResultFrac} : + {ResultSgn, ResultExp[7:0], ResultFrac, 3'b0}; diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index 7f93d33a7..5c15268ed 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -34,7 +34,6 @@ module fpu ( input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg input logic StallE, StallM, StallW, input logic FlushE, FlushM, FlushW, - output logic IsFPD, IsFPE, // Read/write enable for memory {read, write} output logic FStallD, // Stall the decode stage if Div/Sqrt instruction output logic FWriteIntE, FWriteIntM, FWriteIntW, // Write integer register enable output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory @@ -59,8 +58,8 @@ module fpu ( logic SrcZUsedD; // Is input 3 used logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM, FOpCtrlW; // Select which opperation to do in each component - logic SelLoadInputE, SelLoadInputM; // Select which adress to load when single precision - logic FInput2UsedD, FInput3UsedD; + logic [1:0] FResSelD, FResSelE, FResSelM; + logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; logic [4:0] Adr1E, Adr2E, Adr3E; // regfile signals @@ -132,7 +131,8 @@ module fpu ( // fsgn signals logic [63:0] SgnResultE, SgnResultM, SgnResultW; logic [4:0] SgnFlagsE, SgnFlagsM, SgnFlagsW; - logic [63:0] FResM; + logic [63:0] FResM, FResW; + logic FFlgM, FFlgW; // instantiation of W stage regfile signals logic [63:0] AlignedSrcAM, ForwardSrcAM, SrcAW; @@ -167,38 +167,19 @@ module fpu ( //***************** // other D/E pipe registers //***************** - // flopenrc #(64) DEReg14(clk, reset, FlushE, ~StallE, FPUResult64W, FPUResult64E); - // flopenrc #(1) CtrlRegE1(clk, reset, FlushE, ~StallE, FWriteEnD, FWriteEnE); - // flopenrc #(3) CtrlRegE2(clk, reset, FlushE, ~StallE, FResultSelD, FResultSelE); - // flopenrc #(3) CtrlRegE3(clk, reset, FlushE, ~StallE, FrmD, FrmE); - // flopenrc #(1) CtrlRegE4(clk, reset, FlushE, ~StallE, FmtD, FmtE); - // flopenrc #(5) CtrlRegE5(clk, reset, FlushE, ~StallE, InstrD[11:7], RdE); - // flopenrc #(4) CtrlRegE6(clk, reset, FlushE, ~StallE, FOpCtrlD, FOpCtrlE); flopenrc #(1) CtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE); flopenrc #(15) CtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, - {Adr1E, Adr2E, Adr3E}); - // flopenrc #(1) CtrlRegE8(clk, reset, FlushE, ~StallE, FWriteIntD, FWriteIntE); - // flopenrc #(1) CtrlRegE9(clk, reset, FlushE, ~StallE, FOutputInput2D, FOutputInput2E); - // flopenrc #(2) CtrlRegE10(clk, reset, FlushE, ~StallE, FMemRWD, FMemRWE); - // flopenrc #(1) CtrlRegE11(clk, reset, FlushE, ~StallE, InstrD[15], SelLoadInputE); - flopenrc #(20) CtrlRegE(clk, reset, FlushE, ~StallE, - {FWriteEnD, FResultSelD, FrmD, FmtD, InstrD[11:7], FOpCtrlD, FWriteIntD, InstrD[15], IsFPD}, - {FWriteEnE, FResultSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE, SelLoadInputE, IsFPE}); + {Adr1E, Adr2E, Adr3E}); + flopenrc #(22) DECtrlReg(clk, reset, FlushE, ~StallE, + {FWriteEnD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, InstrD[11:7], FOpCtrlD, FWriteIntD}, + {FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE}); //EXECUTION STAGE - // input muxs for forwarding - // single vs double for SRCAM - // mux2 #(64) SrcAMuxForward({SrcAM[31:0], 32'b0}, {SrcAM, {64-`XLEN{1'b0}}}, FmtM, ForwardSrcAM); - // //input 1 forwarding mux - // mux4 #(64) SrcXEmux(FRD1E, FPUResult64W, FPUResult64E, ForwardSrcAM, ForwardXE, SrcXtmpE); - // mux3 #(64) SrcYEmux(FRD2E, FPUResult64W, FPUResult64E, ForwardYE, SrcYE); - // mux2 #(64) SrcZEmux(FRD3E, FPUResult64E, ForwardZE, SrcZE); - // mux2 #(64) FOutputInput2mux(SrcXtmpE, SrcYE, FOutputInput2E, SrcXE); - // Hazard unit for FPU fpuhazard hazard(.*); + // forwarding muxs mux3 #(64) fxemux(FRD1E, FPUResult64W, FResM, ForwardXE, SrcXE); mux3 #(64) fyemux(FRD2E, FPUResult64W, FResM, ForwardYE, SrcYE); mux3 #(64) fzemux(FRD3E, FPUResult64W, FResM, ForwardZE, SrcZE); @@ -225,6 +206,8 @@ module fpu ( fpdiv fpdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .*); + + // first of two-stage instance of floating-point add/cvt unit fpuaddcvt1 fpadd1 (.*); @@ -236,6 +219,8 @@ module fpu ( // first and only instance of floating-point classify unit fpuclassify fpuclass (.*); + + // output for store instructions assign FWriteDataE = FmtE ? SrcYE[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcYE[63:32]}; //***************** @@ -295,17 +280,9 @@ module fpu ( //***************** // fpcmp E/M pipe registers //***************** - // flopenrc #(8) EMRegCmp1(clk, reset, FlushM, ~StallM, WE, WM); - // flopenrc #(8) EMRegCmp2(clk, reset, FlushM, ~StallM, XE, XM); - // flopenrc #(1) EMRegcmp3(clk, reset, FlushM, ~StallM, ANaNE, ANaNM); - // flopenrc #(1) EMRegCmp4(clk, reset, FlushM, ~StallM, BNaNE, BNaNM); - // flopenrc #(1) EMRegCmp5(clk, reset, FlushM, ~StallM, AzeroE, AzeroM); - // flopenrc #(1) EMRegCmp6(clk, reset, FlushM, ~StallM, BzeroE, BzeroM); flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpInvalidE, CmpInvalidM); - // flopenrc #(2) EMRegCmp2(clk, reset, FlushM, ~StallM, CmpFCCE, CmpFCCM); flopenrc #(64) EMRegCmp3(clk, reset, FlushM, ~StallM, FCmpResultE, FCmpResultM); - // put this in for the event we want to delay fsgn - will otherwise bypass //***************** // fpsgn E/M pipe registers //***************** @@ -315,15 +292,9 @@ module fpu ( //***************** // other E/M pipe registers //***************** - flopenrc #(1) EMReg1(clk, reset, FlushM, ~StallM, FWriteEnE, FWriteEnM); - flopenrc #(3) EMReg2(clk, reset, FlushM, ~StallM, FResultSelE, FResultSelM); - flopenrc #(3) EMReg3(clk, reset, FlushM, ~StallM, FrmE, FrmM); - flopenrc #(1) EMReg4(clk, reset, FlushM, ~StallM, FmtE, FmtM); - flopenrc #(5) EMReg5(clk, reset, FlushM, ~StallM, RdE, RdM); - flopenrc #(4) EMReg6(clk, reset, FlushM, ~StallM, FOpCtrlE, FOpCtrlM); - flopenrc #(1) EMReg7(clk, reset, FlushM, ~StallM, FWriteIntE, FWriteIntM); - // flopenrc #(2) EMReg8(clk, reset, FlushM, ~StallM, FMemRWE, FMemRWM); - flopenrc #(1) EMReg9(clk, reset, FlushM, ~StallM, SelLoadInputE, SelLoadInputM); + flopenrc #(22) EMCtrlReg(clk, reset, FlushM, ~StallM, + {FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE}, + {FWriteEnM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, RdM, FOpCtrlM, FWriteIntM}); //***************** // fpuclassify E/M pipe registers @@ -332,24 +303,18 @@ module fpu ( //BEGIN MEMORY STAGE - mux2 #(64) FResMux(AlignedSrcAM, SgnResultM, FResultSelM == 3'b011, FResM); - assign SrcXMAligned = FmtM ? SrcXM[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcXM[63:32]}; - mux3 #(`XLEN) IntResMux(SrcXMAligned, FCmpResultM[`XLEN-1:0], ClassResultM[`XLEN-1:0], {FResultSelM == 3'b101, FResultSelM == 3'b001}, FIntResM); + mux3 #(64) FResMux(AlignedSrcAM, SgnResultM, FCmpResultM, FResSelM, FResM); + assign FFlgM = CmpInvalidM & FResSelM[1]; - //adjecent adress values are sent to the FPU, select the correct one - // -imm is 80000 most of the time vs the error one which is 00000 - // mux3 #(64) FLoadResultMux({HRDATA[31:0], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA[`AHBW-1:`AHBW-32], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA, {64-`AHBW{1'b0}}}, {FmtM, SelLoadInputM}, FLoadResultM); - // mux2 #(64) FLoadStoreResultMux(FLoadResultM, SrcXM, |FOpCtrlM[2:1], FLoadStoreResultM); - + assign SrcXMAligned = FmtM ? SrcXM[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcXM[63:32]}; + mux3 #(`XLEN) IntResMux(FCmpResultM[`XLEN-1:0], SrcXMAligned, ClassResultM[`XLEN-1:0], FIntResSelM, FIntResM); + + // second instance of two-stage FMA unit fma2 fma2(.X(SrcXM), .Y(SrcYM), .Z(SrcZM), .FOpCtrlM(FOpCtrlM[2:0]), .*); // second instance of two-stage floating-point add/cvt unit fpuaddcvt2 fpadd2 (.*); - // second instance of two-stage floating-point comparator - // fpucmp2 fpcmp2 (.Invalid(CmpInvalidM), .FCC(CmpFCCM), .ANaN(ANaNM), .BNaN(BNaNM), .Azero(AzeroM), - // .Bzero(BzeroM), .w(WM), .x(XM), .Sel({1'b0, FmtM}), .op1(SrcXM), .op2(SrcYM), .*); - // Align SrcA to MSB when single precicion mux2 #(64) SrcAMux({SrcAM[31:0], 32'b0}, {{64-`XLEN{1'b0}}, SrcAM}, FmtM, AlignedSrcAM); @@ -397,19 +362,16 @@ module fpu ( //***************** // other M/W pipe registers //***************** - flopenrc #(1) MWReg1(clk, reset, FlushW, ~StallW, FWriteEnM, FWriteEnW); - flopenrc #(3) MWReg2(clk, reset, FlushW, ~StallW, FResultSelM, FResultSelW); - flopenrc #(1) MWReg3(clk, reset, FlushW, ~StallW, FmtM, FmtW); - flopenrc #(5) MWReg4(clk, reset, FlushW, ~StallW, RdM, RdW); - flopenrc #(64) MWReg5(clk, reset, FlushW, ~StallW, AlignedSrcAM, SrcAW); - // flopenrc #(64) MWReg6(clk, reset, FlushW, ~StallW, FLoadStoreResultM, FLoadStoreResultW); - flopenrc #(1) MWReg7(clk, reset, FlushW, ~StallW, FWriteIntM, FWriteIntW); - flopenrc #(4) MWReg6(clk, reset, FlushW, ~StallW, FOpCtrlM, FOpCtrlW); + flopenrc #(11) MWCtrlReg(clk, reset, FlushW, ~StallW, + {FWriteEnM, FResultSelM, RdM, FmtM, FWriteIntM}, + {FWriteEnW, FResultSelW, RdW, FmtW, FWriteIntW}); //***************** // fpuclassify M/W pipe registers //***************** flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, ClassResultM, ClassResultW); + flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW); + flopenrc #(1) MWRegClass1(clk, reset, FlushW, ~StallW, FFlgM, FFlgW); @@ -418,14 +380,6 @@ module fpu ( //######################################### // BEGIN WRITEBACK STAGE //######################################### - - - // mux3 #(64) FLoadResultMux({ReadD[31:0], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA[`AHBW-1:`AHBW-32], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA, {64-`AHBW{1'b0}}}, {FmtM, SelLoadInputM}, FLoadResultM); - // mux2 #(64) FLoadStoreResultMux(FLoadResultM, SrcXM, |FOpCtrlM[2:1], FLoadStoreResultM); - //***RV32D needs to give two bus transactions - mux2 #(64) FLoadResultMux({ReadDataW[31:0], {32{1'b0}}}, {ReadDataW, {64-`XLEN{1'b0}}}, FmtW, FLoadResultW); - mux2 #(64) FLoadStoreResultMux(FLoadResultW, SrcYW, |FOpCtrlW[2:1], FLoadStoreResultW); - @@ -434,47 +388,26 @@ module fpu ( always_comb begin case (FResultSelW) - // div/sqrt - 3'b000 : FPUFlagsW = FDivFlagsW; - // cmp - 3'b001 : FPUFlagsW = {CmpInvalidW, 4'b0}; - //fma/mult - 3'b010 : FPUFlagsW = FmaFlagsW; - // sgn inj - 3'b011 : FPUFlagsW = SgnFlagsW; - // add/sub/cnvt - 3'b100 : FPUFlagsW = FAddFlagsW; - // classify - 3'b101 : FPUFlagsW = 5'b0; - // output SrcAW - 3'b110 : FPUFlagsW = 5'b0; - // output FRD1 - 3'b111 : FPUFlagsW = 5'b0; + 3'b000 : FPUFlagsW = 5'b0; + 3'b001 : FPUFlagsW = FmaFlagsW; + 3'b010 : FPUFlagsW = FAddFlagsW; + 3'b011 : FPUFlagsW = FDivFlagsW; + 3'b100 : FPUFlagsW = {4'b0,FFlgW}; default : FPUFlagsW = 5'bxxxxx; endcase end - + always_comb begin case (FResultSelW) - // div/sqrt - 3'b000 : FPUResult64W = FDivResultW; - // cmp - 3'b001 : FPUResult64W = FCmpResultW; - //fma/mult - 3'b010 : FPUResult64W = FmaResultW; - // sgn inj - 3'b011 : FPUResult64W = SgnResultW; - // add/sub/cnvt - 3'b100 : FPUResult64W = FAddResultW; - // classify - 3'b101 : FPUResult64W = ClassResultW; - // output SrcAW - 3'b110 : FPUResult64W = SrcAW; - // Load/Store/Move to FP-register - 3'b111 : FPUResult64W = FLoadStoreResultW; - default : FPUResult64W = {64{1'bx}}; + 3'b000 : FPUResult64W = FmtW ? {ReadDataW, {64-`XLEN{1'b0}}} : {ReadDataW[31:0], 32'b0}; + 3'b001 : FPUResult64W = FmaResultW; + 3'b010 : FPUResult64W = FAddResultW; + 3'b011 : FPUResult64W = FDivResultW; + 3'b100 : FPUResult64W = FResW; + default : FPUResult64W = 64'bxxxxx; endcase - end // always_comb + end + // interface between XLEN size datapath and double-precision sized // floating-point results diff --git a/wally-pipelined/src/fpu/fpuhazard.sv b/wally-pipelined/src/fpu/fpuhazard.sv index 03667d84f..4d0895a77 100644 --- a/wally-pipelined/src/fpu/fpuhazard.sv +++ b/wally-pipelined/src/fpu/fpuhazard.sv @@ -44,21 +44,21 @@ module fpuhazard( if ((Adr1E == RdM) & FWriteEnM) // if the result will be FResM - if(FResultSelM == 3'b110 | FResultSelM == 3'b011) ForwardXE = 2'b10; // choose FResM + if(FResultSelM == 3'b100) ForwardXE = 2'b10; // choose FResM else FStallD = 1; // if the result won't be ready stall else if ((Adr1E == RdW) & FWriteEnW) ForwardXE = 2'b01; // choose FPUResult64W if ((Adr2E == RdM) & FWriteEnM) // if the result will be FResM - if(FResultSelM == 3'b110 | FResultSelM == 3'b011) ForwardYE = 2'b10; // choose FResM + if(FResultSelM == 3'b100) ForwardYE = 2'b10; // choose FResM else FStallD = 1; // if the result won't be ready stall else if ((Adr2E == RdW) & FWriteEnW) ForwardYE = 2'b01; // choose FPUResult64W if ((Adr3E == RdM) & FWriteEnM) // if the result will be FResM - if(FResultSelM == 3'b110 | FResultSelM == 3'b011) ForwardZE = 2'b10; // choose FResM + if(FResultSelM == 3'b100) ForwardZE = 2'b10; // choose FResM else FStallD = 1; // if the result won't be ready stall else if ((Adr3E == RdW) & FWriteEnW) ForwardZE = 2'b01; // choose FPUResult64W diff --git a/wally-pipelined/src/ieu/datapath.sv b/wally-pipelined/src/ieu/datapath.sv index c3303f9ac..44a40045a 100644 --- a/wally-pipelined/src/ieu/datapath.sv +++ b/wally-pipelined/src/ieu/datapath.sv @@ -37,7 +37,7 @@ module datapath ( input logic ALUSrcAE, ALUSrcBE, input logic TargetSrcE, input logic JumpE, - input logic IsFPE, + input logic IllegalFPUInstrE, input logic [1:0] MemRWE, input logic [`XLEN-1:0] FWriteDataE, input logic [`XLEN-1:0] PCE, @@ -105,9 +105,9 @@ module datapath ( flopenrc #(5) Rs2EReg(clk, reset, FlushE, ~StallE, Rs2D, Rs2E); flopenrc #(5) RdEReg(clk, reset, FlushE, ~StallE, RdD, RdE); - mux3 #(`XLEN) faemux(RD1E, WriteDataW, ALUResultM, ForwardAE, PreSrcAE); - mux3 #(`XLEN) fbemux(RD2E, WriteDataW, ALUResultM, ForwardBE, PreSrcBE); - mux2 #(`XLEN) writedatamux(PreSrcBE, FWriteDataE, IsFPE, WriteDataE); + mux3 #(`XLEN) faemux(RD1E, WriteDataW, ResultM, ForwardAE, PreSrcAE); + mux3 #(`XLEN) fbemux(RD2E, WriteDataW, ResultM, ForwardBE, PreSrcBE); + mux2 #(`XLEN) writedatamux(PreSrcBE, FWriteDataE, ~IllegalFPUInstrE, WriteDataE); mux2 #(`XLEN) srcamux(PreSrcAE, PCE, ALUSrcAE, SrcAE); mux2 #(`XLEN) srcamux2(SrcAE, PCLinkE, JumpE, SrcAE2); mux2 #(`XLEN) srcbmux(PreSrcBE, ExtImmE, ALUSrcBE, SrcBE); diff --git a/wally-pipelined/src/ieu/ieu.sv b/wally-pipelined/src/ieu/ieu.sv index 2515f3230..50bf79e80 100644 --- a/wally-pipelined/src/ieu/ieu.sv +++ b/wally-pipelined/src/ieu/ieu.sv @@ -36,8 +36,7 @@ module ieu ( input logic [`XLEN-1:0] PCE, input logic [`XLEN-1:0] PCLinkE, input logic FWriteIntE, - input logic IsFPE, - //input logic [1:0] FMemRWE, + input logic IllegalFPUInstrE, input logic [`XLEN-1:0] FWriteDataE, output logic [`XLEN-1:0] PCTargetE, output logic MulDivE, W64E, diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index a77c3ab01..fe1f057ce 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -95,18 +95,17 @@ module wallypipelinedhart ( // floating point unit signals logic [2:0] FRM_REGW; - logic [1:0] FMemRWM, FMemRWE; - logic FStallD; - logic FWriteIntE, FWriteIntM, FWriteIntW; - logic [`XLEN-1:0] FWriteDataE; - logic [`XLEN-1:0] FIntResM; - logic FDivBusyE; - logic IsFPD, IsFPE; - logic IllegalFPUInstrD, IllegalFPUInstrE; - logic FloatRegWriteW; - logic FPUStallD; - logic [4:0] SetFflagsM; - logic [`XLEN-1:0] FPUResultW; + logic [1:0] FMemRWM, FMemRWE; + logic FStallD; + logic FWriteIntE, FWriteIntM, FWriteIntW; + logic [`XLEN-1:0] FWriteDataE; + logic [`XLEN-1:0] FIntResM; + logic FDivBusyE; + logic IllegalFPUInstrD, IllegalFPUInstrE; + logic FloatRegWriteW; + logic FPUStallD; + logic [4:0] SetFflagsM; + logic [`XLEN-1:0] FPUResultW; // memory management unit signals logic ITLBWriteF, DTLBWriteM; From 157b1b31bf7341eec198ccacab826e77742081ef Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Wed, 30 Jun 2021 19:24:59 -0500 Subject: [PATCH 18/20] Icache ITLB interlock fix. --- wally-pipelined/src/cache/ICacheCntrl.sv | 26 ++++++++++++++++++++---- wally-pipelined/src/cache/icache.sv | 21 +++++++++++-------- 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/wally-pipelined/src/cache/ICacheCntrl.sv b/wally-pipelined/src/cache/ICacheCntrl.sv index f290f0ad2..bc5c30b3b 100644 --- a/wally-pipelined/src/cache/ICacheCntrl.sv +++ b/wally-pipelined/src/cache/ICacheCntrl.sv @@ -40,8 +40,8 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) ( input logic [31:0] ICacheMemReadData, input logic ICacheMemReadValid, // The address at which we want to search the cache memory - output logic [`PA_BITS-1:0] PCTagF, - output logic [`PA_BITS-1:0] PCNextIndexF, + output logic [`PA_BITS-1:0] PCTagF, + output logic [`PA_BITS-1:0] PCNextIndexF, output logic ICacheReadEn, // Load data into the cache output logic ICacheMemWriteEnable, @@ -56,13 +56,15 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) ( // Outputs to pipeline control stuff output logic ICacheStallF, EndFetchState, + input logic ITLBMissF, + input logic ITLBWriteF, // Signals to/from ahblite interface // A read containing the requested data input logic [`XLEN-1:0] InstrInF, input logic InstrAckF, // The read we request from main memory - output logic [`PA_BITS-1:0] InstrPAdrF, + output logic [`PA_BITS-1:0] InstrPAdrF, output logic InstrReadF ); @@ -109,6 +111,10 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) ( localparam STATE_INVALIDATE = 18; // *** not sure if invalidate or evict? invalidate by cache block or address? + localparam STATE_TLB_MISS = 19; + localparam STATE_TLB_MISS_DONE = 20; + + localparam AHBByteLength = `XLEN / 8; localparam AHBOFFETWIDTH = $clog2(AHBByteLength); @@ -209,7 +215,9 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) ( STATE_READY: begin PCMux = 2'b00; ICacheReadEn = 1'b1; - if (hit & ~spill) begin + if (ITLBMissF) begin + NextState = STATE_TLB_MISS; + end else if (hit & ~spill) begin SavePC = 1'b1; ICacheStallF = 1'b0; NextState = STATE_READY; @@ -363,6 +371,16 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) ( ICacheStallF = 1'b0; NextState = STATE_READY; end + STATE_TLB_MISS: begin + if (ITLBWriteF) begin + NextState = STATE_TLB_MISS_DONE; + end else begin + NextState = STATE_TLB_MISS; + end + end + STATE_TLB_MISS_DONE : begin + NextState = STATE_READY; + end default: begin PCMux = 2'b01; NextState = STATE_READY; diff --git a/wally-pipelined/src/cache/icache.sv b/wally-pipelined/src/cache/icache.sv index abf828fc5..89b2ff9e7 100644 --- a/wally-pipelined/src/cache/icache.sv +++ b/wally-pipelined/src/cache/icache.sv @@ -28,24 +28,27 @@ module icache ( // Basic pipeline stuff - input logic clk, reset, - input logic StallF, StallD, - input logic FlushD, + input logic clk, reset, + input logic StallF, StallD, + input logic FlushD, input logic [`PA_BITS-1:0] PCNextF, input logic [`PA_BITS-1:0] PCPF, // Data read in from the ebu unit - input logic [`XLEN-1:0] InstrInF, - input logic InstrAckF, + input logic [`XLEN-1:0] InstrInF, + input logic InstrAckF, // Read requested from the ebu unit output logic [`PA_BITS-1:0] InstrPAdrF, - output logic InstrReadF, + output logic InstrReadF, // High if the instruction currently in the fetch stage is compressed - output logic CompressedF, + output logic CompressedF, // High if the icache is requesting a stall - output logic ICacheStallF, + output logic ICacheStallF, + input logic ITLBMissF, + input logic ITLBWriteF, + // The raw (not decompressed) instruction that was requested // If this instruction is compressed, upper 16 bits may be the next 16 bits or may be zeros - output logic [31:0] FinalInstrRawF + output logic [31:0] FinalInstrRawF ); // Configuration parameters From ec21126474be1d38574ad22f72c9fe0dc811605f Mon Sep 17 00:00:00 2001 From: Teo Ene Date: Thu, 1 Jul 2021 13:32:42 -0500 Subject: [PATCH 19/20] Flow updated for 90nm --- .gitmodules | 3 - sky130/sky130_osu_sc_t12 | 1 - wally-pipelined/src/generic/lzd.sv~ | 195 ---------------------------- 3 files changed, 199 deletions(-) delete mode 160000 sky130/sky130_osu_sc_t12 delete mode 100755 wally-pipelined/src/generic/lzd.sv~ diff --git a/.gitmodules b/.gitmodules index 65e1e71c9..e69de29bb 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +0,0 @@ -[submodule "sky130/sky130_osu_sc_t12"] - path = sky130/sky130_osu_sc_t12 - url = https://foss-eda-tools.googlesource.com/skywater-pdk/libs/sky130_osu_sc_t12/ diff --git a/sky130/sky130_osu_sc_t12 b/sky130/sky130_osu_sc_t12 deleted file mode 160000 index f60f2d039..000000000 --- a/sky130/sky130_osu_sc_t12 +++ /dev/null @@ -1 +0,0 @@ -Subproject commit f60f2d0395053c4df362a97d7e2099721b6face6 diff --git a/wally-pipelined/src/generic/lzd.sv~ b/wally-pipelined/src/generic/lzd.sv~ deleted file mode 100755 index bfffe5e5b..000000000 --- a/wally-pipelined/src/generic/lzd.sv~ +++ /dev/null @@ -1,195 +0,0 @@ -/////////////////////////////////////////// -// lzd.sv -// -// Written: James.Stine@okstate.edu 1 February 2021 -// Modified: -// -// Purpose: Integer Divide instructions -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software -// is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT -// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -/////////////////////////////////////////// - -`include "wally-config.vh" -/* verilator lint_off DECLFILENAME */ - -// Original idea came from V. G. Oklobdzija, "An algorithmic and novel -// design of a leading zero detector circuit: comparison with logic -// synthesis," in IEEE Transactions on Very Large Scale Integration -// (VLSI) Systems, vol. 2, no. 1, pp. 124-128, March 1994, doi: -// 10.1109/92.273153. - -// Modified to be more hierarchical - -module lz2 (P, V, B); - - input logic [1:0] B; - - output logic P; - output logic V; - - assign V = B[0] | B[1]; - assign P = B[0] & ~B[1]; - -endmodule // lz2 - -module lzd_hier #(parameter WIDTH=8) - (input logic [WIDTH-1:0] B, - output logic [$clog2(WIDTH)-1:0] ZP, - output logic ZV); - - if (WIDTH == 128) - lz128 lzd127 (ZP, ZV, B); - else if (WIDTH == 64) - lz64 lzd64 (ZP, ZV, B); - else if (WIDTH == 32) - lz32 lzd32 (ZP, ZV, B); - else if (WIDTH == 16) - lz16 lzd16 (ZP, ZV, B); - else if (WIDTH == 8) - lz8 lzd8 (ZP, ZV, B); - else if (WIDTH == 4) - lz4 lzd4 (ZP, ZV, B); - -endmodule // lzd_hier - -module lz4 (ZP, ZV, B); - - input logic [3:0] B; - - logic ZPa; - logic ZPb; - logic ZVa; - logic ZVb; - - output logic [1:0] ZP; - output logic ZV; - - lz2 l1(ZPa, ZVa, B[1:0]); - lz2 l2(ZPb, ZVb, B[3:2]); - - assign ZP[0:0] = ZVb ? ZPb : ZPa; - assign ZP[1] = ~ZVb; - assign ZV = ZVa | ZVb; - -endmodule - -module lz8 (ZP, ZV, B); - - input logic [7:0] B; - - logic [1:0] ZPa; - logic [1:0] ZPb; - logic ZVa; - logic ZVb; - - output logic [2:0] ZP; - output logic ZV; - - lz4 l1(ZPa, ZVa, B[3:0]); - lz4 l2(ZPb, ZVb, B[7:4]); - - assign ZP[1:0] = ZVb ? ZPb : ZPa; - assign ZP[2] = ~ZVb; - assign ZV = ZVa | ZVb; - -endmodule - -module lz16 (ZP, ZV, B); - - input logic [15:0] B; - - logic [2:0] ZPa; - logic [2:0] ZPb; - logic ZVa; - logic ZVb; - - output logic [3:0] ZP; - output logic ZV; - - lz8 l1(ZPa, ZVa, B[7:0]); - lz8 l2(ZPb, ZVb, B[15:8]); - - assign ZP[2:0] = ZVb ? ZPb : ZPa; - assign ZP[3] = ~ZVb; - assign ZV = ZVa | ZVb; - -endmodule // lz16 - -module lz32 (ZP, ZV, B); - - input logic [31:0] B; - - logic [3:0] ZPa; - logic [3:0] ZPb; - logic ZVa; - logic ZVb; - - output logic [4:0] ZP; - output logic ZV; - - lz16 l1(ZPa, ZVa, B[15:0]); - lz16 l2(ZPb, ZVb, B[31:16]); - - assign ZP[3:0] = ZVb ? ZPb : ZPa; - assign ZP[4] = ~ZVb; - assign ZV = ZVa | ZVb; - -endmodule // lz32 - -module lz64 (ZP, ZV, B); - - input logic [63:0] B; - - logic [4:0] ZPa; - logic [4:0] ZPb; - logic ZVa; - logic ZVb; - - output logic [5:0] ZP; - output logic ZV; - - lz32 l1(ZPa, ZVa, B[31:0]); - lz32 l2(ZPb, ZVb, B[63:32]); - - assign ZP[4:0] = ZVb ? ZPb : ZPa; - assign ZP[5] = ~ZVb; - assign ZV = ZVa | ZVb; - -endmodule // lz64 - -module lz128 (ZP, ZV, B); - - input logic [127:0] B; - - logic [5:0] ZPa; - logic [5:0] ZPb; - logic ZVa; - logic ZVb; - - output logic [6:0] ZP; - output logic ZV; - - lz64 l1(ZPa, ZVa, B[64:0]); - lz64 l2(ZPb, ZVb, B[127:63]); - - assign ZP[5:0] = ZVb ? ZPb : ZPa; - assign ZP[6] = ~ZVb; - assign ZV = ZVa | ZVb; - -endmodule // lz128 - -/* verilator lint_on DECLFILENAME */