diff --git a/.gitignore b/.gitignore index 2d1454e3..acaab4b0 100644 --- a/.gitignore +++ b/.gitignore @@ -101,3 +101,6 @@ sim/branch.log /fpga/generator/sim/syn-funcsim.v external sim/results +tests/wally-riscv-arch-test/riscv-test-suite/rv*i_m/I/src/*.S +tests/wally-riscv-arch-test/riscv-test-suite/rv*i_m/I/Makefrag + diff --git a/README.md b/README.md index d8c2432d..75778102 100644 --- a/README.md +++ b/README.md @@ -209,6 +209,12 @@ It is most convenient if the sysadmin installs riscof into the server’s Python However, riscof can also be installed and run locally by individual users. +### Other Python libraries + +While a sysadmin is installing Python libraries, it's worth doing some more that will be needed by visualization scripts. + + $ sudo pip3 install matplotlib scipy sklearn adjustText lief + ### Install Verilator Verilator is a free Verilog simulator with a good Lint tool used to catch errors in the SystemVerilog code. It is needed to run regression. diff --git a/config/buildroot/wally-config.vh b/config/buildroot/wally-config.vh index 4c84bd1d..a3d32693 100644 --- a/config/buildroot/wally-config.vh +++ b/config/buildroot/wally-config.vh @@ -40,8 +40,9 @@ `define ZICSR_SUPPORTED 1 `define ZIFENCEI_SUPPORTED 1 `define ZICOUNTERS_SUPPORTED 1 -`define ZFH_SUPPORTED 0 `define COUNTERS 32 +`define ZFH_SUPPORTED 0 +`define SSTC_SUPPORTED 1 // LSU microarchitectural Features `define BUS_SUPPORTED 1 @@ -131,6 +132,7 @@ `define BPRED_SUPPORTED 1 `define BPRED_TYPE "BP_GSHARE_FORWARD" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define BPRED_SIZE 10 +`define BTB_SIZE (`BPRED_SIZE) `define HPTW_WRITES_SUPPORTED 1 diff --git a/config/fpga/wally-config.vh b/config/fpga/wally-config.vh index f86b2e2a..40476652 100644 --- a/config/fpga/wally-config.vh +++ b/config/fpga/wally-config.vh @@ -43,6 +43,7 @@ `define ZICOUNTERS_SUPPORTED 1 `define ZFH_SUPPORTED 0 `define COUNTERS 32 +`define SSTC_SUPPORTED 1 // LSU microarchitectural Features `define BUS_SUPPORTED 1 @@ -140,6 +141,7 @@ `define BPRED_SUPPORTED 1 `define BPRED_TYPE "BP_GSHARE_FORWARD" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE or BPSPECULATIVEGLOBAL or BPSPECULATIVEGSHARE or BPOLDGSHARE or BPOLDGSHARE2 `define BPRED_SIZE 10 +`define BTB_SIZE (`BPRED_SIZE) `define HPTW_WRITES_SUPPORTED 1 diff --git a/config/rv32e/wally-config.vh b/config/rv32e/wally-config.vh index b028da8f..db668ae5 100644 --- a/config/rv32e/wally-config.vh +++ b/config/rv32e/wally-config.vh @@ -44,6 +44,7 @@ `define COUNTERS 0 `define ZICOUNTERS_SUPPORTED 0 `define ZFH_SUPPORTED 0 +`define SSTC_SUPPORTED 0 // LSU microarchitectural Features `define BUS_SUPPORTED 1 @@ -135,6 +136,7 @@ `define BPRED_SUPPORTED 0 `define BPRED_TYPE "BP_GSHARE_FORWARD" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define BPRED_SIZE 10 +`define BTB_SIZE (`BPRED_SIZE) `define HPTW_WRITES_SUPPORTED 0 diff --git a/config/rv32gc/wally-config.vh b/config/rv32gc/wally-config.vh index a62e2b79..bdfa954b 100644 --- a/config/rv32gc/wally-config.vh +++ b/config/rv32gc/wally-config.vh @@ -43,6 +43,7 @@ `define COUNTERS 32 `define ZICOUNTERS_SUPPORTED 1 `define ZFH_SUPPORTED 0 +`define SSTC_SUPPORTED 0 // LSU microarchitectural Features `define BUS_SUPPORTED 1 @@ -134,6 +135,7 @@ `define BPRED_SUPPORTED 1 `define BPRED_TYPE "BP_GSHARE_FORWARD" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define BPRED_SIZE 10 +`define BTB_SIZE (`BPRED_SIZE) `define HPTW_WRITES_SUPPORTED 0 diff --git a/config/rv32i/wally-config.vh b/config/rv32i/wally-config.vh index 56c086be..ee203415 100644 --- a/config/rv32i/wally-config.vh +++ b/config/rv32i/wally-config.vh @@ -44,6 +44,7 @@ `define COUNTERS 32 `define ZICOUNTERS_SUPPORTED 0 `define ZFH_SUPPORTED 0 +`define SSTC_SUPPORTED 0 // LSU microarchitectural Features `define BUS_SUPPORTED 0 @@ -135,6 +136,7 @@ `define BPRED_SUPPORTED 0 `define BPRED_TYPE "BP_GSHARE_FORWARD" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define BPRED_SIZE 10 +`define BTB_SIZE (`BPRED_SIZE) `define HPTW_WRITES_SUPPORTED 0 diff --git a/config/rv32imc/wally-config.vh b/config/rv32imc/wally-config.vh index 309cfef1..f46663ab 100644 --- a/config/rv32imc/wally-config.vh +++ b/config/rv32imc/wally-config.vh @@ -43,6 +43,7 @@ `define COUNTERS 32 `define ZICOUNTERS_SUPPORTED 1 `define ZFH_SUPPORTED 0 +`define SSTC_SUPPORTED 0 // LSU microarchitectural Features `define BUS_SUPPORTED 1 @@ -67,7 +68,7 @@ // Integer Divider Configuration // IDIV_BITSPERCYCLE must be 1, 2, or 4 -`define IDIV_BITSPERCYCLE 4 +`define IDIV_BITSPERCYCLE 2 `define IDIV_ON_FPU 0 // Legal number of PMP entries are 0, 16, or 64 @@ -134,6 +135,7 @@ `define BPRED_SUPPORTED 0 `define BPRED_TYPE "BP_GSHARE_FORWARD" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define BPRED_SIZE 10 +`define BTB_SIZE (`BPRED_SIZE) `define HPTW_WRITES_SUPPORTED 0 diff --git a/config/rv64fpquad/wally-config.vh b/config/rv64fpquad/wally-config.vh index 3f46c565..21bcaf1f 100644 --- a/config/rv64fpquad/wally-config.vh +++ b/config/rv64fpquad/wally-config.vh @@ -44,6 +44,7 @@ `define COUNTERS 32 `define ZICOUNTERS_SUPPORTED 1 `define ZFH_SUPPORTED 1 +`define SSTC_SUPPORTED 0 // LSU microarchitectural Features `define BUS_SUPPORTED 1 @@ -137,6 +138,7 @@ `define BPRED_SUPPORTED 1 `define BPRED_TYPE "BP_GSHARE_FORWARD" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define BPRED_SIZE 10 +`define BTB_SIZE (`BPRED_SIZE) `define HPTW_WRITES_SUPPORTED 0 diff --git a/config/rv64gc/wally-config.vh b/config/rv64gc/wally-config.vh index 017bd9cb..6a40c065 100644 --- a/config/rv64gc/wally-config.vh +++ b/config/rv64gc/wally-config.vh @@ -44,6 +44,7 @@ `define COUNTERS 32 `define ZICOUNTERS_SUPPORTED 1 `define ZFH_SUPPORTED 0 +`define SSTC_SUPPORTED 0 // LSU microarchitectural Features `define BUS_SUPPORTED 1 @@ -137,6 +138,7 @@ `define BPRED_SUPPORTED 1 `define BPRED_TYPE "BP_GSHARE_FORWARD" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE or BPSPECULATIVEGLOBAL or BPSPECULATIVEGSHARE or BPOLDGSHARE or BPOLDGSHARE2 `define BPRED_SIZE 10 +`define BTB_SIZE (`BPRED_SIZE) `define HPTW_WRITES_SUPPORTED 0 diff --git a/config/rv64i/wally-config.vh b/config/rv64i/wally-config.vh index 90d7b404..24a171a7 100644 --- a/config/rv64i/wally-config.vh +++ b/config/rv64i/wally-config.vh @@ -44,6 +44,7 @@ `define COUNTERS 32 `define ZICOUNTERS_SUPPORTED 0 `define ZFH_SUPPORTED 0 +`define SSTC_SUPPORTED 0 // LSU microarchitectural Features `define BUS_SUPPORTED 0 @@ -137,6 +138,7 @@ `define BPRED_SUPPORTED 0 `define BPRED_TYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define BPRED_SIZE 10 +`define BTB_SIZE (`BPRED_SIZE) `define HPTW_WRITES_SUPPORTED 0 diff --git a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index 44b26935..b09e9f38 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -49,7 +49,7 @@ module fdivsqrtpostproc( logic [`DIVb+3:0] W, Sum, DM; logic [`DIVb:0] PreQmM; logic NegStickyM; - logic weq0E, weq0M, WZeroM; + logic weq0E, WZeroM; logic [`XLEN-1:0] IntDivResultM; ////////////////////////// @@ -81,7 +81,6 @@ module fdivsqrtpostproc( ////////////////////////// flopenr #(1) WZeroMReg(clk, reset, ~StallM, WZeroE, WZeroM); - flopenr #(1) WeqZeroMReg(clk, reset, ~StallM, weq0E, weq0M); ////////////////////////// // Memory Stage: Postprocessing diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 5d490df2..0a96e1b1 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -129,7 +129,6 @@ module fdivsqrtpreproc ( // pipeline registers flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM); - flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M); flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM); flopen #(1) negquotreg(clk, IFDivStartE, NegQuotE, NegQuotM); flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM); @@ -137,6 +136,8 @@ module fdivsqrtpreproc ( flopen #(`DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM); flopen #(`DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM); flopen #(`XLEN) srcareg(clk, IFDivStartE, AE, AM); + if (`XLEN==64) + flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M); end else begin // Int not supported assign IFNormLenX = {Xm, {(`DIVb-`NF-1){1'b0}}}; diff --git a/src/fpu/fpu.sv b/src/fpu/fpu.sv index 6abb76b0..73bf1971 100755 --- a/src/fpu/fpu.sv +++ b/src/fpu/fpu.sv @@ -110,6 +110,7 @@ module fpu ( logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage logic XExpMaxE; // is the exponent all ones (max value) + logic [`FLEN-1:0] XPostBoxE; // X after fixing bad NaN box. Needed for 1-input operations // Fma Signals logic FmaAddSubE; // Multiply by 1.0 when adding or subtracting @@ -200,23 +201,20 @@ module fpu ( mux3 #(`FLEN) fzemux (FRD3E, FResultW, PreFpResM, ForwardZE, PreZE); // Select NAN-boxed value of Y = 1.0 in proper format for fma to add/subtract X*Y+Z - generate - if(`FPSIZES == 1) assign BoxedOneE = {2'b0, {`NE-1{1'b1}}, (`NF)'(0)}; - else if(`FPSIZES == 2) - mux2 #(`FLEN) fonemux ({{`FLEN-`LEN1{1'b1}}, 2'b0, {`NE1-1{1'b1}}, (`NF1)'(0)}, {2'b0, {`NE-1{1'b1}}, (`NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes - else if(`FPSIZES == 3 | `FPSIZES == 4) - mux4 #(`FLEN) fonemux ({{`FLEN-`S_LEN{1'b1}}, 2'b0, {`S_NE-1{1'b1}}, (`S_NF)'(0)}, - {{`FLEN-`D_LEN{1'b1}}, 2'b0, {`D_NE-1{1'b1}}, (`D_NF)'(0)}, - {{`FLEN-`H_LEN{1'b1}}, 2'b0, {`H_NE-1{1'b1}}, (`H_NF)'(0)}, - {2'b0, {`NE-1{1'b1}}, (`NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes - endgenerate + if(`FPSIZES == 1) assign BoxedOneE = {2'b0, {`NE-1{1'b1}}, (`NF)'(0)}; + else if(`FPSIZES == 2) + mux2 #(`FLEN) fonemux ({{`FLEN-`LEN1{1'b1}}, 2'b0, {`NE1-1{1'b1}}, (`NF1)'(0)}, {2'b0, {`NE-1{1'b1}}, (`NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes + else if(`FPSIZES == 3 | `FPSIZES == 4) + mux4 #(`FLEN) fonemux ({{`FLEN-`S_LEN{1'b1}}, 2'b0, {`S_NE-1{1'b1}}, (`S_NF)'(0)}, + {{`FLEN-`D_LEN{1'b1}}, 2'b0, {`D_NE-1{1'b1}}, (`D_NF)'(0)}, + {{`FLEN-`H_LEN{1'b1}}, 2'b0, {`H_NE-1{1'b1}}, (`H_NF)'(0)}, + {2'b0, {`NE-1{1'b1}}, (`NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes assign FmaAddSubE = OpCtrlE[2]&OpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10); mux2 #(`FLEN) fyaddmux (PreYE, BoxedOneE, FmaAddSubE, YE); // Force Y to be 1 for add/subtract // Select NAN-boxed value of Z = 0.0 in proper format for FMA for multiply X*Y+Z // For add and subtract, Z comes from second source operand - generate - if(`FPSIZES == 1) assign BoxedZeroE = 0; + if(`FPSIZES == 1) assign BoxedZeroE = 0; else if(`FPSIZES == 2) mux2 #(`FLEN) fmulzeromux ({{`FLEN-`LEN1{1'b1}}, {`LEN1{1'b0}}}, (`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes else if(`FPSIZES == 3 | `FPSIZES == 4) @@ -224,7 +222,6 @@ module fpu ( {{`FLEN-`D_LEN{1'b1}}, {`D_LEN{1'b0}}}, {{`FLEN-`H_LEN{1'b1}}, {`H_LEN{1'b0}}}, (`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes - endgenerate assign FmaZSelE = {OpCtrlE[2]&OpCtrlE[1], OpCtrlE[2]&~OpCtrlE[1]}; mux3 #(`FLEN) fzmulmux (PreZE, BoxedZeroE, PreYE, FmaZSelE, ZE); @@ -234,7 +231,7 @@ module fpu ( .XNaN(XNaNE), .YNaN(YNaNE), .ZNaN(ZNaNE), .XSNaN(XSNaNE), .XEn(XEnE), .YSNaN(YSNaNE), .ZSNaN(ZSNaNE), .XSubnorm(XSubnormE), .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .XInf(XInfE), .YInf(YInfE), - .ZEn(ZEnE), .ZInf(ZInfE), .XExpMax(XExpMaxE)); + .ZEn(ZEnE), .ZInf(ZInfE), .XExpMax(XExpMaxE), .XPostBox(XPostBoxE)); // fused multiply add: fadd/sub, fmul, fmadd/fnmadd/fmsub/fnmsub fma fma (.Xs(XsE), .Ys(YsE), .Zs(ZsE), .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), @@ -255,7 +252,7 @@ module fpu ( .CmpFpRes(CmpFpResE), .CmpIntRes(CmpIntResE)); // sign injection: fsgnj/fsgnjx/fsgnjn - fsgninj fsgninj(.OpCtrl(OpCtrlE[1:0]), .Xs(XsE), .Ys(YsE), .X(XE), .Fmt(FmtE), .SgnRes(SgnResE)); + fsgninj fsgninj(.OpCtrl(OpCtrlE[1:0]), .Xs(XsE), .Ys(YsE), .X(XPostBoxE), .Fmt(FmtE), .SgnRes(SgnResE)); // classify: fclass fclassify fclassify (.Xs(XsE), .XSubnorm(XSubnormE), .XZero(XZeroE), .XNaN(XNaNE), @@ -268,7 +265,6 @@ module fpu ( // NaN Box SrcA to convert integer to requested FP size - generate if(`FPSIZES == 1) assign AlignedSrcAE = {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}; else if(`FPSIZES == 2) mux2 #(`FLEN) SrcAMux ({{`FLEN-`LEN1{1'b1}}, ForwardedSrcAE[`LEN1-1:0]}, {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); @@ -277,14 +273,12 @@ module fpu ( {{`FLEN-`D_LEN{1'b1}}, ForwardedSrcAE[`D_LEN-1:0]}, {{`FLEN-`H_LEN{1'b1}}, ForwardedSrcAE[`H_LEN-1:0]}, {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes - endgenerate // select a result that may be written to the FP register mux3 #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE); assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE); // select the result that may be written to the integer register - to IEU - generate if(`FPSIZES == 1) assign SgnExtXE = XE; else if(`FPSIZES == 2) @@ -294,7 +288,7 @@ module fpu ( {{`FLEN-`S_LEN{XsE}}, XE[`S_LEN-1:0]}, {{`FLEN-`D_LEN{XsE}}, XE[`D_LEN-1:0]}, XE, FmtE, SgnExtXE); - endgenerate + if (`FLEN>`XLEN) assign IntSrcXE = SgnExtXE[`XLEN-1:0]; else diff --git a/src/fpu/unpack.sv b/src/fpu/unpack.sv index 356d7e89..d52b454e 100644 --- a/src/fpu/unpack.sv +++ b/src/fpu/unpack.sv @@ -39,7 +39,8 @@ module unpack ( output logic XSubnorm, // is X subnormal output logic XZero, YZero, ZZero, // is XYZ zero output logic XInf, YInf, ZInf, // is XYZ infinity - output logic XExpMax // does X have the maximum exponent (NaN or Inf) + output logic XExpMax, // does X have the maximum exponent (NaN or Inf) + output logic [`FLEN-1:0] XPostBox // X after being properly NaN-boxed ); logic XExpNonZero, YExpNonZero, ZExpNonZero; // is the exponent of XYZ non-zero @@ -48,14 +49,17 @@ module unpack ( unpackinput unpackinputX (.In(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm), .En(XEn), .NaN(XNaN), .SNaN(XSNaN), .ExpNonZero(XExpNonZero), - .Zero(XZero), .Inf(XInf), .ExpMax(XExpMax), .FracZero(XFracZero), .Subnorm(XSubnorm)); + .Zero(XZero), .Inf(XInf), .ExpMax(XExpMax), .FracZero(XFracZero), + .Subnorm(XSubnorm), .PostBox(XPostBox)); unpackinput unpackinputY (.In(Y), .Fmt, .Sgn(Ys), .Exp(Ye), .Man(Ym), .En(YEn), .NaN(YNaN), .SNaN(YSNaN), .ExpNonZero(YExpNonZero), - .Zero(YZero), .Inf(YInf), .ExpMax(YExpMax), .FracZero(YFracZero), .Subnorm()); + .Zero(YZero), .Inf(YInf), .ExpMax(YExpMax), .FracZero(YFracZero), + .Subnorm(), .PostBox()); unpackinput unpackinputZ (.In(Z), .Fmt, .Sgn(Zs), .Exp(Ze), .Man(Zm), .En(ZEn), .NaN(ZNaN), .SNaN(ZSNaN), .ExpNonZero(ZExpNonZero), - .Zero(ZZero), .Inf(ZInf), .ExpMax(ZExpMax), .FracZero(ZFracZero), .Subnorm()); + .Zero(ZZero), .Inf(ZInf), .ExpMax(ZExpMax), .FracZero(ZFracZero), + .Subnorm(), .PostBox()); endmodule \ No newline at end of file diff --git a/src/fpu/unpackinput.sv b/src/fpu/unpackinput.sv index 68f98e90..82a2eb8f 100644 --- a/src/fpu/unpackinput.sv +++ b/src/fpu/unpackinput.sv @@ -40,13 +40,14 @@ module unpackinput ( output logic Inf, // is the number infinity output logic ExpNonZero, // is the exponent not zero output logic FracZero, // is the fraction zero - output logic ExpMax, // does In have the maximum exponent (NaN or Inf) - output logic Subnorm // is the number subnormal + output logic ExpMax, // does In have the maximum exponent (NaN or Inf) + output logic Subnorm, // is the number subnormal + output logic [`FLEN-1:0] PostBox // Number reboxed correctly as a NaN ); logic [`NF-1:0] Frac; // Fraction of XYZ - logic BadNaNBox; // is the NaN boxing bad - + logic BadNaNBox; // incorrectly NaN Boxed + if (`FPSIZES == 1) begin // if there is only one floating point format supported assign BadNaNBox = 0; assign Sgn = In[`FLEN-1]; // sign bit @@ -54,6 +55,7 @@ module unpackinput ( assign ExpNonZero = |In[`FLEN-2:`NF]; // is the exponent non-zero assign Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero}; // exponent. subnormal numbers have effective biased exponent of 1 assign ExpMax = &In[`FLEN-2:`NF]; // is the exponent all 1's + assign PostBox = In; end else if (`FPSIZES == 2) begin // if there are 2 floating point formats supported // largest format | smaller format @@ -75,9 +77,15 @@ module unpackinput ( // double and half assign BadNaNBox = ~(Fmt|(&In[`FLEN-1:`LEN1])); // Check NaN boxing + always_comb + if (BadNaNBox) begin +// PostBox = {{(`FLEN-`LEN1){1'b1}}, 1'b1, {(`NE1+1){1'b1}}, In[`LEN1-`NE1-3:0]}; + PostBox = {{(`FLEN-`LEN1){1'b1}}, 1'b1, {(`NE1+1){1'b1}}, {(`LEN1-`NE1-2){1'b0}}}; + end else + PostBox = In; // choose sign bit depending on format - 1=larger precsion 0=smaller precision - assign Sgn = Fmt ? In[`FLEN-1] : In[`LEN1-1]; + assign Sgn = Fmt ? In[`FLEN-1] : (BadNaNBox ? 0 : In[`LEN1-1]); // improperly boxed NaNs are treated as positive // extract the fraction, add trailing zeroes to the mantissa if nessisary assign Frac = Fmt ? In[`NF-1:0] : {In[`NF1-1:0], (`NF-`NF1)'(0)}; @@ -128,8 +136,23 @@ module unpackinput ( default: BadNaNBox = 1'bx; endcase + always_comb + if (BadNaNBox) begin + case (Fmt) + `FMT: PostBox = In; +// `FMT1: PostBox = {{(`FLEN-`LEN1){1'b1}}, 1'b1, {(`NE1+1){1'b1}}, In[`LEN1-`NE1-3:0]}; +// `FMT2: PostBox = {{(`FLEN-`LEN2){1'b1}}, 1'b1, {(`NE2+1){1'b1}}, In[`LEN2-`NE2-3:0]}; + `FMT1: PostBox = {{(`FLEN-`LEN1){1'b1}}, 1'b1, {(`NE1+1){1'b1}}, {(`LEN1-`NE1-2){1'b0}}}; + `FMT2: PostBox = {{(`FLEN-`LEN2){1'b1}}, 1'b1, {(`NE2+1){1'b1}}, {(`LEN2-`NE2-2){1'b0}}}; + default: PostBox = 'x; + endcase + end else + PostBox = In; + // extract the sign bit always_comb + if (BadNaNBox) Sgn = 0; // improperly boxed NaNs are treated as positive + else case (Fmt) `FMT: Sgn = In[`FLEN-1]; `FMT1: Sgn = In[`LEN1-1]; @@ -137,7 +160,7 @@ module unpackinput ( default: Sgn = 1'bx; endcase - // extract the fraction + // extract the fraction always_comb case (Fmt) `FMT: Frac = In[`NF-1:0]; @@ -200,8 +223,24 @@ module unpackinput ( 2'b10: BadNaNBox = ~&In[`Q_LEN-1:`H_LEN]; endcase + always_comb + if (BadNaNBox) begin + case (Fmt) + 2'b11: PostBox = In; +// 2'b01: PostBox = {{(`Q_LEN-`D_LEN){1'b1}}, 1'b1, {(`D_NE+1){1'b1}}, In[`D_LEN-`D_NE-3:0]}; +// 2'b00: PostBox = {{(`Q_LEN-`S_LEN){1'b1}}, 1'b1, {(`S_NE+1){1'b1}}, In[`S_LEN-`S_NE-3:0]}; +// 2'b10: PostBox = {{(`Q_LEN-`H_LEN){1'b1}}, 1'b1, {(`H_NE+1){1'b1}}, In[`H_LEN-`H_NE-3:0]}; + 2'b01: PostBox = {{(`Q_LEN-`D_LEN){1'b1}}, 1'b1, {(`D_NE+1){1'b1}}, {(`D_LEN-`D_NE-2){1'b0}}}; + 2'b00: PostBox = {{(`Q_LEN-`S_LEN){1'b1}}, 1'b1, {(`S_NE+1){1'b1}}, {(`S_LEN-`S_NE-2){1'b0}}}; + 2'b10: PostBox = {{(`Q_LEN-`H_LEN){1'b1}}, 1'b1, {(`H_NE+1){1'b1}}, {(`H_LEN-`H_NE-2){1'b0}}}; + endcase + end else + PostBox = In; + // extract sign bit always_comb + if (BadNaNBox) Sgn = 0; // improperly boxed NaNs are treated as positive + else case (Fmt) 2'b11: Sgn = In[`Q_LEN-1]; 2'b01: Sgn = In[`D_LEN-1]; diff --git a/src/generic/mem/ram1p1rwbe.sv b/src/generic/mem/ram1p1rwbe.sv index a77a0eee..f3c98873 100644 --- a/src/generic/mem/ram1p1rwbe.sv +++ b/src/generic/mem/ram1p1rwbe.sv @@ -34,7 +34,7 @@ `include "wally-config.vh" -module ram1p1rwbe #(parameter DEPTH=128, WIDTH=256) ( +module ram1p1rwbe #(parameter DEPTH=64, WIDTH=44) ( input logic clk, input logic ce, input logic [$clog2(DEPTH)-1:0] addr, @@ -49,17 +49,17 @@ module ram1p1rwbe #(parameter DEPTH=128, WIDTH=256) ( // *************************************************************************** // TRUE SRAM macro // *************************************************************************** - if (`USE_SRAM == 1 && WIDTH == 128 && DEPTH == 64) begin + if ((`USE_SRAM == 1) & (WIDTH == 128) & (DEPTH == 64)) begin // Cache data subarray genvar index; // 64 x 128-bit SRAM logic [WIDTH-1:0] BitWriteMask; for (index=0; index < WIDTH; index++) assign BitWriteMask[index] = bwe[index/8]; - TS1N28HPCPSVTB64X128M4SW sram1A (.CLK(clk), .CEB(~ce), .WEB(~we), + ram1p1rwbe_64x128 sram1A (.CLK(clk), .CEB(~ce), .WEB(~we), .A(addr), .D(din), .BWEB(~BitWriteMask), .Q(dout)); - - end else if (`USE_SRAM == 1 && WIDTH == 44 && DEPTH == 64) begin + + end else if ((`USE_SRAM == 1) & (WIDTH == 44) & (DEPTH == 64)) begin // RV64 cache tag genvar index; // 64 x 44-bit SRAM logic [WIDTH-1:0] BitWriteMask; @@ -69,23 +69,13 @@ module ram1p1rwbe #(parameter DEPTH=128, WIDTH=256) ( .A(addr), .D(din), .BWEB(~BitWriteMask), .Q(dout)); - end else if (`USE_SRAM == 1 && WIDTH == 128 && DEPTH == 32) begin - genvar index; - // 64 x 128-bit SRAM - logic [WIDTH-1:0] BitWriteMask; - for (index=0; index < WIDTH; index++) - assign BitWriteMask[index] = bwe[index/8]; - TS1N28HPCPSVTB64X128M4SW sram1A (.CLK(clk), .CEB(~ce), .WEB(~we), - .A(addr), .D(din), - .BWEB(~BitWriteMask), .Q(dout)); - - end else if (`USE_SRAM == 1 && WIDTH == 22 && DEPTH == 32) begin + end else if ((`USE_SRAM == 1) & (WIDTH == 22) & (DEPTH == 64)) begin // RV32 cache tag genvar index; // 64 x 22-bit SRAM logic [WIDTH-1:0] BitWriteMask; for (index=0; index < WIDTH; index++) assign BitWriteMask[index] = bwe[index/8]; - ram1p1rwbe_64x44 sram1B (.CLK(clk), .CEB(~ce), .WEB(~we), + ram1p1rwbe_64x22 sram1B (.CLK(clk), .CEB(~ce), .WEB(~we), .A(addr), .D(din), .BWEB(~BitWriteMask), .Q(dout)); @@ -96,9 +86,15 @@ module ram1p1rwbe #(parameter DEPTH=128, WIDTH=256) ( integer i; // Read - always_ff @(posedge clk) - if(ce) dout <= #1 RAM[addr]; - + logic [$clog2(DEPTH)-1:0] addrd; + flopen #($clog2(DEPTH)) adrreg(clk, ce, addr, addrd); + assign dout = RAM[addrd]; + + /* // Read + always_ff @(posedge clk) + if(ce) dout <= #1 mem[addr]; */ + + // Write divided into part for bytes and part for extra msbs // Questa sim version 2022.3_2 does not allow multiple drivers for RAM when using always_ff. // Therefore these always blocks use the older always @(posedge clk) diff --git a/src/generic/mem/ram1p1rwbe_64x128.sv b/src/generic/mem/ram1p1rwbe_64x128.sv index 095f77c0..84a3e74f 100755 --- a/src/generic/mem/ram1p1rwbe_64x128.sv +++ b/src/generic/mem/ram1p1rwbe_64x128.sv @@ -35,7 +35,7 @@ module ram1p1rwbe_64x128( ); // replace "generic64x128RAM" with "TS1N..64X128.." module from your memory vendor - ts1n28hpcpsvtb64x128m4sw sramIP (.CLK, .CEB, .WEB, .A, .D, .BWEB, .Q); -// generic64x128RAM sramIP (.CLK, .CEB, .WEB, .A, .D, .BWEB, .Q); + //generic64x128RAM sramIP (.CLK, .CEB, .WEB, .A, .D, .BWEB, .Q); + TS1N28HPCPSVTB64X128M4SW sramIP(.CLK, .CEB, .WEB, .A, .D, .BWEB, .Q); endmodule diff --git a/src/generic/mem/ram1p1rwbe_64x22.sv b/src/generic/mem/ram1p1rwbe_64x22.sv index 84c8d1b7..8e0f5630 100755 --- a/src/generic/mem/ram1p1rwbe_64x22.sv +++ b/src/generic/mem/ram1p1rwbe_64x22.sv @@ -29,12 +29,16 @@ module ram1p1rwbe_64x22( input logic CEB, input logic WEB, input logic [5:0] A, - input logic [127:0] D, - input logic [127:0] BWEB, - output logic [127:0] Q + input logic [21:0] D, + input logic [21:0] BWEB, + output logic [21:0] Q ); // replace "generic64x22RAM" with "TS1N..64X22.." module from your memory vendor - generic64x22RAM sramIP (.CLK, .CEB, .WEB, .A, .D, .BWEB, .Q); + // use part of a larger RAM to avoid generating more flavors of RAM + logic [43:0] Qfull; + TS1N28HPCPSVTB64X44M4SW sramIP(.CLK, .CEB, .WEB, .A, .D({22'b0, D[21:0]}), .BWEB({22'b0, BWEB[21:0]}), .Q(Qfull)); + assign Q = Qfull[21:0]; + // genericRAM #(64, 22) sramIP (.CLK, .CEB, .WEB, .A, .D, .BWEB, .Q); endmodule diff --git a/src/generic/mem/ram1p1rwbe_64x44.sv b/src/generic/mem/ram1p1rwbe_64x44.sv index 77ba0951..89730a42 100644 --- a/src/generic/mem/ram1p1rwbe_64x44.sv +++ b/src/generic/mem/ram1p1rwbe_64x44.sv @@ -29,13 +29,13 @@ module ram1p1rwbe_64x44( input logic CEB, input logic WEB, input logic [5:0] A, - input logic [43:0] D, - input logic [43:0] BWEB, - output logic [43:0] Q + input logic [43:0] D, + input logic [43:0] BWEB, + output logic [43:0] Q ); // replace "generic64x44RAM" with "TS1N..64X44.." module from your memory vendor - TS1N28HPCPSVTB64X44M4SW sramIP (.CLK, .CEB, .WEB, .A, .D, .BWEB, .Q); - //generic64x44RAM sramIP (.CLK, .CEB, .WEB, .A, .D, .BWEB, .Q); + // generic64x44RAM sramIP (.CLK, .CEB, .WEB, .A, .D, .BWEB, .Q); + TS1N28HPCPSVTB64X44M4SW sramIP(.CLK, .CEB, .WEB, .A, .D, .BWEB, .Q); endmodule diff --git a/src/generic/mem/ram2p1r1wbe.sv b/src/generic/mem/ram2p1r1wbe.sv index 0daac6ff..cefd5ab9 100644 --- a/src/generic/mem/ram2p1r1wbe.sv +++ b/src/generic/mem/ram2p1r1wbe.sv @@ -33,7 +33,7 @@ `include "wally-config.vh" -module ram2p1r1wbe #(parameter DEPTH=128, WIDTH=256) ( +module ram2p1r1wbe #(parameter DEPTH=1024, WIDTH=68) ( input logic clk, input logic ce1, ce2, input logic [$clog2(DEPTH)-1:0] ra1, @@ -45,12 +45,14 @@ module ram2p1r1wbe #(parameter DEPTH=128, WIDTH=256) ( ); logic [WIDTH-1:0] mem[DEPTH-1:0]; + localparam SRAMWIDTH = 32; + localparam SRAMNUMSETS = SRAMWIDTH/WIDTH; // *************************************************************************** // TRUE Smem macro // *************************************************************************** - if (`USE_SRAM == 1 && WIDTH == 68 && DEPTH == 1024) begin + if ((`USE_SRAM == 1) & (WIDTH == 68) & (DEPTH == 1024)) begin ram2p1r1wbe_1024x68 memory1(.CLKA(clk), .CLKB(clk), .CEBA(~ce1), .CEBB(~ce2), @@ -62,7 +64,7 @@ module ram2p1r1wbe #(parameter DEPTH=128, WIDTH=256) ( .QA(rd1), .QB()); - end else if (`USE_SRAM == 1 && WIDTH == 36 && DEPTH == 1024) begin + end else if ((`USE_SRAM == 1) & (WIDTH == 36) & (DEPTH == 1024)) begin ram2p1r1wbe_1024x36 memory1(.CLKA(clk), .CLKB(clk), .CEBA(~ce1), .CEBB(~ce2), @@ -74,10 +76,7 @@ module ram2p1r1wbe #(parameter DEPTH=128, WIDTH=256) ( .QA(rd1), .QB()); - end else if (`USE_SRAM == 1 && WIDTH == 2 && DEPTH == 1024) begin - - localparam SRAMWIDTH = 32; - localparam SRAMNUMSETS = SRAMWIDTH/WIDTH; + end else if ((`USE_SRAM == 1) & (WIDTH == 2) & (DEPTH == 1024)) begin logic [SRAMWIDTH-1:0] SRAMReadData; logic [SRAMWIDTH-1:0] SRAMWriteData; @@ -107,104 +106,6 @@ module ram2p1r1wbe #(parameter DEPTH=128, WIDTH=256) ( .QA(SRAMReadData), .QB()); - end else if (`USE_SRAM == 1 && WIDTH == 2 && DEPTH == 4096) begin - - localparam SRAMWIDTH = 64; - localparam SRAMNUMSETS = SRAMWIDTH/WIDTH; - - logic [SRAMWIDTH-1:0] SRAMReadData; - logic [SRAMWIDTH-1:0] SRAMWriteData; - logic [SRAMWIDTH-1:0] RD1Sets[SRAMNUMSETS-1:0]; - logic [SRAMNUMSETS-1:0] SRAMBitMaskPre; - logic [SRAMWIDTH-1:0] SRAMBitMask; - logic [$clog2(DEPTH)-1:0] RA1Q; - - - onehotdecoder #($clog2(SRAMNUMSETS)) oh1(wa2[$clog2(SRAMNUMSETS)-1:0], SRAMBitMaskPre); - genvar index; - for (index = 0; index < SRAMNUMSETS; index++) begin:readdatalinesetsmux - assign RD1Sets[index] = SRAMReadData[(index*WIDTH)+WIDTH-1 : (index*WIDTH)]; - assign SRAMWriteData[index*2+1:index*2] = wd2; - assign SRAMBitMask[index*2+1:index*2] = {2{SRAMBitMaskPre[index]}}; - end - flopen #($clog2(DEPTH)) mem_reg1 (clk, ce1, ra1, RA1Q); - assign rd1 = RD1Sets[RA1Q[$clog2(SRAMWIDTH)-1:0]]; - ram2p1r1wbe_128x64 memory2(.CLKA(clk), .CLKB(clk), - .CEBA(~ce1), .CEBB(~ce2), - .WEBA('0), .WEBB(~we2), - .AA(ra1[$clog2(DEPTH)-1:$clog2(SRAMNUMSETS)]), - .AB(wa2[$clog2(DEPTH)-1:$clog2(SRAMNUMSETS)]), - .DA('0), - .DB(SRAMWriteData), - .BWEBA('0), .BWEBB(SRAMBitMask), - .QA(SRAMReadData), - .QB()); - - end else if (`USE_SRAM == 1 && WIDTH == 2 && DEPTH == 16384) begin - - localparam SRAMWIDTH = 64; - localparam SRAMNUMSETS = SRAMWIDTH/WIDTH; - - logic [SRAMWIDTH-1:0] SRAMReadData; - logic [SRAMWIDTH-1:0] SRAMWriteData; - logic [SRAMWIDTH-1:0] RD1Sets[SRAMNUMSETS-1:0]; - logic [SRAMNUMSETS-1:0] SRAMBitMaskPre; - logic [SRAMWIDTH-1:0] SRAMBitMask; - logic [$clog2(DEPTH)-1:0] RA1Q; - - - onehotdecoder #($clog2(SRAMNUMSETS)) oh1(wa2[$clog2(SRAMNUMSETS)-1:0], SRAMBitMaskPre); - genvar index; - for (index = 0; index < SRAMNUMSETS; index++) begin:readdatalinesetsmux - assign RD1Sets[index] = SRAMReadData[(index*WIDTH)+WIDTH-1 : (index*WIDTH)]; - assign SRAMWriteData[index*2+1:index*2] = wd2; - assign SRAMBitMask[index*2+1:index*2] = {2{SRAMBitMaskPre[index]}}; - end - flopen #($clog2(DEPTH)) mem_reg1 (clk, ce1, ra1, RA1Q); - assign rd1 = RD1Sets[RA1Q[$clog2(SRAMWIDTH)-1:0]]; - ram2p1r1wbe_512x64 memory2(.CLKA(clk), .CLKB(clk), - .CEBA(~ce1), .CEBB(~ce2), - .WEBA('0), .WEBB(~we2), - .AA(ra1[$clog2(DEPTH)-1:$clog2(SRAMNUMSETS)]), - .AB(wa2[$clog2(DEPTH)-1:$clog2(SRAMNUMSETS)]), - .DA('0), - .DB(SRAMWriteData), - .BWEBA('0), .BWEBB(SRAMBitMask), - .QA(SRAMReadData), - .QB()); - - end else if (`USE_SRAM == 1 && WIDTH == 2 && DEPTH == 65536) begin - - localparam SRAMWIDTH = 64; - localparam SRAMNUMSETS = SRAMWIDTH/WIDTH; - - logic [SRAMWIDTH-1:0] SRAMReadData; - logic [SRAMWIDTH-1:0] SRAMWriteData; - logic [SRAMWIDTH-1:0] RD1Sets[SRAMNUMSETS-1:0]; - logic [SRAMNUMSETS-1:0] SRAMBitMaskPre; - logic [SRAMWIDTH-1:0] SRAMBitMask; - logic [$clog2(DEPTH)-1:0] RA1Q; - - - onehotdecoder #($clog2(SRAMNUMSETS)) oh1(wa2[$clog2(SRAMNUMSETS)-1:0], SRAMBitMaskPre); - genvar index; - for (index = 0; index < SRAMNUMSETS; index++) begin:readdatalinesetsmux - assign RD1Sets[index] = SRAMReadData[(index*WIDTH)+WIDTH-1 : (index*WIDTH)]; - assign SRAMWriteData[index*2+1:index*2] = wd2; - assign SRAMBitMask[index*2+1:index*2] = {2{SRAMBitMaskPre[index]}}; - end - flopen #($clog2(DEPTH)) mem_reg1 (clk, ce1, ra1, RA1Q); - assign rd1 = RD1Sets[RA1Q[$clog2(SRAMWIDTH)-1:0]]; - ram2p1r1wbe_2048x64 memory2(.CLKA(clk), .CLKB(clk), - .CEBA(~ce1), .CEBB(~ce2), - .WEBA('0), .WEBB(~we2), - .AA(ra1[$clog2(DEPTH)-1:$clog2(SRAMNUMSETS)]), - .AB(wa2[$clog2(DEPTH)-1:$clog2(SRAMNUMSETS)]), - .DA('0), - .DB(SRAMWriteData), - .BWEBA('0), .BWEBB(SRAMBitMask), - .QA(SRAMReadData), - .QB()); end else begin // *************************************************************************** @@ -212,9 +113,14 @@ module ram2p1r1wbe #(parameter DEPTH=128, WIDTH=256) ( // *************************************************************************** integer i; - // Read + // Read + logic [$clog2(DEPTH)-1:0] ra1d; + flopen #($clog2(DEPTH)) adrreg(clk, ce1, ra1, ra1d); + assign rd1 = mem[ra1d]; + +/* // Read always_ff @(posedge clk) - if(ce1) rd1 <= #1 mem[ra1]; + if(ce1) rd1 <= #1 mem[ra1]; */ // Write divided into part for bytes and part for extra msbs if(WIDTH >= 8) diff --git a/src/generic/mem/ram2p1r1wbe_1024x36.sv b/src/generic/mem/ram2p1r1wbe_1024x36.sv index 0aad7db4..b6b501f2 100755 --- a/src/generic/mem/ram2p1r1wbe_1024x36.sv +++ b/src/generic/mem/ram2p1r1wbe_1024x36.sv @@ -42,7 +42,14 @@ module ram2p1r1wbe_1024x36( ); // replace "generic1024x36RAM" with "TSDN..1024X36.." module from your memory vendor - generic1024x36RAM sramIP (.CLKA, .CLKB, .CEBA, .CEBB, .WEBA, .WEBB, - .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB); + //generic1024x36RAM sramIP (.CLKA, .CLKB, .CEBA, .CEBB, .WEBA, .WEBB, + // .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB); + // use part of a larger RAM to avoid generating more flavors of RAM + logic [67:0] QAfull, QBfull; + TSDN28HPCPA1024X68M4MW sramIP(.CLKA, .CLKB, .CEBA, .CEBB, .WEBA, .WEBB, + .AA, .AB, .DA({32'b0, DA[35:0]}), .DB({32'b0, DB[35:0]}), + .BWEBA({32'b0, BWEBA[35:0]}), .BWEBB({32'b0, BWEBB[35:0]}), .QA(QAfull), .QB(QBfull)); + assign QA = QAfull[35:0]; + assign QB = QBfull[35:0]; endmodule diff --git a/src/generic/mem/ram2p1r1wbe_1024x68.sv b/src/generic/mem/ram2p1r1wbe_1024x68.sv index 6dbc23b3..108530be 100755 --- a/src/generic/mem/ram2p1r1wbe_1024x68.sv +++ b/src/generic/mem/ram2p1r1wbe_1024x68.sv @@ -42,9 +42,9 @@ module ram2p1r1wbe_1024x68( ); // replace "generic1024x68RAM" with "TSDN..1024X68.." module from your memory vendor - TSDN28HPCPA1024X68M4MW sramIP (.CLKA, .CLKB, .CEBA, .CEBB, .WEBA, .WEBB, - .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB); -// generic1024x68RAM sramIP (.CLKA, .CLKB, .CEBA, .CEBB, .WEBA, .WEBB, -// .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB); + //generic1024x68RAM sramIP (.CLKA, .CLKB, .CEBA, .CEBB, .WEBA, .WEBB, + // .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB); + TSDN28HPCPA1024X68M4MW sramIP(.CLKA, .CLKB, .CEBA, .CEBB, .WEBA, .WEBB, + .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB); endmodule diff --git a/src/generic/mem/ram2p1r1wbe_64x32.sv b/src/generic/mem/ram2p1r1wbe_64x32.sv index 713aa84f..e551099f 100755 --- a/src/generic/mem/ram2p1r1wbe_64x32.sv +++ b/src/generic/mem/ram2p1r1wbe_64x32.sv @@ -42,9 +42,8 @@ module ram2p1r1wbe_64x32( ); // replace "generic64x32RAM" with "TSDN..64X32.." module from your memory vendor - TSDN28HPCPA64X32M4MW sramIP (.CLKA, .CLKB, .CEBA, .CEBB, .WEBA, .WEBB, + //generic64x32RAM sramIP (.CLKA, .CLKB, .CEBA, .CEBB, .WEBA, .WEBB, + // .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB); + TSDN28HPCPA64X32M4MW sramIP(.CLKA, .CLKB, .CEBA, .CEBB, .WEBA, .WEBB, .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB); - // generic64x32RAM sramIP (.CLKA, .CLKB, .CEBA, .CEBB, .WEBA, .WEBB, -// .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB); - endmodule diff --git a/src/generic/mem/rom1p1r.sv b/src/generic/mem/rom1p1r.sv index 64cb9224..c5c1f8b0 100644 --- a/src/generic/mem/rom1p1r.sv +++ b/src/generic/mem/rom1p1r.sv @@ -38,10 +38,10 @@ module rom1p1r #(parameter ADDR_WIDTH = 8, // Core Memory logic [DATA_WIDTH-1:0] ROM [(2**ADDR_WIDTH)-1:0]; - if (`USE_SRAM == 1 && DATA_WIDTH == 64) begin + if ((`USE_SRAM == 1) & (DATA_WIDTH == 64)) begin rom1p1r_128x64 rom1 (.CLK(clk), .CEB(~ce), .A(addr[6:0]), .Q(dout)); - end if (`USE_SRAM == 1 && DATA_WIDTH == 32) begin + end if ((`USE_SRAM == 1) & (DATA_WIDTH == 32)) begin rom1p1r_128x32 rom1 (.CLK(clk), .CEB(~ce), .A(addr[6:0]), .Q(dout)); end else begin diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index e1a1ab8f..798ec44c 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -72,7 +72,7 @@ module bpred ( logic PredValidF; logic [1:0] DirPredictionF; - logic [3:0] BTBPredInstrClassF, PredInstrClassF, PredInstrClassD, PredInstrClassE; + logic [3:0] BTBPredInstrClassF, PredInstrClassF, PredInstrClassD; logic [`XLEN-1:0] PredPCF, RASPCF; logic PredictionPCWrongE; logic AnyWrongPredInstrClassD, AnyWrongPredInstrClassE; @@ -143,7 +143,8 @@ module bpred ( // Part 2 Branch target address prediction // BTB contains target address for all CFI - btb TargetPredictor(.clk, .reset, .StallF, .StallD, .StallM, .FlushD, .FlushM, + btb #(`BTB_SIZE) + TargetPredictor(.clk, .reset, .StallF, .StallD, .StallM, .FlushD, .FlushM, .PCNextF, .PCF, .PCD, .PCE, .PredPCF, .BTBPredInstrClassF, @@ -213,8 +214,7 @@ module bpred ( // pipeline the class flopenrc #(4) PredInstrClassRegD(clk, reset, FlushD, ~StallD, PredInstrClassF, PredInstrClassD); flopenrc #(1) WrongInstrClassRegE(clk, reset, FlushE, ~StallE, AnyWrongPredInstrClassD, AnyWrongPredInstrClassE); - flopenrc #(4) PredInstrClassRegE(clk, reset, FlushE, ~StallE, PredInstrClassD, PredInstrClassE); - + // Check the prediction // if it is a CFI then check if the next instruction address (PCD) matches the branch's target or fallthrough address. // if the class prediction is wrong a regular instruction may have been predicted as a taken branch diff --git a/src/lsu/dtim.sv b/src/lsu/dtim.sv index 4a5cba1f..9383b776 100644 --- a/src/lsu/dtim.sv +++ b/src/lsu/dtim.sv @@ -42,12 +42,16 @@ module dtim( logic we; - localparam ADDR_WDITH = $clog2(`DTIM_RANGE/8); - localparam OFFSET = $clog2(`LLEN/8); + localparam LLENBYTES = `LLEN/8; + // verilator lint_off WIDTH + localparam DEPTH = `DTIM_RANGE/LLENBYTES; + // verilator lint_on WIDTH + localparam ADDR_WDITH = $clog2(DEPTH); + localparam OFFSET = $clog2(LLENBYTES); assign we = MemRWM[0] & ~FlushW; // have to ignore write if Trap. - ram1p1rwbe #(.DEPTH(`DTIM_RANGE/8), .WIDTH(`LLEN)) + ram1p1rwbe #(.DEPTH(DEPTH), .WIDTH(`LLEN)) ram(.clk, .ce, .we, .bwe(ByteMaskM), .addr(DTIMAdr[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(ReadDataWordM), .din(WriteDataM)); endmodule diff --git a/src/mmu/mmu.sv b/src/mmu/mmu.sv index 8a440c39..5b524816 100644 --- a/src/mmu/mmu.sv +++ b/src/mmu/mmu.sv @@ -107,15 +107,21 @@ module mmu #(parameter TLB_ENTRIES = 8, IMMU = 0) ( .Cacheable, .Idempotent, .SelTIM, .PMAInstrAccessFaultF, .PMALoadAccessFaultM, .PMAStoreAmoAccessFaultM); - pmpchecker pmpchecker(.PhysicalAddress, .PrivilegeModeW, - .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW, - .ExecuteAccessF, .WriteAccessM, .ReadAccessM, - .PMPInstrAccessFaultF, .PMPLoadAccessFaultM, .PMPStoreAmoAccessFaultM); + if (`PMP_ENTRIES > 0) + pmpchecker pmpchecker(.PhysicalAddress, .PrivilegeModeW, + .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW, + .ExecuteAccessF, .WriteAccessM, .ReadAccessM, + .PMPInstrAccessFaultF, .PMPLoadAccessFaultM, .PMPStoreAmoAccessFaultM); + else begin + assign PMPInstrAccessFaultF = 0; + assign PMPStoreAmoAccessFaultM = 0; + assign PMPLoadAccessFaultM = 0; + end // Access faults // If TLB miss and translating we want to not have faults from the PMA and PMP checkers. - assign InstrAccessFaultF = (PMAInstrAccessFaultF | PMPInstrAccessFaultF) & ~(Translate & ~TLBHit); - assign LoadAccessFaultM = (PMALoadAccessFaultM | PMPLoadAccessFaultM) & ~(Translate & ~TLBHit); + assign InstrAccessFaultF = (PMAInstrAccessFaultF | PMPInstrAccessFaultF) & ~(Translate & ~TLBHit); + assign LoadAccessFaultM = (PMALoadAccessFaultM | PMPLoadAccessFaultM) & ~(Translate & ~TLBHit); assign StoreAmoAccessFaultM = (PMAStoreAmoAccessFaultM | PMPStoreAmoAccessFaultM) & ~(Translate & ~TLBHit); // Misaligned faults diff --git a/src/mmu/pmpchecker.sv b/src/mmu/pmpchecker.sv index a5796136..9c9c745b 100644 --- a/src/mmu/pmpchecker.sv +++ b/src/mmu/pmpchecker.sv @@ -49,15 +49,15 @@ module pmpchecker ( output logic PMPStoreAmoAccessFaultM ); - if (`PMP_ENTRIES > 0) begin - // Bit i is high when the address falls in PMP region i - logic EnforcePMP; - logic [`PMP_ENTRIES-1:0] Match; // physical address matches one of the pmp ranges - logic [`PMP_ENTRIES-1:0] FirstMatch; // onehot encoding for the first pmpaddr to match the current address. - logic [`PMP_ENTRIES-1:0] Active; // PMP register i is non-null - logic [`PMP_ENTRIES-1:0] L, X, W, R; // PMP matches and has flag set - logic [`PMP_ENTRIES-1:0] PAgePMPAdr; // for TOR PMP matching, PhysicalAddress > PMPAdr[i] + // Bit i is high when the address falls in PMP region i + logic EnforcePMP; + logic [`PMP_ENTRIES-1:0] Match; // physical address matches one of the pmp ranges + logic [`PMP_ENTRIES-1:0] FirstMatch; // onehot encoding for the first pmpaddr to match the current address. + logic [`PMP_ENTRIES-1:0] Active; // PMP register i is non-null + logic [`PMP_ENTRIES-1:0] L, X, W, R; // PMP matches and has flag set + logic [`PMP_ENTRIES-1:0] PAgePMPAdr; // for TOR PMP matching, PhysicalAddress > PMPAdr[i] + if (`PMP_ENTRIES > 0) pmpadrdec pmpadrdecs[`PMP_ENTRIES-1:0]( .PhysicalAddress, .PMPCfg(PMPCFG_ARRAY_REGW), @@ -66,17 +66,12 @@ module pmpchecker ( .PAgePMPAdrOut(PAgePMPAdr), .Match, .Active, .L, .X, .W, .R); - priorityonehot #(`PMP_ENTRIES) pmppriority(.a(Match), .y(FirstMatch)); // combine the match signal from all the adress decoders to find the first one that matches. + priorityonehot #(`PMP_ENTRIES) pmppriority(.a(Match), .y(FirstMatch)); // combine the match signal from all the adress decoders to find the first one that matches. - // Only enforce PMP checking for S and U modes when at least one PMP is active or in Machine mode when L bit is set in selected region - assign EnforcePMP = (PrivilegeModeW == `M_MODE) ? |(L & FirstMatch) : |Active; + // Only enforce PMP checking for S and U modes when at least one PMP is active or in Machine mode when L bit is set in selected region + assign EnforcePMP = (PrivilegeModeW == `M_MODE) ? |(L & FirstMatch) : |Active; - assign PMPInstrAccessFaultF = EnforcePMP & ExecuteAccessF & ~|(X & FirstMatch) ; - assign PMPStoreAmoAccessFaultM = EnforcePMP & WriteAccessM & ~|(W & FirstMatch) ; - assign PMPLoadAccessFaultM = EnforcePMP & ReadAccessM & ~|(R & FirstMatch) ; - end else begin - assign PMPInstrAccessFaultF = 0; - assign PMPStoreAmoAccessFaultM = 0; - assign PMPLoadAccessFaultM = 0; - end + assign PMPInstrAccessFaultF = EnforcePMP & ExecuteAccessF & ~|(X & FirstMatch) ; + assign PMPStoreAmoAccessFaultM = EnforcePMP & WriteAccessM & ~|(W & FirstMatch) ; + assign PMPLoadAccessFaultM = EnforcePMP & ReadAccessM & ~|(R & FirstMatch) ; endmodule diff --git a/src/privileged/csr.sv b/src/privileged/csr.sv index ef4d73cb..7b765bae 100644 --- a/src/privileged/csr.sv +++ b/src/privileged/csr.sv @@ -113,6 +113,7 @@ module csr #(parameter logic SelMtvecM; logic [`XLEN-1:0] TVecAlignedM; logic InstrValidNotFlushedM; + logic STimerInt; // only valid unflushed instructions can access CSRs assign InstrValidNotFlushedM = InstrValidM & ~StallW & ~FlushW; @@ -201,7 +202,7 @@ module csr #(parameter csri csri(.clk, .reset, .InstrValidNotFlushedM, .CSRMWriteM, .CSRSWriteM, .CSRWriteValM, .CSRAdrM, - .MExtInt, .SExtInt, .MTimerInt, .MSwInt, + .MExtInt, .SExtInt, .MTimerInt, .STimerInt, .MSwInt, .MIP_REGW, .MIE_REGW, .MIP_REGW_writeable); csrsr csrsr(.clk, .reset, .StallW, @@ -227,11 +228,12 @@ module csr #(parameter csrs csrs(.clk, .reset, .InstrValidNotFlushedM, .CSRSWriteM, .STrapM, .CSRAdrM, .NextEPCM, .NextCauseM, .NextMtvalM, .SSTATUS_REGW, - .STATUS_TVM, .CSRWriteValM, .PrivilegeModeW, + .STATUS_TVM, .MCOUNTEREN_TM(MCOUNTEREN_REGW[1]), + .CSRWriteValM, .PrivilegeModeW, .CSRSReadValM, .STVEC_REGW, .SEPC_REGW, .SCOUNTEREN_REGW, - .SATP_REGW, .MIP_REGW, .MIE_REGW, .MIDELEG_REGW, - .WriteSSTATUSM, .IllegalCSRSAccessM); + .SATP_REGW, .MIP_REGW, .MIE_REGW, .MIDELEG_REGW, .MTIME_CLINT, + .WriteSSTATUSM, .IllegalCSRSAccessM, .STimerInt); end else begin assign WriteSSTATUSM = 0; assign CSRSReadValM = 0; diff --git a/src/privileged/csri.sv b/src/privileged/csri.sv index 0de65bee..bdbb0f59 100644 --- a/src/privileged/csri.sv +++ b/src/privileged/csri.sv @@ -39,13 +39,14 @@ module csri #(parameter input logic CSRMWriteM, CSRSWriteM, input logic [`XLEN-1:0] CSRWriteValM, input logic [11:0] CSRAdrM, - input logic MExtInt, SExtInt, MTimerInt, MSwInt, + input logic MExtInt, SExtInt, MTimerInt, STimerInt, MSwInt, output logic [11:0] MIP_REGW, MIE_REGW, - output logic [11:0] MIP_REGW_writeable // only SEIP, STIP, SSIP are actually writeable; the rest are hardwired to 0 + output logic [11:0] MIP_REGW_writeable // only SEIP, STIP, SSIP are actually writeable; the rest are hardwired to 0 ); logic [11:0] MIP_WRITE_MASK, SIP_WRITE_MASK, MIE_WRITE_MASK; logic WriteMIPM, WriteMIEM, WriteSIPM, WriteSIEM; + logic STIP; // Interrupt Write Enables assign WriteMIPM = CSRMWriteM & (CSRAdrM == MIP) & InstrValidNotFlushedM; @@ -58,7 +59,13 @@ module csri #(parameter // SEIP, STIP, SSIP is writable in MIP if S mode exists // SSIP is writable in SIP if S mode exists if (`S_SUPPORTED) begin:mask - assign MIP_WRITE_MASK = 12'h222; // SEIP, STIP, SSIP are writeable in MIP (20210108-draft 3.1.9) + if (`SSTC_SUPPORTED) begin + assign MIP_WRITE_MASK = 12'h202; // SEIP and SSIP are writable, but STIP is not writable when STIMECMP is implemented (see SSTC spec) + assign STIP = STimerInt; + end else begin + assign MIP_WRITE_MASK = 12'h222; // SEIP, STIP, SSIP are writeable in MIP (20210108-draft 3.1.9) + assign STIP = MIP_REGW_writeable[5]; + end assign SIP_WRITE_MASK = 12'h002; // SSIP is writeable in SIP (privileged 20210108-draft 4.1.3) assign MIE_WRITE_MASK = 12'hAAA; end else begin:mask @@ -75,5 +82,8 @@ module csri #(parameter else if (WriteMIEM) MIE_REGW <= (CSRWriteValM[11:0] & MIE_WRITE_MASK); // MIE controls M and S fields else if (WriteSIEM) MIE_REGW <= (CSRWriteValM[11:0] & 12'h222) | (MIE_REGW & 12'h888); // only S fields - assign MIP_REGW = {MExtInt,1'b0,SExtInt|MIP_REGW_writeable[9],1'b0,MTimerInt,1'b0,MIP_REGW_writeable[5],1'b0,MSwInt,1'b0,MIP_REGW_writeable[1],1'b0}; + + assign MIP_REGW = {MExtInt, 1'b0, SExtInt|MIP_REGW_writeable[9], 1'b0, + MTimerInt, 1'b0, STIP, 1'b0, + MSwInt, 1'b0, MIP_REGW_writeable[1], 1'b0}; endmodule diff --git a/src/privileged/csrs.sv b/src/privileged/csrs.sv index 3dfb2cbe..99c56ad1 100644 --- a/src/privileged/csrs.sv +++ b/src/privileged/csrs.sv @@ -41,22 +41,27 @@ module csrs #(parameter SCAUSE = 12'h142, STVAL = 12'h143, SIP= 12'h144, + STIMECMP = 12'h14D, + STIMECMPH = 12'h15D, SATP = 12'h180) ( - input logic clk, reset, - input logic InstrValidNotFlushedM, - input logic CSRSWriteM, STrapM, - input logic [11:0] CSRAdrM, - input logic [`XLEN-1:0] NextEPCM, NextCauseM, NextMtvalM, SSTATUS_REGW, - input logic STATUS_TVM, - input logic [`XLEN-1:0] CSRWriteValM, - input logic [1:0] PrivilegeModeW, + input logic clk, reset, + input logic InstrValidNotFlushedM, + input logic CSRSWriteM, STrapM, + input logic [11:0] CSRAdrM, + input logic [`XLEN-1:0] NextEPCM, NextCauseM, NextMtvalM, SSTATUS_REGW, + input logic STATUS_TVM, + input logic MCOUNTEREN_TM, // TM bit (1) of MCOUNTEREN; cause illegal instruction when trying to access STIMECMP if clear + input logic [`XLEN-1:0] CSRWriteValM, + input logic [1:0] PrivilegeModeW, output logic [`XLEN-1:0] CSRSReadValM, STVEC_REGW, output logic [`XLEN-1:0] SEPC_REGW, output logic [31:0] SCOUNTEREN_REGW, output logic [`XLEN-1:0] SATP_REGW, - input logic [11:0] MIP_REGW, MIE_REGW, MIDELEG_REGW, - output logic WriteSSTATUSM, - output logic IllegalCSRSAccessM + input logic [11:0] MIP_REGW, MIE_REGW, MIDELEG_REGW, + input logic [63:0] MTIME_CLINT, + output logic WriteSSTATUSM, + output logic IllegalCSRSAccessM, + output logic STimerInt ); // Constants @@ -66,10 +71,13 @@ module csrs #(parameter logic WriteSTVECM; logic WriteSSCRATCHM, WriteSEPCM; logic WriteSCAUSEM, WriteSTVALM, WriteSATPM, WriteSCOUNTERENM; + logic WriteSTIMECMPM, WriteSTIMECMPHM; logic [`XLEN-1:0] SSCRATCH_REGW, STVAL_REGW; logic [`XLEN-1:0] SCAUSE_REGW; + logic [63:0] STIMECMP_REGW; // write enables + // *** can InstrValidNotFlushed be factored out of all these writes into CSRWriteM? assign WriteSSTATUSM = CSRSWriteM & (CSRAdrM == SSTATUS) & InstrValidNotFlushedM; assign WriteSTVECM = CSRSWriteM & (CSRAdrM == STVEC) & InstrValidNotFlushedM; assign WriteSSCRATCHM = CSRSWriteM & (CSRAdrM == SSCRATCH) & InstrValidNotFlushedM; @@ -78,6 +86,8 @@ module csrs #(parameter assign WriteSTVALM = STrapM | (CSRSWriteM & (CSRAdrM == STVAL)) & InstrValidNotFlushedM; assign WriteSATPM = CSRSWriteM & (CSRAdrM == SATP) & (PrivilegeModeW == `M_MODE | ~STATUS_TVM) & InstrValidNotFlushedM; assign WriteSCOUNTERENM = CSRSWriteM & (CSRAdrM == SCOUNTEREN) & InstrValidNotFlushedM; + assign WriteSTIMECMPM = CSRSWriteM & (CSRAdrM == STIMECMP) & MCOUNTEREN_TM & InstrValidNotFlushedM; + assign WriteSTIMECMPHM = CSRSWriteM & (CSRAdrM == STIMECMPH) & MCOUNTEREN_TM & (`XLEN == 32) & InstrValidNotFlushedM; // CSRs flopenr #(`XLEN) STVECreg(clk, reset, WriteSTVECM, {CSRWriteValM[`XLEN-1:2], 1'b0, CSRWriteValM[0]}, STVEC_REGW); @@ -90,7 +100,20 @@ module csrs #(parameter else assign SATP_REGW = 0; // hardwire to zero if virtual memory not supported flopens #(32) SCOUNTERENreg(clk, reset, WriteSCOUNTERENM, CSRWriteValM[31:0], SCOUNTEREN_REGW); + if (`XLEN == 64) + flopenr #(`XLEN) STIMECMPreg(clk, reset, WriteSTIMECMPM, CSRWriteValM, STIMECMP_REGW); + else begin + flopenr #(`XLEN) STIMECMPreg(clk, reset, WriteSTIMECMPM, CSRWriteValM, STIMECMP_REGW[31:0]); + flopenr #(`XLEN) STIMECMPHreg(clk, reset, WriteSTIMECMPHM, CSRWriteValM, STIMECMP_REGW[63:32]); + end + // Supervisor timer interrupt logic + // Spec is a bit peculiar - Machine timer interrupts are produced in CLINT, while Supervisor timer interrupts are in CSRs + if (`SSTC_SUPPORTED) + assign STimerInt = ({1'b0, MTIME_CLINT} >= {1'b0, STIMECMP_REGW}); // unsigned comparison + else + assign STimerInt = 0; + // CSR Reads always_comb begin:csrr IllegalCSRSAccessM = 0; @@ -109,10 +132,20 @@ module csrs #(parameter if (PrivilegeModeW == `S_MODE & STATUS_TVM) IllegalCSRSAccessM = 1; end SCOUNTEREN:CSRSReadValM = {{(`XLEN-32){1'b0}}, SCOUNTEREN_REGW}; + STIMECMP: if (MCOUNTEREN_TM) CSRSReadValM = STIMECMP_REGW[`XLEN-1:0]; + else begin + CSRSReadValM = 0; + IllegalCSRSAccessM = 1; + end + STIMECMPH: if (MCOUNTEREN_TM & (`XLEN == 32)) CSRSReadValM[31:0] = STIMECMP_REGW[63:32]; + else begin // not supported for RV64 + CSRSReadValM = 0; + IllegalCSRSAccessM = 1; + end default: begin CSRSReadValM = 0; IllegalCSRSAccessM = 1; - end + end endcase end endmodule diff --git a/src/privileged/csrsr.sv b/src/privileged/csrsr.sv index 1fa1fe8e..a92aca3b 100644 --- a/src/privileged/csrsr.sv +++ b/src/privileged/csrsr.sv @@ -68,7 +68,7 @@ module csrsr ( STATUS_XS, STATUS_FS, /*STATUS_MPP, 2'b0*/ 4'b0, STATUS_SPP, /*STATUS_MPIE*/ 1'b0, STATUS_UBE, STATUS_SPIE, /*1'b0, STATUS_MIE, 1'b0*/ 3'b0, STATUS_SIE, 1'b0}; - assign MSTATUSH_REGW = '0; // *** does not exist when XLEN=64, but don't want it to have an undefined value. Spec is not clear what it should be. + assign MSTATUSH_REGW = '0; // *** does not exist when XLEN=64, but don't want it to have an undefined value. Spec is not clear what it should be. end else begin: csrsr32 // RV32 assign MSTATUS_REGW = {STATUS_SD, 8'b0, STATUS_TSR, STATUS_TW, STATUS_TVM, STATUS_MXR, STATUS_SUM, STATUS_MPRV, diff --git a/synthDC/.synopsys_dc.setup b/synthDC/.synopsys_dc.setup index f058bd82..5c6b7f6b 100755 --- a/synthDC/.synopsys_dc.setup +++ b/synthDC/.synopsys_dc.setup @@ -3,33 +3,60 @@ set CURRENT_DIR [exec pwd] set search_path [list "./" ] -set memory ../memory -set pdk /import/yukari1/pdk/TSMC/28/CMOS/HPC+/stclib/7-track/tcbn28hpcplusbwp7t30p140-set/ -set tsmc28nlib $pdk/tcbn28hpcplusbwp7t30p140_190a_FE/TSMCHOME/digital/Front_End/timing_power_noise/NLDM/tcbn28hpcplusbwp7t30p140_180a -set iolib1p8 /import/yukari1/pdk/TSMC/28/CMOS/HPC+/IO1.8V/iolib/STAGGERED/tphn28hpcpgv18_170d_FE/TSMCHOME/digital/Front_End/timing_power_noise/NLDM/tphn28hpcpgv18_170a/ -lappend search_path $tsmc28nlib -lappend search_path $iolib1p8 -lappend search_path $memory +set tech $::env(TECH) + +if { [info exists ::env(RISCV)] } { + set timing_lib $::env(RISCV)/cad/lib +} else { + set timing_lib ../addins +} + +if {$tech == "sky130"} { + set s8lib $timing_lib/sky130_osu_sc_t12/12T_ms/lib + lappend search_path $s8lib +} elseif {$tech == "sky90"} { + set s9lib $timing_lib/sky90/sky90_sc/V1.7.4/lib + lappend search_path $s9lib +} elseif {$tech == "tsmc28"} { + set pdk /proj/models/tsmc28/libraries/28nmtsmc/tcbn28hpcplusbwp30p140_190a/ + set osupdk /import/yukari1/pdk/TSMC/28/CMOS/HPC+/stclib/9-track/tcbn28hpcplusbwp30p140-set/tcbn28hpcplusbwp30p140_190a_FE/ + set s10lib $pdk/TSMCHOME/digital/Front_End/timing_power_noise/NLDM/tcbn28hpcplusbwp30p140_180a + lappend search_path $s10lib +} elseif {$tech == "tsmc28psyn"} { + set TLU /home/jstine/TLU+ + set pdk /proj/models/tsmc28/libraries/28nmtsmc/tcbn28hpcplusbwp30p140_190a/ + set osupdk /import/yukari1/pdk/TSMC/28/CMOS/HPC+/stclib/9-track/tcbn28hpcplusbwp30p140-set/tcbn28hpcplusbwp30p140_190a_FE/ + set s10lib $pdk/TSMCHOME/digital/Front_End/timing_power_noise/NLDM/tcbn28hpcplusbwp30p140_180a + lappend search_path $s10lib + set TLUPLUS true + set mw_logic1_net VDD + set mw_logic0_net VSS + set CAPTABLE $TLU/1p8m/ + set MW_REFERENCE_LIBRARY /home/jstine/MW + set MW_TECH_FILE tcbn28hpcplusbwp30p140 + set MIN_TLU_FILE $CAPTABLE/crn28hpc+_1p08m+ut-alrdl_5x1z1u_rcbest.tluplus + set MAX_TLU_FILE $CAPTABLE/crn28hpc+_1p08m+ut-alrdl_5x1z1u_rcworst.tluplus + set PRS_MAP_FILE $MW_REFERENCE_LIBRARY/astro_layername.map +} # Synthetic libraries set synthetic_library [list dw_foundation.sldb] -# Set OKSTATE standard cell libraries +# Set standard cell libraries set target_library [list] -lappend target_library $iolib1p8/tphn28hpcpgv18tt0p9v1p8v25c.db -lappend target_library $tsmc28nlib/tcbn28hpcplusbwp7t30p140tt0p9v25c.db -lappend target_library $memory/ts3n28hpcpa128x64m8m_130a/NLDM/ts3n28hpcpa128x64m8m_tt0p9v25c.db -lappend target_library $memory/ts1n28hpcpsvtb64x128m4sw_180a/NLDM/ts1n28hpcpsvtb64x128m4sw_tt0p9v25c.db -lappend target_library $memory/ts1n28hpcpsvtb64x44m4sw_180a/NLDM/ts1n28hpcpsvtb64x44m4sw_tt0p9v25c.db -lappend target_library $memory/tsdn28hpcpa1024x68m4mw_130a/NLDM/tsdn28hpcpa1024x68m4mw_tt0p9v25c.db -lappend target_library $memory/tsdn28hpcpa64x32m4mw_130a/NLDM/tsdn28hpcpa64x32m4mw_tt0p9v25c.db -lappend target_library $memory/dbs/tsdn28hpcpa128x64m4fw_tt0p9v25c.db -lappend target_library $memory/dbs/tsdn28hpcpa512x64m4fw_tt0p9v25c.db -lappend target_library $memory/dbs/tsdn28hpcpa2048x64m4mw_tt0p9v25c.db - -# Set Link Library -set link_library "$target_library $synthetic_library" +#lappend target_library scc9gena_tt_1.2v_25C.db +if {$tech == "sky130"} { + lappend target_library $s8lib/sky130_osu_sc_12T_ms_TT_1P8_25C.ccs.db +} elseif {$tech == "sky90"} { + lappend target_library $s9lib/scc9gena_tt_1.2v_25C.db +} elseif {$tech == "tsmc28"} { + lappend target_library $s10lib/tcbn28hpcplusbwp30p140tt0p9v25c.db +} elseif {$tech == "tsmc28psyn"} { + set mw_reference_library [list ] + lappend target_library $s10lib/tcbn28hpcplusbwp30p140tt0p9v25c.db + lappend mw_reference_library $MW_REFERENCE_LIBRARY/tcbn28hpcplusbwp30p140 +} # Set up DesignWare cache read and write directories to speed up compile. set cache_write ~ @@ -39,6 +66,21 @@ set cache_read $cache_write lappend search_path ./scripts lappend search_path ./hdl lappend search_path ./mapped +if {($tech == "tsmc28psyn")} { + set memory /home/jstine/WallyMem/rv64gc/ + set osumemory /import/yukari1/pdk/TSMC/WallyMem/rv64gc/ + lappend target_library $memory/ts1n28hpcpsvtb64x128m4sw_180a/NLDM/ts1n28hpcpsvtb64x128m4sw_tt0p9v25c.db + lappend target_library $memory/ts1n28hpcpsvtb64x44m4sw_180a/NLDM/ts1n28hpcpsvtb64x44m4sw_tt0p9v25c.db + lappend target_library $memory/tsdn28hpcpa1024x68m4mw_130a/NLDM/tsdn28hpcpa1024x68m4mw_tt0p9v25c.db + lappend target_library $memory/tsdn28hpcpa64x32m4mw_130a/NLDM/tsdn28hpcpa64x32m4mw_tt0p9v25c.db + lappend mw_reference_library $MW_REFERENCE_LIBRARY/ts1n28hpcpsvtb64x44m4sw + lappend mw_reference_library $MW_REFERENCE_LIBRARY/ts1n28hpcpsvtb64x128m4sw + lappend mw_reference_library $MW_REFERENCE_LIBRARY/tsdn28hpcpa1024x68m4mw + lappend mw_reference_library $MW_REFERENCE_LIBRARY/tsdn28hpcpa64x32m4mw +} + +# Set Link Library +set link_library "$target_library $synthetic_library" # Set up User Information set company "Oklahoma State University" diff --git a/synthDC/Makefile b/synthDC/Makefile index 76531d95..5ea9677c 100755 --- a/synthDC/Makefile +++ b/synthDC/Makefile @@ -20,7 +20,7 @@ export MAXCORES ?= 1 # The output netlist is hard to interpret, but significantly better PPA export MAXOPT ?= 0 export DRIVE ?= FLOP -export USESRAM ?= 1 +export USESRAM ?= 0 time := $(shell date +%F-%H-%M) hash := $(shell git rev-parse --short HEAD) @@ -30,8 +30,6 @@ export SAIFPOWER ?= 0 OLDCONFIGDIR ?= ${WALLY}/config export CONFIGDIR ?= $(OUTPUTDIR)/config - - default: @echo " Basic synthesis procedure for Wally:" @echo " Invoke with make synth" @@ -84,6 +82,15 @@ endif endif # adjust config if synthesizing with any modifications +# This code is subtle with ifneq. It successively turns off a larger +# set of features in order of cycle time limiting. +# When mod = orig, all features are ON +# When mod = PMP0, the number of PMP entries is set to 0 +# when mod = noPriv, the privileged unit and PMP are disabled +# when mod = noFPU, the FPU, privileged unit, and PMP are disabled +# when mod = noMulDiv, the MDU, FPU, privileged unit, and PMP are disabled. +# when mod = noAtomic, the Atomic, MDU, FPU, privileged unit, and PMP are disabled + ifneq ($(MOD), orig) # PMP 0 sed -i 's/PMP_ENTRIES \(64\|16\|0\)/PMP_ENTRIES 0/' $(CONFIGDIR)/wally-config.vh @@ -97,6 +104,10 @@ ifneq ($(MOD), noPriv) ifneq ($(MOD), noFPU) # no muldiv sed -i 's/1 *<< *12/0 << 12/' $(CONFIGDIR)/wally-config.vh +ifneq ($(MOD), noMulDiv) + # no atomic + sed -i 's/1 *<< *0/0 << 0/' $(CONFIGDIR)/wally-config.vh +endif endif endif endif @@ -116,13 +127,16 @@ mkdirecs: @mkdir -p $(OUTPUTDIR)/mapped @mkdir -p $(OUTPUTDIR)/unmapped - synth: mkdirecs configs rundc clean rundc: - dc_shell-xg-t -64bit -f scripts/$(NAME).tcl | tee $(OUTPUTDIR)/$(NAME).out - - +ifeq ($(TECH), tsmc28psyn) + dc_shell-xg-t -64bit -topographical_mode -f scripts/$(NAME).tcl | tee $(OUTPUTDIR)/$(NAME).out +else + dc_shell-xg-t -64bit -f scripts/$(NAME).tcl | tee $(OUTPUTDIR)/$(NAME).out +endif + + clean: rm -rf $(OUTPUTDIR)/hdl rm -rf $(OUTPUTDIR)/WORK @@ -132,4 +146,4 @@ clean: rm -f filenames*.log rm -f power.saif rm -f Synopsys_stack_trace_*.txt - rm -f crte_*.txt \ No newline at end of file + rm -f crte_*.txt diff --git a/synthDC/extractArea.pl b/synthDC/extractArea.pl new file mode 100755 index 00000000..d16c74df --- /dev/null +++ b/synthDC/extractArea.pl @@ -0,0 +1,118 @@ +#!/bin/perl -W + +########################################### +## extractArea.pl +## +## Written: David_Harris@hmc.edu +## Created: 19 Feb 2023 +## Modified: +## +## Purpose: Pull area statistics from run directory +## +## A component of the CORE-V-WALLY configurable RISC-V project. +## +## Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +## +## SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +## +## Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +## except in compliance with the License, or, at your option, the Apache License version 2.0. You +## may obtain a copy of the License at +## +## https:##solderpad.org/licenses/SHL-2.1/ +## +## Unless required by applicable law or agreed to in writing, any work distributed under the +## License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +## either express or implied. See the License for the specific language governing permissions +## and limitations under the License. +################################################################################################ + + +use strict; +use warnings; +import os; + +my %configResults; +my $dir = "runs"; +my $macro = "Macro/Black Box area:"; +my $seq = "Noncombinational area:"; +my $buf = "Buf/Inv area:"; +my $comb = "Combinational area:"; +my $macroC = "Number of macros/black boxes:"; +my $seqC = "Number of sequential cells:"; +my $bufC = "Number of buf/inv:"; +my $combC = "Number of combinational cells:"; +my @keywords = ("ifu", "ieu", "lsu", "hzu", "ebu.ebu", "priv.priv", "mdu.mdu", "fpu.fpu", "wallypipelinedcore", $macro, $seq, $buf, $comb, $macroC, $seqC, $bufC, $combC); +my @keywordsp = ("ifu", "ieu", "lsu", "hzu", "ebu.ebu", "priv.priv", "mdu.mdu", "fpu.fpu", "wallypipelinedcore", + "RAMs", "Flip-flops", "Inv/Buf", "Logic", "RAMs Cnt", "Flip-flops Cnt", "Inv/Buf Cnt", "Logic Cnt", "Total Cnt"); +my @configs = ("rv32e", "rv32i", "rv32imc", "rv32gc", "rv64i", "rv64gc"); + +opendir(DIR, $dir) or die "Could not open $dir"; + +while (my $filename = readdir(DIR)) { + if ($filename =~ /orig_tsmc28psyn/) { +# print "$filename\n"; + &processRun("$dir/$filename"); + } +} +closedir(DIR); + +# print table of results +printf("%20s\t", ""); +foreach my $config (@configs) { + printf("%s\t", $config); +} +print ("\n"); +foreach my $kw (@keywordsp) { + my $kws = substr($kw, 0, 3); + printf("%20s\t", $kw); + foreach my $config (@configs) { + my $r = $configResults{$config}; + if (exists ${$r}{$kw}) { + my $area = ${$r}{$kw}; + while ($area =~ s/(\d+)(\d\d\d)/$1\,$2/){}; + #print "${$r}{$kw}\t"; + print "$area\t"; + } else { + print("\t"); + } + } + print("\n"); +} + +sub processRun { + my $fname = shift; + my $ffname = "$fname/reports/wallypipelinedcore_area.rep"; + open(FILE, "$ffname") or die ("Could not read $ffname"); + + # Extract configuration from fname; + $fname =~ /_([^_]*)_orig/; + my $config = $1; + #print("Reading $config from $ffname\n"); + + # Search for results + my %results; + while (my $line = ) { + foreach my $kw (@keywords) { + # print "$kw $line\n"; + if ($line =~ /^${kw}\s+(\S*)/) { + #print "$line $kw $1\n"; + $results{$kw} = int($1); + } + } + } + foreach my $kw (@keywords) { + #print "$kw\t$results{$kw}\n"; + } + $results{"Logic"} = $results{$comb} - $results{$buf}; + $results{"Inv/Buf"} = $results{$buf}; + $results{"Flip-flops"} = $results{$seq}; + $results{"RAMs"} = $results{$macro}; + $results{"Logic Cnt"} = $results{$combC} - $results{$bufC}; + $results{"Inv/Buf Cnt"} = $results{$bufC}; + $results{"Flip-flops Cnt"} = $results{$seqC}; + $results{"RAMs Cnt"} = $results{$macroC}; + $results{"Total Cnt"} = $results{$macroC} + $results{$seqC} + $results{$combC}; + close(FILE); + $configResults{$config} = \%results; +} \ No newline at end of file diff --git a/synthDC/extractSummary.py b/synthDC/extractSummary.py index 3d939644..94902f9c 100755 --- a/synthDC/extractSummary.py +++ b/synthDC/extractSummary.py @@ -85,7 +85,7 @@ def freqPlot(tech, width, config): freqsL, delaysL, areasL = ([[], []] for i in range(3)) for oneSynth in allSynths: if (width == oneSynth.width) & (config == oneSynth.config) & (tech == oneSynth.tech) & ('orig' == oneSynth.mod): - ind = (1000/oneSynth.delay < oneSynth.freq) # when delay is within target clock period + ind = (1000/oneSynth.delay < (0.95*oneSynth.freq)) # when delay is within target clock period freqsL[ind] += [oneSynth.freq] delaysL[ind] += [oneSynth.delay] areasL[ind] += [oneSynth.area] @@ -103,7 +103,7 @@ def freqPlot(tech, width, config): freqs = freqsL[ind] freqs, delays, areas = noOutliers(median, freqs, delays, areas) - c = 'blue' if ind else 'green' + c = 'blue' if ind else 'gray' targs = [1000/f for f in freqs] ax1.scatter(targs, delays, color=c) @@ -113,7 +113,7 @@ def freqPlot(tech, width, config): delays = list(flatten(delaysL)) areas = list(flatten(areasL)) - legend_elements = [lines.Line2D([0], [0], color='green', ls='', marker='o', label='timing achieved'), + legend_elements = [lines.Line2D([0], [0], color='gray', ls='', marker='o', label='timing achieved'), lines.Line2D([0], [0], color='blue', ls='', marker='o', label='slack violated')] ax1.legend(handles=legend_elements) @@ -246,8 +246,8 @@ if __name__ == '__main__': TechSpec = namedtuple("TechSpec", "color shape targfreq fo4 add32area add32lpower add32denergy") techdict = {} - techdict['sky90'] = TechSpec('green', 'o', args.skyfreq, 43.2e-3, 1440.600027, 714.057, 0.658023) - techdict['tsmc28'] = TechSpec('blue', 's', args.tsmcfreq, 12.2e-3, 209.286002, 1060.0, .081533) + techdict['sky90'] = TechSpec('gray', 'o', args.skyfreq, 43.2e-3, 1440.600027, 714.057, 0.658023) + techdict['tsmc28psyn'] = TechSpec('blue', 's', args.tsmcfreq, 12.2e-3, 209.286002, 1060.0, .081533) current_directory = os.getcwd() final_directory = os.path.join(current_directory, 'wallyplots') @@ -256,10 +256,11 @@ if __name__ == '__main__': synthsintocsv() synthsfromcsv('Summary.csv') - freqPlot('tsmc28', 'rv32', 'e') + freqPlot('tsmc28psyn', 'rv32', 'e') freqPlot('sky90', 'rv32', 'e') plotFeatures('sky90', 'rv64', 'gc') - plotFeatures('tsmc28', 'rv64', 'gc') + plotFeatures('tsmc28psyn', 'rv64', 'gc') plotConfigs('sky90', mod='orig') - plotConfigs('tsmc28', mod='orig') + plotConfigs('tsmc28psyn', mod='orig') normAreaDelay(mod='orig') + os.system("./extractArea.pl"); diff --git a/synthDC/scripts/synth.tcl b/synthDC/scripts/synth.tcl index 1040f35c..c4579e5c 100755 --- a/synthDC/scripts/synth.tcl +++ b/synthDC/scripts/synth.tcl @@ -49,6 +49,24 @@ set report_default_significant_digits 6 set verilogout_show_unconnected_pins "true" set vhdlout_show_unconnected_pins "true" +# Set up MW List +set MY_LIB_NAME $my_toplevel +# Create MW +if { [shell_is_in_topographical_mode] } { + echo "In Topographical Mode...processing\n" + create_mw_lib -technology $MW_REFERENCE_LIBRARY/$MW_TECH_FILE.tf \ + -mw_reference_library $mw_reference_library $outputDir/$MY_LIB_NAME + # Open MW + open_mw_lib $outputDir/$MY_LIB_NAME + + # TLU+ + set_tlu_plus_files -max_tluplus $MAX_TLU_FILE -min_tluplus $MIN_TLU_FILE \ + -tech2itf_map $PRS_MAP_FILE + +} else { + echo "In normal DC mode...processing\n" +} + # Due to parameterized Verilog must use analyze/elaborate and not # read_verilog/vhdl (change to pull in Verilog and/or VHDL) # @@ -122,7 +140,7 @@ if {$tech == "sky130"} { } elseif {$drive == "FLOP"} { set_driving_cell -lib_cell scc9gena_dfxbp_1 -pin Q $all_in_ex_clk } -} elseif {$tech == "tsmc28"} { +} elseif {$tech == "tsmc28" || $tech=="tsmc28psyn"} { if {$drive == "INV"} { set_driving_cell -lib_cell INVD1BWP30P140 -pin ZN $all_in_ex_clk } elseif {$drive == "FLOP"} { @@ -148,7 +166,7 @@ if {$tech == "sky130"} { } elseif {$drive == "FLOP"} { set_load [expr [load_of scc9gena_tt_1.2v_25C/scc9gena_dfxbp_1/D] * 1] [all_outputs] } -} elseif {$tech == "tsmc28"} { +} elseif {$tech == "tsmc28" || $tech == "tsmc28psyn"} { if {$drive == "INV"} { set_load [expr [load_of tcbn28hpcplusbwp30p140tt0p9v25c/INVD4BWP30P140/I] * 1] [all_outputs] } elseif {$drive == "FLOP"} { @@ -156,8 +174,10 @@ if {$tech == "sky130"} { } } -# Set the wire load model -set_wire_load_mode "top" +if {$tech != "tsmc28psyn"} { + # Set the wire load model + set_wire_load_mode "top" +} # Set switching activities # default activity factors are 1 for clocks, 0.1 for others diff --git a/synthDC/wallySynth.py b/synthDC/wallySynth.py index 3253607d..139bcdd6 100755 --- a/synthDC/wallySynth.py +++ b/synthDC/wallySynth.py @@ -16,9 +16,10 @@ def mask(command): if __name__ == '__main__': - techs = ['sky90', 'tsmc28'] + techs = ['sky90', 'tsmc28', 'tsmc28psyn'] allConfigs = ['rv32gc', 'rv32imc', 'rv64gc', 'rv64imc', 'rv32e', 'rv32i', 'rv64i'] freqVaryPct = [-20, -12, -8, -6, -4, -2, 0, 2, 4, 6, 8, 12, 20] +# freqVaryPct = [-20, -10, 0, 10, 20] pool = Pool() @@ -46,19 +47,19 @@ if __name__ == '__main__': config = args.version if args.version else 'rv32e' for freq in [round(sc+sc*x/100) for x in freqVaryPct]: # rv32e freq sweep runSynth(config, mod, tech, freq, maxopt, usesram) - if args.configsweep: + elif args.configsweep: defaultfreq = 1500 if tech == 'sky90' else 5000 freq = args.targetfreq if args.targetfreq else defaultfreq for config in ['rv32i', 'rv64gc', 'rv64i', 'rv32gc', 'rv32imc', 'rv32e']: #configs runSynth(config, mod, tech, freq, maxopt, usesram) - if args.featuresweep: + elif args.featuresweep: defaultfreq = 500 if tech == 'sky90' else 1500 freq = args.targetfreq if args.targetfreq else defaultfreq config = args.version if args.version else 'rv64gc' - for mod in ['noFPU', 'noMulDiv', 'noPriv', 'PMP0', 'orig']: + for mod in ['noAtomic', 'noFPU', 'noMulDiv', 'noPriv', 'PMP0']: runSynth(config, mod, tech, freq, maxopt, usesram) else: defaultfreq = 500 if tech == 'sky90' else 1500 freq = args.targetfreq if args.targetfreq else defaultfreq config = args.version if args.version else 'rv64gc' - runSynth(config, mod, tech, freq, maxopt, usesram) \ No newline at end of file + runSynth(config, mod, tech, freq, maxopt, usesram) diff --git a/testbench/common/functionName.sv b/testbench/common/functionName.sv index f3cb656f..17b27ec5 100644 --- a/testbench/common/functionName.sv +++ b/testbench/common/functionName.sv @@ -1,33 +1,24 @@ /////////////////////////////////////////// -// datapath.sv +// functionName.sv // -// Written: Ross Thompson -// email: ross1728@gmail.com -// Created: November 9, 2019 -// Modified: March 04, 2021 -// -// Purpose: Finds the current function or global assembly label based on PCE. +// Purpose: decode name of function // // A component of the Wally configurable RISC-V project. // // Copyright (C) 2021 Harvey Mudd College & Oklahoma State University // -// MIT LICENSE -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons -// to whom the Software is furnished to do so, subject to the following conditions: +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 // -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at // -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -// OR OTHER DEALINGS IN THE SOFTWARE. +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// `include "wally-config.vh" diff --git a/testbench/common/instrNameDecTB.sv b/testbench/common/instrNameDecTB.sv index 4c1a6850..dd993e71 100644 --- a/testbench/common/instrNameDecTB.sv +++ b/testbench/common/instrNameDecTB.sv @@ -1,3 +1,26 @@ +/////////////////////////////////////////// +// instrNameDecTB.sv +// +// Purpose: decode name of function +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + // decode the instruction name, to help the test bench module instrNameDecTB( input logic [31:0] instr, diff --git a/testbench/common/instrTrackerTB.sv b/testbench/common/instrTrackerTB.sv index 2b0ca7c5..429ff848 100644 --- a/testbench/common/instrTrackerTB.sv +++ b/testbench/common/instrTrackerTB.sv @@ -1,3 +1,24 @@ +/////////////////////////////////////////// +// instrTrackerTB.sv +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + module instrTrackerTB( input logic clk, reset, FlushE, input logic [31:0] InstrF, InstrD, diff --git a/testbench/common/riscvassertions.sv b/testbench/common/riscvassertions.sv new file mode 100644 index 00000000..f6cb4c6f --- /dev/null +++ b/testbench/common/riscvassertions.sv @@ -0,0 +1,64 @@ +/////////////////////////////////////////// +// riscvassertions.sv +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module riscvassertions; + initial begin + assert (`PMP_ENTRIES == 0 | `PMP_ENTRIES==16 | `PMP_ENTRIES==64) else $error("Illegal number of PMP entries: PMP_ENTRIES must be 0, 16, or 64"); + assert (`S_SUPPORTED | `VIRTMEM_SUPPORTED == 0) else $error("Virtual memory requires S mode support"); + assert (`IDIV_BITSPERCYCLE == 1 | `IDIV_BITSPERCYCLE==2 | `IDIV_BITSPERCYCLE==4) else $error("Illegal number of divider bits/cycle: IDIV_BITSPERCYCLE must be 1, 2, or 4"); + assert (`F_SUPPORTED | ~`D_SUPPORTED) else $error("Can't support double fp (D) without supporting float (F)"); + assert (`D_SUPPORTED | ~`Q_SUPPORTED) else $error("Can't support quad fp (Q) without supporting double (D)"); + assert (`F_SUPPORTED | ~`ZFH_SUPPORTED) else $error("Can't support half-precision fp (ZFH) without supporting float (F)"); + assert (`DCACHE_SUPPORTED | ~`F_SUPPORTED | `FLEN <= `XLEN) else $error("Data cache required to support FLEN > XLEN because AHB bus width is XLEN"); + assert (`I_SUPPORTED ^ `E_SUPPORTED) else $error("Exactly one of I and E must be supported"); + assert (`FLEN<=`XLEN | `DCACHE_SUPPORTED | `DTIM_SUPPORTED) else $error("Wally does not support FLEN > XLEN unleses data cache or DTIM is supported"); + assert (`DCACHE_WAYSIZEINBYTES <= 4096 | (!`DCACHE_SUPPORTED) | `VIRTMEM_SUPPORTED == 0) else $error("DCACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)"); + assert (`DCACHE_LINELENINBITS >= 128 | (!`DCACHE_SUPPORTED)) else $error("DCACHE_LINELENINBITS must be at least 128 when caches are enabled"); + assert (`DCACHE_LINELENINBITS < `DCACHE_WAYSIZEINBYTES*8) else $error("DCACHE_LINELENINBITS must be smaller than way size"); + assert (`ICACHE_WAYSIZEINBYTES <= 4096 | (!`ICACHE_SUPPORTED) | `VIRTMEM_SUPPORTED == 0) else $error("ICACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)"); + assert (`ICACHE_LINELENINBITS >= 32 | (!`ICACHE_SUPPORTED)) else $error("ICACHE_LINELENINBITS must be at least 32 when caches are enabled"); + assert (`ICACHE_LINELENINBITS < `ICACHE_WAYSIZEINBYTES*8) else $error("ICACHE_LINELENINBITS must be smaller than way size"); + assert (2**$clog2(`DCACHE_LINELENINBITS) == `DCACHE_LINELENINBITS | (!`DCACHE_SUPPORTED)) else $error("DCACHE_LINELENINBITS must be a power of 2"); + assert (2**$clog2(`DCACHE_WAYSIZEINBYTES) == `DCACHE_WAYSIZEINBYTES | (!`DCACHE_SUPPORTED)) else $error("DCACHE_WAYSIZEINBYTES must be a power of 2"); + assert (2**$clog2(`ICACHE_LINELENINBITS) == `ICACHE_LINELENINBITS | (!`ICACHE_SUPPORTED)) else $error("ICACHE_LINELENINBITS must be a power of 2"); + assert (2**$clog2(`ICACHE_WAYSIZEINBYTES) == `ICACHE_WAYSIZEINBYTES | (!`ICACHE_SUPPORTED)) else $error("ICACHE_WAYSIZEINBYTES must be a power of 2"); + assert (2**$clog2(`ITLB_ENTRIES) == `ITLB_ENTRIES | `VIRTMEM_SUPPORTED==0) else $error("ITLB_ENTRIES must be a power of 2"); + assert (2**$clog2(`DTLB_ENTRIES) == `DTLB_ENTRIES | `VIRTMEM_SUPPORTED==0) else $error("DTLB_ENTRIES must be a power of 2"); + assert (`UNCORE_RAM_RANGE >= 56'h07FFFFFF) else $warning("Some regression tests will fail if UNCORE_RAM_RANGE is less than 56'h07FFFFFF"); + assert (`ZICSR_SUPPORTED == 1 | (`PMP_ENTRIES == 0 & `VIRTMEM_SUPPORTED == 0)) else $error("PMP_ENTRIES and VIRTMEM_SUPPORTED must be zero if ZICSR not supported."); + assert (`ZICSR_SUPPORTED == 1 | (`S_SUPPORTED == 0 & `U_SUPPORTED == 0)) else $error("S and U modes not supported if ZICSR not supported"); + assert (`U_SUPPORTED | (`S_SUPPORTED == 0)) else $error ("S mode only supported if U also is supported"); + assert (`VIRTMEM_SUPPORTED == 0 | (`DTIM_SUPPORTED == 0 & `IROM_SUPPORTED == 0)) else $error("Can't simultaneously have virtual memory and DTIM_SUPPORTED/IROM_SUPPORTED because local memories don't translate addresses"); + assert (`DCACHE_SUPPORTED | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs dcache"); + assert (`ICACHE_SUPPORTED | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs icache"); + assert ((`DCACHE_SUPPORTED == 0 & `ICACHE_SUPPORTED == 0) | `BUS_SUPPORTED) else $error("Dcache and Icache requires DBUS_SUPPORTED."); + assert (`DCACHE_LINELENINBITS <= `XLEN*16 | (!`DCACHE_SUPPORTED)) else $error("DCACHE_LINELENINBITS must not exceed 16 words because max AHB burst size is 1"); + assert (`DCACHE_LINELENINBITS % 4 == 0) else $error("DCACHE_LINELENINBITS must hold 4, 8, or 16 words"); + assert (`DCACHE_SUPPORTED | `A_SUPPORTED == 0) else $error("Atomic extension (A) requires cache on Wally."); + assert (`IDIV_ON_FPU == 0 | `F_SUPPORTED) else $error("IDIV on FPU needs F_SUPPORTED"); + assert (`SSTC_SUPPORTED == 0 | (`S_SUPPORTED)) else $error("SSTC requires S_SUPPORTED"); + end + +endmodule + + diff --git a/testbench/common/wallyTracer.sv b/testbench/common/wallyTracer.sv index 14b22968..6daf48e9 100644 --- a/testbench/common/wallyTracer.sv +++ b/testbench/common/wallyTracer.sv @@ -1,3 +1,24 @@ +/////////////////////////////////////////// +// wallyTracer.sv +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + `include "wally-config.vh" `define NUM_REGS 32 @@ -14,7 +35,10 @@ module wallyTracer(rvviTrace rvvi); // wally specific signals logic reset; - + logic clk; + logic InstrValidD, InstrValidE; + logic StallF, StallD; + logic STATUS_SXL, STATUS_UXL; logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, PCW; logic [`XLEN-1:0] InstrRawD, InstrRawE, InstrRawM, InstrRawW; logic InstrValidM, InstrValidW; diff --git a/testbench/testbench.sv b/testbench/testbench.sv index a85ffd65..baf8980d 100644 --- a/testbench/testbench.sv +++ b/testbench/testbench.sv @@ -281,7 +281,7 @@ logic [3:0] dummy; // once the test inidicates it's done we need to immediately hold reset for a number of cycles. if(ResetCount < ResetThreshold) ResetCount = ResetCount + 1; else begin // hit reset threshold so we remove reset. - InReset = 0; + InReset = 0; ResetCount = 0; end end else begin @@ -519,47 +519,6 @@ logic [3:0] dummy; endmodule -module riscvassertions; - initial begin - assert (`PMP_ENTRIES == 0 | `PMP_ENTRIES==16 | `PMP_ENTRIES==64) else $error("Illegal number of PMP entries: PMP_ENTRIES must be 0, 16, or 64"); - assert (`S_SUPPORTED | `VIRTMEM_SUPPORTED == 0) else $error("Virtual memory requires S mode support"); - assert (`IDIV_BITSPERCYCLE == 1 | `IDIV_BITSPERCYCLE==2 | `IDIV_BITSPERCYCLE==4) else $error("Illegal number of divider bits/cycle: IDIV_BITSPERCYCLE must be 1, 2, or 4"); - assert (`F_SUPPORTED | ~`D_SUPPORTED) else $error("Can't support double fp (D) without supporting float (F)"); - assert (`D_SUPPORTED | ~`Q_SUPPORTED) else $error("Can't support quad fp (Q) without supporting double (D)"); - assert (`F_SUPPORTED | ~`ZFH_SUPPORTED) else $error("Can't support half-precision fp (ZFH) without supporting float (F)"); - assert (`DCACHE_SUPPORTED | ~`F_SUPPORTED | `FLEN <= `XLEN) else $error("Data cache required to support FLEN > XLEN because AHB bus width is XLEN"); - assert (`I_SUPPORTED ^ `E_SUPPORTED) else $error("Exactly one of I and E must be supported"); - assert (`FLEN<=`XLEN | `DCACHE_SUPPORTED | `DTIM_SUPPORTED) else $error("Wally does not support FLEN > XLEN unleses data cache or DTIM is supported"); - assert (`DCACHE_WAYSIZEINBYTES <= 4096 | (!`DCACHE_SUPPORTED) | `VIRTMEM_SUPPORTED == 0) else $error("DCACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)"); - assert (`DCACHE_LINELENINBITS >= 128 | (!`DCACHE_SUPPORTED)) else $error("DCACHE_LINELENINBITS must be at least 128 when caches are enabled"); - assert (`DCACHE_LINELENINBITS < `DCACHE_WAYSIZEINBYTES*8) else $error("DCACHE_LINELENINBITS must be smaller than way size"); - assert (`ICACHE_WAYSIZEINBYTES <= 4096 | (!`ICACHE_SUPPORTED) | `VIRTMEM_SUPPORTED == 0) else $error("ICACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)"); - assert (`ICACHE_LINELENINBITS >= 32 | (!`ICACHE_SUPPORTED)) else $error("ICACHE_LINELENINBITS must be at least 32 when caches are enabled"); - assert (`ICACHE_LINELENINBITS < `ICACHE_WAYSIZEINBYTES*8) else $error("ICACHE_LINELENINBITS must be smaller than way size"); - assert (2**$clog2(`DCACHE_LINELENINBITS) == `DCACHE_LINELENINBITS | (!`DCACHE_SUPPORTED)) else $error("DCACHE_LINELENINBITS must be a power of 2"); - assert (2**$clog2(`DCACHE_WAYSIZEINBYTES) == `DCACHE_WAYSIZEINBYTES | (!`DCACHE_SUPPORTED)) else $error("DCACHE_WAYSIZEINBYTES must be a power of 2"); - assert (2**$clog2(`ICACHE_LINELENINBITS) == `ICACHE_LINELENINBITS | (!`ICACHE_SUPPORTED)) else $error("ICACHE_LINELENINBITS must be a power of 2"); - assert (2**$clog2(`ICACHE_WAYSIZEINBYTES) == `ICACHE_WAYSIZEINBYTES | (!`ICACHE_SUPPORTED)) else $error("ICACHE_WAYSIZEINBYTES must be a power of 2"); - assert (2**$clog2(`ITLB_ENTRIES) == `ITLB_ENTRIES | `VIRTMEM_SUPPORTED==0) else $error("ITLB_ENTRIES must be a power of 2"); - assert (2**$clog2(`DTLB_ENTRIES) == `DTLB_ENTRIES | `VIRTMEM_SUPPORTED==0) else $error("DTLB_ENTRIES must be a power of 2"); - assert (`UNCORE_RAM_RANGE >= 56'h07FFFFFF) else $warning("Some regression tests will fail if UNCORE_RAM_RANGE is less than 56'h07FFFFFF"); - assert (`ZICSR_SUPPORTED == 1 | (`PMP_ENTRIES == 0 & `VIRTMEM_SUPPORTED == 0)) else $error("PMP_ENTRIES and VIRTMEM_SUPPORTED must be zero if ZICSR not supported."); - assert (`ZICSR_SUPPORTED == 1 | (`S_SUPPORTED == 0 & `U_SUPPORTED == 0)) else $error("S and U modes not supported if ZICSR not supported"); - assert (`U_SUPPORTED | (`S_SUPPORTED == 0)) else $error ("S mode only supported if U also is supported"); - assert (`VIRTMEM_SUPPORTED == 0 | (`DTIM_SUPPORTED == 0 & `IROM_SUPPORTED == 0)) else $error("Can't simultaneously have virtual memory and DTIM_SUPPORTED/IROM_SUPPORTED because local memories don't translate addresses"); - assert (`DCACHE_SUPPORTED | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs dcache"); - assert (`ICACHE_SUPPORTED | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs icache"); - assert ((`DCACHE_SUPPORTED == 0 & `ICACHE_SUPPORTED == 0) | `BUS_SUPPORTED) else $error("Dcache and Icache requires DBUS_SUPPORTED."); - assert (`DCACHE_LINELENINBITS <= `XLEN*16 | (!`DCACHE_SUPPORTED)) else $error("DCACHE_LINELENINBITS must not exceed 16 words because max AHB burst size is 1"); - assert (`DCACHE_LINELENINBITS % 4 == 0) else $error("DCACHE_LINELENINBITS must hold 4, 8, or 16 words"); - assert (`DCACHE_SUPPORTED | `A_SUPPORTED == 0) else $error("Atomic extension (A) requires cache on Wally."); - assert (`IDIV_ON_FPU == 0 | `F_SUPPORTED) else $error("IDIV on FPU needs F_SUPPORTED"); - end - - // *** DH 8/23/ -endmodule - - /* verilator lint_on STMTDLY */ /* verilator lint_on WIDTH */ diff --git a/testbench/testbench_imperas.sv b/testbench/testbench_imperas.sv index c7d987b8..d760bf5b 100644 --- a/testbench/testbench_imperas.sv +++ b/testbench/testbench_imperas.sv @@ -322,46 +322,6 @@ module testbench; endmodule -module riscvassertions; - initial begin - assert (`PMP_ENTRIES == 0 | `PMP_ENTRIES==16 | `PMP_ENTRIES==64) else $error("Illegal number of PMP entries: PMP_ENTRIES must be 0, 16, or 64"); - assert (`S_SUPPORTED | `VIRTMEM_SUPPORTED == 0) else $error("Virtual memory requires S mode support"); - assert (`IDIV_BITSPERCYCLE == 1 | `IDIV_BITSPERCYCLE==2 | `IDIV_BITSPERCYCLE==4) else $error("Illegal number of divider bits/cycle: IDIV_BITSPERCYCLE must be 1, 2, or 4"); - assert (`F_SUPPORTED | ~`D_SUPPORTED) else $error("Can't support double fp (D) without supporting float (F)"); - assert (`D_SUPPORTED | ~`Q_SUPPORTED) else $error("Can't support quad fp (Q) without supporting double (D)"); - assert (`F_SUPPORTED | ~`ZFH_SUPPORTED) else $error("Can't support half-precision fp (ZFH) without supporting float (F)"); - assert (`DCACHE_SUPPORTED | ~`F_SUPPORTED | `FLEN <= `XLEN) else $error("Data cache required to support FLEN > XLEN because AHB bus width is XLEN"); - assert (`I_SUPPORTED ^ `E_SUPPORTED) else $error("Exactly one of I and E must be supported"); - assert (`FLEN<=`XLEN | `DCACHE_SUPPORTED | `DTIM_SUPPORTED) else $error("Wally does not support FLEN > XLEN unleses data cache or DTIM is supported"); - assert (`DCACHE_WAYSIZEINBYTES <= 4096 | (!`DCACHE_SUPPORTED) | `VIRTMEM_SUPPORTED == 0) else $error("DCACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)"); - assert (`DCACHE_LINELENINBITS >= 128 | (!`DCACHE_SUPPORTED)) else $error("DCACHE_LINELENINBITS must be at least 128 when caches are enabled"); - assert (`DCACHE_LINELENINBITS < `DCACHE_WAYSIZEINBYTES*8) else $error("DCACHE_LINELENINBITS must be smaller than way size"); - assert (`ICACHE_WAYSIZEINBYTES <= 4096 | (!`ICACHE_SUPPORTED) | `VIRTMEM_SUPPORTED == 0) else $error("ICACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)"); - assert (`ICACHE_LINELENINBITS >= 32 | (!`ICACHE_SUPPORTED)) else $error("ICACHE_LINELENINBITS must be at least 32 when caches are enabled"); - assert (`ICACHE_LINELENINBITS < `ICACHE_WAYSIZEINBYTES*8) else $error("ICACHE_LINELENINBITS must be smaller than way size"); - assert (2**$clog2(`DCACHE_LINELENINBITS) == `DCACHE_LINELENINBITS | (!`DCACHE_SUPPORTED)) else $error("DCACHE_LINELENINBITS must be a power of 2"); - assert (2**$clog2(`DCACHE_WAYSIZEINBYTES) == `DCACHE_WAYSIZEINBYTES | (!`DCACHE_SUPPORTED)) else $error("DCACHE_WAYSIZEINBYTES must be a power of 2"); - assert (2**$clog2(`ICACHE_LINELENINBITS) == `ICACHE_LINELENINBITS | (!`ICACHE_SUPPORTED)) else $error("ICACHE_LINELENINBITS must be a power of 2"); - assert (2**$clog2(`ICACHE_WAYSIZEINBYTES) == `ICACHE_WAYSIZEINBYTES | (!`ICACHE_SUPPORTED)) else $error("ICACHE_WAYSIZEINBYTES must be a power of 2"); - assert (2**$clog2(`ITLB_ENTRIES) == `ITLB_ENTRIES | `VIRTMEM_SUPPORTED==0) else $error("ITLB_ENTRIES must be a power of 2"); - assert (2**$clog2(`DTLB_ENTRIES) == `DTLB_ENTRIES | `VIRTMEM_SUPPORTED==0) else $error("DTLB_ENTRIES must be a power of 2"); - assert (`UNCORE_RAM_RANGE >= 56'h07FFFFFF) else $warning("Some regression tests will fail if UNCORE_RAM_RANGE is less than 56'h07FFFFFF"); - assert (`ZICSR_SUPPORTED == 1 | (`PMP_ENTRIES == 0 & `VIRTMEM_SUPPORTED == 0)) else $error("PMP_ENTRIES and VIRTMEM_SUPPORTED must be zero if ZICSR not supported."); - assert (`ZICSR_SUPPORTED == 1 | (`S_SUPPORTED == 0 & `U_SUPPORTED == 0)) else $error("S and U modes not supported if ZISR not supported"); - assert (`U_SUPPORTED | (`S_SUPPORTED == 0)) else $error ("S mode only supported if U also is supported"); - assert (`VIRTMEM_SUPPORTED == 0 | (`DTIM_SUPPORTED == 0 & `IROM_SUPPORTED == 0)) else $error("Can't simultaneously have virtual memory and DTIM_SUPPORTED/IROM_SUPPORTED because local memories don't translate addresses"); - assert (`DCACHE_SUPPORTED | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs dcache"); - assert (`ICACHE_SUPPORTED | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs icache"); - assert ((`DCACHE_SUPPORTED == 0 & `ICACHE_SUPPORTED == 0) | `BUS_SUPPORTED) else $error("Dcache and Icache requires DBUS."); - assert (`DCACHE_LINELENINBITS <= `XLEN*16 | (!`DCACHE_SUPPORTED)) else $error("DCACHE_LINELENINBITS must not exceed 16 words because max AHB burst size is 1"); - assert (`DCACHE_LINELENINBITS % 4 == 0) else $error("DCACHE_LINELENINBITS must hold 4, 8, or 16 words"); - assert (`DCACHE_SUPPORTED | `A_SUPPORTED == 0) else $error("Atomic extension (A) requires cache on Wally."); - assert (`IDIV_ON_FPU == 0 | `F_SUPPORTED) else $error("IDIV on FPU needs F_SUPPORTED"); - end - - // *** DH 8/23/ -endmodule - /* verilator lint_on STMTDLY */ /* verilator lint_on WIDTH */ diff --git a/tests/custom/debug/debug.S b/tests/custom/debug/debug.S index 5be3c201..e6e00e86 100644 --- a/tests/custom/debug/debug.S +++ b/tests/custom/debug/debug.S @@ -5,23 +5,38 @@ .global rvtest_entry_point rvtest_entry_point: - lui t0, 0x1e # turn on Floating point and XS + lui t0, 0x02 # turn on Floating point and XS csrs mstatus, t0 - # openhwgroup/cvw Issue #55 la a6, begin_signature la a7, rvtest_data - fadd.d ft0, ft1, ft2 + + # openhwgroup/cvw Issue #55 fld f4, 0(a7) fld f9, 8(a7) -# li x1, 0x7ff0000000000001 -# sd x1, 0(a6) -# fmv.w.x f4, x1 -# li x1, 0x7ff8000000000000 - # fmv.w.x f9, x1 - fsgnjx.s f12,f9,f4 # expected f 0xffffffff7fc00000 + fsgnjx.s f12,f9,f4 # expected f 0xffffffff7fc00000, hdl has been giving fff8000000000000 fsd f12, 0(a6) + # openhwgroup/cvw Issue #56 + fld f4, 16(a7) + fld f14, 24(a7) + fsgnjx.s f10,f4,f14 # expected f 0xffffffff7fc00000, hdl has been giving 0xcfa695b1047553b1 + fsd f19, 8(a6) + + # openhwgroup/cvw Issue #57 + fld f0, 32(a7) + fld f15, 40(a7) + fsgnjx.s f30,f0,f15 # expected f 0xfffffffffb3754ef, hdl has been giving 0xffffffff7b3754ef + fsd f30, 16(a6) + + # openhwgroup/cvw Issue #58 + fld f14, 48(a7) + fclass.s x2, f14 # expected 0x0000000000000200, hdl had been giving 0x0000000000000220 + sd x2, 24(a6) + + # fsgnjx.s, fclass.s, fsgnjn.s, fsgnj.s, fneg.s, fabs.s, fmv.s all treat inputs as dp rather than sp + + ######################### # HTIF and signature ######################### @@ -47,10 +62,14 @@ fromhost: rvtest_data: .dword 0x7ff0000000000001 .dword 0x7ff8000000000000 +.dword 0xcfa695b1047553b1 +.dword 0xffffffff7fc00000 +.dword 0xfffffffffb3754ef +.dword 0x7fefffffffffffff .EQU XLEN,64 begin_signature: - .fill 2*(XLEN/32),4,0xdeadbeef # + .fill 8*(XLEN/32),4,0xdeadbeef # end_signature: # Initialize stack with room for 512 bytes diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-mmu-sv32-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-mmu-sv32-01.S index 3bdd03ad..feba771b 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-mmu-sv32-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-mmu-sv32-01.S @@ -52,9 +52,9 @@ test_cases: # # --------------------------------------------------------------------------------------------- -# =========== test 12.3.1.1 Page Table Translation =========== +# =========== test 8.3.1.1 Page Table Translation =========== -# test 12.3.1.1.1 write page tables / entries to phyiscal memory +# test 8.3.1.1.1 write page tables / entries to phyiscal memory # sv32 Page table (See Figure 12.12***): # Level 1 page table, situated at 0x8000D000 .4byte 0x8000D000, 0x20004C01, write32_test # points to level 0 page table A @@ -78,19 +78,19 @@ test_cases: .4byte 0x8000F000, 0x200000CF, write32_test # Vaddr 0x0 Paddr 0x80000000: aligned megapage .4byte 0x8000F800, 0x200000CF, write32_test # Vaddr 0x80000000 Paddr 0x80000000: aligned megapage (program and data memory) -# test 12.3.1.1.2 write values to Paddrs in each page -# each of these values is used for 12.3.1.1.3 and some other tests, specified in the comments. +# test 8.3.1.1.2 write values to Paddrs in each page +# each of these values is used for 8.3.1.1.3 and some other tests, specified in the comments. # when a test is supposed to fault, nothing is written into where it'll be reading/executing since it should fault before getting there. -.4byte 0x800AAAA8, 0xBEEF0055, write32_test # 12.3.1.1.4 megapage -.4byte 0x800FFAC0, 0xBEEF0033, write32_test # 12.3.1.3.2 -.4byte 0x800E3130, 0xBEEF0077, write32_test # 12.3.1.3.2 -.4byte 0x808017E0, 0xBEEF0099, write32_test # 12.3.1.1.4 kilopage -.4byte 0x80805EA0, 0xBEEF0440, write32_test # 12.3.1.3.3 -.4byte 0x80803AA0, 0xBEEF0BB0, write32_test # 12.3.1.3.7 +.4byte 0x800AAAA8, 0xBEEF0055, write32_test # 8.3.1.1.4 megapage +.4byte 0x800FFAC0, 0xBEEF0033, write32_test # 8.3.1.3.2 +.4byte 0x800E3130, 0xBEEF0077, write32_test # 8.3.1.3.2 +.4byte 0x808017E0, 0xBEEF0099, write32_test # 8.3.1.1.4 kilopage +.4byte 0x80805EA0, 0xBEEF0440, write32_test # 8.3.1.3.3 +.4byte 0x80803AA0, 0xBEEF0BB0, write32_test # 8.3.1.3.7 .4byte 0x8000FFA0, 0x11100393, write32_test # write executable code for "li x7, 0x111; ret" to executable region. -.4byte 0x8000FFA4, 0x00008067, write32_test # Used for 12.3.1.3.1, 12.3.1.3.2 +.4byte 0x8000FFA4, 0x00008067, write32_test # Used for 8.3.1.3.1, 8.3.1.3.2 -# test 12.3.1.1.3 read values back from Paddrs without translation (this also verifies the previous test) +# test 8.3.1.1.3 read values back from Paddrs without translation (this also verifies the previous test) .4byte 0x0, 0x0, goto_baremetal # satp.MODE = baremetal / no translation. .4byte 0x0, 0x0, goto_s_mode # change to S mode, 0xb written to output .4byte 0x800AAAA8, 0xBEEF0055, read32_test @@ -102,38 +102,38 @@ test_cases: .4byte 0x8000FFA0, 0x11100393, read32_test .4byte 0x8000FFA4, 0x00008067, read32_test -# test 12.3.1.1.4 check translation works in sv48, read the same values from previous tests, this time with Vaddrs +# test 8.3.1.1.4 check translation works in sv48, read the same values from previous tests, this time with Vaddrs .4byte 0x8000D, 0x0, goto_sv32 # satp.MODE = sv32, Nothing written to output .4byte 0x4AAAA8, 0xBEEF0055, read32_test # megapage at Vaddr 0x400000, Paddr 0x80000000 .4byte 0xBFF7E0, 0xBEEF0099, read32_test # kilopage at Vaddr 0xBFF000, Paddr 0x80201000 -# =========== test 12.3.1.2 page fault tests =========== +# =========== test 8.3.1.2 page fault tests =========== -# test 12.3.1.2.1 load page fault if upper bits of Vaddr are not the same +# test 8.3.1.2.1 load page fault if upper bits of Vaddr are not the same # Not tested in rv32/sv32 -# test 12.3.1.2.2 load page fault when reading an address where the valid flag is zero +# test 8.3.1.2.2 load page fault when reading an address where the valid flag is zero .4byte 0x6000, 0x0, read32_test -# test 12.3.1.2.3 store page fault if PTE has W and ~R flags set +# test 8.3.1.2.3 store page fault if PTE has W and ~R flags set .4byte 0x2000, 0x0, write32_test -# test 12.3.1.2.4 Fault if last level PTE is a pointer +# test 8.3.1.2.4 Fault if last level PTE is a pointer .4byte 0x0200, 0x0, read32_test -# test 12.3.1.2.5 load page fault on misaligned pages +# test 8.3.1.2.5 load page fault on misaligned pages .4byte 0xC00000, 0x0, read32_test # misaligned megapage -# =========== test 12.3.1.3 PTE Protection flags =========== +# =========== test 8.3.1.3 PTE Protection flags =========== -# test 12.3.1.3.1 User flag == 0 -# *** reads on pages with U=0 already tested in 12.3.1.1.4 +# test 8.3.1.3.1 User flag == 0 +# *** reads on pages with U=0 already tested in 8.3.1.1.4 .4byte 0x40FFA0, 0x111, executable_test # fetch success when U=0, priv=S .4byte 0x80400000, 0x1, goto_u_mode # go to U mode, return to VPN 0x80400000 where PTE.U = 1. 0x9 written to output .4byte 0xBFFC80, 0xBEEF0550, read32_test # load page fault when U=0, priv=U .4byte 0x40FFA0, 0xbad, executable_test # instr page fault when U=0, priv=U -# test 12.3.1.3.2 User flag == 1 +# test 8.3.1.3.2 User flag == 1 .4byte 0x804FFAC0, 0xBEEF0033, read32_test # read success when U=1, priv=U .4byte 0x80000000, 0x1, goto_s_mode # go back to S mode, return to VPN 0x80000000 where PTE.U = 0. 0x8 written to output .4byte 0x0, 0x3, write_mxr_sum # set sstatus.[MXR, SUM] = 11 @@ -142,58 +142,58 @@ test_cases: .4byte 0x0, 0x2, write_mxr_sum # set sstatus.[MXR, SUM] = 10. .4byte 0x804FFAC0, 0xBEEF0033, read32_test # load page fault when U-1, priv=S, sstatus.SUM=0 -# test 12.3.1.3.3 Read flag -# *** reads on pages with R=1 already tested in 12.3.1.1.4 +# test 8.3.1.3.3 Read flag +# *** reads on pages with R=1 already tested in 8.3.1.1.4 .4byte 0x0, 0x1, write_mxr_sum # set sstatus.[MXR, SUM] = 01. .4byte 0x5EA0, 0xBEEF0440, read32_test # load page fault when R=0, sstatus.MXR=0 .4byte 0x0, 0x3, write_mxr_sum # set sstatus.[MXR, SUM] = 11. .4byte 0x5EA0, 0xBEEF0440, read32_test # read success when R=0, MXR=1, X=1 -# test 12.3.1.3.4 Write flag +# test 8.3.1.3.4 Write flag .4byte 0xBFF290, 0xBEEF0110, write32_test # write success when W=1 .4byte 0xBFF290, 0xBEEF0110, read32_test # check write success by reading .4byte 0x5B78, 0xBEEF0CC0, write32_test # store page fault when W=0 -# test 12.3.1.3.5 eXecute flag -# *** fetches on pages with X = 1 already tested in 12.3.1.3.1 +# test 8.3.1.3.5 eXecute flag +# *** fetches on pages with X = 1 already tested in 8.3.1.3.1 .4byte 0xBFFDE0, 0xbad, executable_test # instr page fault when X=0 -# test 12.3.1.3.6 Accessed flag == 0 +# test 8.3.1.3.6 Accessed flag == 0 .4byte 0x3020, 0xBEEF0770, write32_test # store page fault when A=0 .4byte 0x3808, 0xBEEF0990, read32_test # load page fault when A=0 -# test 12.3.1.3.7 Dirty flag == 0 +# test 8.3.1.3.7 Dirty flag == 0 .4byte 0x4658, 0xBEEF0AA0, write32_test # store page fault when D=0 .4byte 0x4AA0, 0xBEEF0BB0, read32_test # read success when D=0 -# =========== test 12.3.1.4 SATP Register =========== +# =========== test 8.3.1.4 SATP Register =========== -# test 12.3.1.4.1 SATP ASID and PPN fields (test having two page tables with different ASID) +# test 8.3.1.4.1 SATP ASID and PPN fields (test having two page tables with different ASID) // *** .4byte 0xFFFFFFFFFFFFF888, 0x0220DEADBEEF0099, write32_test # write identical value to global PTE to make sure it's still in the TLB .4byte 0x8000F, 0x11, goto_sv32 # go to SV39 on a second, very minimal page table .4byte 0xE3130, 0xBEEF0077, read32_test # Read success of old written value from a new page table mapping -# test 12.3.1.4.2 Test Global mapping +# test 8.3.1.4.2 Test Global mapping // ***.4byte 0x7FFFFFF888, 0x0220DEADBEEF0099, read32_test # read success of global PTE undefined in current mapping. -# =========== test 12.3.1.5 STATUS Registers =========== +# =========== test 8.3.1.5 STATUS Registers =========== -# test 12.3.1.5.1 mstatus.mprv translation +# test 8.3.1.5.1 mstatus.mprv translation # *** mstatus.mprv = 0 tested on every one of the translated reads and writes before this. .4byte 0x8000D, 0x0, goto_sv32 // go back to old, extensive page table .4byte 0x80000000, 0x1, goto_m_mode // go to m mode to be able to write mstatus .4byte 0x1, 0x1, read_write_mprv // write 1 to mstatus.mprv and set mstatus.mpp to be 01=S .4byte 0xBFF7E0, 0xBEEF0099, read32_test // read test succeeds with translation even though we're in M mode since MPP=S and MPRV=1 -# test 12.3.1.5.2 mstatus.mprv clearing +# test 8.3.1.5.2 mstatus.mprv clearing # mstatus.mprv is already 1 from the last test so going to S mode should clear it with the mret .4byte 0x80000000, 0x1, goto_s_mode // This should zero out the mprv bit but now to read and write mstatus, we have to .4byte 0x80000000, 0x1, goto_m_mode // go back to m mode to allow us to reread mstatus. .4byte 0x0, 0x0, read_write_mprv // read what should be a zeroed out mprv value and then force it back to zero. -# test 12.3.1.5.3 sstatus.mxr read -# this bitfield already tested in 12.3.1.3.3 +# test 8.3.1.5.3 sstatus.mxr read +# this bitfield already tested in 8.3.1.3.3 # terminate tests .4byte 0x0, 0x0, terminate_test # brings us back into machine mode with a final ecall, writing 0x9 to the output. diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-mmu-sv39-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-mmu-sv39-01.S index 2a54d486..40a876d2 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-mmu-sv39-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-mmu-sv39-01.S @@ -52,9 +52,9 @@ test_cases: # # --------------------------------------------------------------------------------------------- -# =========== test 12.3.1.1 Page Table Translation =========== +# =========== test 8.3.1.1 Page Table Translation =========== -# test 12.3.1.1.1 write page tables / entries to phyiscal memory +# test 8.3.1.1.1 write page tables / entries to phyiscal memory # sv39 page table (See Figure 12.12***): # Level 2 page table, situated at 0x8000D000 .8byte 0x000000008000D000, 0x0000000020004C01, write64_test# points to level 1 page table A @@ -89,20 +89,20 @@ test_cases: .8byte 0x8FFFF000, 0x200000CF, write64_test# Vaddr 0x0, Paddr 0x80000000 aligned gigapage .8byte 0x8FFFF010, 0x200000CF, write64_test# Vaddr 0x8000_0000, Paddr 0x80000000: aligned gigapage (program and data memory so we can execute without jumping around) -# test 12.3.1.1.2 write values to Paddrs in each page -# each of these values is used for 12.3.1.1.3 and some other tests, specified in the comments. +# test 8.3.1.1.2 write values to Paddrs in each page +# each of these values is used for 8.3.1.1.3 and some other tests, specified in the comments. # when a test is supposed to fault, nothing is written into where it'll be reading/executing since it shuold fault before getting there. -.8byte 0x80200AB0, 0x0000DEADBEEF0000, write64_test# 12.3.1.1.4 and 12.3.1.4.1 -.8byte 0x800FFAB8, 0x0880DEADBEEF0055, write64_test# 12.3.1.1.4 -.8byte 0x80200AC0, 0x0990DEADBEEF0033, write64_test# 12.3.1.3.2 -.8byte 0x80203130, 0x0110DEADBEEF0077, write64_test# 12.3.1.3.2 -.8byte 0x80099000, 0x0000806711100393, write64_test# 12.3.1.3.1 and 12.3.1.3.2 write executable code for "li x7, 0x111; ret" -.8byte 0x80205AA0, 0x0000806711100393, write64_test# 12.3.1.3.5 write same executable code -.8byte 0x80201888, 0x0220DEADBEEF0099, write64_test# 12.3.1.1.4 -.8byte 0x84212348, 0x0330DEADBEEF0440, write64_test# 12.3.1.3.3 -.8byte 0x80203AA0, 0x0440DEADBEEF0BB0, write64_test# 12.3.1.3.7 +.8byte 0x80200AB0, 0x0000DEADBEEF0000, write64_test# 8.3.1.1.4 and 8.3.1.4.1 +.8byte 0x800FFAB8, 0x0880DEADBEEF0055, write64_test# 8.3.1.1.4 +.8byte 0x80200AC0, 0x0990DEADBEEF0033, write64_test# 8.3.1.3.2 +.8byte 0x80203130, 0x0110DEADBEEF0077, write64_test# 8.3.1.3.2 +.8byte 0x80099000, 0x0000806711100393, write64_test# 8.3.1.3.1 and 8.3.1.3.2 write executable code for "li x7, 0x111; ret" +.8byte 0x80205AA0, 0x0000806711100393, write64_test# 8.3.1.3.5 write same executable code +.8byte 0x80201888, 0x0220DEADBEEF0099, write64_test# 8.3.1.1.4 +.8byte 0x84212348, 0x0330DEADBEEF0440, write64_test# 8.3.1.3.3 +.8byte 0x80203AA0, 0x0440DEADBEEF0BB0, write64_test# 8.3.1.3.7 -# test 12.3.1.1.3 read values back from Paddrs without translation (this also verifies the previous test) +# test 8.3.1.1.3 read values back from Paddrs without translation (this also verifies the previous test) .8byte 0x0, 0x0, goto_baremetal# satp.MODE = baremetal / no translation. .8byte 0x0, 0x0, goto_s_mode # change to S mode, 0xb written to output .8byte 0x80200AB0, 0x0000DEADBEEF0000, read64_test @@ -113,42 +113,42 @@ test_cases: .8byte 0x84212348, 0x0330DEADBEEF0440, read64_test .8byte 0x80203AA0, 0x0440DEADBEEF0BB0, read64_test -# test 12.3.1.1.4 check translation works in sv39, read the same values from previous tests, this time with Vaddrs +# test 8.3.1.1.4 check translation works in sv39, read the same values from previous tests, this time with Vaddrs .8byte 0x8000D, 0x0, goto_sv39 # satp.MODE = sv39, with base page table PPN = 0x8000D and ASID = 0. current VPN: gigapage at 0x80000000. .8byte 0x80200AB0, 0x0000DEADBEEF0000, read64_test # gigapage at Vaddr 0x80000000, Paddr 0x80000000 .8byte 0x400FFAB8, 0x0880DEADBEEF0055, read64_test # megapage at Vaddr 0x40400000, Paddr 0x80000000 .8byte 0xFFFFFFFFFFFFF888, 0x0220DEADBEEF0099, read64_test # kilopage at Vaddr 0xFFFFFFFFFFFFF000, Paddr 0x80201000 -# =========== test 12.3.1.2 page fault tests =========== +# =========== test 8.3.1.2 page fault tests =========== -# test 12.3.1.2.1 load page fault if upper bits of Vaddr are not the same +# test 8.3.1.2.1 load page fault if upper bits of Vaddr are not the same .8byte 0x0010000080000AB0, 0x0, read64_test# gigapage at Vaddr 0x80000000, Paddr 0x80000000, bad 1 in upper bits .8byte 0xFF0FFFFFFFFFF888, 0x0, read64_test# kilopage at Vaddr 0xFFFFFFFFFFFFF000, Paddr 0x80201000, bad 0000 in upper bits -# test 12.3.1.2.2 load page fault when reading an address where the valid flag is zero +# test 8.3.1.2.2 load page fault when reading an address where the valid flag is zero .8byte 0x6000, 0x0, read64_test -# test 12.3.1.2.3 store page fault if PTE has W and ~R flags set +# test 8.3.1.2.3 store page fault if PTE has W and ~R flags set .8byte 0x2000, 0x0, write64_test -# test 12.3.1.2.4 Fault if last level PTE is a pointer +# test 8.3.1.2.4 Fault if last level PTE is a pointer .8byte 0x0020, 0x0, read64_test -# test 12.3.1.2.5 load page fault on misaligned pages +# test 8.3.1.2.5 load page fault on misaligned pages .8byte 0xC0000000, 0x0, read64_test# misaligned gigapage .8byte 0x40200000, 0x0, read64_test# misaligned megapage -# =========== test 12.3.1.3 PTE Protection flags =========== +# =========== test 8.3.1.3 PTE Protection flags =========== -# test 12.3.1.3.1 User flag == 0 -# *** reads on pages with U=0 already tested in 12.3.1.1.4 +# test 8.3.1.3.1 User flag == 0 +# *** reads on pages with U=0 already tested in 8.3.1.1.4 .8byte 0x40099000, 0x111, executable_test # execute success when U=0, priv=S .8byte 0x40400000, 0x2, goto_u_mode # go to U mode, return to megapage at 0x40400000 where U = 1. 0x9 written to output .8byte 0xFFFFFFFFFFFFFC80, 0x0880DEADBEEF0550, read64_test # load page fault when U=0, priv=U .8byte 0x40099000, 0xbad, executable_test # execute fault when U=0, priv=U -# test 12.3.1.3.2 User flag == 1 +# test 8.3.1.3.2 User flag == 1 .8byte 0x1AC0, 0x0990DEADBEEF0033, read64_test # read success when U=1, priv=U .8byte 0x80000000, 0x1, goto_s_mode # go back to S mode, return to gigapage at 0x80000000 where U = 0. 0x8 written to output .8byte 0x0, 0x3, write_mxr_sum # set sstatus.[MXR, SUM] = 11 @@ -157,58 +157,58 @@ test_cases: .8byte 0x0, 0x2, write_mxr_sum # set sstatus.[MXR, SUM] = 10. .8byte 0x1AC0, 0x0990DEADBEEF0033, read64_test # load page fault when U-1, priv=S, sstatus.SUM=0 -# test 12.3.1.3.3 Read flag -# *** reads on pages with R=1 already tested in 12.3.1.1.4 +# test 8.3.1.3.3 Read flag +# *** reads on pages with R=1 already tested in 8.3.1.1.4 .8byte 0x0, 0x1, write_mxr_sum # set sstatus.[MXR, SUM] = 01. .8byte 0x40612348, 0x0330DEADBEEF0440, read64_test # load page fault when R=0, sstatus.MXR=0 .8byte 0x0, 0x3, write_mxr_sum # set sstatus.[MXR, SUM] = 11. .8byte 0x40612348, 0x0330DEADBEEF0440, read64_test # read success when MXR=1, X=1 -# test 12.3.1.3.4 Write flag +# test 8.3.1.3.4 Write flag .8byte 0x80AAAAA0, 0x0440DEADBEEF0110, write64_test# write success when W=1 .8byte 0x80AAAAA0, 0x0440DEADBEEF0110, read64_test# check write success by reading the same address .8byte 0x40000000, 0x0220DEADBEEF0BB0, write64_test# store page fault when W=0 -# test 12.3.1.3.5 eXecute flag -# *** fetches on pages with X = 1 already tested in 12.3.1.3.1 +# test 8.3.1.3.5 eXecute flag +# *** fetches on pages with X = 1 already tested in 8.3.1.3.1 .8byte 0x5AA0, 0x1, executable_test # instr page fault when X=0 -# test 12.3.1.3.6 Accessed flag == 0 +# test 8.3.1.3.6 Accessed flag == 0 .8byte 0x36D0, 0x0990DEADBEEF0770, write64_test# store page fault when A=0 .8byte 0x3AB8, 0x0990DEADBEEF0990, read64_test# load page fault when A=0 -# test 12.3.1.3.7 Dirty flag == 0 +# test 8.3.1.3.7 Dirty flag == 0 .8byte 0x4658, 0x0440DEADBEEF0AA0, write64_test# store page fault when D=0 .8byte 0x4AA0, 0x0440DEADBEEF0BB0, read64_test# read success when D=0 -# =========== test 12.3.1.4 SATP Register =========== +# =========== test 8.3.1.4 SATP Register =========== -# test 12.3.1.4.1 SATP ASID and PPN fields (test having two page tables with different ASID) +# test 8.3.1.4.1 SATP ASID and PPN fields (test having two page tables with different ASID) // *** .8byte 0xFFFFFFFFFFFFF888, 0x0220DEADBEEF0099, write64_test # write identical value to global PTE to make sure it's still in the TLB .8byte 0x8FFFF, 0x11, goto_sv39 # go to SV39 on a second, very minimal page table .8byte 0x200AB0, 0x0000DEADBEEF0000, read64_test # Read success of old written value from a new page table mapping -# test 12.3.1.4.2 Test Global mapping +# test 8.3.1.4.2 Test Global mapping // ***.8byte 0x7FFFFFF888, 0x0220DEADBEEF0099, read64_test # read success of global PTE undefined in current mapping. -# =========== test 12.3.1.5 STATUS Registers =========== +# =========== test 8.3.1.5 STATUS Registers =========== -# test 12.3.1.5.1 mstatus.mprv translation +# test 8.3.1.5.1 mstatus.mprv translation # *** mstatus.mprv = 0 tested on every one of the translated reads and writes before this. .8byte 0x8000D, 0x0, goto_sv39 // go back to old, extensive page table .8byte 0x80000000, 0x1, goto_m_mode // go to m mode to be able to write mstatus .8byte 0x1, 0x1, read_write_mprv // write 1 to mstatus.mprv and set mstatus.mpp to be 01=S .8byte 0xFFFFFFFFFFFFF888, 0x0220DEADBEEF0099, read64_test // read test succeeds with translation even though we're in M mode since MPP=S and MPRV=1 -# test 12.3.1.5.2 mstatus.mprv clearing +# test 8.3.1.5.2 mstatus.mprv clearing # mstatus.mprv is already 1 from the last test so going to S mode should clear it with the mret .8byte 0x80000000, 0x1, goto_s_mode // This should zero out the mprv bit but now to read and write mstatus, we have to .8byte 0x80000000, 0x1, goto_m_mode // go back to m mode to allow us to reread mstatus. .8byte 0x0, 0x0, read_write_mprv // read what should be a zeroed out mprv value and then force it back to zero. -# test 12.3.1.5.3 sstatus.mxr read -# this bitfield already tested in 12.3.1.3.3 +# test 8.3.1.5.3 sstatus.mxr read +# this bitfield already tested in 8.3.1.3.3 # terminate tests .8byte 0x0, 0x0, terminate_test # brings us back into machine mode with a final ecall, writing 0x9 to the output. diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-mmu-sv48-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-mmu-sv48-01.S index fafaa242..f0f9b3a7 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-mmu-sv48-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-mmu-sv48-01.S @@ -54,9 +54,9 @@ test_cases: # --------------------------------------------------------------------------------------------- -# =========== test 12.3.1.1 Page Table Translation =========== +# =========== test 8.3.1.1 Page Table Translation =========== -# test 12.3.1.1.1 write page tables / entries to phyiscal memory +# test 8.3.1.1.1 write page tables / entries to phyiscal memory # sv48 page table (See Figure 12.12***): # Level 3 page table, situated at 0x8000D000 .8byte 0x000000008000D000, 0x0000000020004C01, write64_test # points to level 2 page table A @@ -101,22 +101,22 @@ test_cases: .8byte 0x8002F010, 0x200000CF, write64_test # Vaddr 0x80000000, Paddr 0x80000000: aligned gigapage (data and instr memory) -# test 12.3.1.1.2 write values to Paddrs in each page -# each of these values is used for 12.3.1.1.3 and some other tests, specified in the comments. +# test 8.3.1.1.2 write values to Paddrs in each page +# each of these values is used for 8.3.1.1.3 and some other tests, specified in the comments. # when a test is supposed to fault, nothing is written into where it'll be reading/executing since it should fault before getting there. -.8byte 0x82777778, 0x0EE0DEADBEEF0CC0, write64_test # 12.3.1.1.4 terapage -.8byte 0x85BC0AB0, 0x0000DEADBEEF0000, write64_test # 12.3.1.1.4 gigapage -.8byte 0x800F0AB8, 0x0880DEADBEEF0055, write64_test # 12.3.1.1.4 megapage -.8byte 0x80201888, 0x0220DEADBEEF0099, write64_test # 12.3.1.1.4 kilopage -.8byte 0x80099000, 0x0000806711100393, write64_test # 12.3.1.3.1 write executable code for "li x7, 0x111; ret" -.8byte 0x80200400, 0x0000806711100393, write64_test # 12.3.1.3.2 write same executable code -.8byte 0x80200AC0, 0x0990DEADBEEF0033, write64_test # 12.3.1.3.2 -.8byte 0x80200130, 0x0110DEADBEEF0077, write64_test # 12.3.1.3.2 -.8byte 0x85212348, 0x0330DEADBEEF0440, write64_test # 12.3.1.3.3 -.8byte 0x88888000, 0x0000806711100393, write64_test # 12.3.1.3.5 write same executable code -.8byte 0x80203AA0, 0x0440DEADBEEF0BB0, write64_test # 12.3.1.3.7 +.8byte 0x82777778, 0x0EE0DEADBEEF0CC0, write64_test # 8.3.1.1.4 terapage +.8byte 0x85BC0AB0, 0x0000DEADBEEF0000, write64_test # 8.3.1.1.4 gigapage +.8byte 0x800F0AB8, 0x0880DEADBEEF0055, write64_test # 8.3.1.1.4 megapage +.8byte 0x80201888, 0x0220DEADBEEF0099, write64_test # 8.3.1.1.4 kilopage +.8byte 0x80099000, 0x0000806711100393, write64_test # 8.3.1.3.1 write executable code for "li x7, 0x111; ret" +.8byte 0x80200400, 0x0000806711100393, write64_test # 8.3.1.3.2 write same executable code +.8byte 0x80200AC0, 0x0990DEADBEEF0033, write64_test # 8.3.1.3.2 +.8byte 0x80200130, 0x0110DEADBEEF0077, write64_test # 8.3.1.3.2 +.8byte 0x85212348, 0x0330DEADBEEF0440, write64_test # 8.3.1.3.3 +.8byte 0x88888000, 0x0000806711100393, write64_test # 8.3.1.3.5 write same executable code +.8byte 0x80203AA0, 0x0440DEADBEEF0BB0, write64_test # 8.3.1.3.7 -# test 12.3.1.1.3 read values back from Paddrs without translation (this also verifies the previous test) +# test 8.3.1.1.3 read values back from Paddrs without translation (this also verifies the previous test) .8byte 0x0, 0x0, goto_baremetal # satp.MODE = baremetal / no translation. .8byte 0x0, 0x0, goto_s_mode # change to S mode, 0xb written to output .8byte 0x82777778, 0x0EE0DEADBEEF0CC0, read64_test @@ -128,43 +128,43 @@ test_cases: .8byte 0x85212348, 0x0330DEADBEEF0440, read64_test .8byte 0x80203AA0, 0x0440DEADBEEF0BB0, read64_test -# test 12.3.1.1.4 check translation works in sv48, read the same values from previous tests, this time with Vaddrs +# test 8.3.1.1.4 check translation works in sv48, read the same values from previous tests, this time with Vaddrs .8byte 0x8000D, 0x0, goto_sv48 # satp.MODE = sv48, with base page table PPN = 0x8000D and ASID = 0. current VPN: megapage at 0x80000000. Nothing written to output .8byte 0x10082777778, 0x0EE0DEADBEEF0CC0, read64_test # terapage at Vaddr 0x010000000000, Paddr 0x0 .8byte 0x8005BC0AB0, 0x0000DEADBEEF0000, read64_test # gigapage at Vaddr 0x008000000000, Paddr 0x80000000 .8byte 0x800F0AB8, 0x0880DEADBEEF0055, read64_test # megapage at Vaddr 0x80000000, Paddr 0x80000000 .8byte 0xFFFFFFFFFFFFF888, 0x0220DEADBEEF0099, read64_test # kilopage at Vaddr 0xFFFFFFFFFFFFF000, Paddr 0x80201000 -# =========== test 12.3.1.2 page fault tests =========== +# =========== test 8.3.1.2 page fault tests =========== -# test 12.3.1.2.1 page fault if upper bits of Vaddr are not the same +# test 8.3.1.2.1 page fault if upper bits of Vaddr are not the same .8byte 0x001000800ABC0AB0, 0x0, read64_test# gigapage at Vaddr 0x008000000000, Paddr 0x80000000, bad 1 in upper bits .8byte 0xFF0FFFFFFFFFF888, 0x0, read64_test# kilopage at Vaddr 0xFFFFFFFFFFFFF000, Paddr 0x80201000, bad 0000 in upper bits -# test 12.3.1.2.2 read fault when reading an address where the valid flag is zero +# test 8.3.1.2.2 read fault when reading an address where the valid flag is zero .8byte 0x80205000, 0x0, read64_test -# test 12.3.1.2.3 write fault if PTE has W and ~R flags set +# test 8.3.1.2.3 write fault if PTE has W and ~R flags set .8byte 0x80202000, 0x0, write64_test -# test 12.3.1.2.4 Fault if last level PTE is a pointer +# test 8.3.1.2.4 Fault if last level PTE is a pointer .8byte 0x80200000, 0x0, read64_test -# test 12.3.1.2.5 read fault on misaligned pages +# test 8.3.1.2.5 read fault on misaligned pages .8byte 0x18000000000, 0x0, read64_test # misaligned terapage .8byte 0x8080000000, 0x0, read64_test # misaligned gigapage .8byte 0x80400000, 0x0, read64_test # misaligned megapage -# =========== test 12.3.1.3 PTE Protection flags =========== +# =========== test 8.3.1.3 PTE Protection flags =========== -# test 12.3.1.3.1 User flag == 0 -# reads on pages with U=0 already tested in 12.3.1.1.4 +# test 8.3.1.3.1 User flag == 0 +# reads on pages with U=0 already tested in 8.3.1.1.4 .8byte 0x008000099000, 0x111, executable_test # execute success when U=0, priv=S .8byte 0x008040000000, 0x1, goto_u_mode # go to U mode, return to gigapage at 0x008040000000 where PTE.U = 1. 0x9 written to output .8byte 0xFFFFFFFFFFFFFC80, 0x0880DEADBEEF0550, read64_test # read fault when U=0, priv=U .8byte 0x008000099000, 0xbad, executable_test # execute fault when U=0, priv=U -# test 12.3.1.3.2 User flag == 1 +# test 8.3.1.3.2 User flag == 1 .8byte 0x80201AC0, 0x0990DEADBEEF0033, read64_test # read success when U=1, priv=U .8byte 0x80000000, 0x2, goto_s_mode .8byte 0x0, 0x3, write_mxr_sum # set sstatus.[MXR, SUM] = 11 @@ -173,58 +173,58 @@ test_cases: .8byte 0x0, 0x2, write_mxr_sum # set sstatus.[MXR, SUM] = 10. .8byte 0x80201AC0, 0x0990DEADBEEF0033, read64_test # read fault when U=1, priv=S, sstatus.SUM=0 -# test 12.3.1.3.3 Read flag -# reads on pages with R=1 already tested in 12.3.1.1.4 +# test 8.3.1.3.3 Read flag +# reads on pages with R=1 already tested in 8.3.1.1.4 .8byte 0x0, 0x1, write_mxr_sum # set sstatus.[MXR, SUM] = 01. .8byte 0x80612348, 0x0330DEADBEEF0440, read64_test # read fault when R=0, sstatus.MXR=0 .8byte 0x0, 0x3, write_mxr_sum # set sstatus.[MXR, SUM] = 11. .8byte 0x80612348, 0x0330DEADBEEF0440, read64_test # read success when MXR=1, X=1 -# test 12.3.1.3.4 Write flag +# test 8.3.1.3.4 Write flag .8byte 0x10080BCDED8, 0x0440DEADBEEF0110, write64_test # write success when W=1 (corresponding Paddr = 0x80BCDED8) .8byte 0x10080BCDED8, 0x0440DEADBEEF0110, read64_test # check write success by reading value back .8byte 0x8000009E88, 0x0220DEADBEEF0BB0, write64_test # write fault when W=0 -# test 12.3.1.3.5 eXecute flag -# executes on pages with X = 1 already tested in 12.3.1.3.1 +# test 8.3.1.3.5 eXecute flag +# executes on pages with X = 1 already tested in 8.3.1.3.1 .8byte 0x010088888000, 0x2, executable_test # execute fault when X=0 -# test 12.3.1.3.6 Accessed flag == 0 +# test 8.3.1.3.6 Accessed flag == 0 .8byte 0x802036D0, 0x0990DEADBEEF0770, write64_test # write fault when A=0 .8byte 0x80203AB8, 0x0990DEADBEEF0990, read64_test# read fault when A=0 -# test 12.3.1.3.7 Dirty flag == 0 +# test 8.3.1.3.7 Dirty flag == 0 .8byte 0x80204658, 0x0440DEADBEEF0AA0, write64_test # write fault when D=0 .8byte 0x80204AA0, 0x0440DEADBEEF0BB0, read64_test# read success when D=0 -# =========== test 12.3.1.4 SATP Register =========== +# =========== test 8.3.1.4 SATP Register =========== -# test 12.3.1.4.1 SATP ASID and PPN fields (test having two page tables with different ASID) +# test 8.3.1.4.1 SATP ASID and PPN fields (test having two page tables with different ASID) // *** .8byte 0xFFFFFFFFFFFFF888, 0x0220DEADBEEF0099, write64_test # write identical value to global PTE to make sure it's still in the TLB .8byte 0x8000F, 0x11, goto_sv48 # go to SV39 on a second, very minimal page table .8byte 0x5BC0AB0, 0x0000DEADBEEF0000, read64_test # Read success of old written value from a new page table mapping -# test 12.3.1.4.2 Test Global mapping +# test 8.3.1.4.2 Test Global mapping // ***.8byte 0x7FFFFFF888, 0x0220DEADBEEF0099, read64_test # read success of global PTE undefined in current mapping. -# =========== test 12.3.1.5 STATUS Registers =========== +# =========== test 8.3.1.5 STATUS Registers =========== -# test 12.3.1.5.1 mstatus.mprv translation +# test 8.3.1.5.1 mstatus.mprv translation # *** mstatus.mprv = 0 tested on every one of the translated reads and writes before this. .8byte 0x8000D, 0x0, goto_sv48 // go back to old, extensive page table .8byte 0x80000000, 0x1, goto_m_mode // go to m mode to be able to write mstatus .8byte 0x1, 0x1, read_write_mprv // write 1 to mstatus.mprv and set mstatus.mpp to be 01=S .8byte 0xFFFFFFFFFFFFF888, 0x0220DEADBEEF0099, read64_test // read test succeeds with translation even though we're in M mode since MPP=S and MPRV=1 -# test 12.3.1.5.2 mstatus.mprv clearing +# test 8.3.1.5.2 mstatus.mprv clearing # mstatus.mprv is already 1 from the last test so going to S mode should clear it with the mret .8byte 0x80000000, 0x1, goto_s_mode // This should zero out the mprv bit but now to read and write mstatus, we have to .8byte 0x80000000, 0x1, goto_m_mode // go back to m mode to allow us to reread mstatus. .8byte 0x0, 0x0, read_write_mprv // read what should be a zeroed out mprv value and then force it back to zero. -# test 12.3.1.5.3 sstatus.mxr read -# this bitfield already tested in 12.3.1.3.3 +# test 8.3.1.5.3 sstatus.mxr read +# this bitfield already tested in 8.3.1.3.3 # terminate tests .8byte 0x0, 0x0, terminate_test # brings us back into machine mode with a final ecall, writing 0x9 to the output.