From da4eca48544607a55c7b1824173c5da04d34785a Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 15 Jan 2024 13:24:57 -0800 Subject: [PATCH 01/37] Tested Zfh support using unreleased version of risch-arch-test Zfh tests. Fixed two bugs in fmv to/from int. --- config/buildroot/config.vh | 1 + config/rv32e/config.vh | 1 + config/rv32gc/config.vh | 1 + config/rv32i/config.vh | 1 + config/rv32imc/config.vh | 1 + config/rv64fpquad/config.vh | 3 +- config/rv64gc/config.vh | 1 + config/rv64i/config.vh | 1 + config/shared/parameter-defs.vh | 1 + src/cvw.sv | 1 + src/fpu/fpu.sv | 20 +- testbench/common/instrNameDecTB.sv | 131 ++++-------- testbench/testbench.sv | 4 + testbench/tests.vh | 327 ++++++++++++++++++++++++++++- 14 files changed, 395 insertions(+), 99 deletions(-) diff --git a/config/buildroot/config.vh b/config/buildroot/config.vh index e183d9cbd..d36fcf6e3 100644 --- a/config/buildroot/config.vh +++ b/config/buildroot/config.vh @@ -41,6 +41,7 @@ localparam ZICNTR_SUPPORTED = 1; localparam ZIHPM_SUPPORTED = 1; localparam COUNTERS = 12'd32; localparam ZFH_SUPPORTED = 0; +localparam ZFA_SUPPORTED = 0; localparam SSTC_SUPPORTED = 1; localparam ZICBOM_SUPPORTED = 1; localparam ZICBOZ_SUPPORTED = 1; diff --git a/config/rv32e/config.vh b/config/rv32e/config.vh index 8906bb571..70d455b4e 100644 --- a/config/rv32e/config.vh +++ b/config/rv32e/config.vh @@ -41,6 +41,7 @@ localparam COUNTERS = 12'd0; localparam ZICNTR_SUPPORTED = 0; localparam ZIHPM_SUPPORTED = 0; localparam ZFH_SUPPORTED = 0; +localparam ZFA_SUPPORTED = 0; localparam SSTC_SUPPORTED = 0; localparam ZICBOM_SUPPORTED = 0; localparam ZICBOZ_SUPPORTED = 0; diff --git a/config/rv32gc/config.vh b/config/rv32gc/config.vh index 67855c817..a59bb1ab3 100644 --- a/config/rv32gc/config.vh +++ b/config/rv32gc/config.vh @@ -42,6 +42,7 @@ localparam COUNTERS = 12'd32; localparam ZICNTR_SUPPORTED = 1; localparam ZIHPM_SUPPORTED = 1; localparam ZFH_SUPPORTED = 0; +localparam ZFA_SUPPORTED = 0; localparam SSTC_SUPPORTED = 1; localparam ZICBOM_SUPPORTED = 1; localparam ZICBOZ_SUPPORTED = 1; diff --git a/config/rv32i/config.vh b/config/rv32i/config.vh index 2f90656f2..6e5d08803 100644 --- a/config/rv32i/config.vh +++ b/config/rv32i/config.vh @@ -41,6 +41,7 @@ localparam COUNTERS = 0; localparam ZICNTR_SUPPORTED = 0; localparam ZIHPM_SUPPORTED = 0; localparam ZFH_SUPPORTED = 0; +localparam ZFA_SUPPORTED = 0; localparam SSTC_SUPPORTED = 0; localparam ZICBOM_SUPPORTED = 0; localparam ZICBOZ_SUPPORTED = 0; diff --git a/config/rv32imc/config.vh b/config/rv32imc/config.vh index ecb7b8f78..a32dc3bd6 100644 --- a/config/rv32imc/config.vh +++ b/config/rv32imc/config.vh @@ -40,6 +40,7 @@ localparam COUNTERS = 12'd32; localparam ZICNTR_SUPPORTED = 1; localparam ZIHPM_SUPPORTED = 1; localparam ZFH_SUPPORTED = 0; +localparam ZFA_SUPPORTED = 0; localparam SSTC_SUPPORTED = 0; localparam ZICBOM_SUPPORTED = 0; localparam ZICBOZ_SUPPORTED = 0; diff --git a/config/rv64fpquad/config.vh b/config/rv64fpquad/config.vh index 11feba734..09885808f 100644 --- a/config/rv64fpquad/config.vh +++ b/config/rv64fpquad/config.vh @@ -31,7 +31,7 @@ localparam XLEN = 32'd64; // IEEE 754 compliance -localparam IEEE754 = 0; +localparam IEEE754 = 1; // MISA RISC-V configuration per specification localparam MISA = (32'h00000104 | 1 << 5 | 1 << 3 | 1 << 16 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0 ); @@ -41,6 +41,7 @@ localparam COUNTERS = 12'd32; localparam ZICNTR_SUPPORTED = 1; localparam ZIHPM_SUPPORTED = 1; localparam ZFH_SUPPORTED = 1; +localparam ZFA_SUPPORTED = 0; localparam SSTC_SUPPORTED = 0; localparam ZICBOM_SUPPORTED = 0; localparam ZICBOZ_SUPPORTED = 0; diff --git a/config/rv64gc/config.vh b/config/rv64gc/config.vh index af6e4aebd..af828589d 100644 --- a/config/rv64gc/config.vh +++ b/config/rv64gc/config.vh @@ -41,6 +41,7 @@ localparam COUNTERS = 12'd32; localparam ZICNTR_SUPPORTED = 1; localparam ZIHPM_SUPPORTED = 1; localparam ZFH_SUPPORTED = 0; +localparam ZFA_SUPPORTED = 0; localparam SSTC_SUPPORTED = 1; localparam ZICBOM_SUPPORTED = 1; localparam ZICBOZ_SUPPORTED = 1; diff --git a/config/rv64i/config.vh b/config/rv64i/config.vh index 1908f900f..609a50f97 100644 --- a/config/rv64i/config.vh +++ b/config/rv64i/config.vh @@ -41,6 +41,7 @@ localparam COUNTERS = 0; localparam ZICNTR_SUPPORTED = 0; localparam ZIHPM_SUPPORTED = 0; localparam ZFH_SUPPORTED = 0; +localparam ZFA_SUPPORTED = 0; localparam SSTC_SUPPORTED = 0; localparam ZICBOM_SUPPORTED = 0; localparam ZICBOZ_SUPPORTED = 0; diff --git a/config/shared/parameter-defs.vh b/config/shared/parameter-defs.vh index ec6fc7ec5..7dc0a0bcf 100644 --- a/config/shared/parameter-defs.vh +++ b/config/shared/parameter-defs.vh @@ -14,6 +14,7 @@ localparam cvw_t P = '{ ZICNTR_SUPPORTED : ZICNTR_SUPPORTED, ZIHPM_SUPPORTED : ZIHPM_SUPPORTED, ZFH_SUPPORTED : ZFH_SUPPORTED, + ZFA_SUPPORTED : ZFA_SUPPORTED, SSTC_SUPPORTED : SSTC_SUPPORTED, VIRTMEM_SUPPORTED : VIRTMEM_SUPPORTED, VECTORED_INTERRUPTS_SUPPORTED : VECTORED_INTERRUPTS_SUPPORTED, diff --git a/src/cvw.sv b/src/cvw.sv index 53cbb5a70..a9ee9d093 100644 --- a/src/cvw.sv +++ b/src/cvw.sv @@ -49,6 +49,7 @@ typedef struct packed { logic ZICNTR_SUPPORTED; logic ZIHPM_SUPPORTED; logic ZFH_SUPPORTED; + logic ZFA_SUPPORTED; logic SSTC_SUPPORTED; logic VIRTMEM_SUPPORTED; logic VECTORED_INTERRUPTS_SUPPORTED; diff --git a/src/fpu/fpu.sv b/src/fpu/fpu.sv index 7d7574a45..14fc4259b 100755 --- a/src/fpu/fpu.sv +++ b/src/fpu/fpu.sv @@ -263,15 +263,17 @@ module fpu import cvw::*; #(parameter cvw_t P) ( .ToInt(FWriteIntE), .XZero(XZeroE), .Fmt(FmtE), .Ce(CeE), .ShiftAmt(CvtShiftAmtE), .ResSubnormUf(CvtResSubnormUfE), .Cs(CsE), .IntZero(IntZeroE), .LzcIn(CvtLzcInE)); - // NaN Box SrcA to convert integer to requested FP size + // NaN Box SrcA to convert integer to requested FP size for fmv int->fp if(P.FPSIZES == 1) assign AlignedSrcAE = {{P.FLEN-P.XLEN{1'b1}}, ForwardedSrcAE}; else if(P.FPSIZES == 2) mux2 #(P.FLEN) SrcAMux ({{P.FLEN-P.LEN1{1'b1}}, ForwardedSrcAE[P.LEN1-1:0]}, {{P.FLEN-P.XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); - else if(P.FPSIZES == 3 | P.FPSIZES == 4) + else if(P.FPSIZES == 3 | P.FPSIZES == 4) begin + localparam XD_LEN = P.D_LEN < P.XLEN ? P.D_LEN : P.XLEN; // shorter of D_LEN and XLEN mux4 #(P.FLEN) SrcAMux ({{P.FLEN-P.S_LEN{1'b1}}, ForwardedSrcAE[P.S_LEN-1:0]}, - {{P.FLEN-P.D_LEN{1'b1}}, ForwardedSrcAE[P.D_LEN-1:0]}, + {{P.FLEN-XD_LEN{1'b1}}, ForwardedSrcAE[XD_LEN-1:0]}, {{P.FLEN-P.H_LEN{1'b1}}, ForwardedSrcAE[P.H_LEN-1:0]}, {{P.FLEN-P.XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes + end // select a result that may be written to the FP register mux3 #(P.FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE); @@ -282,20 +284,20 @@ module fpu import cvw::*; #(parameter cvw_t P) ( assign mvsgn = XE[P.FLEN-1]; assign SgnExtXE = XE; end else if(P.FPSIZES == 2) begin - mux2 #(1) sgnmux (XE[P.LEN1-1], XE[P.FLEN-1],FmtE, mvsgn); + mux2 #(1) sgnmux (XE[P.LEN1-1], XE[P.FLEN-1],FmtE, mvsgn); mux2 #(P.FLEN) sgnextmux ({{P.FLEN-P.LEN1{mvsgn}}, XE[P.LEN1-1:0]}, XE, FmtE, SgnExtXE); end else if(P.FPSIZES == 3 | P.FPSIZES == 4) begin - mux4 #(1) sgnmux (XE[P.H_LEN-1], XE[P.S_LEN-1], XE[P.D_LEN-1], XE[P.LLEN-1], FmtE, mvsgn); - mux4 #(P.FLEN) fmulzeromux ({{P.FLEN-P.H_LEN{mvsgn}}, XE[P.H_LEN-1:0]}, - {{P.FLEN-P.S_LEN{mvsgn}}, XE[P.S_LEN-1:0]}, - {{P.FLEN-P.D_LEN{mvsgn}}, XE[P.D_LEN-1:0]}, + mux4 #(1) sgnmux (XE[P.S_LEN-1], XE[P.D_LEN-1], XE[P.H_LEN-1], XE[P.LLEN-1], FmtE, mvsgn); + mux4 #(P.FLEN) sgnextmux ({{P.FLEN-P.S_LEN{mvsgn}}, XE[P.S_LEN-1:0]}, + {{P.FLEN-P.D_LEN{mvsgn}}, XE[P.D_LEN-1:0]}, + {{P.FLEN-P.H_LEN{mvsgn}}, XE[P.H_LEN-1:0]}, XE, FmtE, SgnExtXE); end if (P.FLEN>P.XLEN) assign IntSrcXE = SgnExtXE[P.XLEN-1:0]; else - assign IntSrcXE = {{P.XLEN-P.FLEN{mvsgn}}, SgnExtXE}; + assign IntSrcXE = {{P.XLEN-P.FLEN{mvsgn}}, SgnExtXE}; mux3 #(P.XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE); // E/M pipe registers diff --git a/testbench/common/instrNameDecTB.sv b/testbench/common/instrNameDecTB.sv index 96ef6d67f..a3b5ef58e 100644 --- a/testbench/common/instrNameDecTB.sv +++ b/testbench/common/instrNameDecTB.sv @@ -232,95 +232,7 @@ module instrNameDecTB( 10'b1000111_???: name = "FMSUB"; 10'b1001011_???: name = "FNMSUB"; 10'b1001111_???: name = "FNMADD"; - 10'b1010011_000: if (funct7[6:2] == 5'b00000) name = "FADD"; - else if (funct7[6:2] == 5'b00001) name = "FSUB"; - else if (funct7[6:2] == 5'b00010) name = "FMUL"; - else if (funct7[6:2] == 5'b00011) name = "FDIV"; - else if (funct7[6:2] == 5'b01011) name = "FSQRT"; - else if (funct7 == 7'b1100000 & rs2 == 5'b00000) name = "FCVT.W.S"; - else if (funct7 == 7'b1100000 & rs2 == 5'b00001) name = "FCVT.WU.S"; - else if (funct7 == 7'b1100000 & rs2 == 5'b00010) name = "FCVT.L.S"; - else if (funct7 == 7'b1100000 & rs2 == 5'b00011) name = "FCVT.LU.S"; - else if (funct7 == 7'b1101000 & rs2 == 5'b00000) name = "FCVT.S.W"; - else if (funct7 == 7'b1101000 & rs2 == 5'b00001) name = "FCVT.S.WU"; - else if (funct7 == 7'b1101000 & rs2 == 5'b00010) name = "FCVT.S.L"; - else if (funct7 == 7'b1101000 & rs2 == 5'b00011) name = "FCVT.S.LU"; - else if (funct7 == 7'b1100001 & rs2 == 5'b00000) name = "FCVT.W.D"; - else if (funct7 == 7'b1100001 & rs2 == 5'b00001) name = "FCVT.WU.D"; - else if (funct7 == 7'b1100001 & rs2 == 5'b00010) name = "FCVT.L.D"; - else if (funct7 == 7'b1100001 & rs2 == 5'b00011) name = "FCVT.LU.D"; - else if (funct7 == 7'b1101001 & rs2 == 5'b00000) name = "FCVT.D.W"; - else if (funct7 == 7'b1101001 & rs2 == 5'b00001) name = "FCVT.D.WU"; - else if (funct7 == 7'b1101001 & rs2 == 5'b00010) name = "FCVT.D.L"; - else if (funct7 == 7'b1101001 & rs2 == 5'b00011) name = "FCVT.D.LU"; - else if (funct7 == 7'b0100000 & rs2 == 5'b00001) name = "FCVT.S.D"; - else if (funct7 == 7'b0100001 & rs2 == 5'b00000) name = "FCVT.D.S"; - else if (funct7 == 7'b1110000 & rs2 == 5'b00000) name = "FMV.X.W"; - else if (funct7 == 7'b1111000 & rs2 == 5'b00000) name = "FMV.W.X"; - else if (funct7 == 7'b1110001 & rs2 == 5'b00000) name = "FMV.X.D"; // DOUBLE - else if (funct7 == 7'b1111001 & rs2 == 5'b00000) name = "FMV.D.X"; // DOUBLE - else if (funct7[6:2] == 5'b00100) name = "FSGNJ"; - else if (funct7[6:2] == 5'b00101) name = "FMIN"; - else if (funct7[6:2] == 5'b10100) name = "FLE"; - else name = "ILLEGAL"; - 10'b1010011_001: if (funct7[6:2] == 5'b00000) name = "FADD"; - else if (funct7[6:2] == 5'b00001) name = "FSUB"; - else if (funct7[6:2] == 5'b00010) name = "FMUL"; - else if (funct7[6:2] == 5'b00011) name = "FDIV"; - else if (funct7[6:2] == 5'b01011) name = "FSQRT"; - else if (funct7 == 7'b1100000 & rs2 == 5'b00000) name = "FCVT.W.S"; - else if (funct7 == 7'b1100000 & rs2 == 5'b00001) name = "FCVT.WU.S"; - else if (funct7 == 7'b1100000 & rs2 == 5'b00010) name = "FCVT.L.S"; - else if (funct7 == 7'b1100000 & rs2 == 5'b00011) name = "FCVT.LU.S"; - else if (funct7 == 7'b1101000 & rs2 == 5'b00000) name = "FCVT.S.W"; - else if (funct7 == 7'b1101000 & rs2 == 5'b00001) name = "FCVT.S.WU"; - else if (funct7 == 7'b1101000 & rs2 == 5'b00010) name = "FCVT.S.L"; - else if (funct7 == 7'b1101000 & rs2 == 5'b00011) name = "FCVT.S.LU"; - else if (funct7 == 7'b1100001 & rs2 == 5'b00000) name = "FCVT.W.D"; - else if (funct7 == 7'b1100001 & rs2 == 5'b00001) name = "FCVT.WU.D"; - else if (funct7 == 7'b1100001 & rs2 == 5'b00010) name = "FCVT.L.D"; - else if (funct7 == 7'b1100001 & rs2 == 5'b00011) name = "FCVT.LU.D"; - else if (funct7 == 7'b1101001 & rs2 == 5'b00000) name = "FCVT.D.W"; - else if (funct7 == 7'b1101001 & rs2 == 5'b00001) name = "FCVT.D.WU"; - else if (funct7 == 7'b1101001 & rs2 == 5'b00010) name = "FCVT.D.L"; - else if (funct7 == 7'b1101001 & rs2 == 5'b00011) name = "FCVT.D.LU"; - else if (funct7 == 7'b0100000 & rs2 == 5'b00001) name = "FCVT.S.D"; - else if (funct7 == 7'b0100001 & rs2 == 5'b00000) name = "FCVT.D.S"; - else if (funct7[6:2] == 5'b00100) name = "FSGNJN"; - else if (funct7[6:2] == 5'b00101) name = "FMAX"; - else if (funct7[6:2] == 5'b10100) name = "FLT"; - else if (funct7[6:2] == 5'b11100) name = "FCLASS"; - else name = "ILLEGAL"; - 10'b1010011_010: if (funct7[6:2] == 5'b00000) name = "FADD"; - else if (funct7[6:2] == 5'b00001) name = "FSUB"; - else if (funct7[6:2] == 5'b00010) name = "FMUL"; - else if (funct7[6:2] == 5'b00011) name = "FDIV"; - else if (funct7[6:2] == 5'b01011) name = "FSQRT"; - else if (funct7 == 7'b1100000 & rs2 == 5'b00000) name = "FCVT.W.S"; - else if (funct7 == 7'b1100000 & rs2 == 5'b00001) name = "FCVT.WU.S"; - else if (funct7 == 7'b1100000 & rs2 == 5'b00010) name = "FCVT.L.S"; - else if (funct7 == 7'b1100000 & rs2 == 5'b00011) name = "FCVT.LU.S"; - else if (funct7 == 7'b1101000 & rs2 == 5'b00000) name = "FCVT.S.W"; - else if (funct7 == 7'b1101000 & rs2 == 5'b00001) name = "FCVT.S.WU"; - else if (funct7 == 7'b1101000 & rs2 == 5'b00010) name = "FCVT.S.L"; - else if (funct7 == 7'b1101000 & rs2 == 5'b00011) name = "FCVT.S.LU"; - else if (funct7 == 7'b1100001 & rs2 == 5'b00000) name = "FCVT.W.D"; - else if (funct7 == 7'b1100001 & rs2 == 5'b00001) name = "FCVT.WU.D"; - else if (funct7 == 7'b1100001 & rs2 == 5'b00010) name = "FCVT.L.D"; - else if (funct7 == 7'b1100001 & rs2 == 5'b00011) name = "FCVT.LU.D"; - else if (funct7 == 7'b1101001 & rs2 == 5'b00000) name = "FCVT.D.W"; - else if (funct7 == 7'b1101001 & rs2 == 5'b00001) name = "FCVT.D.WU"; - else if (funct7 == 7'b1101001 & rs2 == 5'b00010) name = "FCVT.D.L"; - else if (funct7 == 7'b1101001 & rs2 == 5'b00011) name = "FCVT.D.LU"; - else if (funct7 == 7'b0100000 & rs2 == 5'b00001) name = "FCVT.S.D"; - else if (funct7 == 7'b0100001 & rs2 == 5'b00000) name = "FCVT.D.S"; - else if (funct7[6:2] == 5'b00100) name = "FSGNJX"; - else if (funct7[6:2] == 5'b10100) name = "FEQ"; - else name = "ILLEGAL"; - /* verilator lint_off CASEOVERLAP */ - // *** RT: definitely take a look at this. This overlaps with 10'b1010011_000 10'b1010011_???: if (funct7[6:2] == 5'b00000) name = "FADD"; - /* verilator lint_on CASEOVERLAP */ else if (funct7[6:2] == 5'b00001) name = "FSUB"; else if (funct7[6:2] == 5'b00010) name = "FMUL"; else if (funct7[6:2] == 5'b00011) name = "FDIV"; @@ -343,6 +255,49 @@ module instrNameDecTB( else if (funct7 == 7'b1101001 & rs2 == 5'b00011) name = "FCVT.D.LU"; else if (funct7 == 7'b0100000 & rs2 == 5'b00001) name = "FCVT.S.D"; else if (funct7 == 7'b0100001 & rs2 == 5'b00000) name = "FCVT.D.S"; + else if (funct7 == 7'b1100010 & rs2 == 5'b00000) name = "FCVT.W.H"; + else if (funct7 == 7'b1100010 & rs2 == 5'b00001) name = "FCVT.WU.H"; + else if (funct7 == 7'b1100010 & rs2 == 5'b00010) name = "FCVT.L.H"; + else if (funct7 == 7'b1100010 & rs2 == 5'b00011) name = "FCVT.LU.H"; + else if (funct7 == 7'b1101010 & rs2 == 5'b00000) name = "FCVT.H.W"; + else if (funct7 == 7'b1101010 & rs2 == 5'b00001) name = "FCVT.H.WU"; + else if (funct7 == 7'b1101010 & rs2 == 5'b00010) name = "FCVT.H.L"; + else if (funct7 == 7'b1101010 & rs2 == 5'b00011) name = "FCVT.H.LU"; + else if (funct7 == 7'b1100011 & rs2 == 5'b00000) name = "FCVT.W.Q"; + else if (funct7 == 7'b1100011 & rs2 == 5'b00001) name = "FCVT.WU.Q"; + else if (funct7 == 7'b1100011 & rs2 == 5'b00010) name = "FCVT.L.Q"; + else if (funct7 == 7'b1100011 & rs2 == 5'b00011) name = "FCVT.LU.Q"; + else if (funct7 == 7'b1101011 & rs2 == 5'b00000) name = "FCVT.Q.W"; + else if (funct7 == 7'b1101011 & rs2 == 5'b00001) name = "FCVT.Q.WU"; + else if (funct7 == 7'b1101011 & rs2 == 5'b00010) name = "FCVT.Q.L"; + else if (funct7 == 7'b1101011 & rs2 == 5'b00011) name = "FCVT.Q.LU"; + else if (funct7 == 7'b0100000 & rs2 == 5'b00001) name = "FCVT.S.D"; + else if (funct7 == 7'b0100000 & rs2 == 5'b00010) name = "FCVT.S.H"; + else if (funct7 == 7'b0100000 & rs2 == 5'b00011) name = "FCVT.S.Q"; + else if (funct7 == 7'b0100001 & rs2 == 5'b00000) name = "FCVT.D.S"; + else if (funct7 == 7'b0100001 & rs2 == 5'b00010) name = "FCVT.D.H"; + else if (funct7 == 7'b0100001 & rs2 == 5'b00011) name = "FCVT.D.Q"; + else if (funct7 == 7'b0100010 & rs2 == 5'b00000) name = "FCVT.H.S"; + else if (funct7 == 7'b0100010 & rs2 == 5'b00001) name = "FCVT.H.D"; + else if (funct7 == 7'b0100010 & rs2 == 5'b00011) name = "FCVT.H.Q"; + else if (funct7 == 7'b0100011 & rs2 == 5'b00000) name = "FCVT.Q.S"; + else if (funct7 == 7'b0100011 & rs2 == 5'b00001) name = "FCVT.Q.D"; + else if (funct7 == 7'b0100011 & rs2 == 5'b00010) name = "FCVT.Q.H"; + else if (funct7 == 7'b1110000 & rs2 == 5'b00000 & funct3 == 3'b000) name = "FMV.X.W"; + else if (funct7 == 7'b1111000 & rs2 == 5'b00000 & funct3 == 3'b000) name = "FMV.W.X"; + else if (funct7 == 7'b1110001 & rs2 == 5'b00000 & funct3 == 3'b000) name = "FMV.X.D"; + else if (funct7 == 7'b1111001 & rs2 == 5'b00000 & funct3 == 3'b000) name = "FMV.D.X"; + else if (funct7 == 7'b1110010 & rs2 == 5'b00000 & funct3 == 3'b000) name = "FMV.X.H"; + else if (funct7 == 7'b1111010 & rs2 == 5'b00000 & funct3 == 3'b000) name = "FMV.H.X"; + else if (funct7[6:2] == 5'b00100 & funct3 == 3'b000) name = "FSGNJ"; + else if (funct7[6:2] == 5'b00101 & funct3 == 3'b000) name = "FMIN"; + else if (funct7[6:2] == 5'b10100 & funct3 == 3'b000) name = "FLE"; + else if (funct7[6:2] == 5'b00100 & funct3 == 3'b001) name = "FSGNJN"; + else if (funct7[6:2] == 5'b00101 & funct3 == 3'b001) name = "FMAX"; + else if (funct7[6:2] == 5'b10100 & funct3 == 3'b001) name = "FLT"; + else if (funct7[6:2] == 5'b11100 & funct3 == 3'b001) name = "FCLASS"; + else if (funct7[6:2] == 5'b00100 & funct3 == 3'b010) name = "FSGNJX"; + else if (funct7[6:2] == 5'b10100 & funct3 == 3'b010) name = "FEQ"; else name = "ILLEGAL"; 10'b0000111_010: name = "FLW"; 10'b0100111_010: name = "FSW"; diff --git a/testbench/testbench.sv b/testbench/testbench.sv index e68b01b48..efd4ea637 100644 --- a/testbench/testbench.sv +++ b/testbench/testbench.sv @@ -127,6 +127,8 @@ module testbench; "arch64zbs": if (P.ZBS_SUPPORTED) tests = arch64zbs; "arch64zicboz": if (P.ZICBOZ_SUPPORTED) tests = arch64zicboz; "arch64zcb": if (P.ZCB_SUPPORTED) tests = arch64zcb; + "arch64zfh": if (P.ZFH_SUPPORTED) tests = arch64zfh; +// "arch64zfa": if (P.ZFA_SUPPORTED) tests = arch64zfa; endcase end else begin // RV32 case (TEST) @@ -161,6 +163,8 @@ module testbench; "arch32zbs": if (P.ZBS_SUPPORTED) tests = arch32zbs; "arch32zicboz": if (P.ZICBOZ_SUPPORTED) tests = arch32zicboz; "arch32zcb": if (P.ZCB_SUPPORTED) tests = arch32zcb; + "arch32zfh": if (P.ZFH_SUPPORTED) tests = arch32zfh; + "arch32zfaf": if (P.ZFA_SUPPORTED) tests = arch32zfaf; endcase end if (tests.size() == 0) begin diff --git a/testbench/tests.vh b/testbench/tests.vh index 39b4ecc41..2eef6fc04 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -1291,6 +1291,172 @@ string imperas32f[] = '{ "rv64i_m/F/src/fsw-align-01.S" }; + string arch64zfh[] = '{ + `RISCVARCHTEST, + "rv64i_m/Zfh/src/fmv.x.h_b1-01.S", + "rv64i_m/Zfh/src/fadd_b10-01.S", + "rv64i_m/Zfh/src/fadd_b1-01.S", + "rv64i_m/Zfh/src/fadd_b11-01.S", + "rv64i_m/Zfh/src/fadd_b12-01.S", + "rv64i_m/Zfh/src/fadd_b13-01.S", + "rv64i_m/Zfh/src/fadd_b2-01.S", + "rv64i_m/Zfh/src/fadd_b3-01.S", + "rv64i_m/Zfh/src/fadd_b4-01.S", + "rv64i_m/Zfh/src/fadd_b5-01.S", + "rv64i_m/Zfh/src/fadd_b7-01.S", + "rv64i_m/Zfh/src/fadd_b8-01.S", + "rv64i_m/Zfh/src/fclass_b1-01.S", + "rv64i_m/Zfh/src/fcvt.h.w_b25-01.S", + "rv64i_m/Zfh/src/fcvt.h.w_b26-01.S", + "rv64i_m/Zfh/src/fcvt.h.wu_b25-01.S", + "rv64i_m/Zfh/src/fcvt.h.wu_b26-01.S", + "rv64i_m/Zfh/src/fcvt.w.h_b1-01.S", + "rv64i_m/Zfh/src/fcvt.w.h_b22-01.S", + "rv64i_m/Zfh/src/fcvt.w.h_b23-01.S", + "rv64i_m/Zfh/src/fcvt.w.h_b24-01.S", + "rv64i_m/Zfh/src/fcvt.w.h_b27-01.S", + "rv64i_m/Zfh/src/fcvt.w.h_b28-01.S", + "rv64i_m/Zfh/src/fcvt.w.h_b29-01.S", + "rv64i_m/Zfh/src/fcvt.wu.h_b1-01.S", + "rv64i_m/Zfh/src/fcvt.wu.h_b22-01.S", + "rv64i_m/Zfh/src/fcvt.wu.h_b23-01.S", + "rv64i_m/Zfh/src/fcvt.wu.h_b24-01.S", + "rv64i_m/Zfh/src/fcvt.wu.h_b27-01.S", + "rv64i_m/Zfh/src/fcvt.wu.h_b28-01.S", + "rv64i_m/Zfh/src/fcvt.wu.h_b29-01.S", + "rv64i_m/Zfh/src/fcvt.h.l_b25-01.S", + "rv64i_m/Zfh/src/fcvt.h.l_b26-01.S", + "rv64i_m/Zfh/src/fcvt.h.lu_b25-01.S", + "rv64i_m/Zfh/src/fcvt.h.lu_b26-01.S", + "rv64i_m/Zfh/src/fcvt.l.h_b1-01.S", + "rv64i_m/Zfh/src/fcvt.l.h_b22-01.S", + "rv64i_m/Zfh/src/fcvt.l.h_b23-01.S", + "rv64i_m/Zfh/src/fcvt.l.h_b24-01.S", + "rv64i_m/Zfh/src/fcvt.l.h_b27-01.S", + "rv64i_m/Zfh/src/fcvt.l.h_b28-01.S", + "rv64i_m/Zfh/src/fcvt.l.h_b29-01.S", + "rv64i_m/Zfh/src/fcvt.lu.h_b1-01.S", + "rv64i_m/Zfh/src/fcvt.lu.h_b22-01.S", + "rv64i_m/Zfh/src/fcvt.lu.h_b23-01.S", + "rv64i_m/Zfh/src/fcvt.lu.h_b24-01.S", + "rv64i_m/Zfh/src/fcvt.lu.h_b27-01.S", + "rv64i_m/Zfh/src/fcvt.lu.h_b28-01.S", + "rv64i_m/Zfh/src/fcvt.lu.h_b29-01.S", + "rv64i_m/Zfh/src/fdiv_b20-01.S", + "rv64i_m/Zfh/src/fdiv_b1-01.S", + "rv64i_m/Zfh/src/fdiv_b2-01.S", + "rv64i_m/Zfh/src/fdiv_b21-01.S", + "rv64i_m/Zfh/src/fdiv_b3-01.S", + "rv64i_m/Zfh/src/fdiv_b4-01.S", + "rv64i_m/Zfh/src/fdiv_b5-01.S", + "rv64i_m/Zfh/src/fdiv_b6-01.S", + "rv64i_m/Zfh/src/fdiv_b7-01.S", + "rv64i_m/Zfh/src/fdiv_b8-01.S", + "rv64i_m/Zfh/src/fdiv_b9-01.S", + "rv64i_m/Zfh/src/feq_b1-01.S", + "rv64i_m/Zfh/src/feq_b19-01.S", + "rv64i_m/Zfh/src/fle_b1-01.S", + "rv64i_m/Zfh/src/fle_b19-01.S", + "rv64i_m/Zfh/src/flt_b1-01.S", + "rv64i_m/Zfh/src/flt_b19-01.S", + "rv64i_m/Zfh/src/flh-align-01.S", +/* "rv64i_m/Zfh/src/fmadd_b1-01.S", + "rv64i_m/Zfh/src/fmadd_b14-01.S", + "rv64i_m/Zfh/src/fmadd_b16-01.S", + "rv64i_m/Zfh/src/fmadd_b17-01.S", + "rv64i_m/Zfh/src/fmadd_b18-01.S", + "rv64i_m/Zfh/src/fmadd_b2-01.S", + "rv64i_m/Zfh/src/fmadd_b3-01.S", + "rv64i_m/Zfh/src/fmadd_b4-01.S", + "rv64i_m/Zfh/src/fmadd_b5-01.S", + "rv64i_m/Zfh/src/fmadd_b6-01.S", + "rv64i_m/Zfh/src/fmadd_b7-01.S", + "rv64i_m/Zfh/src/fmadd_b8-01.S", */ + "rv64i_m/Zfh/src/fmax_b1-01.S", + "rv64i_m/Zfh/src/fmax_b19-01.S", + "rv64i_m/Zfh/src/fmin_b1-01.S", + "rv64i_m/Zfh/src/fmin_b19-01.S", +/* "rv64i_m/Zfh/src/fmsub_b1-01.S", + "rv64i_m/Zfh/src/fmsub_b14-01.S", + "rv64i_m/Zfh/src/fmsub_b16-01.S", + "rv64i_m/Zfh/src/fmsub_b17-01.S", + "rv64i_m/Zfh/src/fmsub_b18-01.S", + "rv64i_m/Zfh/src/fmsub_b2-01.S", + "rv64i_m/Zfh/src/fmsub_b3-01.S", + "rv64i_m/Zfh/src/fmsub_b4-01.S", + "rv64i_m/Zfh/src/fmsub_b5-01.S", + "rv64i_m/Zfh/src/fmsub_b6-01.S", + "rv64i_m/Zfh/src/fmsub_b7-01.S", + "rv64i_m/Zfh/src/fmsub_b8-01.S", */ + "rv64i_m/Zfh/src/fmul_b1-01.S", + "rv64i_m/Zfh/src/fmul_b2-01.S", + "rv64i_m/Zfh/src/fmul_b3-01.S", + "rv64i_m/Zfh/src/fmul_b4-01.S", + "rv64i_m/Zfh/src/fmul_b5-01.S", + "rv64i_m/Zfh/src/fmul_b6-01.S", + "rv64i_m/Zfh/src/fmul_b7-01.S", + "rv64i_m/Zfh/src/fmul_b8-01.S", + "rv64i_m/Zfh/src/fmul_b9-01.S", + "rv64i_m/Zfh/src/fmv.h.x_b25-01.S", + "rv64i_m/Zfh/src/fmv.h.x_b26-01.S", + "rv64i_m/Zfh/src/fmv.x.h_b1-01.S", + "rv64i_m/Zfh/src/fmv.x.h_b22-01.S", + "rv64i_m/Zfh/src/fmv.x.h_b23-01.S", + "rv64i_m/Zfh/src/fmv.x.h_b24-01.S", + "rv64i_m/Zfh/src/fmv.x.h_b27-01.S", + "rv64i_m/Zfh/src/fmv.x.h_b28-01.S", + "rv64i_m/Zfh/src/fmv.x.h_b29-01.S", +/* "rv64i_m/Zfh/src/fnmadd_b1-01.S", + "rv64i_m/Zfh/src/fnmadd_b14-01.S", + "rv64i_m/Zfh/src/fnmadd_b16-01.S", + "rv64i_m/Zfh/src/fnmadd_b17-01.S", + "rv64i_m/Zfh/src/fnmadd_b18-01.S", + "rv64i_m/Zfh/src/fnmadd_b2-01.S", + "rv64i_m/Zfh/src/fnmadd_b3-01.S", + "rv64i_m/Zfh/src/fnmadd_b4-01.S", + "rv64i_m/Zfh/src/fnmadd_b5-01.S", + "rv64i_m/Zfh/src/fnmadd_b6-01.S", + "rv64i_m/Zfh/src/fnmadd_b7-01.S", + "rv64i_m/Zfh/src/fnmadd_b8-01.S", + "rv64i_m/Zfh/src/fnmsub_b1-01.S", + "rv64i_m/Zfh/src/fnmsub_b14-01.S", + "rv64i_m/Zfh/src/fnmsub_b16-01.S", + "rv64i_m/Zfh/src/fnmsub_b17-01.S", + "rv64i_m/Zfh/src/fnmsub_b18-01.S", + "rv64i_m/Zfh/src/fnmsub_b2-01.S", + "rv64i_m/Zfh/src/fnmsub_b3-01.S", + "rv64i_m/Zfh/src/fnmsub_b4-01.S", + "rv64i_m/Zfh/src/fnmsub_b5-01.S", + "rv64i_m/Zfh/src/fnmsub_b6-01.S", + "rv64i_m/Zfh/src/fnmsub_b7-01.S", + "rv64i_m/Zfh/src/fnmsub_b8-01.S", */ + "rv64i_m/Zfh/src/fsgnj_b1-01.S", + "rv64i_m/Zfh/src/fsgnjn_b1-01.S", + "rv64i_m/Zfh/src/fsgnjx_b1-01.S", + "rv64i_m/Zfh/src/fsqrt_b1-01.S", + "rv64i_m/Zfh/src/fsqrt_b20-01.S", + "rv64i_m/Zfh/src/fsqrt_b2-01.S", + "rv64i_m/Zfh/src/fsqrt_b3-01.S", + "rv64i_m/Zfh/src/fsqrt_b4-01.S", + "rv64i_m/Zfh/src/fsqrt_b5-01.S", + "rv64i_m/Zfh/src/fsqrt_b7-01.S", + "rv64i_m/Zfh/src/fsqrt_b8-01.S", + "rv64i_m/Zfh/src/fsqrt_b9-01.S", + "rv64i_m/Zfh/src/fsub_b10-01.S", + "rv64i_m/Zfh/src/fsub_b1-01.S", + "rv64i_m/Zfh/src/fsub_b11-01.S", + "rv64i_m/Zfh/src/fsub_b12-01.S", + "rv64i_m/Zfh/src/fsub_b13-01.S", + "rv64i_m/Zfh/src/fsub_b2-01.S", + "rv64i_m/Zfh/src/fsub_b3-01.S", + "rv64i_m/Zfh/src/fsub_b4-01.S", + "rv64i_m/Zfh/src/fsub_b5-01.S", + "rv64i_m/Zfh/src/fsub_b7-01.S", + "rv64i_m/Zfh/src/fsub_b8-01.S", + "rv64i_m/Zfh/src/fsh-align-01.S" + }; + + string arch64d_fma[] = '{ `RISCVARCHTEST, //"rv64i_m/D/src/fmadd.d_b15-01.S", @@ -1638,7 +1804,6 @@ string arch64zbs[] = '{ string arch32f[] = '{ `RISCVARCHTEST, - "rv32i_m/F/src/fdiv_b20-01.S", "rv32i_m/F/src/fadd_b10-01.S", "rv32i_m/F/src/fadd_b1-01.S", "rv32i_m/F/src/fadd_b11-01.S", @@ -1783,6 +1948,166 @@ string arch64zbs[] = '{ "rv32i_m/F/src/fsw-align-01.S" }; + string arch32zfh[] = '{ + `RISCVARCHTEST, + "rv32i_m/Zfh/src/fadd_b10-01.S", + "rv32i_m/Zfh/src/fadd_b1-01.S", + "rv32i_m/Zfh/src/fadd_b11-01.S", + "rv32i_m/Zfh/src/fadd_b12-01.S", + "rv32i_m/Zfh/src/fadd_b13-01.S", + "rv32i_m/Zfh/src/fadd_b2-01.S", + "rv32i_m/Zfh/src/fadd_b3-01.S", + "rv32i_m/Zfh/src/fadd_b4-01.S", + "rv32i_m/Zfh/src/fadd_b5-01.S", + "rv32i_m/Zfh/src/fadd_b7-01.S", + "rv32i_m/Zfh/src/fadd_b8-01.S", + "rv32i_m/Zfh/src/fclass_b1-01.S", + "rv32i_m/Zfh/src/fcvt.h.w_b25-01.S", + "rv32i_m/Zfh/src/fcvt.h.w_b26-01.S", + "rv32i_m/Zfh/src/fcvt.h.wu_b25-01.S", + "rv32i_m/Zfh/src/fcvt.h.wu_b26-01.S", + "rv32i_m/Zfh/src/fcvt.w.h_b1-01.S", + "rv32i_m/Zfh/src/fcvt.w.h_b22-01.S", + "rv32i_m/Zfh/src/fcvt.w.h_b23-01.S", + "rv32i_m/Zfh/src/fcvt.w.h_b24-01.S", + "rv32i_m/Zfh/src/fcvt.w.h_b27-01.S", + "rv32i_m/Zfh/src/fcvt.w.h_b28-01.S", + "rv32i_m/Zfh/src/fcvt.w.h_b29-01.S", + "rv32i_m/Zfh/src/fcvt.wu.h_b1-01.S", + "rv32i_m/Zfh/src/fcvt.wu.h_b22-01.S", + "rv32i_m/Zfh/src/fcvt.wu.h_b23-01.S", + "rv32i_m/Zfh/src/fcvt.wu.h_b24-01.S", + "rv32i_m/Zfh/src/fcvt.wu.h_b27-01.S", + "rv32i_m/Zfh/src/fcvt.wu.h_b28-01.S", + "rv32i_m/Zfh/src/fcvt.wu.h_b29-01.S", + "rv32i_m/Zfh/src/fdiv_b20-01.S", + "rv32i_m/Zfh/src/fdiv_b1-01.S", + "rv32i_m/Zfh/src/fdiv_b2-01.S", + "rv32i_m/Zfh/src/fdiv_b21-01.S", + "rv32i_m/Zfh/src/fdiv_b3-01.S", + "rv32i_m/Zfh/src/fdiv_b4-01.S", + "rv32i_m/Zfh/src/fdiv_b5-01.S", + "rv32i_m/Zfh/src/fdiv_b6-01.S", + "rv32i_m/Zfh/src/fdiv_b7-01.S", + "rv32i_m/Zfh/src/fdiv_b8-01.S", + "rv32i_m/Zfh/src/fdiv_b9-01.S", + "rv32i_m/Zfh/src/feq_b1-01.S", + "rv32i_m/Zfh/src/feq_b19-01.S", + "rv32i_m/Zfh/src/fle_b1-01.S", + "rv32i_m/Zfh/src/fle_b19-01.S", + "rv32i_m/Zfh/src/flt_b1-01.S", + "rv32i_m/Zfh/src/flt_b19-01.S", + "rv32i_m/Zfh/src/flh-align-01.S", +/* "rv32i_m/Zfh/src/fmadd_b1-01.S", + "rv32i_m/Zfh/src/fmadd_b14-01.S", + "rv32i_m/Zfh/src/fmadd_b16-01.S", + "rv32i_m/Zfh/src/fmadd_b17-01.S", + "rv32i_m/Zfh/src/fmadd_b18-01.S", + "rv32i_m/Zfh/src/fmadd_b2-01.S", + "rv32i_m/Zfh/src/fmadd_b3-01.S", + "rv32i_m/Zfh/src/fmadd_b4-01.S", + "rv32i_m/Zfh/src/fmadd_b5-01.S", + "rv32i_m/Zfh/src/fmadd_b6-01.S", + "rv32i_m/Zfh/src/fmadd_b7-01.S", + "rv32i_m/Zfh/src/fmadd_b8-01.S", */ + "rv32i_m/Zfh/src/fmax_b1-01.S", + "rv32i_m/Zfh/src/fmax_b19-01.S", + "rv32i_m/Zfh/src/fmin_b1-01.S", + "rv32i_m/Zfh/src/fmin_b19-01.S", +/* "rv32i_m/Zfh/src/fmsub_b1-01.S", + "rv32i_m/Zfh/src/fmsub_b14-01.S", + "rv32i_m/Zfh/src/fmsub_b16-01.S", + "rv32i_m/Zfh/src/fmsub_b17-01.S", + "rv32i_m/Zfh/src/fmsub_b18-01.S", + "rv32i_m/Zfh/src/fmsub_b2-01.S", + "rv32i_m/Zfh/src/fmsub_b3-01.S", + "rv32i_m/Zfh/src/fmsub_b4-01.S", + "rv32i_m/Zfh/src/fmsub_b5-01.S", + "rv32i_m/Zfh/src/fmsub_b6-01.S", + "rv32i_m/Zfh/src/fmsub_b7-01.S", + "rv32i_m/Zfh/src/fmsub_b8-01.S", */ + "rv32i_m/Zfh/src/fmul_b1-01.S", + "rv32i_m/Zfh/src/fmul_b2-01.S", + "rv32i_m/Zfh/src/fmul_b3-01.S", + "rv32i_m/Zfh/src/fmul_b4-01.S", + "rv32i_m/Zfh/src/fmul_b5-01.S", + "rv32i_m/Zfh/src/fmul_b6-01.S", + "rv32i_m/Zfh/src/fmul_b7-01.S", + "rv32i_m/Zfh/src/fmul_b8-01.S", + "rv32i_m/Zfh/src/fmul_b9-01.S", + "rv32i_m/Zfh/src/fmv.h.x_b25-01.S", + "rv32i_m/Zfh/src/fmv.h.x_b26-01.S", + "rv32i_m/Zfh/src/fmv.x.h_b1-01.S", + "rv32i_m/Zfh/src/fmv.x.h_b22-01.S", + "rv32i_m/Zfh/src/fmv.x.h_b23-01.S", + "rv32i_m/Zfh/src/fmv.x.h_b24-01.S", + "rv32i_m/Zfh/src/fmv.x.h_b27-01.S", + "rv32i_m/Zfh/src/fmv.x.h_b28-01.S", + "rv32i_m/Zfh/src/fmv.x.h_b29-01.S", +/* "rv32i_m/Zfh/src/fnmadd_b1-01.S", + "rv32i_m/Zfh/src/fnmadd_b14-01.S", + "rv32i_m/Zfh/src/fnmadd_b16-01.S", + "rv32i_m/Zfh/src/fnmadd_b17-01.S", + "rv32i_m/Zfh/src/fnmadd_b18-01.S", + "rv32i_m/Zfh/src/fnmadd_b2-01.S", + "rv32i_m/Zfh/src/fnmadd_b3-01.S", + "rv32i_m/Zfh/src/fnmadd_b4-01.S", + "rv32i_m/Zfh/src/fnmadd_b5-01.S", + "rv32i_m/Zfh/src/fnmadd_b6-01.S", + "rv32i_m/Zfh/src/fnmadd_b7-01.S", + "rv32i_m/Zfh/src/fnmadd_b8-01.S", + "rv32i_m/Zfh/src/fnmsub_b1-01.S", + "rv32i_m/Zfh/src/fnmsub_b14-01.S", + "rv32i_m/Zfh/src/fnmsub_b16-01.S", + "rv32i_m/Zfh/src/fnmsub_b17-01.S", + "rv32i_m/Zfh/src/fnmsub_b18-01.S", + "rv32i_m/Zfh/src/fnmsub_b2-01.S", + "rv32i_m/Zfh/src/fnmsub_b3-01.S", + "rv32i_m/Zfh/src/fnmsub_b4-01.S", + "rv32i_m/Zfh/src/fnmsub_b5-01.S", + "rv32i_m/Zfh/src/fnmsub_b6-01.S", + "rv32i_m/Zfh/src/fnmsub_b7-01.S", + "rv32i_m/Zfh/src/fnmsub_b8-01.S", */ + "rv32i_m/Zfh/src/fsgnj_b1-01.S", + "rv32i_m/Zfh/src/fsgnjn_b1-01.S", + "rv32i_m/Zfh/src/fsgnjx_b1-01.S", + "rv32i_m/Zfh/src/fsqrt_b1-01.S", + "rv32i_m/Zfh/src/fsqrt_b20-01.S", + "rv32i_m/Zfh/src/fsqrt_b2-01.S", + "rv32i_m/Zfh/src/fsqrt_b3-01.S", + "rv32i_m/Zfh/src/fsqrt_b4-01.S", + "rv32i_m/Zfh/src/fsqrt_b5-01.S", + "rv32i_m/Zfh/src/fsqrt_b7-01.S", + "rv32i_m/Zfh/src/fsqrt_b8-01.S", + "rv32i_m/Zfh/src/fsqrt_b9-01.S", + "rv32i_m/Zfh/src/fsub_b10-01.S", + "rv32i_m/Zfh/src/fsub_b1-01.S", + "rv32i_m/Zfh/src/fsub_b11-01.S", + "rv32i_m/Zfh/src/fsub_b12-01.S", + "rv32i_m/Zfh/src/fsub_b13-01.S", + "rv32i_m/Zfh/src/fsub_b2-01.S", + "rv32i_m/Zfh/src/fsub_b3-01.S", + "rv32i_m/Zfh/src/fsub_b4-01.S", + "rv32i_m/Zfh/src/fsub_b5-01.S", + "rv32i_m/Zfh/src/fsub_b7-01.S", + "rv32i_m/Zfh/src/fsub_b8-01.S", + "rv32i_m/Zfh/src/fsh-align-01.S" + }; + + string arch32zfaf[] = '{ + `RISCVARCHTEST, + "rv32i_m/F_Zfa/src/fle_b1-01.S", + "rv32i_m/F_Zfa/src/fle_b19-01.S", + "rv32i_m/F_Zfa/src/fli_b1-01.S", + "rv32i_m/F_Zfa/src/fltq_b1-01.S", + "rv32i_m/F_Zfa/src/fltq_b19-01.S", + "rv32i_m/F_Zfa/src/fmin_b1-01.S", + "rv32i_m/F_Zfa/src/fmin_b19-01.S", + "rv32i_m/F_Zfa/src/fmax_b1-01.S", + "rv32i_m/F_Zfa/src/fmax_b19-01.S", + "rv32i_m/F_Zfa/src/fround_b1-01.S" + }; + string arch32d_fma[] = '{ `RISCVARCHTEST, //"rv32i_m/D/src/fmadd.d_b15-01.S", From 0d56a281b9c2993c5d816f8259670828dac5bb4e Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 15 Jan 2024 13:25:46 -0800 Subject: [PATCH 02/37] Cleaned up indentation in testbench-fp --- testbench/testbench-fp.sv | 114 +++++++++++++++++++------------------- 1 file changed, 56 insertions(+), 58 deletions(-) diff --git a/testbench/testbench-fp.sv b/testbench/testbench-fp.sv index ce10c2be2..b19542d62 100644 --- a/testbench/testbench-fp.sv +++ b/testbench/testbench-fp.sv @@ -882,7 +882,7 @@ module testbenchfp; // - the sign of the NaN does not matter for the opperations being tested // - when 2 or more NaNs are inputed the NaN that is propigated doesn't matter if (UnitVal !== `CVTFPUNIT & UnitVal !== `CVTINTUNIT) - case (FmtVal) + case (FmtVal) 2'b11: NaNGood = (((P.IEEE754==0)&AnsNaN&(Res === {1'b0, {P.Q_NE+1{1'b1}}, {P.Q_NF-1{1'b0}}})) | (AnsFlg[4]&(Res[P.Q_LEN-2:0] === {{P.Q_NE+1{1'b1}}, {P.Q_NF-1{1'b0}}})) | (XNaN&(Res[P.Q_LEN-2:0] === {X[P.Q_LEN-2:P.Q_NF],1'b1,X[P.Q_NF-2:0]})) | @@ -903,9 +903,9 @@ module testbenchfp; (XNaN&(Res[P.H_LEN-2:0] === {X[P.H_LEN-2:P.H_NF],1'b1,X[P.H_NF-2:0]})) | (YNaN&(Res[P.H_LEN-2:0] === {Y[P.H_LEN-2:P.H_NF],1'b1,Y[P.H_NF-2:0]})) | (ZNaN&(Res[P.H_LEN-2:0] === {Z[P.H_LEN-2:P.H_NF],1'b1,Z[P.H_NF-2:0]}))); - endcase - else if (UnitVal === `CVTFPUNIT) // if converting from floating point to floating point OpCtrl contains the final FP format - case (OpCtrlVal[1:0]) + endcase + else if (UnitVal === `CVTFPUNIT) // if converting from floating point to floating point OpCtrl contains the final FP format + case (OpCtrlVal[1:0]) 2'b11: NaNGood = (((P.IEEE754==0)&AnsNaN&(Res === {1'b0, {P.Q_NE+1{1'b1}}, {P.Q_NF-1{1'b0}}})) | (AnsFlg[4]&(Res[P.Q_LEN-2:0] === {{P.Q_NE+1{1'b1}}, {P.Q_NF-1{1'b0}}})) | (AnsNaN&(Res[P.Q_LEN-2:0] === Ans[P.Q_LEN-2:0])) | @@ -926,72 +926,70 @@ module testbenchfp; (AnsNaN&(Res[P.H_LEN-2:0] === Ans[P.H_LEN-2:0])) | (XNaN&(Res[P.H_LEN-2:0] === {X[P.H_LEN-2:P.H_NF],1'b1,X[P.H_NF-2:0]})) | (YNaN&(Res[P.H_LEN-2:0] === {Y[P.H_LEN-2:P.H_NF],1'b1,Y[P.H_NF-2:0]}))); - endcase - else NaNGood = 1'b0; // integers can't be NaNs + endcase + else NaNGood = 1'b0; // integers can't be NaNs - /////////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////////// - // ||||||| ||| ||| ||||||| ||||||| ||| ||| - // ||| ||| ||| ||| ||| ||| ||| - // ||| |||||||||| ||||||| ||| |||||| - // ||| ||| ||| ||| ||| ||| ||| - // ||||||| ||| ||| ||||||| ||||||| ||| ||| + // ||||||| ||| ||| ||||||| ||||||| ||| ||| + // ||| ||| ||| ||| ||| ||| ||| + // ||| |||||||||| ||||||| ||| |||||| + // ||| ||| ||| ||| ||| ||| ||| + // ||||||| ||| ||| ||||||| ||||||| ||| ||| - /////////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////////// - // check if result is correct - // wait till the division result is done or one extra cylcle for early termination (to simulate the EM pipline stage) - assign ResMatch = ((Res === Ans) | NaNGood | (NaNGood === 1'bx)); - assign FlagMatch = ((ResFlg === AnsFlg) | (AnsFlg === 5'bx)); - assign divsqrtop = (OpCtrlVal == `SQRT_OPCTRL) | (OpCtrlVal == `DIV_OPCTRL); - assign FMAop = (OpCtrlVal == `FMAUNIT); - assign DivDone = OldFDivBusyE & ~FDivBusyE; + // check if result is correct + // wait till the division result is done or one extra cylcle for early termination (to simulate the EM pipline stage) + assign ResMatch = ((Res === Ans) | NaNGood | (NaNGood === 1'bx)); + assign FlagMatch = ((ResFlg === AnsFlg) | (AnsFlg === 5'bx)); + assign divsqrtop = (OpCtrlVal == `SQRT_OPCTRL) | (OpCtrlVal == `DIV_OPCTRL); + assign FMAop = (OpCtrlVal == `FMAUNIT); + assign DivDone = OldFDivBusyE & ~FDivBusyE; - // Maybe change OpCtrl but for now just look at TEST for fma test - assign CheckNow = ((DivDone | ~divsqrtop) | (TEST == "add" | TEST == "fma" | TEST == "sub")) & (UnitVal !== `CVTINTUNIT) & (UnitVal !== `CMPUNIT); - if (~(ResMatch & FlagMatch) & CheckNow) begin + // Maybe change OpCtrl but for now just look at TEST for fma test + assign CheckNow = ((DivDone | ~divsqrtop) | (TEST == "add" | TEST == "fma" | TEST == "sub")) & (UnitVal !== `CVTINTUNIT) & (UnitVal !== `CMPUNIT); + if (~(ResMatch & FlagMatch) & CheckNow) begin + errors += 1; + $display("\nError in %s", Tests[TestNum]); + $display("TestNum %d OpCtrl %d", TestNum, OpCtrl[TestNum]); + $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Expected: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg); + $stop; + end else if (((UnitVal === `CVTINTUNIT) | (UnitVal === `CMPUNIT)) & + ~(ResMatch & FlagMatch) & (Ans[0] !== 1'bx)) begin // Check for conversion and comparisons errors += 1; $display("\nError in %s", Tests[TestNum]); - $display("TestNum %d OpCtrl %d", TestNum, OpCtrl[TestNum]); - $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Expected: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg); + $display("TestNum %d OpCtrl %d", TestNum, OpCtrl[TestNum]); + $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg); $stop; - end else if (((UnitVal === `CVTINTUNIT) | (UnitVal === `CMPUNIT)) & - ~(ResMatch & FlagMatch) & (Ans[0] !== 1'bx)) begin // Check for conversion and comparisons - errors += 1; - $display("\nError in %s", Tests[TestNum]); - $display("TestNum %d OpCtrl %d", TestNum, OpCtrl[TestNum]); - $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg); - $stop; - end end if (TestVectors[VectorNum][0] === 1'bx & Tests[TestNum] !== "") begin // if reached the eof - // increment the test - TestNum += 1; - // clear the vectors - for(int i=0; i<6133248; i++) TestVectors[i] = {P.FLEN*4+8{1'bx}}; - // read next files - $readmemh({`PATH, Tests[TestNum]}, TestVectors); - // set the vector index back to 0 - VectorNum = 0; - // incemet the operation if all the rounding modes have been tested - if (FrmNum === 4) OpCtrlNum += 1; - // increment the rounding mode or loop back to rne - if (FrmNum < 4) - FrmNum += 1; - else begin - FrmNum = 0; - // Add some time as a buffer between tests at the end of each test - repeat (10) - @(posedge clk); - end - // if no more Tests - finish - if (Tests[TestNum] === "") begin - $display("\nAll Tests completed with %d errors\n", errors); - $stop; - end - $display("Running %s vectors", Tests[TestNum]); + // increment the test + TestNum += 1; + // clear the vectors + for(int i=0; i<6133248; i++) TestVectors[i] = {P.FLEN*4+8{1'bx}}; + // read next files + $readmemh({`PATH, Tests[TestNum]}, TestVectors); + // set the vector index back to 0 + VectorNum = 0; + // incemet the operation if all the rounding modes have been tested + if (FrmNum === 4) OpCtrlNum += 1; + // increment the rounding mode or loop back to rne + if (FrmNum < 4) FrmNum += 1; + else begin + FrmNum = 0; + // Add some time as a buffer between tests at the end of each test + repeat (10) + @(posedge clk); + end + // if no more Tests - finish + if (Tests[TestNum] === "") begin + $display("\nAll Tests completed with %d errors\n", errors); + $stop; + end + $display("Running %s vectors", Tests[TestNum]); end end endmodule From 0235970313a7822614e2b1610dec2f82530f10c7 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 15 Jan 2024 13:40:12 -0800 Subject: [PATCH 03/37] Optimized away unused support for fmv with quads --- src/fpu/fpu.sv | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/fpu/fpu.sv b/src/fpu/fpu.sv index 14fc4259b..45af38c0c 100755 --- a/src/fpu/fpu.sv +++ b/src/fpu/fpu.sv @@ -263,23 +263,23 @@ module fpu import cvw::*; #(parameter cvw_t P) ( .ToInt(FWriteIntE), .XZero(XZeroE), .Fmt(FmtE), .Ce(CeE), .ShiftAmt(CvtShiftAmtE), .ResSubnormUf(CvtResSubnormUfE), .Cs(CsE), .IntZero(IntZeroE), .LzcIn(CvtLzcInE)); - // NaN Box SrcA to convert integer to requested FP size for fmv int->fp + // NaN Box SrcA to convert integer to requested FP size for fmv.*.x if(P.FPSIZES == 1) assign AlignedSrcAE = {{P.FLEN-P.XLEN{1'b1}}, ForwardedSrcAE}; else if(P.FPSIZES == 2) mux2 #(P.FLEN) SrcAMux ({{P.FLEN-P.LEN1{1'b1}}, ForwardedSrcAE[P.LEN1-1:0]}, {{P.FLEN-P.XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); else if(P.FPSIZES == 3 | P.FPSIZES == 4) begin localparam XD_LEN = P.D_LEN < P.XLEN ? P.D_LEN : P.XLEN; // shorter of D_LEN and XLEN - mux4 #(P.FLEN) SrcAMux ({{P.FLEN-P.S_LEN{1'b1}}, ForwardedSrcAE[P.S_LEN-1:0]}, + mux3 #(P.FLEN) SrcAMux ({{P.FLEN-P.S_LEN{1'b1}}, ForwardedSrcAE[P.S_LEN-1:0]}, {{P.FLEN-XD_LEN{1'b1}}, ForwardedSrcAE[XD_LEN-1:0]}, {{P.FLEN-P.H_LEN{1'b1}}, ForwardedSrcAE[P.H_LEN-1:0]}, - {{P.FLEN-P.XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes + FmtE, AlignedSrcAE); // NaN boxing zeroes end // select a result that may be written to the FP register mux3 #(P.FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE); assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE); - // select the result that may be written to the integer register with fmv - to IEU + // select the result that may be written to the integer register with fmv.x.* if(P.FPSIZES == 1) begin assign mvsgn = XE[P.FLEN-1]; assign SgnExtXE = XE; @@ -288,16 +288,17 @@ module fpu import cvw::*; #(parameter cvw_t P) ( mux2 #(P.FLEN) sgnextmux ({{P.FLEN-P.LEN1{mvsgn}}, XE[P.LEN1-1:0]}, XE, FmtE, SgnExtXE); end else if(P.FPSIZES == 3 | P.FPSIZES == 4) begin mux4 #(1) sgnmux (XE[P.S_LEN-1], XE[P.D_LEN-1], XE[P.H_LEN-1], XE[P.LLEN-1], FmtE, mvsgn); - mux4 #(P.FLEN) sgnextmux ({{P.FLEN-P.S_LEN{mvsgn}}, XE[P.S_LEN-1:0]}, + mux3 #(P.FLEN) sgnextmux ({{P.FLEN-P.S_LEN{mvsgn}}, XE[P.S_LEN-1:0]}, {{P.FLEN-P.D_LEN{mvsgn}}, XE[P.D_LEN-1:0]}, {{P.FLEN-P.H_LEN{mvsgn}}, XE[P.H_LEN-1:0]}, - XE, FmtE, SgnExtXE); + FmtE, SgnExtXE); // Q not needed because there is no fmv.x.q end + // sign extend to XLEN if necessary if (P.FLEN>P.XLEN) assign IntSrcXE = SgnExtXE[P.XLEN-1:0]; else - assign IntSrcXE = {{P.XLEN-P.FLEN{mvsgn}}, SgnExtXE}; + assign IntSrcXE = {{P.XLEN-P.FLEN{mvsgn}}, SgnExtXE}; mux3 #(P.XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE); // E/M pipe registers From 83df3dfe83e860aa272294032725383923158801 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 15 Jan 2024 16:02:37 -0600 Subject: [PATCH 04/37] Fixed the zifencei bug (part of issue 405). --- src/ieu/controller.sv | 2 +- src/lsu/lsu.sv | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/ieu/controller.sv b/src/ieu/controller.sv index 3507ec3e9..e5cfff9ed 100644 --- a/src/ieu/controller.sv +++ b/src/ieu/controller.sv @@ -370,7 +370,7 @@ module controller import cvw::*; #(parameter cvw_t P) ( // Fences // Ordinary fence is presently a nop // fence.i flushes the D$ and invalidates the I$ if Zifencei is supported and I$ is implemented - if (P.ZIFENCEI_SUPPORTED & P.ICACHE_SUPPORTED) begin:fencei + if (P.ZIFENCEI_SUPPORTED & (P.ICACHE_SUPPORTED | P.DCACHE_SUPPORTED)) begin:fencei logic FenceID; assign FenceID = FenceXD & (Funct3D == 3'b001); // is it a FENCE.I instruction? assign InvalidateICacheD = FenceID; diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index e7d6707d6..06d64c154 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -342,7 +342,6 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign DCacheStallM = CacheStall & ~IgnoreRequestTLB; assign CacheBusRW = CacheBusRWTemp; - // *** add support for cboz ahbcacheinterface #(.AHBW(P.AHBW), .LLEN(P.LLEN), .PA_BITS(P.PA_BITS), .BEATSPERLINE(BEATSPERLINE), .AHBWLOGBWPL(AHBWLOGBWPL), .LINELEN(LINELEN), .LLENPOVERAHBW(LLENPOVERAHBW), .READ_ONLY_CACHE(0)) ahbcacheinterface( .HCLK(clk), .HRESETn(~reset), .Flush(FlushW | IgnoreRequestTLB), .HRDATA, .HWDATA(LSUHWDATA), .HWSTRB(LSUHWSTRB), From 614a83331f10032200eb2b6c61f262813e7e9fb2 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 15 Jan 2024 17:29:00 -0600 Subject: [PATCH 05/37] Fixed part of issue #405. The non-cache version of the bus controller did not have the correct supression of BusCommitted for a read only controller. --- src/ebu/ahbinterface.sv | 2 +- src/ebu/busfsm.sv | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/ebu/ahbinterface.sv b/src/ebu/ahbinterface.sv index de17f3553..d9892f21d 100644 --- a/src/ebu/ahbinterface.sv +++ b/src/ebu/ahbinterface.sv @@ -64,7 +64,7 @@ module ahbinterface #( assign HWSTRB = '0; end - busfsm busfsm(.HCLK, .HRESETn, .Flush, .BusRW, + busfsm #(~LSU) busfsm(.HCLK, .HRESETn, .Flush, .BusRW, .BusCommitted, .Stall, .BusStall, .CaptureEn, .HREADY, .HTRANS, .HWRITE); diff --git a/src/ebu/busfsm.sv b/src/ebu/busfsm.sv index 108cd546d..a2d4e42b2 100644 --- a/src/ebu/busfsm.sv +++ b/src/ebu/busfsm.sv @@ -28,7 +28,9 @@ //////////////////////////////////////////////////////////////////////////////////////////////// // HCLK and clk must be the same clock! -module busfsm ( +module busfsm #( + parameter READ_ONLY +)( input logic HCLK, input logic HRESETn, @@ -70,7 +72,7 @@ module busfsm ( // (CurrState == DATA_PHASE & ~BusRW[0]); // possible optimization here. fails uart test, but i'm not sure the failure is valid. (CurrState == DATA_PHASE); - assign BusCommitted = CurrState != ADR_PHASE; + assign BusCommitted = CurrState != ADR_PHASE & ~(READ_ONLY & CurrState == MEM3); assign HTRANS = (CurrState == ADR_PHASE & HREADY & |BusRW & ~Flush) ? AHB_NONSEQ : AHB_IDLE; assign HWRITE = BusRW[0]; From 82a786f1855998a82a27de422cf76e0973363402 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 15 Jan 2024 17:36:01 -0600 Subject: [PATCH 06/37] Hmm. Verilator is complaining about the parameter width. I'm not sure why so I changed to 1 bit. --- src/ebu/ahbinterface.sv | 2 +- src/ebu/busfsm.sv | 4 ++-- src/ifu/ifu.sv | 2 +- src/lsu/lsu.sv | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ebu/ahbinterface.sv b/src/ebu/ahbinterface.sv index d9892f21d..fa5a6293e 100644 --- a/src/ebu/ahbinterface.sv +++ b/src/ebu/ahbinterface.sv @@ -29,7 +29,7 @@ module ahbinterface #( parameter XLEN, - parameter LSU = 0 // 1: LSU bus width is `XLEN, 0: IFU bus width is 32 bits + parameter logic LSU = 1'b0 // 1: LSU bus width is `XLEN, 0: IFU bus width is 32 bits )( input logic HCLK, HRESETn, // bus interface diff --git a/src/ebu/busfsm.sv b/src/ebu/busfsm.sv index a2d4e42b2..9ba159705 100644 --- a/src/ebu/busfsm.sv +++ b/src/ebu/busfsm.sv @@ -29,7 +29,7 @@ // HCLK and clk must be the same clock! module busfsm #( - parameter READ_ONLY + parameter logic READ_ONLY )( input logic HCLK, input logic HRESETn, @@ -72,7 +72,7 @@ module busfsm #( // (CurrState == DATA_PHASE & ~BusRW[0]); // possible optimization here. fails uart test, but i'm not sure the failure is valid. (CurrState == DATA_PHASE); - assign BusCommitted = CurrState != ADR_PHASE & ~(READ_ONLY & CurrState == MEM3); + assign BusCommitted = (CurrState != ADR_PHASE) & ~(READ_ONLY & CurrState == MEM3); assign HTRANS = (CurrState == ADR_PHASE & HREADY & |BusRW & ~Flush) ? AHB_NONSEQ : AHB_IDLE; assign HWRITE = BusRW[0]; diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index 107a4af8b..8e7d9a0d1 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -273,7 +273,7 @@ module ifu import cvw::*; #(parameter cvw_t P) ( assign BusRW = ~ITLBMissF & ~SelIROM ? IFURWF : '0; assign IFUHSIZE = 3'b010; - ahbinterface #(P.XLEN, 0) ahbinterface(.HCLK(clk), .Flush(FlushD), .HRESETn(~reset), .HREADY(IFUHREADY), + ahbinterface #(P.XLEN, 1'b0) ahbinterface(.HCLK(clk), .Flush(FlushD), .HRESETn(~reset), .HREADY(IFUHREADY), .HRDATA(HRDATA), .HTRANS(IFUHTRANS), .HWRITE(IFUHWRITE), .HWDATA(), .HWSTRB(), .BusRW, .ByteMask(), .WriteData('0), .Stall(GatedStallD), .BusStall, .BusCommitted(BusCommittedF), .FetchBuffer(FetchBuffer)); diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 06d64c154..cf0fab9e8 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -367,7 +367,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign LSUHADDR = PAdrM; assign LSUHSIZE = LSUFunct3M; - ahbinterface #(P.XLEN, 1) ahbinterface(.HCLK(clk), .HRESETn(~reset), .Flush(FlushW), .HREADY(LSUHREADY), + ahbinterface #(P.XLEN, 1'b1) ahbinterface(.HCLK(clk), .HRESETn(~reset), .Flush(FlushW), .HREADY(LSUHREADY), .HRDATA(HRDATA), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HWDATA(LSUHWDATA), .HWSTRB(LSUHWSTRB), .BusRW, .ByteMask(ByteMaskM), .WriteData(LSUWriteDataM[P.XLEN-1:0]), .Stall(GatedStallW), .BusStall, .BusCommitted(BusCommittedM), .FetchBuffer(FetchBuffer)); From dfe5ef44276af447c32c510aae47e02350d16093 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 15 Jan 2024 17:47:17 -0600 Subject: [PATCH 07/37] Added logic for the non-cache atomics. --- src/ebu/ahbinterface.sv | 3 ++- src/ebu/busfsm.sv | 26 ++++++++++++++++---------- src/ifu/ifu.sv | 2 +- src/lsu/lsu.sv | 2 +- 4 files changed, 20 insertions(+), 13 deletions(-) diff --git a/src/ebu/ahbinterface.sv b/src/ebu/ahbinterface.sv index fa5a6293e..df84175f0 100644 --- a/src/ebu/ahbinterface.sv +++ b/src/ebu/ahbinterface.sv @@ -44,6 +44,7 @@ module ahbinterface #( input logic Stall, // Core pipeline is stalled input logic Flush, // Pipeline stage flush. Prevents bus transaction from starting input logic [1:0] BusRW, // Memory operation read/write control: 10: read, 01: write + input logic BusAtomic, // Uncache atomic memory operation input logic [XLEN/8-1:0] ByteMask, // Bytes enables within a word input logic [XLEN-1:0] WriteData, // IEU write data for a store output logic BusStall, // Bus is busy with an in flight memory operation @@ -64,7 +65,7 @@ module ahbinterface #( assign HWSTRB = '0; end - busfsm #(~LSU) busfsm(.HCLK, .HRESETn, .Flush, .BusRW, + busfsm #(~LSU) busfsm(.HCLK, .HRESETn, .Flush, .BusRW, .BusAtomic, .BusCommitted, .Stall, .BusStall, .CaptureEn, .HREADY, .HTRANS, .HWRITE); diff --git a/src/ebu/busfsm.sv b/src/ebu/busfsm.sv index 9ba159705..126759b0d 100644 --- a/src/ebu/busfsm.sv +++ b/src/ebu/busfsm.sv @@ -38,6 +38,7 @@ module busfsm #( input logic Stall, // Core pipeline is stalled input logic Flush, // Pipeline stage flush. Prevents bus transaction from starting input logic [1:0] BusRW, // Memory operation read/write control: 10: read, 01: write + input logic BusAtomic, // Uncache atomic memory operation output logic CaptureEn, // Enable updating the Fetch buffer with valid data from HRDATA output logic BusStall, // Bus is busy with an in flight memory operation output logic BusCommitted, // Bus is busy with an in flight memory operation and it is not safe to take an interrupt @@ -47,7 +48,7 @@ module busfsm #( output logic HWRITE // AHB 0: Read operation 1: Write operation ); - typedef enum logic [2:0] {ADR_PHASE, DATA_PHASE, MEM3} busstatetype; + typedef enum logic [2:0] {ADR_PHASE, DATA_PHASE, MEM3, ATOMIC_PHASE} busstatetype; typedef enum logic [1:0] {AHB_IDLE = 2'b00, AHB_BUSY = 2'b01, AHB_NONSEQ = 2'b10, AHB_SEQ = 2'b11} ahbtranstype; busstatetype CurrState, NextState; @@ -58,13 +59,16 @@ module busfsm #( always_comb begin case(CurrState) - ADR_PHASE: if(HREADY & |BusRW) NextState = DATA_PHASE; - else NextState = ADR_PHASE; - DATA_PHASE: if(HREADY) NextState = MEM3; - else NextState = DATA_PHASE; - MEM3: if(Stall) NextState = MEM3; - else NextState = ADR_PHASE; - default: NextState = ADR_PHASE; + ADR_PHASE: if(HREADY & |BusRW) NextState = DATA_PHASE; + else NextState = ADR_PHASE; + DATA_PHASE: if(HREADY & BusAtomic) NextState = ATOMIC_PHASE; + else if(HREADY & ~BusAtomic) NextState = MEM3; + else NextState = DATA_PHASE; + ATOMIC_PHASE: if(HREADY) NextState = MEM3; + else NextState = ATOMIC_PHASE; + MEM3: if(Stall) NextState = MEM3; + else NextState = ADR_PHASE; + default: NextState = ADR_PHASE; endcase end @@ -74,8 +78,10 @@ module busfsm #( assign BusCommitted = (CurrState != ADR_PHASE) & ~(READ_ONLY & CurrState == MEM3); - assign HTRANS = (CurrState == ADR_PHASE & HREADY & |BusRW & ~Flush) ? AHB_NONSEQ : AHB_IDLE; - assign HWRITE = BusRW[0]; + assign HTRANS = (CurrState == ADR_PHASE & HREADY & |BusRW & ~Flush) | + (CurrState == DATA_PHASE & BusAtomic) ? AHB_NONSEQ : AHB_IDLE; + assign HWRITE = (BusRW[0] & ~BusAtomic) | (CurrState == DATA_PHASE & BusAtomic); + assign CaptureEn = CurrState == DATA_PHASE; endmodule diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index 8e7d9a0d1..0bd899306 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -275,7 +275,7 @@ module ifu import cvw::*; #(parameter cvw_t P) ( ahbinterface #(P.XLEN, 1'b0) ahbinterface(.HCLK(clk), .Flush(FlushD), .HRESETn(~reset), .HREADY(IFUHREADY), .HRDATA(HRDATA), .HTRANS(IFUHTRANS), .HWRITE(IFUHWRITE), .HWDATA(), - .HWSTRB(), .BusRW, .ByteMask(), .WriteData('0), + .HWSTRB(), .BusRW, .BusAtomic('0), .ByteMask(), .WriteData('0), .Stall(GatedStallD), .BusStall, .BusCommitted(BusCommittedF), .FetchBuffer(FetchBuffer)); assign CacheCommittedF = '0; diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index cf0fab9e8..f86c62aea 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -369,7 +369,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( ahbinterface #(P.XLEN, 1'b1) ahbinterface(.HCLK(clk), .HRESETn(~reset), .Flush(FlushW), .HREADY(LSUHREADY), .HRDATA(HRDATA), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HWDATA(LSUHWDATA), - .HWSTRB(LSUHWSTRB), .BusRW, .ByteMask(ByteMaskM), .WriteData(LSUWriteDataM[P.XLEN-1:0]), + .HWSTRB(LSUHWSTRB), .BusRW, .BusAtomic(AtomicM[1]), .ByteMask(ByteMaskM), .WriteData(LSUWriteDataM[P.XLEN-1:0]), .Stall(GatedStallW), .BusStall, .BusCommitted(BusCommittedM), .FetchBuffer(FetchBuffer)); // Mux between the 2 sources of read data, 0: Bus, 1: DTIM From bb3a7850c421c79eb8c2fdc470f506b597ad6e15 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 15 Jan 2024 17:48:41 -0800 Subject: [PATCH 08/37] Simplified floating-point parameters in config-shared --- config/shared/config-shared.vh | 46 ++++++++++++++++------------------ 1 file changed, 21 insertions(+), 25 deletions(-) diff --git a/config/shared/config-shared.vh b/config/shared/config-shared.vh index 55bca569f..86f9a0a9e 100644 --- a/config/shared/config-shared.vh +++ b/config/shared/config-shared.vh @@ -65,33 +65,29 @@ localparam H_NF = 32'd10; localparam H_BIAS = 32'd15; localparam H_FMT = 2'd2; -// Floating point length FLEN and number of exponent (NE) and fraction (NF) bits -localparam FLEN = (Q_SUPPORTED ? Q_LEN : D_SUPPORTED ? D_LEN : S_LEN); -localparam NE = (Q_SUPPORTED ? Q_NE : D_SUPPORTED ? D_NE : S_NE); -localparam NF = (Q_SUPPORTED ? Q_NF : D_SUPPORTED ? D_NF : S_NF); -localparam FMT = (Q_SUPPORTED ? 2'd3 : D_SUPPORTED ? 2'd1 : 2'd0); -localparam BIAS = (Q_SUPPORTED ? Q_BIAS : D_SUPPORTED ? D_BIAS : S_BIAS); -/* Delete once tested dh 10/10/22 - -localparam FLEN = (Q_SUPPORTED ? Q_LEN : D_SUPPORTED ? D_LEN : F_SUPPORTED ? S_LEN : H_LEN); -localparam NE = (Q_SUPPORTED ? Q_NE : D_SUPPORTED ? D_NE : F_SUPPORTED ? S_NE : H_NE); -localparam NF = (Q_SUPPORTED ? Q_NF : D_SUPPORTED ? D_NF : F_SUPPORTED ? S_NF : H_NF); -localparam FMT = (Q_SUPPORTED ? 2'd3 : D_SUPPORTED ? 2'd1 : F_SUPPORTED ? 2'd0 : 2'd2); -localparam BIAS = (Q_SUPPORTED ? Q_BIAS : D_SUPPORTED ? D_BIAS : F_SUPPORTED ? S_BIAS : H_BIAS);*/ +// Floating point length FLEN and number of exponent (NE) and fraction (NF) bits (for longest format supported) +localparam FLEN = Q_SUPPORTED ? Q_LEN : D_SUPPORTED ? D_LEN : S_LEN; +localparam NE = Q_SUPPORTED ? Q_NE : D_SUPPORTED ? D_NE : S_NE; +localparam NF = Q_SUPPORTED ? Q_NF : D_SUPPORTED ? D_NF : S_NF; +localparam FMT = Q_SUPPORTED ? 2'd3 : D_SUPPORTED ? 2'd1 : 2'd0; +localparam BIAS = Q_SUPPORTED ? Q_BIAS : D_SUPPORTED ? D_BIAS : S_BIAS; // Floating point constants needed for FPU paramerterization -localparam FPSIZES = ((32)'(Q_SUPPORTED)+(32)'(D_SUPPORTED)+(32)'(F_SUPPORTED)+(32)'(ZFH_SUPPORTED)); -localparam FMTBITS = ((32)'(FPSIZES>=3)+1); -localparam LEN1 = ((D_SUPPORTED & (FLEN != D_LEN)) ? D_LEN : (F_SUPPORTED & (FLEN != S_LEN)) ? S_LEN : H_LEN); -localparam NE1 = ((D_SUPPORTED & (FLEN != D_LEN)) ? D_NE : (F_SUPPORTED & (FLEN != S_LEN)) ? S_NE : H_NE); -localparam NF1 = ((D_SUPPORTED & (FLEN != D_LEN)) ? D_NF : (F_SUPPORTED & (FLEN != S_LEN)) ? S_NF : H_NF); -localparam FMT1 = ((D_SUPPORTED & (FLEN != D_LEN)) ? 2'd1 : (F_SUPPORTED & (FLEN != S_LEN)) ? 2'd0 : 2'd2); -localparam BIAS1 = ((D_SUPPORTED & (FLEN != D_LEN)) ? D_BIAS : (F_SUPPORTED & (FLEN != S_LEN)) ? S_BIAS : H_BIAS); -localparam LEN2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_LEN : H_LEN); -localparam NE2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_NE : H_NE); -localparam NF2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_NF : H_NF); -localparam FMT2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? 2'd0 : 2'd2); -localparam BIAS2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_BIAS : H_BIAS); +// LEN1/NE1/NF1/FNT1 is the size of the second longest supported format +localparam FPSIZES = (32)'(Q_SUPPORTED)+(32)'(D_SUPPORTED)+(32)'(F_SUPPORTED)+(32)'(ZFH_SUPPORTED); +localparam FMTBITS = (32)'(FPSIZES>=3)+1; +localparam LEN1 = (FLEN > D_LEN) ? D_LEN : (FLEN > S_LEN) ? S_LEN : H_LEN; +localparam NE1 = (FLEN > D_LEN) ? D_NE : (FLEN > S_LEN) ? S_NE : H_NE; +localparam NF1 = (FLEN > D_LEN) ? D_NF : (FLEN > S_LEN) ? S_NF : H_NF; +localparam FMT1 = (FLEN > D_LEN) ? 2'd1 : (FLEN > S_LEN) ? 2'd0 : 2'd2; +localparam BIAS1 = (FLEN > D_LEN) ? D_BIAS : (FLEN > S_LEN) ? S_BIAS : H_BIAS; + +// LEN2 etc is the size of the third longest supported format +localparam LEN2 = (LEN1 > S_LEN) ? S_LEN : H_LEN; +localparam NE2 = (LEN1 > S_LEN) ? S_NE : H_NE; +localparam NF2 = (LEN1 > S_LEN) ? S_NF : H_NF; +localparam FMT2 = (LEN1 > S_LEN) ? 2'd0 : 2'd2; +localparam BIAS2 = (LEN1 > S_LEN) ? S_BIAS : H_BIAS; // divider r and rk (bits per digit, bits per cycle) localparam LOGR = $clog2(RADIX); // r = log(R) bits per digit From ff5554ca61d22087c6cd7e882652d8285a02f73f Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Tue, 16 Jan 2024 10:43:20 -0600 Subject: [PATCH 09/37] Atomics work correctly without a d cache. --- src/ebu/buscachefsm.sv | 24 ++++++++++++++---------- src/ebu/busfsm.sv | 12 ++++++++---- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/src/ebu/buscachefsm.sv b/src/ebu/buscachefsm.sv index 0368164ed..8d434c678 100644 --- a/src/ebu/buscachefsm.sv +++ b/src/ebu/buscachefsm.sv @@ -66,7 +66,7 @@ module buscachefsm #( output logic [2:0] HBURST // AHB burst length ); - typedef enum logic [2:0] {ADR_PHASE, DATA_PHASE, ATOMIC_PHASE, MEM3, CACHE_FETCH, CACHE_WRITEBACK} busstatetype; + typedef enum logic [2:0] {ADR_PHASE, DATA_PHASE, ATOMIC_READ_DATA_PHASE, ATOMIC_PHASE, MEM3, CACHE_FETCH, CACHE_WRITEBACK} busstatetype; typedef enum logic [1:0] {AHB_IDLE = 2'b00, AHB_BUSY = 2'b01, AHB_NONSEQ = 2'b10, AHB_SEQ = 2'b11} ahbtranstype; busstatetype CurrState, NextState; @@ -87,13 +87,15 @@ module buscachefsm #( always_comb begin case(CurrState) - ADR_PHASE: if (HREADY & |BusRW) NextState = DATA_PHASE; - else if (HREADY & BusWrite) NextState = CACHE_WRITEBACK; - else if (HREADY & CacheBusRW[1]) NextState = CACHE_FETCH; - else NextState = ADR_PHASE; - DATA_PHASE: if(HREADY & BusAtomic) NextState = ATOMIC_PHASE; - else if(HREADY & ~BusAtomic) NextState = MEM3; + ADR_PHASE: if (HREADY & |BusRW) NextState = DATA_PHASE; + else if (HREADY & BusWrite) NextState = CACHE_WRITEBACK; + else if (HREADY & CacheBusRW[1]) NextState = CACHE_FETCH; + else NextState = ADR_PHASE; + DATA_PHASE: if(HREADY & BusAtomic) NextState = ATOMIC_READ_DATA_PHASE; + else if(HREADY & ~BusAtomic) NextState = MEM3; else NextState = DATA_PHASE; + ATOMIC_READ_DATA_PHASE: if(HREADY) NextState = ATOMIC_PHASE; + else NextState = ATOMIC_READ_DATA_PHASE; ATOMIC_PHASE: if(HREADY) NextState = MEM3; else NextState = ATOMIC_PHASE; MEM3: if(Stall) NextState = MEM3; @@ -107,7 +109,7 @@ module buscachefsm #( else if(HREADY & FinalBeatCount & BusCMOZero) NextState = MEM3; else if(HREADY & FinalBeatCount & ~|CacheBusRW) NextState = ADR_PHASE; else NextState = CACHE_WRITEBACK; - default: NextState = ADR_PHASE; + default: NextState = ADR_PHASE; endcase end @@ -129,6 +131,7 @@ module buscachefsm #( //(CurrState == DATA_PHASE & ~BusRW[0]) | // *** replace the next line with this. Fails uart test but i think it's a test problem not a hardware problem. (CurrState == DATA_PHASE) | (CurrState == ATOMIC_PHASE) | + (CurrState == ATOMIC_READ_DATA_PHASE) | (CurrState == CACHE_FETCH & ~FinalBeatCount) | (CurrState == CACHE_WRITEBACK & ~FinalBeatCount); @@ -136,11 +139,11 @@ module buscachefsm #( // AHB bus interface assign HTRANS = (CurrState == ADR_PHASE & HREADY & ((|BusRW) | (|CacheBusRW) | BusCMOZero) & ~Flush) | - (CurrState == DATA_PHASE & BusAtomic) | + (CurrState == ATOMIC_READ_DATA_PHASE & BusAtomic) | (CacheAccess & FinalBeatCount & |CacheBusRW & HREADY & ~Flush) ? AHB_NONSEQ : // if we have a pipelined request (CacheAccess & |BeatCount) ? (`BURST_EN ? AHB_SEQ : AHB_NONSEQ) : AHB_IDLE; - assign HWRITE = ((BusRW[0] & ~BusAtomic) | BusWrite & ~Flush) | (CurrState == DATA_PHASE & BusAtomic) | + assign HWRITE = ((BusRW[0] & ~BusAtomic) | BusWrite & ~Flush) | (CurrState == ATOMIC_READ_DATA_PHASE & BusAtomic) | (CurrState == CACHE_WRITEBACK & |BeatCount); assign HBURST = `BURST_EN & ((|CacheBusRW & ~Flush) | (CacheAccess & |BeatCount)) ? LocalBurstType : 3'b0; @@ -159,6 +162,7 @@ module buscachefsm #( assign SelBusBeat = (CurrState == ADR_PHASE & (BusRW[0] | BusWrite)) | (CurrState == DATA_PHASE & BusRW[0]) | (CurrState == ATOMIC_PHASE & BusRW[0]) | + (CurrState == ATOMIC_READ_DATA_PHASE & BusRW[0]) | (CurrState == CACHE_WRITEBACK) | (CurrState == CACHE_FETCH); diff --git a/src/ebu/busfsm.sv b/src/ebu/busfsm.sv index 126759b0d..81d11715e 100644 --- a/src/ebu/busfsm.sv +++ b/src/ebu/busfsm.sv @@ -48,7 +48,7 @@ module busfsm #( output logic HWRITE // AHB 0: Read operation 1: Write operation ); - typedef enum logic [2:0] {ADR_PHASE, DATA_PHASE, MEM3, ATOMIC_PHASE} busstatetype; + typedef enum logic [2:0] {ADR_PHASE, DATA_PHASE, MEM3, ATOMIC_READ_DATA_PHASE, ATOMIC_PHASE} busstatetype; typedef enum logic [1:0] {AHB_IDLE = 2'b00, AHB_BUSY = 2'b01, AHB_NONSEQ = 2'b10, AHB_SEQ = 2'b11} ahbtranstype; busstatetype CurrState, NextState; @@ -61,9 +61,11 @@ module busfsm #( case(CurrState) ADR_PHASE: if(HREADY & |BusRW) NextState = DATA_PHASE; else NextState = ADR_PHASE; - DATA_PHASE: if(HREADY & BusAtomic) NextState = ATOMIC_PHASE; + DATA_PHASE: if(HREADY & BusAtomic) NextState = ATOMIC_READ_DATA_PHASE; else if(HREADY & ~BusAtomic) NextState = MEM3; else NextState = DATA_PHASE; + ATOMIC_READ_DATA_PHASE: if(HREADY) NextState = ATOMIC_PHASE; + else NextState = ATOMIC_READ_DATA_PHASE; ATOMIC_PHASE: if(HREADY) NextState = MEM3; else NextState = ATOMIC_PHASE; MEM3: if(Stall) NextState = MEM3; @@ -74,13 +76,15 @@ module busfsm #( assign BusStall = (CurrState == ADR_PHASE & |BusRW) | // (CurrState == DATA_PHASE & ~BusRW[0]); // possible optimization here. fails uart test, but i'm not sure the failure is valid. + (CurrState == ATOMIC_PHASE) | + (CurrState == ATOMIC_READ_DATA_PHASE) | (CurrState == DATA_PHASE); assign BusCommitted = (CurrState != ADR_PHASE) & ~(READ_ONLY & CurrState == MEM3); assign HTRANS = (CurrState == ADR_PHASE & HREADY & |BusRW & ~Flush) | - (CurrState == DATA_PHASE & BusAtomic) ? AHB_NONSEQ : AHB_IDLE; - assign HWRITE = (BusRW[0] & ~BusAtomic) | (CurrState == DATA_PHASE & BusAtomic); + (CurrState == ATOMIC_READ_DATA_PHASE & BusAtomic) ? AHB_NONSEQ : AHB_IDLE; + assign HWRITE = (BusRW[0] & ~BusAtomic) | (CurrState == ATOMIC_READ_DATA_PHASE & BusAtomic); assign CaptureEn = CurrState == DATA_PHASE; From abecc98563ccb7e4dfaa82bdbe009e58e54b322c Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 16 Jan 2024 10:26:00 -0800 Subject: [PATCH 10/37] Fixed spelling of precision --- src/fpu/fctrl.sv | 2 +- src/fpu/fcvt.sv | 4 ++-- src/fpu/postproc/fmashiftcalc.sv | 2 +- src/fpu/postproc/postprocess.sv | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/fpu/fctrl.sv b/src/fpu/fctrl.sv index 3d1a7bedd..d1e933142 100755 --- a/src/fpu/fctrl.sv +++ b/src/fpu/fctrl.sv @@ -38,7 +38,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( input logic FDivBusyE, // is the divider busy // instruction input logic [31:0] InstrD, // the full instruction - input logic [6:0] Funct7D, // bits 31:25 of instruction - may contain percision + input logic [6:0] Funct7D, // bits 31:25 of instruction - may contain precision input logic [6:0] OpD, // bits 6:0 of instruction input logic [4:0] Rs2D, // bits 24:20 of instruction input logic [2:0] Funct3D, // bits 14:12 of instruction - may contain rounding mode diff --git a/src/fpu/fcvt.sv b/src/fpu/fcvt.sv index d396fee95..d721dbc2f 100644 --- a/src/fpu/fcvt.sv +++ b/src/fpu/fcvt.sv @@ -70,8 +70,8 @@ module fcvt import cvw::*; #(parameter cvw_t P) ( assign IntToFp = OpCtrl[2]; // choose the ouptut format depending on the opperation - // - fp -> fp: OpCtrl contains the percision of the output - // - int -> fp: Fmt contains the percision of the output + // - fp -> fp: OpCtrl contains the precision of the output + // - int -> fp: Fmt contains the precision of the output if (P.FPSIZES == 2) assign OutFmt = IntToFp ? Fmt : (OpCtrl[1:0] == P.FMT); else if (P.FPSIZES == 3 | P.FPSIZES == 4) diff --git a/src/fpu/postproc/fmashiftcalc.sv b/src/fpu/postproc/fmashiftcalc.sv index c80748061..0a8ac3035 100644 --- a/src/fpu/postproc/fmashiftcalc.sv +++ b/src/fpu/postproc/fmashiftcalc.sv @@ -50,7 +50,7 @@ module fmashiftcalc import cvw::*; #(parameter cvw_t P) ( // calculate the sum's exponent assign PreNormSumExp = FmaSe + {{P.NE+2-$unsigned($clog2(3*P.NF+5)){1'b1}}, ~FmaSCnt} + (P.NE+2)'(P.NF+3); - //convert the sum's exponent into the proper percision + //convert the sum's exponent into the proper precision if (P.FPSIZES == 1) begin assign NormSumExp = PreNormSumExp; end else if (P.FPSIZES == 2) begin diff --git a/src/fpu/postproc/postprocess.sv b/src/fpu/postproc/postprocess.sv index 05db352cd..ba897a5fd 100644 --- a/src/fpu/postproc/postprocess.sv +++ b/src/fpu/postproc/postprocess.sv @@ -128,8 +128,8 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( assign NaNIn = XNaN|YNaN|ZNaN; // choose the ouptut format depending on the opperation - // - fp -> fp: OpCtrl contains the percision of the output - // - otherwise: Fmt contains the percision of the output + // - fp -> fp: OpCtrl contains the precision of the output + // - otherwise: Fmt contains the precision of the output if (P.FPSIZES == 2) assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == P.FMT); else if (P.FPSIZES == 3 | P.FPSIZES == 4) From dcd40c6be702c274770a4d1fea488f152fa3fcf5 Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 16 Jan 2024 10:27:31 -0800 Subject: [PATCH 11/37] Fixed spelling of output --- src/fpu/fcvt.sv | 2 +- src/fpu/postproc/postprocess.sv | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fpu/fcvt.sv b/src/fpu/fcvt.sv index d721dbc2f..ad767d2ef 100644 --- a/src/fpu/fcvt.sv +++ b/src/fpu/fcvt.sv @@ -69,7 +69,7 @@ module fcvt import cvw::*; #(parameter cvw_t P) ( assign Int64 = OpCtrl[1]; assign IntToFp = OpCtrl[2]; - // choose the ouptut format depending on the opperation + // choose the output format depending on the opperation // - fp -> fp: OpCtrl contains the precision of the output // - int -> fp: Fmt contains the precision of the output if (P.FPSIZES == 2) diff --git a/src/fpu/postproc/postprocess.sv b/src/fpu/postproc/postprocess.sv index ba897a5fd..c2de8644e 100644 --- a/src/fpu/postproc/postprocess.sv +++ b/src/fpu/postproc/postprocess.sv @@ -127,7 +127,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( assign InfIn = XInf|YInf|ZInf; assign NaNIn = XNaN|YNaN|ZNaN; - // choose the ouptut format depending on the opperation + // choose the output format depending on the opperation // - fp -> fp: OpCtrl contains the precision of the output // - otherwise: Fmt contains the precision of the output if (P.FPSIZES == 2) From 1a77c08f6e614627c1570ab80662859b918049b6 Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 16 Jan 2024 10:46:44 -0800 Subject: [PATCH 12/37] Fixed issues 575 and 477 about FPU tests failing when Zfh = 1. --- src/fpu/postproc/cvtshiftcalc.sv | 2 +- src/fpu/postproc/round.sv | 8 +-- testbench/tests.vh | 97 -------------------------------- 3 files changed, 5 insertions(+), 102 deletions(-) diff --git a/src/fpu/postproc/cvtshiftcalc.sv b/src/fpu/postproc/cvtshiftcalc.sv index 1150d4ecc..ff3d29b90 100644 --- a/src/fpu/postproc/cvtshiftcalc.sv +++ b/src/fpu/postproc/cvtshiftcalc.sv @@ -82,7 +82,7 @@ module cvtshiftcalc import cvw::*; #(parameter cvw_t P) ( P.FMT: ResNegNF = -($clog2(P.NF)+1)'(P.NF); P.FMT1: ResNegNF = -($clog2(P.NF)+1)'(P.NF1); P.FMT2: ResNegNF = -($clog2(P.NF)+1)'(P.NF2); - default: ResNegNF = 'x; + default: ResNegNF = 0; // Not used for floating-point so don't care, but convert to unsigned long has OutFmt = 11. endcase end else if (P.FPSIZES == 4) begin diff --git a/src/fpu/postproc/round.sv b/src/fpu/postproc/round.sv index e01ff376b..460786135 100644 --- a/src/fpu/postproc/round.sv +++ b/src/fpu/postproc/round.sv @@ -145,18 +145,18 @@ module round import cvw::*; #(parameter cvw_t P) ( end else if (P.FPSIZES == 3) begin // 1: XLEN > NF > NF1 - if (XLENPOS == 1) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT1)) | + if (XLENPOS == 1) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT2)) | (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&FpRes&~(OutFmt==P.FMT)) | (|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes) | (|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]); // 2: NF > XLEN > NF1 - if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT1)) | + if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT2)) | (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&~(OutFmt==P.FMT)) | (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&(IntRes|~(OutFmt==P.FMT))) | (|Mf[P.CORRSHIFTSZ-P.NF-2:0]); // 3: NF > NF1 > XLEN - if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&(OutFmt==P.FMT1)) | - (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF1-1]&((OutFmt==P.FMT1)|IntRes)) | + if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&(OutFmt==P.FMT2)) | + (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF1-1]&((OutFmt==P.FMT2)|IntRes)) | (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&(~(OutFmt==P.FMT)|IntRes)) | (|Mf[P.CORRSHIFTSZ-P.NF-2:0]); diff --git a/testbench/tests.vh b/testbench/tests.vh index 2eef6fc04..86f65eb14 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -1293,7 +1293,6 @@ string imperas32f[] = '{ string arch64zfh[] = '{ `RISCVARCHTEST, - "rv64i_m/Zfh/src/fmv.x.h_b1-01.S", "rv64i_m/Zfh/src/fadd_b10-01.S", "rv64i_m/Zfh/src/fadd_b1-01.S", "rv64i_m/Zfh/src/fadd_b11-01.S", @@ -1360,34 +1359,10 @@ string imperas32f[] = '{ "rv64i_m/Zfh/src/flt_b1-01.S", "rv64i_m/Zfh/src/flt_b19-01.S", "rv64i_m/Zfh/src/flh-align-01.S", -/* "rv64i_m/Zfh/src/fmadd_b1-01.S", - "rv64i_m/Zfh/src/fmadd_b14-01.S", - "rv64i_m/Zfh/src/fmadd_b16-01.S", - "rv64i_m/Zfh/src/fmadd_b17-01.S", - "rv64i_m/Zfh/src/fmadd_b18-01.S", - "rv64i_m/Zfh/src/fmadd_b2-01.S", - "rv64i_m/Zfh/src/fmadd_b3-01.S", - "rv64i_m/Zfh/src/fmadd_b4-01.S", - "rv64i_m/Zfh/src/fmadd_b5-01.S", - "rv64i_m/Zfh/src/fmadd_b6-01.S", - "rv64i_m/Zfh/src/fmadd_b7-01.S", - "rv64i_m/Zfh/src/fmadd_b8-01.S", */ "rv64i_m/Zfh/src/fmax_b1-01.S", "rv64i_m/Zfh/src/fmax_b19-01.S", "rv64i_m/Zfh/src/fmin_b1-01.S", "rv64i_m/Zfh/src/fmin_b19-01.S", -/* "rv64i_m/Zfh/src/fmsub_b1-01.S", - "rv64i_m/Zfh/src/fmsub_b14-01.S", - "rv64i_m/Zfh/src/fmsub_b16-01.S", - "rv64i_m/Zfh/src/fmsub_b17-01.S", - "rv64i_m/Zfh/src/fmsub_b18-01.S", - "rv64i_m/Zfh/src/fmsub_b2-01.S", - "rv64i_m/Zfh/src/fmsub_b3-01.S", - "rv64i_m/Zfh/src/fmsub_b4-01.S", - "rv64i_m/Zfh/src/fmsub_b5-01.S", - "rv64i_m/Zfh/src/fmsub_b6-01.S", - "rv64i_m/Zfh/src/fmsub_b7-01.S", - "rv64i_m/Zfh/src/fmsub_b8-01.S", */ "rv64i_m/Zfh/src/fmul_b1-01.S", "rv64i_m/Zfh/src/fmul_b2-01.S", "rv64i_m/Zfh/src/fmul_b3-01.S", @@ -1406,30 +1381,6 @@ string imperas32f[] = '{ "rv64i_m/Zfh/src/fmv.x.h_b27-01.S", "rv64i_m/Zfh/src/fmv.x.h_b28-01.S", "rv64i_m/Zfh/src/fmv.x.h_b29-01.S", -/* "rv64i_m/Zfh/src/fnmadd_b1-01.S", - "rv64i_m/Zfh/src/fnmadd_b14-01.S", - "rv64i_m/Zfh/src/fnmadd_b16-01.S", - "rv64i_m/Zfh/src/fnmadd_b17-01.S", - "rv64i_m/Zfh/src/fnmadd_b18-01.S", - "rv64i_m/Zfh/src/fnmadd_b2-01.S", - "rv64i_m/Zfh/src/fnmadd_b3-01.S", - "rv64i_m/Zfh/src/fnmadd_b4-01.S", - "rv64i_m/Zfh/src/fnmadd_b5-01.S", - "rv64i_m/Zfh/src/fnmadd_b6-01.S", - "rv64i_m/Zfh/src/fnmadd_b7-01.S", - "rv64i_m/Zfh/src/fnmadd_b8-01.S", - "rv64i_m/Zfh/src/fnmsub_b1-01.S", - "rv64i_m/Zfh/src/fnmsub_b14-01.S", - "rv64i_m/Zfh/src/fnmsub_b16-01.S", - "rv64i_m/Zfh/src/fnmsub_b17-01.S", - "rv64i_m/Zfh/src/fnmsub_b18-01.S", - "rv64i_m/Zfh/src/fnmsub_b2-01.S", - "rv64i_m/Zfh/src/fnmsub_b3-01.S", - "rv64i_m/Zfh/src/fnmsub_b4-01.S", - "rv64i_m/Zfh/src/fnmsub_b5-01.S", - "rv64i_m/Zfh/src/fnmsub_b6-01.S", - "rv64i_m/Zfh/src/fnmsub_b7-01.S", - "rv64i_m/Zfh/src/fnmsub_b8-01.S", */ "rv64i_m/Zfh/src/fsgnj_b1-01.S", "rv64i_m/Zfh/src/fsgnjn_b1-01.S", "rv64i_m/Zfh/src/fsgnjx_b1-01.S", @@ -1998,34 +1949,10 @@ string arch64zbs[] = '{ "rv32i_m/Zfh/src/flt_b1-01.S", "rv32i_m/Zfh/src/flt_b19-01.S", "rv32i_m/Zfh/src/flh-align-01.S", -/* "rv32i_m/Zfh/src/fmadd_b1-01.S", - "rv32i_m/Zfh/src/fmadd_b14-01.S", - "rv32i_m/Zfh/src/fmadd_b16-01.S", - "rv32i_m/Zfh/src/fmadd_b17-01.S", - "rv32i_m/Zfh/src/fmadd_b18-01.S", - "rv32i_m/Zfh/src/fmadd_b2-01.S", - "rv32i_m/Zfh/src/fmadd_b3-01.S", - "rv32i_m/Zfh/src/fmadd_b4-01.S", - "rv32i_m/Zfh/src/fmadd_b5-01.S", - "rv32i_m/Zfh/src/fmadd_b6-01.S", - "rv32i_m/Zfh/src/fmadd_b7-01.S", - "rv32i_m/Zfh/src/fmadd_b8-01.S", */ "rv32i_m/Zfh/src/fmax_b1-01.S", "rv32i_m/Zfh/src/fmax_b19-01.S", "rv32i_m/Zfh/src/fmin_b1-01.S", "rv32i_m/Zfh/src/fmin_b19-01.S", -/* "rv32i_m/Zfh/src/fmsub_b1-01.S", - "rv32i_m/Zfh/src/fmsub_b14-01.S", - "rv32i_m/Zfh/src/fmsub_b16-01.S", - "rv32i_m/Zfh/src/fmsub_b17-01.S", - "rv32i_m/Zfh/src/fmsub_b18-01.S", - "rv32i_m/Zfh/src/fmsub_b2-01.S", - "rv32i_m/Zfh/src/fmsub_b3-01.S", - "rv32i_m/Zfh/src/fmsub_b4-01.S", - "rv32i_m/Zfh/src/fmsub_b5-01.S", - "rv32i_m/Zfh/src/fmsub_b6-01.S", - "rv32i_m/Zfh/src/fmsub_b7-01.S", - "rv32i_m/Zfh/src/fmsub_b8-01.S", */ "rv32i_m/Zfh/src/fmul_b1-01.S", "rv32i_m/Zfh/src/fmul_b2-01.S", "rv32i_m/Zfh/src/fmul_b3-01.S", @@ -2044,30 +1971,6 @@ string arch64zbs[] = '{ "rv32i_m/Zfh/src/fmv.x.h_b27-01.S", "rv32i_m/Zfh/src/fmv.x.h_b28-01.S", "rv32i_m/Zfh/src/fmv.x.h_b29-01.S", -/* "rv32i_m/Zfh/src/fnmadd_b1-01.S", - "rv32i_m/Zfh/src/fnmadd_b14-01.S", - "rv32i_m/Zfh/src/fnmadd_b16-01.S", - "rv32i_m/Zfh/src/fnmadd_b17-01.S", - "rv32i_m/Zfh/src/fnmadd_b18-01.S", - "rv32i_m/Zfh/src/fnmadd_b2-01.S", - "rv32i_m/Zfh/src/fnmadd_b3-01.S", - "rv32i_m/Zfh/src/fnmadd_b4-01.S", - "rv32i_m/Zfh/src/fnmadd_b5-01.S", - "rv32i_m/Zfh/src/fnmadd_b6-01.S", - "rv32i_m/Zfh/src/fnmadd_b7-01.S", - "rv32i_m/Zfh/src/fnmadd_b8-01.S", - "rv32i_m/Zfh/src/fnmsub_b1-01.S", - "rv32i_m/Zfh/src/fnmsub_b14-01.S", - "rv32i_m/Zfh/src/fnmsub_b16-01.S", - "rv32i_m/Zfh/src/fnmsub_b17-01.S", - "rv32i_m/Zfh/src/fnmsub_b18-01.S", - "rv32i_m/Zfh/src/fnmsub_b2-01.S", - "rv32i_m/Zfh/src/fnmsub_b3-01.S", - "rv32i_m/Zfh/src/fnmsub_b4-01.S", - "rv32i_m/Zfh/src/fnmsub_b5-01.S", - "rv32i_m/Zfh/src/fnmsub_b6-01.S", - "rv32i_m/Zfh/src/fnmsub_b7-01.S", - "rv32i_m/Zfh/src/fnmsub_b8-01.S", */ "rv32i_m/Zfh/src/fsgnj_b1-01.S", "rv32i_m/Zfh/src/fsgnjn_b1-01.S", "rv32i_m/Zfh/src/fsgnjx_b1-01.S", From 846a0c4d50c1032ca787dddd9441f810267e725f Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 16 Jan 2024 11:12:06 -0800 Subject: [PATCH 13/37] Check fma operations don't support H precision --- src/fpu/fctrl.sv | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/fpu/fctrl.sv b/src/fpu/fctrl.sv index d1e933142..999837889 100755 --- a/src/fpu/fctrl.sv +++ b/src/fpu/fctrl.sv @@ -84,8 +84,9 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( assign Fmt = Funct7D[1:0]; assign Fmt2 = Rs2D[1:0]; // source format for fcvt fp->fp - assign SupportedFmt = (Fmt == 2'b00 | (Fmt == 2'b01 & P.D_SUPPORTED) | - (Fmt == 2'b10 & P.ZFH_SUPPORTED) | (Fmt == 2'b11 & P.Q_SUPPORTED)); + assign SupportedFmt = (Fmt == 2'b00 | (Fmt == 2'b01 & P.D_SUPPORTED) | + (Fmt == 2'b10 & P.ZFH_SUPPORTED & {OpD[6:4], OpD[1:0]} != 5'b10011) | // fma not supported for Zfh + (Fmt == 2'b11 & P.Q_SUPPORTED)); assign SupportedFmt2 = (Fmt2 == 2'b00 | (Fmt2 == 2'b01 & P.D_SUPPORTED) | (Fmt2 == 2'b10 & P.ZFH_SUPPORTED) | (Fmt2 == 2'b11 & P.Q_SUPPORTED)); From 60e09965d5484219e21a2be68dba720b46935c72 Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 16 Jan 2024 11:14:43 -0800 Subject: [PATCH 14/37] Enabled Zfh support in rv64gc --- config/rv64gc/config.vh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/rv64gc/config.vh b/config/rv64gc/config.vh index af828589d..bb3e79659 100644 --- a/config/rv64gc/config.vh +++ b/config/rv64gc/config.vh @@ -40,7 +40,7 @@ localparam ZIFENCEI_SUPPORTED = 1; localparam COUNTERS = 12'd32; localparam ZICNTR_SUPPORTED = 1; localparam ZIHPM_SUPPORTED = 1; -localparam ZFH_SUPPORTED = 0; +localparam ZFH_SUPPORTED = 1; localparam ZFA_SUPPORTED = 0; localparam SSTC_SUPPORTED = 1; localparam ZICBOM_SUPPORTED = 1; From 29eba93bfaf6f69c924a17fe24eccdea281fe4dc Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 16 Jan 2024 17:26:46 -0800 Subject: [PATCH 15/37] Path to new Questa --- setup.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/setup.sh b/setup.sh index b1ecbd84f..e1d4e6cd3 100755 --- a/setup.sh +++ b/setup.sh @@ -16,8 +16,7 @@ echo \$WALLY set to ${WALLY} # Must edit these based on your local environment. Ask your sysadmin. export MGLS_LICENSE_FILE=27002@zircon.eng.hmc.edu # Change this to your Siemens license server export SNPSLMD_LICENSE_FILE=27020@zircon.eng.hmc.edu # Change this to your Synopsys license server -export QUESTA_HOME=/cad/mentor/questa_sim-2022.4_2/questasim # Change this for your path to Questa, excluding bin -#export QUESTA_HOME=/cad/mentor/questa_sim-2022.4_3/questasim # Change this for your path to Questa, excluding bin +export QUESTA_HOME=/cad/mentor/questa_sim-2023.4/questasim # Change this for your path to Questa, excluding bin export SNPS_HOME=/cad/synopsys/SYN # Change this for your path to Design Compiler, excluding bin # Path to RISC-V Tools From 0588d611ead1deb5379be81a93b1f7ebb5859dfa Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 16 Jan 2024 17:27:40 -0800 Subject: [PATCH 16/37] Zfa fli support working for F and D --- src/fpu/fctrl.sv | 7 +++++-- src/fpu/fpu.sv | 11 ++++++++++- testbench/testbench.sv | 4 +++- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/fpu/fctrl.sv b/src/fpu/fctrl.sv index 999837889..d4cc60e87 100755 --- a/src/fpu/fctrl.sv +++ b/src/fpu/fctrl.sv @@ -143,14 +143,16 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0; // fmv.x.w/d/h/q fp to int register 7'b11110??: if (Funct3D == 3'b000 & Rs2D == 5'b00000) ControlsD = `FCTRLW'b1_0_00_00_011_0_0_0; // fmv.w/d/h/q.x int to fp reg + else if (P.ZFA_SUPPORTED & Funct3D == 3'b000 & Rs2D == 5'b00001) + ControlsD = `FCTRLW'b1_0_00_00_111_0_0_0; // fli 7'b0100000: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b00) ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0; // fcvt.s.(d/q/h) 7'b0100001: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b01) ControlsD = `FCTRLW'b1_0_01_00_001_0_0_0; // fcvt.d.(s/h/q) // coverage off - // Not covered in testing because rv64gc does not support half or quad precision 7'b0100010: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b10) ControlsD = `FCTRLW'b1_0_01_00_010_0_0_0; // fcvt.h.(s/d/q) + // Not covered in testing because rv64gc does not support quad precision 7'b0100011: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b11) ControlsD = `FCTRLW'b1_0_01_00_011_0_0_0; // fcvt.q.(s/h/d) // coverage on @@ -179,7 +181,6 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1; // fcvt.lu.d d->lu endcase // coverage off - // Not covered in testing because rv64gc does not support half or quad precision 7'b1101010: case(Rs2D) 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0; // fcvt.h.w w->h 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0; // fcvt.h.wu wu->h @@ -192,6 +193,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1; // fcvt.l.h h->l 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1; // fcvt.lu.h h->lu endcase + // Not covered in testing because rv64gc does not support quad precision 7'b1101011: case(Rs2D) 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0; // fcvt.q.w w->q 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0; // fcvt.q.wu wu->q @@ -274,6 +276,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( // 011 - mv to fp 01 // 110 - min 10 // 101 - max 10 + // 111 - fli 11 // OpCtrl: // Fma: {not multiply-add?, negate prod?, negate Z?} diff --git a/src/fpu/fpu.sv b/src/fpu/fpu.sv index 45af38c0c..8be0e4488 100755 --- a/src/fpu/fpu.sv +++ b/src/fpu/fpu.sv @@ -160,6 +160,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( logic StallUnpackedM; // Stall unpacker outputs during multicycle fdivsqrt logic [P.FLEN-1:0] SgnExtXE; // Sign-extended X input for move to integer logic mvsgn; // sign bit for extending move + logic [P.FLEN-1:0] FliResE; // Floating-point load immediate value ////////////////////////////////////////////////////////////////////////////////////////// // Decode Stage: fctrl decoder, read register file @@ -263,6 +264,14 @@ module fpu import cvw::*; #(parameter cvw_t P) ( .ToInt(FWriteIntE), .XZero(XZeroE), .Fmt(FmtE), .Ce(CeE), .ShiftAmt(CvtShiftAmtE), .ResSubnormUf(CvtResSubnormUfE), .Cs(CsE), .IntZero(IntZeroE), .LzcIn(CvtLzcInE)); + // floating-point load immediate: fli + if (P.ZFA_SUPPORTED) begin + logic [4:0] Rs1E; + + flopenrc #(5) Rs1EReg(clk, reset, FlushE, ~StallE, InstrD[19:15], Rs1E); + fli #(P) fli(.Rs1(Rs1E), .Fmt(FmtE), .Imm(FliResE)); + end else assign FliResE = '0; + // NaN Box SrcA to convert integer to requested FP size for fmv.*.x if(P.FPSIZES == 1) assign AlignedSrcAE = {{P.FLEN-P.XLEN{1'b1}}, ForwardedSrcAE}; else if(P.FPSIZES == 2) @@ -276,7 +285,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( end // select a result that may be written to the FP register - mux3 #(P.FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE); + mux4 #(P.FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, FliResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE); assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE); // select the result that may be written to the integer register with fmv.x.* diff --git a/testbench/testbench.sv b/testbench/testbench.sv index efd4ea637..87b603288 100644 --- a/testbench/testbench.sv +++ b/testbench/testbench.sv @@ -128,7 +128,8 @@ module testbench; "arch64zicboz": if (P.ZICBOZ_SUPPORTED) tests = arch64zicboz; "arch64zcb": if (P.ZCB_SUPPORTED) tests = arch64zcb; "arch64zfh": if (P.ZFH_SUPPORTED) tests = arch64zfh; -// "arch64zfa": if (P.ZFA_SUPPORTED) tests = arch64zfa; + "arch64zfaf": if (P.ZFA_SUPPORTED) tests = arch64zfaf; + "arch64zfad": if (P.ZFA_SUPPORTED & P.D_SUPPORTED) tests = arch64zfad; endcase end else begin // RV32 case (TEST) @@ -165,6 +166,7 @@ module testbench; "arch32zcb": if (P.ZCB_SUPPORTED) tests = arch32zcb; "arch32zfh": if (P.ZFH_SUPPORTED) tests = arch32zfh; "arch32zfaf": if (P.ZFA_SUPPORTED) tests = arch32zfaf; + "arch32zfad": if (P.ZFA_SUPPORTED & P.D_SUPPORTED) tests = arch32zfad; endcase end if (tests.size() == 0) begin From 9d57002c070461ec9518fa811936d4a975846fc6 Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 16 Jan 2024 17:27:59 -0800 Subject: [PATCH 17/37] Zfa fli support working for F and D (add fli.sv module) --- src/fpu/fli.sv | 219 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 219 insertions(+) create mode 100644 src/fpu/fli.sv diff --git a/src/fpu/fli.sv b/src/fpu/fli.sv new file mode 100644 index 000000000..e61415388 --- /dev/null +++ b/src/fpu/fli.sv @@ -0,0 +1,219 @@ +/////////////////////////////////////////// +// fli.sv +// +// Written: David_Harris@hmc.edu +// Modified: 1/16/2024 +// +// Purpose: Floating-point float immediate +// +// Documentation: RISC-V System on Chip Design Chapter 16 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module fli import cvw::*; #(parameter cvw_t P) ( + input logic [4:0] Rs1, // Index of immediate to select + input logic [1:0] Fmt, // 00 = single, 01 = double, 10 = half, 11 = quad + output logic [P.FLEN-1:0] Imm // Immediate output +); + + logic [P.FLEN-1:0] HImmBox, SImmBox, DImmBox, QImmBox; + + // select constant for each immediate size supported + + //////////////////////////// + // half + //////////////////////////// + + if (P.ZFH_SUPPORTED) begin + logic [15:0] HImm; + always_comb begin + case(Rs1) + 0: HImm = 16'hBC00; + 1: HImm = 16'h0400; + 2: HImm = 16'h0100; + 3: HImm = 16'h0200; + 4: HImm = 16'h1C00; + 5: HImm = 16'h2000; + 6: HImm = 16'h2C00; + 7: HImm = 16'h3000; + 8: HImm = 16'h3400; + 9: HImm = 16'h3500; + 10: HImm = 16'h3600; + 11: HImm = 16'h3700; + 12: HImm = 16'h3800; + 13: HImm = 16'h3900; + 14: HImm = 16'h3A00; + 15: HImm = 16'h3B00; + 16: HImm = 16'h3C00; + 17: HImm = 16'h3D00; + 18: HImm = 16'h3E00; + 19: HImm = 16'h3F00; + 20: HImm = 16'h4000; + 21: HImm = 16'h4100; + 22: HImm = 16'h4200; + 23: HImm = 16'h4400; + 24: HImm = 16'h4800; + 25: HImm = 16'h4C00; + 26: HImm = 16'h5800; + 27: HImm = 16'h5C00; + 28: HImm = 16'h7800; + 29: HImm = 16'h7C00; + 30: HImm = 16'h7C00; + 31: HImm = 16'h7E00; + endcase + end + assign HImmBox = {{(P.FLEN-16){1'b1}}, HImm}; // NaN-box HImm + end else assign HImmBox = '0; + + //////////////////////////// + // single + //////////////////////////// + + logic [31:0] SImm; + always_comb begin + case(Rs1) + 0: SImm = 32'hBF800000; + 1: SImm = 32'h00800000; + 2: SImm = 32'h37800000; + 3: SImm = 32'h38000000; + 4: SImm = 32'h3B800000; + 5: SImm = 32'h3C000000; + 6: SImm = 32'h3D800000; + 7: SImm = 32'h3E000000; + 8: SImm = 32'h3E800000; + 9: SImm = 32'h3EA00000; + 10: SImm = 32'h3EC00000; + 11: SImm = 32'h3EE00000; + 12: SImm = 32'h3F000000; + 13: SImm = 32'h3F200000; + 14: SImm = 32'h3F400000; + 15: SImm = 32'h3F600000; + 16: SImm = 32'h3F800000; + 17: SImm = 32'h3FA00000; + 18: SImm = 32'h3FC00000; + 19: SImm = 32'h3FE00000; + 20: SImm = 32'h40000000; + 21: SImm = 32'h40200000; + 22: SImm = 32'h40400000; + 23: SImm = 32'h40800000; + 24: SImm = 32'h41000000; + 25: SImm = 32'h41800000; + 26: SImm = 32'h43000000; + 27: SImm = 32'h43800000; + 28: SImm = 32'h47000000; + 29: SImm = 32'h47800000; + 30: SImm = 32'h7F800000; + 31: SImm = 32'h7FC00000; + endcase + end + assign SImmBox = {{(P.FLEN-32){1'b1}}, SImm}; // NaN-box SImm + + //////////////////////////// + // double + //////////////////////////// + + if (P.D_SUPPORTED) begin + logic [63:0] DImm; + always_comb begin + case(Rs1) + 0: DImm = 64'hBFF0000000000000; + 1: DImm = 64'h0010000000000000; + 2: DImm = 64'h3EF0000000000000; + 3: DImm = 64'h3F00000000000000; + 4: DImm = 64'h3F70000000000000; + 5: DImm = 64'h3F80000000000000; + 6: DImm = 64'h3FB0000000000000; + 7: DImm = 64'h3FC0000000000000; + 8: DImm = 64'h3FD0000000000000; + 9: DImm = 64'h3FD4000000000000; + 10: DImm = 64'h3FD8000000000000; + 11: DImm = 64'h3FDC000000000000; + 12: DImm = 64'h3FE0000000000000; + 13: DImm = 64'h3FE4000000000000; + 14: DImm = 64'h3FE8000000000000; + 15: DImm = 64'h3FEC000000000000; + 16: DImm = 64'h3FF0000000000000; + 17: DImm = 64'h3FF4000000000000; + 18: DImm = 64'h3FF8000000000000; + 19: DImm = 64'h3FFC000000000000; + 20: DImm = 64'h4000000000000000; + 21: DImm = 64'h4004000000000000; + 22: DImm = 64'h4008000000000000; + 23: DImm = 64'h4010000000000000; + 24: DImm = 64'h4020000000000000; + 25: DImm = 64'h4030000000000000; + 26: DImm = 64'h4060000000000000; + 27: DImm = 64'h4070000000000000; + 28: DImm = 64'h40E0000000000000; + 29: DImm = 64'h40F0000000000000; + 30: DImm = 64'h7FF0000000000000; + 31: DImm = 64'h7FF8000000000000; + endcase + end + assign DImmBox = {{(P.FLEN-64){1'b1}}, DImm}; // NaN-box DImm + end else assign DImmBox = '0; + + //////////////////////////// + // double + //////////////////////////// + + if (P.Q_SUPPORTED) begin + logic [63:0] QImm; + always_comb begin + case(Rs1) + 0: QImm = 128'hBFFF0000000000000000000000000000; + 1: QImm = 128'h00010000000000000000000000000000; + 2: QImm = 128'h3FEF0000000000000000000000000000; + 3: QImm = 128'h3FF00000000000000000000000000000; + 4: QImm = 128'h3FF70000000000000000000000000000; + 5: QImm = 128'h3FF80000000000000000000000000000; + 6: QImm = 128'h3FFB0000000000000000000000000000; + 7: QImm = 128'h3FFC0000000000000000000000000000; + 8: QImm = 128'h3FFD0000000000000000000000000000; + 9: QImm = 128'h3FFD4000000000000000000000000000; + 10: QImm = 128'h3FFD8000000000000000000000000000; + 11: QImm = 128'h3FFDC000000000000000000000000000; + 12: QImm = 128'h3FFE0000000000000000000000000000; + 13: QImm = 128'h3FFE4000000000000000000000000000; + 14: QImm = 128'h3FFE8000000000000000000000000000; + 15: QImm = 128'h3FFEC000000000000000000000000000; + 16: QImm = 128'h3FFF0000000000000000000000000000; + 17: QImm = 128'h3FFF4000000000000000000000000000; + 18: QImm = 128'h3FFF8000000000000000000000000000; + 19: QImm = 128'h3FFFC000000000000000000000000000; + 20: QImm = 128'h40000000000000000000000000000000; + 21: QImm = 128'h40004000000000000000000000000000; + 22: QImm = 128'h40008000000000000000000000000000; + 23: QImm = 128'h40010000000000000000000000000000; + 24: QImm = 128'h40020000000000000000000000000000; + 25: QImm = 128'h40030000000000000000000000000000; + 26: QImm = 128'h40060000000000000000000000000000; + 27: QImm = 128'h40070000000000000000000000000000; + 28: QImm = 128'h400E0000000000000000000000000000; + 29: QImm = 128'h400F0000000000000000000000000000; + 30: QImm = 128'h7FFF0000000000000000000000000000; + 31: QImm = 128'h7FFF8000000000000000000000000000; + endcase + end + assign QImmBox = QImm; // NaN-box QImm trivial because Q is longest format + end else assign QImmBox = '0; + + mux4 #(P.FLEN) flimux(SImmBox, DImmBox, HImmBox, QImmBox, Fmt, Imm); // select immediate based on format + +endmodule From 8654375f26504d4efe1b772421f00e87d49a444b Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 16 Jan 2024 20:03:54 -0800 Subject: [PATCH 18/37] Zfa fminm/fmaxm/fltq/fleq implemented and tested --- src/fpu/fcmp.sv | 49 +++++++----- src/fpu/fctrl.sv | 191 +++++++++++++++++++++++++++------------------ src/fpu/fpu.sv | 5 +- testbench/tests.vh | 58 ++++++++++++-- 4 files changed, 198 insertions(+), 105 deletions(-) diff --git a/src/fpu/fcmp.sv b/src/fpu/fcmp.sv index 9d0d582b5..e330f1fda 100755 --- a/src/fpu/fcmp.sv +++ b/src/fpu/fcmp.sv @@ -36,6 +36,7 @@ module fcmp import cvw::*; #(parameter cvw_t P) ( input logic [P.FMTBITS-1:0] Fmt, // format of fp number input logic [2:0] OpCtrl, // see above table + input logic Zfa, // Zfa variants: fminm, fmaxm, fleq, fltq input logic Xs, Ys, // input signs input logic [P.NE-1:0] Xe, Ye, // input exponents input logic [P.NF:0] Xm, Ym, // input mantissa @@ -70,8 +71,8 @@ module fcmp import cvw::*; #(parameter cvw_t P) ( 3'b110: CmpNV = EitherSNaN; //min 3'b101: CmpNV = EitherSNaN; //max 3'b010: CmpNV = EitherSNaN; //equal - 3'b001: CmpNV = EitherNaN; //less than - 3'b011: CmpNV = EitherNaN; //less than or equal + 3'b001: CmpNV = Zfa ? EitherSNaN : EitherNaN; // fltq / flt perform CompareQuietLess / CompareSignalingLess differing on when to set invalid + 3'b011: CmpNV = Zfa ? EitherSNaN : EitherNaN; // fleq / fle differ on when to set invalid default: CmpNV = 1'bx; endcase end @@ -128,23 +129,35 @@ module fcmp import cvw::*; #(parameter cvw_t P) ( // - if one is a NaN output the non-NaN always_comb if(OpCtrl[0]) // MAX - if(XNaN) - if(YNaN) CmpFpRes = NaNRes; // X = NaN Y = NaN - else CmpFpRes = Y; // X = NaN Y != NaN - else - if(YNaN) CmpFpRes = X; // X != NaN Y = NaN - else // X,Y != NaN - if(LT) CmpFpRes = Y; // X < Y - else CmpFpRes = X; // X > Y + if (Zfa & P.ZFA_SUPPORTED) // fmaxm perform IEEE754 maxNum that produce NaN if either input is NaN + if (XNaN | YNaN) CmpFpRes = NaNRes; // either input is NaN + else + if (LT) CmpFpRes = Y; // X < Y + else CmpFpRes = X; // X > Y + else // fmax performs IEEE754 maxNumber that produces NaN if both inputs are NaN + if(XNaN) + if(YNaN) CmpFpRes = NaNRes; // X = NaN Y = NaN + else CmpFpRes = Y; // X = NaN Y != NaN + else + if(YNaN) CmpFpRes = X; // X != NaN Y = NaN + else // X,Y != NaN + if(LT) CmpFpRes = Y; // X < Y + else CmpFpRes = X; // X > Y else // MIN - if(XNaN) - if(YNaN) CmpFpRes = NaNRes; // X = NaN Y = NaN - else CmpFpRes = Y; // X = NaN Y != NaN - else - if(YNaN) CmpFpRes = X; // X != NaN Y = NaN - else // X,Y != NaN - if(LT) CmpFpRes = X; // X < Y - else CmpFpRes = Y; // X > Y + if (Zfa & P.ZFA_SUPPORTED) // fminm perform IEEE754 minNum that produce NaN if either input is NaN + if (XNaN | YNaN) CmpFpRes = NaNRes; // either input is NaN + else + if (LT) CmpFpRes = X; // X < Y + else CmpFpRes = Y; // X > Y + else // fmin performs IEEE754 minNumber that produces NaN if both inputs are NaN + if(XNaN) + if(YNaN) CmpFpRes = NaNRes; // X = NaN Y = NaN + else CmpFpRes = Y; // X = NaN Y != NaN + else + if(YNaN) CmpFpRes = X; // X != NaN Y = NaN + else // X,Y != NaN + if(LT) CmpFpRes = X; // X < Y + else CmpFpRes = Y; // X > Y // LT/LE/EQ // - -0 = 0 diff --git a/src/fpu/fctrl.sv b/src/fpu/fctrl.sv index d4cc60e87..9f60a692f 100755 --- a/src/fpu/fctrl.sv +++ b/src/fpu/fctrl.sv @@ -54,6 +54,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( output logic [1:0] PostProcSelE, PostProcSelM, // select result in the post processing unit output logic [1:0] FResSelE, FResSelM, FResSelW, // Select one of the results that finish in the memory stage output logic FPUActiveE, // FP instruction being executed + output logic ZfaE, // Zfa variants of instructions (fli, fminm, fmaxm, fround, froundnx, fleq, fltq, fmvh, fmvp, fcvtmod) // register control signals output logic FRegWriteE, FRegWriteM, FRegWriteW, // FP register write enable output logic FWriteIntE, FWriteIntM, // Write to integer register @@ -64,7 +65,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( output logic FDivStartE, IDivStartE // Start division or squareroot ); - `define FCTRLW 12 + `define FCTRLW 13 logic [`FCTRLW-1:0] ControlsD; // control signals logic FRegWriteD; // FP register write enable @@ -79,6 +80,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( logic SupportedFmt; // is the format supported logic SupportedFmt2; // is the source format supported for fp -> fp logic FCvtIntD, FCvtIntM; // convert to integer opperation + logic ZfaD; // Zfa variants of instructions // FPU Instruction Decoder assign Fmt = Funct7D[1:0]; @@ -91,129 +93,164 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( (Fmt2 == 2'b10 & P.ZFH_SUPPORTED) | (Fmt2 == 2'b11 & P.Q_SUPPORTED)); // decode the instruction - // FRegWrite_FWriteInt_FResSel_PostProcSel_FOpCtrl_FDivStart_IllegalFPUInstr_FCvtInt + // FRegWrite_FWriteInt_FResSel_PostProcSel_FOpCtrl_FDivStart_IllegalFPUInstr_FCvtInt_Zfa always_comb if (STATUS_FS == 2'b00) // FPU instructions are illegal when FPU is disabled - ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0; + ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0_0; else if (OpD != 7'b0000111 & OpD != 7'b0100111 & ~SupportedFmt) - ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0; // for anything other than loads and stores, check for supported format + ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0_0; // for anything other than loads and stores, check for supported format else begin - ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0; // default: non-implemented instruction + ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0_0; // default: non-implemented instruction /* verilator lint_off CASEINCOMPLETE */ // default value above has priority so no other default needed case(OpD) 7'b0000111: case(Funct3D) - 3'b010: ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flw - 3'b011: if (P.D_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // fld - 3'b100: if (P.Q_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flq - 3'b001: if (P.ZFH_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flh + 3'b010: ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0_0; // flw + 3'b011: if (P.D_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0_0; // fld + 3'b100: if (P.Q_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0_0; // flq + 3'b001: if (P.ZFH_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0_0; // flh endcase 7'b0100111: case(Funct3D) - 3'b010: ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsw - 3'b011: if (P.D_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsd - 3'b100: if (P.Q_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsq - 3'b001: if (P.ZFH_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsh + 3'b010: ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0_0; // fsw + 3'b011: if (P.D_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0_0; // fsd + 3'b100: if (P.Q_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0_0; // fsq + 3'b001: if (P.ZFH_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0_0; // fsh endcase - 7'b1000011: ControlsD = `FCTRLW'b1_0_01_10_000_0_0_0; // fmadd - 7'b1000111: ControlsD = `FCTRLW'b1_0_01_10_001_0_0_0; // fmsub - 7'b1001011: ControlsD = `FCTRLW'b1_0_01_10_010_0_0_0; // fnmsub - 7'b1001111: ControlsD = `FCTRLW'b1_0_01_10_011_0_0_0; // fnmadd + 7'b1000011: ControlsD = `FCTRLW'b1_0_01_10_000_0_0_0_0; // fmadd + 7'b1000111: ControlsD = `FCTRLW'b1_0_01_10_001_0_0_0_0; // fmsub + 7'b1001011: ControlsD = `FCTRLW'b1_0_01_10_010_0_0_0_0; // fnmsub + 7'b1001111: ControlsD = `FCTRLW'b1_0_01_10_011_0_0_0_0; // fnmadd 7'b1010011: casez(Funct7D) - 7'b00000??: ControlsD = `FCTRLW'b1_0_01_10_110_0_0_0; // fadd - 7'b00001??: ControlsD = `FCTRLW'b1_0_01_10_111_0_0_0; // fsub - 7'b00010??: ControlsD = `FCTRLW'b1_0_01_10_100_0_0_0; // fmul - 7'b00011??: ControlsD = `FCTRLW'b1_0_01_01_xx0_1_0_0; // fdiv - 7'b01011??: if (Rs2D == 5'b0000) ControlsD = `FCTRLW'b1_0_01_01_xx1_1_0_0; // fsqrt + 7'b00000??: ControlsD = `FCTRLW'b1_0_01_10_110_0_0_0_0; // fadd + 7'b00001??: ControlsD = `FCTRLW'b1_0_01_10_111_0_0_0_0; // fsub + 7'b00010??: ControlsD = `FCTRLW'b1_0_01_10_100_0_0_0_0; // fmul + 7'b00011??: ControlsD = `FCTRLW'b1_0_01_01_xx0_1_0_0_0; // fdiv + 7'b01011??: if (Rs2D == 5'b0000) ControlsD = `FCTRLW'b1_0_01_01_xx1_1_0_0_0; // fsqrt 7'b00100??: case(Funct3D) - 3'b000: ControlsD = `FCTRLW'b1_0_00_00_000_0_0_0; // fsgnj - 3'b001: ControlsD = `FCTRLW'b1_0_00_00_001_0_0_0; // fsgnjn - 3'b010: ControlsD = `FCTRLW'b1_0_00_00_010_0_0_0; // fsgnjx + 3'b000: ControlsD = `FCTRLW'b1_0_00_00_000_0_0_0_0; // fsgnj + 3'b001: ControlsD = `FCTRLW'b1_0_00_00_001_0_0_0_0; // fsgnjn + 3'b010: ControlsD = `FCTRLW'b1_0_00_00_010_0_0_0_0; // fsgnjx endcase 7'b00101??: case(Funct3D) - 3'b000: ControlsD = `FCTRLW'b1_0_00_00_110_0_0_0; // fmin - 3'b001: ControlsD = `FCTRLW'b1_0_00_00_101_0_0_0; // fmax + 3'b000: ControlsD = `FCTRLW'b1_0_00_00_110_0_0_0_0; // fmin + 3'b001: ControlsD = `FCTRLW'b1_0_00_00_101_0_0_0_0; // fmax + 3'b010: if (P.ZFA_SUPPORTED) ControlsD = `FCTRLW'b1_0_00_00_110_0_0_0_1; // fminm (Zfa) + 3'b011: if (P.ZFA_SUPPORTED) ControlsD = `FCTRLW'b1_0_00_00_101_0_0_0_1; // fmaxm (Zfa) endcase 7'b10100??: case(Funct3D) - 3'b010: ControlsD = `FCTRLW'b0_1_00_00_010_0_0_0; // feq - 3'b001: ControlsD = `FCTRLW'b0_1_00_00_001_0_0_0; // flt - 3'b000: ControlsD = `FCTRLW'b0_1_00_00_011_0_0_0; // fle + 3'b000: ControlsD = `FCTRLW'b0_1_00_00_011_0_0_0_0; // fle + 3'b001: ControlsD = `FCTRLW'b0_1_00_00_001_0_0_0_0; // flt + 3'b010: ControlsD = `FCTRLW'b0_1_00_00_010_0_0_0_0; // feq + 3'b100: if (P.ZFA_SUPPORTED) ControlsD = `FCTRLW'b0_1_00_00_011_0_0_0_1; // fleq (Zfa) + 3'b101: if (P.ZFA_SUPPORTED) ControlsD = `FCTRLW'b0_1_00_00_001_0_0_0_1; // fltq (Zfa) endcase 7'b11100??: if (Funct3D == 3'b001 & Rs2D == 5'b00000) - ControlsD = `FCTRLW'b0_1_10_00_000_0_0_0; // fclass + ControlsD = `FCTRLW'b0_1_10_00_000_0_0_0_0; // fclass else if (Funct3D == 3'b000 & Rs2D == 5'b00000) - ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0; // fmv.x.w/d/h/q fp to int register + ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0_0; // fmv.x.w/d/h/q fp to int register + else if (P.ZFA_SUPPORTED & P.XLEN == 32 & P.D_SUPPORTED & Funct7D[1:0] == 2'b01 & Funct3D == 3'b000 & Rs2D == 5'b00001) + ControlsD = '0; // fmvh.x.d (Zfa) *** needs values for all moves + // coverage off Q not supported in RV64GC + else if (P.ZFA_SUPPORTED & P.XLEN == 64 & P.Q_SUPPORTED & Funct7D[1:0] == 2'b11 & Funct3D == 3'b000 & Rs2D == 5'b00001) + ControlsD = '0; // fmvh.x.q (Zfa) + // coverage on 7'b11110??: if (Funct3D == 3'b000 & Rs2D == 5'b00000) - ControlsD = `FCTRLW'b1_0_00_00_011_0_0_0; // fmv.w/d/h/q.x int to fp reg + ControlsD = `FCTRLW'b1_0_00_00_011_0_0_0_0; // fmv.w/d/h/q.x int to fp reg else if (P.ZFA_SUPPORTED & Funct3D == 3'b000 & Rs2D == 5'b00001) - ControlsD = `FCTRLW'b1_0_00_00_111_0_0_0; // fli + ControlsD = `FCTRLW'b1_0_00_00_111_0_0_0_1; // fli (Zfa) 7'b0100000: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b00) - ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0; // fcvt.s.(d/q/h) + ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_0; // fcvt.s.(d/q/h) + else if (Rs2D == 5'b00100 & P.ZFA_SUPPORTED) + ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // fround.s (Zfa) *** needs ctrl for all rounds + else if (Rs2D == 5'b00101 & P.ZFA_SUPPORTED) + ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // froundnx.s (Zfa) *** needs ctrl for all rounds 7'b0100001: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b01) - ControlsD = `FCTRLW'b1_0_01_00_001_0_0_0; // fcvt.d.(s/h/q) - // coverage off + ControlsD = `FCTRLW'b1_0_01_00_001_0_0_0_0; // fcvt.d.(s/h/q) + else if (Rs2D == 5'b00100 & P.ZFA_SUPPORTED) + ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // fround.d (Zfa) + else if (Rs2D == 5'b00101 & P.ZFA_SUPPORTED) + ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // froundnx.d (Zfa) 7'b0100010: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b10) - ControlsD = `FCTRLW'b1_0_01_00_010_0_0_0; // fcvt.h.(s/d/q) + ControlsD = `FCTRLW'b1_0_01_00_010_0_0_0_0; // fcvt.h.(s/d/q) + else if (Rs2D == 5'b00100 & P.ZFA_SUPPORTED) + ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // fround.h (Zfa) + else if (Rs2D == 5'b00101 & P.ZFA_SUPPORTED) + ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // froundnx.h (Zfa) + // coverage off // Not covered in testing because rv64gc does not support quad precision 7'b0100011: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b11) - ControlsD = `FCTRLW'b1_0_01_00_011_0_0_0; // fcvt.q.(s/h/d) + ControlsD = `FCTRLW'b1_0_01_00_011_0_0_0_0; // fcvt.q.(s/h/d) + else if (Rs2D == 5'b00100 & P.ZFA_SUPPORTED) + ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // fround.q (Zfa) + else if (Rs2D == 5'b00101 & P.ZFA_SUPPORTED) + ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // froundnx.q (Zfa) // coverage on 7'b1101000: case(Rs2D) - 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0; // fcvt.s.w w->s - 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0; // fcvt.s.wu wu->s - 5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0; // fcvt.s.l l->s - 5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0; // fcvt.s.lu lu->s + 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0; // fcvt.s.w w->s + 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0_0; // fcvt.s.wu wu->s + 5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0_0; // fcvt.s.l l->s + 5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0_0; // fcvt.s.lu lu->s endcase 7'b1100000: case(Rs2D) - 5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1; // fcvt.w.s s->w - 5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1; // fcvt.wu.s s->wu - 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1; // fcvt.l.s s->l - 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1; // fcvt.lu.s s->lu + 5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_0; // fcvt.w.s s->w + 5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1_0; // fcvt.wu.s s->wu + 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1_0; // fcvt.l.s s->l + 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1_0; // fcvt.lu.s s->lu endcase 7'b1101001: case(Rs2D) - 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0; // fcvt.d.w w->d - 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0; // fcvt.d.wu wu->d - 5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0; // fcvt.d.l l->d - 5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0; // fcvt.d.lu lu->d + 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0; // fcvt.d.w w->d + 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0_0; // fcvt.d.wu wu->d + 5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0_0; // fcvt.d.l l->d + 5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0_0; // fcvt.d.lu lu->d endcase 7'b1100001: case(Rs2D) - 5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1; // fcvt.w.d d->w - 5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1; // fcvt.wu.d d->wu - 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1; // fcvt.l.d d->l - 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1; // fcvt.lu.d d->lu + 5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_0; // fcvt.w.d d->w + 5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1_0; // fcvt.wu.d d->wu + 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1_0; // fcvt.l.d d->l + 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1_0; // fcvt.lu.d d->lu + 5'b01000: if (P.ZFA_SUPPORTED & P.D_SUPPORTED & Funct3D == 3'b001) + ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_1; // fcvtmod.w.d (Zfa) endcase - // coverage off 7'b1101010: case(Rs2D) - 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0; // fcvt.h.w w->h - 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0; // fcvt.h.wu wu->h - 5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0; // fcvt.h.l l->h - 5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0; // fcvt.h.lu lu->h + 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0; // fcvt.h.w w->h + 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0_0; // fcvt.h.wu wu->h + 5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0_0; // fcvt.h.l l->h + 5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0_0; // fcvt.h.lu lu->h endcase 7'b1100010: case(Rs2D) - 5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1; // fcvt.w.h h->w - 5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1; // fcvt.wu.h h->wu - 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1; // fcvt.l.h h->l - 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1; // fcvt.lu.h h->lu + 5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_0; // fcvt.w.h h->w + 5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1_0; // fcvt.wu.h h->wu + 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1_0; // fcvt.l.h h->l + 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1_0; // fcvt.lu.h h->lu endcase // Not covered in testing because rv64gc does not support quad precision + // coverage off 7'b1101011: case(Rs2D) - 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0; // fcvt.q.w w->q - 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0; // fcvt.q.wu wu->q - 5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0; // fcvt.q.l l->q - 5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0; // fcvt.q.lu lu->q + 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0; // fcvt.q.w w->q + 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0_0; // fcvt.q.wu wu->q + 5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0_0; // fcvt.q.l l->q + 5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0_0; // fcvt.q.lu lu->q endcase 7'b1100011: case(Rs2D) - 5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1; // fcvt.w.q q->w - 5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1; // fcvt.wu.q q->wu - 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1; // fcvt.l.q q->l - 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1; // fcvt.lu.q q->lu + 5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_0; // fcvt.w.q q->w + 5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1_0; // fcvt.wu.q q->wu + 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1_0; // fcvt.l.q q->l + 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1_0; // fcvt.lu.q q->lu endcase // coverage on - endcase + 7'b1011001: if (P.ZFA_SUPPORTED & P.XLEN == 32 & P.D_SUPPORTED & Funct3D == 3'b000) + ControlsD = '0; // fmvp.d.x (Zfa) + // Not covered in testing because rv64gc does not support quad precision + // coverage off + 7'b1011011: if (P.ZFA_SUPPORTED & P.XLEN == 64 & P.Q_SUPPORTED & Funct3D == 3'b000) + ControlsD = '0; // fmvp.q.x (Zfa) + // coverage on + endcase endcase end /* verilator lint_on CASEINCOMPLETE */ // unswizzle control bits - assign #1 {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, OpCtrlD, FDivStartD, IllegalFPUInstrD, FCvtIntD} = ControlsD; + assign #1 {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, OpCtrlD, FDivStartD, IllegalFPUInstrD, FCvtIntD, ZfaD} = ControlsD; // rounding modes: // 000 - round to nearest, ties to even @@ -313,9 +350,9 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( assign Adr3D = InstrD[31:27]; // D/E pipleine register - flopenrc #(14+P.FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE, - {FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, OpCtrlD, FWriteIntD, FCvtIntD, ~IllegalFPUInstrD}, - {FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE, FPUActiveE}); + flopenrc #(15+P.FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE, + {FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, OpCtrlD, FWriteIntD, FCvtIntD, ZfaD, ~IllegalFPUInstrD}, + {FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE, ZfaE, FPUActiveE}); flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {Adr1D, Adr2D, Adr3D}, {Adr1E, Adr2E, Adr3E}); flopenrc #(1) DEFDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, FDivStartD, FDivStartE); flopenrc #(3) DEEnReg(clk, reset, FlushE, ~StallE, {XEnD, YEnD, ZEnD}, {XEnE, YEnE, ZEnE}); diff --git a/src/fpu/fpu.sv b/src/fpu/fpu.sv index 8be0e4488..bd387f5d7 100755 --- a/src/fpu/fpu.sv +++ b/src/fpu/fpu.sv @@ -83,6 +83,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( logic XEnE, YEnE, ZEnE; // X, Y, Z inputs used for current operation logic FRegWriteE; // Write floating-point register logic FPUActiveE; // FP instruction being executed + logic ZfaE; // Zfa variants of instructions (fli, fminm, fmaxm, fround, froundnx, fleq, fltq, fmvh, fmvp, fcvtmod.w.d) // regfile signals logic [P.FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage @@ -170,7 +171,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( fctrl #(P) fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .IntDivE, .InstrD, .StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE, - .reset, .clk, .FRegWriteE, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM, + .reset, .clk, .FRegWriteE, .FRegWriteM, .FRegWriteW, .ZfaE, .FrmM, .FmtE, .FmtM, .FDivStartE, .IDivStartE, .FWriteIntE, .FCvtIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .FpLoadStoreM, .IllegalFPUInstrD, .XEnD, .YEnD, .ZEnD, .XEnE, .YEnE, .ZEnE, .FResSelE, .FResSelM, .FResSelW, .FPUActiveE, .PostProcSelE, .PostProcSelM, .FCvtIntW, @@ -247,7 +248,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( .UmM, .FIntDivResultM); // compare: fmin/fmax, flt/fle/feq - fcmp #(P) fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE), + fcmp #(P) fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Zfa(ZfaE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE), .Xm(XmE), .Ym(YmE), .XZero(XZeroE), .YZero(YZeroE), .XNaN(XNaNE), .YNaN(YNaNE), .XSNaN(XSNaNE), .YSNaN(YSNaNE), .X(XE), .Y(YE), .CmpNV(CmpNVE), .CmpFpRes(CmpFpResE), .CmpIntRes(CmpIntResE)); diff --git a/testbench/tests.vh b/testbench/tests.vh index 86f65eb14..43cbc91a0 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -1999,16 +1999,58 @@ string arch64zbs[] = '{ string arch32zfaf[] = '{ `RISCVARCHTEST, - "rv32i_m/F_Zfa/src/fle_b1-01.S", - "rv32i_m/F_Zfa/src/fle_b19-01.S", - "rv32i_m/F_Zfa/src/fli_b1-01.S", + "rv32i_m/F_Zfa/src/fleq_b1-01.S", + "rv32i_m/F_Zfa/src/fleq_b19-01.S", + "rv32i_m/F_Zfa/src/fli.s-01.S", "rv32i_m/F_Zfa/src/fltq_b1-01.S", "rv32i_m/F_Zfa/src/fltq_b19-01.S", - "rv32i_m/F_Zfa/src/fmin_b1-01.S", - "rv32i_m/F_Zfa/src/fmin_b19-01.S", - "rv32i_m/F_Zfa/src/fmax_b1-01.S", - "rv32i_m/F_Zfa/src/fmax_b19-01.S", - "rv32i_m/F_Zfa/src/fround_b1-01.S" + "rv32i_m/F_Zfa/src/fminm_b1-01.S", + "rv32i_m/F_Zfa/src/fminm_b19-01.S", + "rv32i_m/F_Zfa/src/fmaxm_b1-01.S", + "rv32i_m/F_Zfa/src/fmaxm_b19-01.S" +/* "rv32i_m/F_Zfa/src/fround_b1-01.S" */ + }; + + string arch32zfad[] = '{ + `RISCVARCHTEST, + "rv32i_m/D_Zfa/src/fleq_b1-01.S", + "rv32i_m/D_Zfa/src/fleq_b19-01.S", + "rv32i_m/D_Zfa/src/fli.d-01.S", + "rv32i_m/D_Zfa/src/fltq_b1-01.S", + "rv32i_m/D_Zfa/src/fltq_b19-01.S", + "rv32i_m/D_Zfa/src/fminm_b1-01.S", + "rv32i_m/D_Zfa/src/fminm_b19-01.S", + "rv32i_m/D_Zfa/src/fmaxm_b1-01.S", + "rv32i_m/D_Zfa/src/fmaxm_b19-01.S" +/* "rv32i_m/D_Zfa/src/fround_b1-01.S" */ + }; + + string arch64zfaf[] = '{ + `RISCVARCHTEST, + "rv64i_m/F_Zfa/src/fleq_b1-01.S", + "rv64i_m/F_Zfa/src/fleq_b19-01.S", + "rv64i_m/F_Zfa/src/fli.s-01.S", + "rv64i_m/F_Zfa/src/fltq_b1-01.S", + "rv64i_m/F_Zfa/src/fltq_b19-01.S", + "rv64i_m/F_Zfa/src/fminm_b1-01.S", + "rv64i_m/F_Zfa/src/fminm_b19-01.S", + "rv64i_m/F_Zfa/src/fmaxm_b1-01.S", + "rv64i_m/F_Zfa/src/fmaxm_b19-01.S" +/* "rv64i_m/F_Zfa/src/fround_b1-01.S" */ + }; + + string arch64zfad[] = '{ + `RISCVARCHTEST, + "rv64i_m/D_Zfa/src/fleq_b1-01.S", + "rv64i_m/D_Zfa/src/fleq_b19-01.S", + "rv64i_m/D_Zfa/src/fli.d-01.S", + "rv64i_m/D_Zfa/src/fltq_b1-01.S", + "rv64i_m/D_Zfa/src/fltq_b19-01.S", + "rv64i_m/D_Zfa/src/fminm_b1-01.S", + "rv64i_m/D_Zfa/src/fminm_b19-01.S", + "rv64i_m/D_Zfa/src/fmaxm_b1-01.S", + "rv64i_m/D_Zfa/src/fmaxm_b19-01.S" +/* "rv64i_m/D_Zfa/src/fround_b1-01.S" */ }; string arch32d_fma[] = '{ From 07e7e022415a66a9cc89c5c53b4f5fe72f71558d Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 16 Jan 2024 21:26:42 -0800 Subject: [PATCH 19/37] Coded Zfa fmvp but no tests exist --- src/fpu/fpu.sv | 18 +++++++++++------- testbench/tests.vh | 12 +++++++++++- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/src/fpu/fpu.sv b/src/fpu/fpu.sv index bd387f5d7..430750c71 100755 --- a/src/fpu/fpu.sv +++ b/src/fpu/fpu.sv @@ -155,7 +155,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( logic [P.FLEN-1:0] FResultW; // final FP result being written to the FP register // other signals - logic [P.FLEN-1:0] AlignedSrcAE; // align SrcA from IEU to the floating point format for fmv + logic [P.FLEN-1:0] PreIntSrcE, IntSrcE; // align SrcA from IEU to the floating point format for fmv / fmvp logic [P.FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed logic [P.FLEN-1:0] BoxedOneE; // One value for Z for multiplication, with NaN boxing if needed logic StallUnpackedM; // Stall unpacker outputs during multicycle fdivsqrt @@ -273,23 +273,27 @@ module fpu import cvw::*; #(parameter cvw_t P) ( fli #(P) fli(.Rs1(Rs1E), .Fmt(FmtE), .Imm(FliResE)); end else assign FliResE = '0; - // NaN Box SrcA to convert integer to requested FP size for fmv.*.x - if(P.FPSIZES == 1) assign AlignedSrcAE = {{P.FLEN-P.XLEN{1'b1}}, ForwardedSrcAE}; + // fmv.*.x: NaN Box SrcA to extend integer to requested FP size + if(P.FPSIZES == 1) assign PreIntSrcE = {{P.FLEN-P.XLEN{1'b1}}, ForwardedSrcAE}; else if(P.FPSIZES == 2) - mux2 #(P.FLEN) SrcAMux ({{P.FLEN-P.LEN1{1'b1}}, ForwardedSrcAE[P.LEN1-1:0]}, {{P.FLEN-P.XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); + mux2 #(P.FLEN) SrcAMux ({{P.FLEN-P.LEN1{1'b1}}, ForwardedSrcAE[P.LEN1-1:0]}, {{P.FLEN-P.XLEN{1'b1}}, ForwardedSrcAE}, FmtE, PreIntSrcE); else if(P.FPSIZES == 3 | P.FPSIZES == 4) begin localparam XD_LEN = P.D_LEN < P.XLEN ? P.D_LEN : P.XLEN; // shorter of D_LEN and XLEN mux3 #(P.FLEN) SrcAMux ({{P.FLEN-P.S_LEN{1'b1}}, ForwardedSrcAE[P.S_LEN-1:0]}, {{P.FLEN-XD_LEN{1'b1}}, ForwardedSrcAE[XD_LEN-1:0]}, {{P.FLEN-P.H_LEN{1'b1}}, ForwardedSrcAE[P.H_LEN-1:0]}, - FmtE, AlignedSrcAE); // NaN boxing zeroes + FmtE, PreIntSrcE); // NaN boxing zeroes end + // fmvp.*.x: Select pair of registers + if (P.ZFA_SUPPORTED & (P.XLEN==32 & P.D_SUPPORTED) | (P.XLEN==64 & P.Q_SUPPORTED)) + assign IntSrcE = ZfaE ? {ForwardedSrcBE, ForwardedSrcAE} : PreIntSrcE; // choose pair of integer registers for fmvp.d.x / fmvp.q.x + else assign IntSrcE = PreIntSrcE; // select a result that may be written to the FP register - mux4 #(P.FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, FliResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE); + mux4 #(P.FLEN) FResMux(SgnResE, IntSrcE, CmpFpResE, FliResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE); assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE); - // select the result that may be written to the integer register with fmv.x.* + // fmv.x.*: select the result that may be written to the integer register if(P.FPSIZES == 1) begin assign mvsgn = XE[P.FLEN-1]; assign SgnExtXE = XE; diff --git a/testbench/tests.vh b/testbench/tests.vh index 43cbc91a0..0c60228dd 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -2004,6 +2004,8 @@ string arch64zbs[] = '{ "rv32i_m/F_Zfa/src/fli.s-01.S", "rv32i_m/F_Zfa/src/fltq_b1-01.S", "rv32i_m/F_Zfa/src/fltq_b19-01.S", + "rv32i_m/D_Zfa/src/fltq_b1-01.S", // these D tests are more comprehensive and seem they should replace the F tests. Applies to all F tests duplicated in D + "rv32i_m/D_Zfa/src/fltq_b19-01.S", "rv32i_m/F_Zfa/src/fminm_b1-01.S", "rv32i_m/F_Zfa/src/fminm_b19-01.S", "rv32i_m/F_Zfa/src/fmaxm_b1-01.S", @@ -2015,13 +2017,21 @@ string arch64zbs[] = '{ `RISCVARCHTEST, "rv32i_m/D_Zfa/src/fleq_b1-01.S", "rv32i_m/D_Zfa/src/fleq_b19-01.S", + "rv32i_m/D_Zfa/src/fleq.d_b1-01.S", + "rv32i_m/D_Zfa/src/fleq.d_b19-01.S", "rv32i_m/D_Zfa/src/fli.d-01.S", "rv32i_m/D_Zfa/src/fltq_b1-01.S", "rv32i_m/D_Zfa/src/fltq_b19-01.S", + "rv32i_m/D_Zfa/src/fltq.d_b1-01.S", + "rv32i_m/D_Zfa/src/fltq.d_b19-01.S", "rv32i_m/D_Zfa/src/fminm_b1-01.S", "rv32i_m/D_Zfa/src/fminm_b19-01.S", + "rv32i_m/D_Zfa/src/fminm.d_b1-01.S", + "rv32i_m/D_Zfa/src/fminm.d_b19-01.S", "rv32i_m/D_Zfa/src/fmaxm_b1-01.S", - "rv32i_m/D_Zfa/src/fmaxm_b19-01.S" + "rv32i_m/D_Zfa/src/fmaxm_b19-01.S", + "rv32i_m/D_Zfa/src/fmaxm.d_b1-01.S", + "rv32i_m/D_Zfa/src/fmaxm.d_b19-01.S" /* "rv32i_m/D_Zfa/src/fround_b1-01.S" */ }; From 4cfc86140c5c31740d5fd2a1272c59ebe5d36a53 Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 17 Jan 2024 06:18:00 -0800 Subject: [PATCH 20/37] Zfa fmvh complete and passing tests: --- src/fpu/fctrl.sv | 4 ++-- src/fpu/fpu.sv | 3 ++- testbench/tests.vh | 9 ++++++++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/fpu/fctrl.sv b/src/fpu/fctrl.sv index 9f60a692f..8dae34f89 100755 --- a/src/fpu/fctrl.sv +++ b/src/fpu/fctrl.sv @@ -148,10 +148,10 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( else if (Funct3D == 3'b000 & Rs2D == 5'b00000) ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0_0; // fmv.x.w/d/h/q fp to int register else if (P.ZFA_SUPPORTED & P.XLEN == 32 & P.D_SUPPORTED & Funct7D[1:0] == 2'b01 & Funct3D == 3'b000 & Rs2D == 5'b00001) - ControlsD = '0; // fmvh.x.d (Zfa) *** needs values for all moves + ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0_1; // fmvh.x.d (Zfa) // coverage off Q not supported in RV64GC else if (P.ZFA_SUPPORTED & P.XLEN == 64 & P.Q_SUPPORTED & Funct7D[1:0] == 2'b11 & Funct3D == 3'b000 & Rs2D == 5'b00001) - ControlsD = '0; // fmvh.x.q (Zfa) + ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0_1; // fmvh.x.q (Zfa) // coverage on 7'b11110??: if (Funct3D == 3'b000 & Rs2D == 5'b00000) ControlsD = `FCTRLW'b1_0_00_00_011_0_0_0_0; // fmv.w/d/h/q.x int to fp reg diff --git a/src/fpu/fpu.sv b/src/fpu/fpu.sv index 430750c71..85ea9dba6 100755 --- a/src/fpu/fpu.sv +++ b/src/fpu/fpu.sv @@ -310,7 +310,8 @@ module fpu import cvw::*; #(parameter cvw_t P) ( // sign extend to XLEN if necessary if (P.FLEN>P.XLEN) - assign IntSrcXE = SgnExtXE[P.XLEN-1:0]; + if (P.ZFA_SUPPORTED) assign IntSrcXE = ZfaE ? XE[P.FLEN-1:P.FLEN/2] : SgnExtXE[P.XLEN-1:0]; // either fmvh.x.* or fmv.x.* + else assign IntSrcXE = SgnExtXE[P.XLEN-1:0]; else assign IntSrcXE = {{P.XLEN-P.FLEN{mvsgn}}, SgnExtXE}; mux3 #(P.XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE); diff --git a/testbench/tests.vh b/testbench/tests.vh index 0c60228dd..7d2d320a8 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -2031,7 +2031,14 @@ string arch64zbs[] = '{ "rv32i_m/D_Zfa/src/fmaxm_b1-01.S", "rv32i_m/D_Zfa/src/fmaxm_b19-01.S", "rv32i_m/D_Zfa/src/fmaxm.d_b1-01.S", - "rv32i_m/D_Zfa/src/fmaxm.d_b19-01.S" + "rv32i_m/D_Zfa/src/fmaxm.d_b19-01.S", + "rv32i_m/D_Zfa/src/fmvh.x.d_b1-01.S", + "rv32i_m/D_Zfa/src/fmvh.x.d_b22-01.S", + "rv32i_m/D_Zfa/src/fmvh.x.d_b23-01.S", + "rv32i_m/D_Zfa/src/fmvh.x.d_b24-01.S", + "rv32i_m/D_Zfa/src/fmvh.x.d_b27-01.S", + "rv32i_m/D_Zfa/src/fmvh.x.d_b28-01.S", + "rv32i_m/D_Zfa/src/fmvh.x.d_b29-01.S" /* "rv32i_m/D_Zfa/src/fround_b1-01.S" */ }; From 2d3dc55986095dd0de45009139de1f40fd3c1215 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 17 Jan 2024 12:19:10 -0600 Subject: [PATCH 21/37] Fixed bug. After I$ invalidated. If the pipelined wasn't stalled the I$ still output the old instruction on the next cycle. Now the I$ ensure that invalidation leads to the next cycle not hitting. --- src/cache/cacheway.sv | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/cache/cacheway.sv b/src/cache/cacheway.sv index 96762dbde..3445067a0 100644 --- a/src/cache/cacheway.sv +++ b/src/cache/cacheway.sv @@ -77,6 +77,7 @@ module cacheway import cvw::*; #(parameter cvw_t P, logic ClearDirtyWay; logic SelNonHit; logic SelData; + logic InvalidateCacheDelay; if (!READ_ONLY_CACHE) begin:flushlogic logic FlushWayEn; @@ -121,7 +122,9 @@ module cacheway import cvw::*; #(parameter cvw_t P, assign TagWay = SelData ? ReadTag : '0; // AND part of AOMux assign HitDirtyWay = Dirty & ValidWay; assign DirtyWay = SelDirty & HitDirtyWay; - assign HitWay = ValidWay & (ReadTag == PAdr[PA_BITS-1:OFFSETLEN+INDEXLEN]); + assign HitWay = ValidWay & (ReadTag == PAdr[PA_BITS-1:OFFSETLEN+INDEXLEN]) & ~InvalidateCacheDelay; + + flop #(1) InvalidateCacheReg(clk, InvalidateCache, InvalidateCacheDelay); ///////////////////////////////////////////////////////////////////////////////////////////// // Data Array From 74b242ce5ca85b4f19587094ad4739c85372ed0a Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 17 Jan 2024 12:25:06 -0800 Subject: [PATCH 22/37] Partial implementation of fcvtmod.w.d; flags disagree in one case where Sail might be wrong, and result 134 is wrong because of overflow --- src/fpu/fctrl.sv | 15 ++++++++------- src/fpu/fpu.sv | 6 +++--- src/fpu/postproc/postprocess.sv | 5 +++-- src/fpu/postproc/specialcase.sv | 30 ++++++++++++++++++++++++++---- testbench/common/instrNameDecTB.sv | 12 ++++++++++++ testbench/tests.vh | 7 +++++++ 6 files changed, 59 insertions(+), 16 deletions(-) diff --git a/src/fpu/fctrl.sv b/src/fpu/fctrl.sv index 8dae34f89..6d5a91aa6 100755 --- a/src/fpu/fctrl.sv +++ b/src/fpu/fctrl.sv @@ -54,7 +54,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( output logic [1:0] PostProcSelE, PostProcSelM, // select result in the post processing unit output logic [1:0] FResSelE, FResSelM, FResSelW, // Select one of the results that finish in the memory stage output logic FPUActiveE, // FP instruction being executed - output logic ZfaE, // Zfa variants of instructions (fli, fminm, fmaxm, fround, froundnx, fleq, fltq, fmvh, fmvp, fcvtmod) + output logic ZfaE, ZfaM, // Zfa variants of instructions (fli, fminm, fmaxm, fround, froundnx, fleq, fltq, fmvh, fmvp, fcvtmod) // register control signals output logic FRegWriteE, FRegWriteM, FRegWriteW, // FP register write enable output logic FWriteIntE, FWriteIntM, // Write to integer register @@ -149,7 +149,8 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0_0; // fmv.x.w/d/h/q fp to int register else if (P.ZFA_SUPPORTED & P.XLEN == 32 & P.D_SUPPORTED & Funct7D[1:0] == 2'b01 & Funct3D == 3'b000 & Rs2D == 5'b00001) ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0_1; // fmvh.x.d (Zfa) - // coverage off Q not supported in RV64GC + // Q not supported in RV64GC + // coverage off else if (P.ZFA_SUPPORTED & P.XLEN == 64 & P.Q_SUPPORTED & Funct7D[1:0] == 2'b11 & Funct3D == 3'b000 & Rs2D == 5'b00001) ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0_1; // fmvh.x.q (Zfa) // coverage on @@ -238,11 +239,11 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( endcase // coverage on 7'b1011001: if (P.ZFA_SUPPORTED & P.XLEN == 32 & P.D_SUPPORTED & Funct3D == 3'b000) - ControlsD = '0; // fmvp.d.x (Zfa) + ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0; // fmvp.d.x (Zfa) *** untested, controls could be wrong // Not covered in testing because rv64gc does not support quad precision // coverage off 7'b1011011: if (P.ZFA_SUPPORTED & P.XLEN == 64 & P.Q_SUPPORTED & Funct3D == 3'b000) - ControlsD = '0; // fmvp.q.x (Zfa) + ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0; // fmvp.q.x (Zfa) // coverage on endcase endcase @@ -362,9 +363,9 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( else assign IDivStartE = 0; // E/M pipleine register - flopenrc #(13+int'(P.FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM, - {FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE}, - {FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, OpCtrlM, FWriteIntM, FCvtIntM}); + flopenrc #(14+int'(P.FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM, + {FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE, ZfaE}, + {FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, OpCtrlM, FWriteIntM, FCvtIntM, ZfaM}); // renameing for readability assign FpLoadStoreM = FResSelM[1]; diff --git a/src/fpu/fpu.sv b/src/fpu/fpu.sv index 85ea9dba6..c304219aa 100755 --- a/src/fpu/fpu.sv +++ b/src/fpu/fpu.sv @@ -83,7 +83,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( logic XEnE, YEnE, ZEnE; // X, Y, Z inputs used for current operation logic FRegWriteE; // Write floating-point register logic FPUActiveE; // FP instruction being executed - logic ZfaE; // Zfa variants of instructions (fli, fminm, fmaxm, fround, froundnx, fleq, fltq, fmvh, fmvp, fcvtmod.w.d) + logic ZfaE, ZfaM; // Zfa variants of instructions (fli, fminm, fmaxm, fround, froundnx, fleq, fltq, fmvh, fmvp, fcvtmod.w.d) // regfile signals logic [P.FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage @@ -171,7 +171,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( fctrl #(P) fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .IntDivE, .InstrD, .StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE, - .reset, .clk, .FRegWriteE, .FRegWriteM, .FRegWriteW, .ZfaE, .FrmM, .FmtE, .FmtM, + .reset, .clk, .FRegWriteE, .FRegWriteM, .FRegWriteW, .ZfaE, .ZfaM, .FrmM, .FmtE, .FmtM, .FDivStartE, .IDivStartE, .FWriteIntE, .FCvtIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .FpLoadStoreM, .IllegalFPUInstrD, .XEnD, .YEnD, .ZEnD, .XEnE, .YEnE, .ZEnE, .FResSelE, .FResSelM, .FResSelW, .FPUActiveE, .PostProcSelE, .PostProcSelM, .FCvtIntW, @@ -348,7 +348,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SmM), .DivUe(UeM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM), .CvtCe(CeM), .CvtResSubnormUf(CvtResSubnormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CsM), - .ToInt(FWriteIntM), .DivSticky(DivStickyM), .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), + .ToInt(FWriteIntM), .Zfa(ZfaM), .DivSticky(DivStickyM), .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM)); // FPU flag selection - to privileged diff --git a/src/fpu/postproc/postprocess.sv b/src/fpu/postproc/postprocess.sv index c2de8644e..516752a78 100644 --- a/src/fpu/postproc/postprocess.sv +++ b/src/fpu/postproc/postprocess.sv @@ -56,6 +56,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( input logic CvtResSubnormUf, // the convert result is subnormal or underflows input logic [P.LOGCVTLEN-1:0] CvtShiftAmt, // how much to shift by input logic ToInt, // is fp->int (since it's writting to the integer register) + input logic Zfa, // Zfa operation (fcvtmod.w.d) input logic [P.CVTLEN-1:0] CvtLzcIn, // input to the Leading Zero Counter (without msb) input logic IntZero, // is the integer input zero // final results @@ -216,9 +217,9 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( negateintres #(P) negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes); - specialcase #(P) specialcase(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid, + specialcase #(P) specialcase(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid, .IntZero, .Frm, .OutFmt, .XNaN, .YNaN, .ZNaN, .CvtResUf, - .NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .CvtNegRes, + .NaNIn, .IntToFp, .Int64, .Signed, .Zfa, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .CvtNegRes, .XInf, .YInf, .DivOp, .DivByZero, .FullRe, .CvtCe, .Rs, .Re, .Rf, .PostProcRes, .FCvtIntRes); endmodule diff --git a/src/fpu/postproc/specialcase.sv b/src/fpu/postproc/specialcase.sv index 677ccce16..76784e4a1 100644 --- a/src/fpu/postproc/specialcase.sv +++ b/src/fpu/postproc/specialcase.sv @@ -53,6 +53,7 @@ module specialcase import cvw::*; #(parameter cvw_t P) ( input logic IntToFp, // is cvt int -> fp opperation input logic Int64, // is the integer 64 bits input logic Signed, // is the integer signed + input logic Zfa, // Zfa conversion operation: fcvtmod.w.d input logic [P.NE:0] CvtCe, // the calculated expoent for cvt input logic IntInvalid, // integer invalid flag to choose the result input logic CvtResUf, // does the convert result underflow @@ -70,10 +71,12 @@ module specialcase import cvw::*; #(parameter cvw_t P) ( logic [P.FLEN-1:0] OfRes; // overflowed result result logic [P.FLEN-1:0] NormRes; // normal result logic [P.XLEN-1:0] OfIntRes; // the overflow result for integer output + logic [P.XLEN-1:0] OfIntRes2; // the overflow result for integer output after accounting for fcvtmod.w.d + logic [P.XLEN-1:0] Int64Res; // Result for conversion to 64-bit int after accounting for fcvtmod.w.d logic OfResMax; // does the of result output maximum norm fp number logic KillRes; // kill the result for underflow - logic SelOfRes; // should the overflow result be selected - + logic SelOfRes; // should the overflow result be selected (excluding convert) + logic SelCvtOfRes; // select overflow result for convert instruction // does the overflow result output the maximum normalized floating point number // output infinity if the input is infinity @@ -329,6 +332,25 @@ module specialcase import cvw::*; #(parameter cvw_t P) ( else OfIntRes = {P.XLEN{1'b1}}; // unsigned positive end + // fcvtmod.w.d logic + // fcvtmod.w.d is like fcvt.w.d excep thtat it takes bits [31:0] and sign extends the rest, + // and converts +/-inf and NaN to zero. + + if (P.ZFA_SUPPORTED & P.D_SUPPORTED) // fcvtmod.w.d support + always_comb begin + if (Zfa) OfIntRes2 = '0; + else OfIntRes2 = OfIntRes; + if (Zfa) Int64Res = {{(P.XLEN-32){CvtNegRes[P.XLEN-1]}}, CvtNegRes[31:0]}; + else Int64Res = CvtNegRes[P.XLEN-1:0]; + if (Zfa) SelCvtOfRes = InfIn | NaNIn; // fcvtmod.w.d only overflows to 0 on NaN or Infinity + else SelCvtOfRes = IntInvalid; // regular fcvt gives an overflow if out of range + end + else + always_comb begin // no fcvtmod.w.d support + OfIntRes2 = OfIntRes; + Int64Res = CvtNegRes[P.XLEN-1:0]; + SelCvtOfRes = IntInvalid; + end // select the integer output // - if the input is invalid (out of bounds NaN or Inf) then output overflow res @@ -337,10 +359,10 @@ module specialcase import cvw::*; #(parameter cvw_t P) ( // - otherwise output a rounded 0 // - otherwise output the normal res (trmined and sign extended if nessisary) always_comb - if(IntInvalid) FCvtIntRes = OfIntRes; + if(SelCvtOfRes) FCvtIntRes = OfIntRes2; else if(CvtCe[P.NE]) if(Xs&Signed&Plus1) FCvtIntRes = {{P.XLEN{1'b1}}}; else FCvtIntRes = {{P.XLEN-1{1'b0}}, Plus1}; - else if(Int64) FCvtIntRes = CvtNegRes[P.XLEN-1:0]; + else if(Int64) FCvtIntRes = Int64Res; else FCvtIntRes = {{P.XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]}; endmodule diff --git a/testbench/common/instrNameDecTB.sv b/testbench/common/instrNameDecTB.sv index a3b5ef58e..ee6cd6900 100644 --- a/testbench/common/instrNameDecTB.sv +++ b/testbench/common/instrNameDecTB.sv @@ -298,6 +298,18 @@ module instrNameDecTB( else if (funct7[6:2] == 5'b11100 & funct3 == 3'b001) name = "FCLASS"; else if (funct7[6:2] == 5'b00100 & funct3 == 3'b010) name = "FSGNJX"; else if (funct7[6:2] == 5'b10100 & funct3 == 3'b010) name = "FEQ"; + else if (funct7[6:2] == 5'b11110 & funct3 == 3'b000 & rs2 == 5'b00001) name = "FLI"; + else if (funct7[6:2] == 5'b00101 & funct3 == 3'b010) name = "FMINM"; + else if (funct7[6:2] == 5'b00101 & funct3 == 3'b011) name = "FMAXM"; + else if (funct7[6:2] == 5'b01000 & rs2 == 5'b00100) name = "FROUND"; + else if (funct7[6:2] == 5'b01000 & rs2 == 5'b00101) name = "FROUNDNX"; + else if (funct7[6:2] == 5'b10100 & funct3 == 3'b100) name = "FLEQ"; + else if (funct7[6:2] == 5'b10100 & funct3 == 3'b101) name = "FLTQ"; + else if (funct7 == 7'b1110001 & funct3 == 3'b000 & rs2 == 5'b00001) name = "FMVH.X.D"; + else if (funct7 == 7'b1110011 & funct3 == 3'b000 & rs2 == 5'b00001) name = "FMVH.X.Q"; + else if (funct7 == 7'b1011001 & funct3 == 3'b000) name = "FMVP.D.X"; + else if (funct7 == 7'b1011011 & funct3 == 3'b000) name = "FMVP.Q.X"; + else if (funct7 == 7'b1100001 & funct3 == 3'b001 & rs2 == 5'b01000) name = "FCVTMOD.W.D"; else name = "ILLEGAL"; 10'b0000111_010: name = "FLW"; 10'b0100111_010: name = "FSW"; diff --git a/testbench/tests.vh b/testbench/tests.vh index 7d2d320a8..fecf4ebc9 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -2015,6 +2015,13 @@ string arch64zbs[] = '{ string arch32zfad[] = '{ `RISCVARCHTEST, + "rv32i_m/D_Zfa/src/fcvtmod.w.d_b1-01.S", + "rv32i_m/D_Zfa/src/fcvtmod.w.d_b22-01.S", + "rv32i_m/D_Zfa/src/fcvtmod.w.d_b23-01.S", + "rv32i_m/D_Zfa/src/fcvtmod.w.d_b24-01.S", + "rv32i_m/D_Zfa/src/fcvtmod.w.d_b27-01.S", + "rv32i_m/D_Zfa/src/fcvtmod.w.d_b28-01.S", + "rv32i_m/D_Zfa/src/fcvtmod.w.d_b29-01.S", "rv32i_m/D_Zfa/src/fleq_b1-01.S", "rv32i_m/D_Zfa/src/fleq_b19-01.S", "rv32i_m/D_Zfa/src/fleq.d_b1-01.S", From 8b60992e72c629add0b2d090b83ecf4711b5a3ab Mon Sep 17 00:00:00 2001 From: naichewa Date: Wed, 17 Jan 2024 14:38:11 -0800 Subject: [PATCH 23/37] fixed SPI tests failing when no icache --- .../riscv-test-suite/rv32i_m/privilege/src/WALLY-spi-01.S | 1 + .../riscv-test-suite/rv64i_m/privilege/src/WALLY-spi-01.S | 1 + 2 files changed, 2 insertions(+) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-spi-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-spi-01.S index b9c82c92d..3d0abc6a0 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-spi-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-spi-01.S @@ -607,6 +607,7 @@ SETUP_PLIC .4byte delay1, 0x0000001, write32_test # reset delay1 register .4byte cs_mode, 0x00000000, write32_test # reset cs_mode .4byte tx_mark, 0x00000001, write32_test # set transmit watermark to 1 (any entry turns mark off) +.4byte sck_div, 0x00000100, write32_test # lower SPI clock rate so read32_tests trigger at correct times #.4byte ie, 0x00000000, write32_test # enable transmit interrupt .4byte ip, 0x00000001, read32_test # tx watermark interupt should be pending .4byte 0x0, 0x00000000, readmip_test diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-spi-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-spi-01.S index 266b0e74f..11aebe333 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-spi-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-spi-01.S @@ -608,6 +608,7 @@ SETUP_PLIC .8byte delay1, 0x0000001, write32_test # reset delay1 register .8byte cs_mode, 0x00000000, write32_test # reset cs_mode +.8byte sck_div, 0x00000100, write32_test # lower SPI clock rate so reads are done at correct time when ICACHE not supported .8byte tx_mark, 0x00000001, write32_test # set transmit watermark to 1 (any entry turns mark off) #.8byte ie, 0x00000000, write32_test # enable transmit interrupt .8byte ip, 0x00000001, read32_test # tx watermark interupt should be pending From e8474373e4b97637d87f8e11a7136502a3ba21d1 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Thu, 18 Jan 2024 09:18:17 -0600 Subject: [PATCH 24/37] Fixed it so Virtual Memory work without a D$. --- src/lsu/lsu.sv | 6 ++++-- src/mmu/hptw.sv | 16 ++++++++-------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index f86c62aea..360b4a103 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -112,6 +112,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic BusStall; // Bus interface busy with multicycle operation logic HPTWStall; // HPTW busy with multicycle operation + logic DCacheBusStallM; // Cache or bus stall logic CacheBusHPWTStall; // Cache, bus, or hptw is requesting a stall logic SelSpillE; // Align logic detected a spill and needs to stall @@ -194,7 +195,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( if(P.VIRTMEM_SUPPORTED) begin : hptw hptw #(P) hptw(.clk, .reset, .MemRWM, .AtomicM, .ITLBMissF, .ITLBWriteF, .DTLBMissM, .DTLBWriteM, .InstrUpdateDAF, .DataUpdateDAM, - .FlushW, .DCacheStallM, .SATP_REGW, .PCSpillF, + .FlushW, .DCacheBusStallM, .SATP_REGW, .PCSpillF, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .ENVCFG_ADUE, .PrivilegeModeW, .ReadDataM(ReadDataM[P.XLEN-1:0]), // ReadDataM is LLEN, but HPTW only needs XLEN .WriteDataM(WriteDataZM), .Funct3M, .LSUFunct3M, .Funct7M, .LSUFunct7M, @@ -225,7 +226,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( // the trap module. assign CommittedM = SelHPTW | DCacheCommittedM | BusCommittedM; assign GatedStallW = StallW & ~SelHPTW; - assign CacheBusHPWTStall = DCacheStallM | HPTWStall | BusStall; + assign DCacheBusStallM = DCacheStallM | BusStall; + assign CacheBusHPWTStall = DCacheBusStallM | HPTWStall; assign LSUStallM = CacheBusHPWTStall | SpillStallM; ///////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/mmu/hptw.sv b/src/mmu/hptw.sv index 7ca4a007a..82eeaef87 100644 --- a/src/mmu/hptw.sv +++ b/src/mmu/hptw.sv @@ -42,7 +42,7 @@ module hptw import cvw::*; #(parameter cvw_t P) ( input logic [1:0] PrivilegeModeW, input logic [P.XLEN-1:0] ReadDataM, // page table entry from LSU input logic [P.XLEN-1:0] WriteDataM, - input logic DCacheStallM, // stall from LSU + input logic DCacheBusStallM, // stall from LSU input logic [2:0] Funct3M, input logic [6:0] Funct7M, input logic ITLBMissF, @@ -145,7 +145,7 @@ module hptw import cvw::*; #(parameter cvw_t P) ( // State flops flopenr #(1) TLBMissMReg(clk, reset, StartWalk, DTLBMissOrUpdateDAM, DTLBWalk); // when walk begins, record whether it was for DTLB (or record 0 for ITLB) - assign PRegEn = HPTWRW[1] & ~DCacheStallM | UpdatePTE; + assign PRegEn = HPTWRW[1] & ~DCacheBusStallM | UpdatePTE; flopenr #(P.XLEN) PTEReg(clk, reset, PRegEn, NextPTE, PTE); // Capture page table entry from data cache // Assign PTE descriptors common across all XLEN values @@ -283,30 +283,30 @@ module hptw import cvw::*; #(parameter cvw_t P) ( flopenl #(.TYPE(statetype)) WalkerStateReg(clk, reset | FlushW, 1'b1, NextWalkerState, IDLE, WalkerState); always_comb case (WalkerState) - IDLE: if (TLBMiss & ~DCacheStallM) NextWalkerState = InitialWalkerState; + IDLE: if (TLBMiss & ~DCacheBusStallM) NextWalkerState = InitialWalkerState; else NextWalkerState = IDLE; L3_ADR: NextWalkerState = L3_RD; // first access in SV48 - L3_RD: if (DCacheStallM) NextWalkerState = L3_RD; + L3_RD: if (DCacheBusStallM) NextWalkerState = L3_RD; else if(HPTWFaultM) NextWalkerState = FAULT; else NextWalkerState = L2_ADR; L2_ADR: if (InitialWalkerState == L2_ADR | ValidNonLeafPTE) NextWalkerState = L2_RD; // first access in SV39 else NextWalkerState = LEAF; - L2_RD: if (DCacheStallM) NextWalkerState = L2_RD; + L2_RD: if (DCacheBusStallM) NextWalkerState = L2_RD; else if(HPTWFaultM) NextWalkerState = FAULT; else NextWalkerState = L1_ADR; L1_ADR: if (InitialWalkerState == L1_ADR | ValidNonLeafPTE) NextWalkerState = L1_RD; // first access in SV32 else NextWalkerState = LEAF; - L1_RD: if (DCacheStallM) NextWalkerState = L1_RD; + L1_RD: if (DCacheBusStallM) NextWalkerState = L1_RD; else if(HPTWFaultM) NextWalkerState = FAULT; else NextWalkerState = L0_ADR; L0_ADR: if (ValidNonLeafPTE) NextWalkerState = L0_RD; else NextWalkerState = LEAF; - L0_RD: if (DCacheStallM) NextWalkerState = L0_RD; + L0_RD: if (DCacheBusStallM) NextWalkerState = L0_RD; else if(HPTWFaultM) NextWalkerState = FAULT; else NextWalkerState = LEAF; LEAF: if (P.SVADU_SUPPORTED & HPTWUpdateDA) NextWalkerState = UPDATE_PTE; else NextWalkerState = IDLE; - UPDATE_PTE: if(DCacheStallM) NextWalkerState = UPDATE_PTE; + UPDATE_PTE: if(DCacheBusStallM) NextWalkerState = UPDATE_PTE; else NextWalkerState = LEAF; FAULT: NextWalkerState = IDLE; default: NextWalkerState = IDLE; // should never be reached From ff6bb3be0c1562aad94f159f88e0f2648c148c27 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Thu, 18 Jan 2024 09:29:52 -0600 Subject: [PATCH 25/37] Fixed another bug with virtual memory and no caches. --- src/lsu/lsu.sv | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 360b4a103..220a42eef 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -111,6 +111,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic GatedStallW; // Hazard unit StallW gated when SelHPTW = 1 logic BusStall; // Bus interface busy with multicycle operation + logic LSUBusStallM; // Bus interface busy with multicycle operation masked by IgnoreRequestTLB logic HPTWStall; // HPTW busy with multicycle operation logic DCacheBusStallM; // Cache or bus stall logic CacheBusHPWTStall; // Cache, bus, or hptw is requesting a stall @@ -226,7 +227,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( // the trap module. assign CommittedM = SelHPTW | DCacheCommittedM | BusCommittedM; assign GatedStallW = StallW & ~SelHPTW; - assign DCacheBusStallM = DCacheStallM | BusStall; + assign DCacheBusStallM = DCacheStallM | LSUBusStallM; assign CacheBusHPWTStall = DCacheBusStallM | HPTWStall; assign LSUStallM = CacheBusHPWTStall | SpillStallM; @@ -354,6 +355,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( .Cacheable(CacheableOrFlushCacheM), .BusRW, .Stall(GatedStallW), .BusStall, .BusCommitted(BusCommittedM)); + // Mux between the 3 sources of read data, 0: cache, 1: Bus, 2: DTIM // Uncache bus access may be smaller width than LLEN. Duplicate LLENPOVERAHBW times. // *** DTIMReadDataWordM should be increased to LLEN. @@ -388,6 +390,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign {DCacheStallM, DCacheCommittedM} = '0; end + assign LSUBusStallM = BusStall & ~IgnoreRequestTLB; + ///////////////////////////////////////////////////////////////////////////////////////////// // Atomic operations ///////////////////////////////////////////////////////////////////////////////////////////// From 911b400af2cfd4ce47e6d48a748d7983684ba184 Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 18 Jan 2024 13:13:56 -0800 Subject: [PATCH 26/37] Fault on misaligned AMO --- src/mmu/mmu.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mmu/mmu.sv b/src/mmu/mmu.sv index c26ee2a44..dffa7be76 100644 --- a/src/mmu/mmu.sv +++ b/src/mmu/mmu.sv @@ -140,7 +140,7 @@ module mmu import cvw::*; #(parameter cvw_t P, 2'b11: DataMisalignedM = |VAdr[2:0]; // ld, sd, fld, fsd endcase assign LoadMisalignedFaultM = DataMisalignedM & ReadNoAmoAccessM & ~(P.ZICCLSM_SUPPORTED & Cacheable); - assign StoreAmoMisalignedFaultM = DataMisalignedM & WriteAccessM & ~(P.ZICCLSM_SUPPORTED & Cacheable); + assign StoreAmoMisalignedFaultM = DataMisalignedM & WriteAccessM & (~(P.ZICCLSM_SUPPORTED & Cacheable) | ReadAccessM); // Misaligned AMO faults even if ZICCLSM supported // Specify which type of page fault is occurring assign InstrPageFaultF = TLBPageFault & ExecuteAccessF; From 12b2baff827707f6934e71b6f1623f423fe08e1d Mon Sep 17 00:00:00 2001 From: Jordan Carlin Date: Thu, 18 Jan 2024 17:33:59 -0800 Subject: [PATCH 27/37] add coverage of sfence.inval.ir instruction and fix sret coverage --- tests/coverage/priv.S | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/coverage/priv.S b/tests/coverage/priv.S index aa9c8b50b..dcf56e14a 100644 --- a/tests/coverage/priv.S +++ b/tests/coverage/priv.S @@ -297,6 +297,16 @@ sretdone: wfi + + + # Test uncovered privdec instructions + # exercise sfence.inval.ir instruction + .word 0x18100073 + + # exercise sret with rs1 not 0 + .word 0x102F8073 + + j done From 82d9467eeaf10135548b491e4224b5d5e0b638ec Mon Sep 17 00:00:00 2001 From: Jordan Carlin Date: Thu, 18 Jan 2024 19:29:16 -0800 Subject: [PATCH 28/37] Add coverage of FIOM in different privelege modes --- tests/coverage/csrwrites.S | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/coverage/csrwrites.S b/tests/coverage/csrwrites.S index 63ee00c38..ce5639bd7 100644 --- a/tests/coverage/csrwrites.S +++ b/tests/coverage/csrwrites.S @@ -37,4 +37,31 @@ main: csrrw t1, menvcfg, t0 csrrw t2, senvcfg, t0 + # testing FIOM with different privelege modes + # setting environment config (to both 1 and 0) in each privelege mode + csrsi menvcfg, 1 + li a0, 1 + ecall # enter supervisor mode + + li a0, 0 + ecall # enter user mode + + li a0, 1 + ecall # enter supervisor mode + + csrsi senvcfg, 1 + li a0, 0 + ecall # enter user mode + + li a0, 3 + ecall # enter machine mode + csrci menvcfg, 1 + + li a0, 1 + ecall # enter supervisor mode + + li a0, 0 + ecall # enter user mode + + j done From f06f681dbd9491fb876261e8d2cd96382ae6628f Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 18 Jan 2024 21:30:39 -0800 Subject: [PATCH 29/37] CoreMark displays StoreStalls --- benchmarks/coremark/Makefile | 22 ++----------------- .../coremark/riscv64-baremetal/syscalls.c | 1 + 2 files changed, 3 insertions(+), 20 deletions(-) diff --git a/benchmarks/coremark/Makefile b/benchmarks/coremark/Makefile index db8a6e1d6..6e466291e 100644 --- a/benchmarks/coremark/Makefile +++ b/benchmarks/coremark/Makefile @@ -11,8 +11,8 @@ sources=$(cmbase)/core_main.c $(cmbase)/core_list_join.c $(cmbase)/coremark.h \ $(PORT_DIR)/core_portme.h $(PORT_DIR)/core_portme.c $(PORT_DIR)/core_portme.mak \ $(PORT_DIR)/crt.S $(PORT_DIR)/encoding.h $(PORT_DIR)/util.h $(PORT_DIR)/syscalls.c ABI := $(if $(findstring "64","$(XLEN)"),lp64,ilp32) -ARCH := rv$(XLEN)im_zicsr_zba_zbb_zbc_zbs -#ARCH := rv$(XLEN)gc_zba_zbb_zbc_zbs +ARCH := rv$(XLEN)gc_zba_zbb_zbc +#ARCH := rv$(XLEN)im_zicsr_zba_zbb_zbc #ARCH := rv$(XLEN)gc #ARCH := rv$(XLEN)imc_zicsr #ARCH := rv$(XLEN)im_zicsr @@ -25,24 +25,6 @@ PORT_CFLAGS = -g -mabi=$(ABI) -march=$(ARCH) -static -falign-functions=16 \ -nostdlib -nostartfiles -ffreestanding -mstrict-align \ -DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 -DPERFORMANCE_RUN=1 -DITERATIONS=10 -DXLEN=$(XLEN) -# Black Parrott -#PORT_CFLAGS = -O2 -fno-common -funroll-loops -finline-functions --param max-inline-insns-auto=20 -falign-functions=4 -falign-jumps=4 -falign-loops=4 \ - -DITERATIONS=10 -DPERFORMANCE_RUN=1 -#OPTIMIZE := -O2 -fno-common -funroll-loops -finline-functions --param max-inline-insns-auto=20 -falign-functions=4 -falign-jumps=4 -falign-loops=4 -#override CFLAGS += $(OPTIMIZE) -DFLAGS_STR=\""$(OPTIMIZE)"\" -#override CFLAGS += -DITERATIONS=10 -DPERFORMANCE_RUN=1 - -# try adding the new fields from muntjac coremark build -#PORT_CFLAGS = -g -mabi=$(ABI) -march=$(ARCH) -static -falign-functions=16 \ - -fno-common -flto -funswitch-loops -mcmodel=medany \ - -falign-functions=4 -falign-jumps=4 -falign-loops=4 \ - -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-3-series -O3 -finline-functions --param max-inline-insns-auto=20 -falign-jumps=4 \ - -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 \ - -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta \ - -nostdlib -nostartfiles -ffreestanding -mstrict-align \ - -DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 -DPERFORMANCE_RUN=1 -DITERATIONS=10 -DXLEN=$(XLEN) - - all: $(work_dir)/coremark.bare.riscv.elf.memfile run: diff --git a/benchmarks/coremark/riscv64-baremetal/syscalls.c b/benchmarks/coremark/riscv64-baremetal/syscalls.c index 29cd5f24a..25c47b797 100644 --- a/benchmarks/coremark/riscv64-baremetal/syscalls.c +++ b/benchmarks/coremark/riscv64-baremetal/syscalls.c @@ -177,6 +177,7 @@ void _init(int cid, int nc) counters[17] = read_csr(mhpmcounter17) - counters[17]; ee_printf("Load Stalls %d\n", counters[11]); + ee_printf("Store Stalls %d\n", counters[12]); ee_printf("D-Cache Accesses %d\n", counters[13]); ee_printf("D-Cache Misses %d\n", counters[14]); ee_printf("I-Cache Accesses %d\n", counters[16]); From eb8ab3fae252211d1e600c9a641a04f1e71ab464 Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 18 Jan 2024 21:30:59 -0800 Subject: [PATCH 30/37] EBU coverage exclusion --- sim/coverage-exclusions-rv64gc.do | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sim/coverage-exclusions-rv64gc.do b/sim/coverage-exclusions-rv64gc.do index 76e18e30e..b9c20eead 100644 --- a/sim/coverage-exclusions-rv64gc.do +++ b/sim/coverage-exclusions-rv64gc.do @@ -253,3 +253,10 @@ coverage exclude -srcfile priorityonehot.sv # Excluding pmpadrdecs[0] coverage case for PAgePMPAdrIn being hardwired to 1 coverage exclude -scope /dut/core/ifu/immu/immu/pmp/pmpchecker/pmp/pmpadrdecs[0] -linerange [GetLineNum ../src/mmu/pmpadrdec.sv "exclusion-tag: PAgePMPAdrIn"] -item e 1 -fecexprrow 1 coverage exclude -scope /dut/core/lsu/dmmu/dmmu/pmp/pmpchecker/pmp/pmpadrdecs[0] -linerange [GetLineNum ../src/mmu/pmpadrdec.sv "exclusion-tag: PAgePMPAdrIn"] -item e 1 -fecexprrow 1 + +#################### +# EBU +#################### + +# Exclude EBU Beat Counter because it is only idle when bus has multicycle latency, but rv64gc has single cycle latency +coverage exclude -scope /core/ebu/ebu/ebufsmarb/BeatCounter From 17c9be7695b6e7a65541c7e727c63d202c40bf65 Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 18 Jan 2024 21:36:52 -0800 Subject: [PATCH 31/37] Cleanup typos, remove Zicond from riscof until it is working --- config/buildroot/config.vh | 12 ++++++------ src/fpu/postproc/flags.sv | 2 +- src/fpu/postproc/postprocess.sv | 2 +- testbench/testbench.sv | 4 ++-- tests/riscof/spike/riscof_spike.py | 4 ++++ tests/riscof/spike/spike_rv32gc_isa.yaml | 3 ++- tests/riscof/spike/spike_rv64gc_isa.yaml | 3 ++- 7 files changed, 18 insertions(+), 12 deletions(-) diff --git a/config/buildroot/config.vh b/config/buildroot/config.vh index d36fcf6e3..de6e4800d 100644 --- a/config/buildroot/config.vh +++ b/config/buildroot/config.vh @@ -40,7 +40,7 @@ localparam ZIFENCEI_SUPPORTED = 1; localparam ZICNTR_SUPPORTED = 1; localparam ZIHPM_SUPPORTED = 1; localparam COUNTERS = 12'd32; -localparam ZFH_SUPPORTED = 0; +localparam ZFH_SUPPORTED = 1; localparam ZFA_SUPPORTED = 0; localparam SSTC_SUPPORTED = 1; localparam ZICBOM_SUPPORTED = 1; @@ -57,7 +57,7 @@ localparam BUS_SUPPORTED = 1; localparam DCACHE_SUPPORTED = 1; localparam ICACHE_SUPPORTED = 1; localparam VIRTMEM_SUPPORTED = 1; -localparam VECTORED_INTERRUPTS_SUPPORTED = 1 ; +localparam VECTORED_INTERRUPTS_SUPPORTED = 1; localparam BIGENDIAN_SUPPORTED = 1; // TLB configuration. Entries should be a power of 2 @@ -163,10 +163,10 @@ localparam RADIX = 32'h4; localparam DIVCOPIES = 32'h4; // bit manipulation -localparam ZBA_SUPPORTED = 0; -localparam ZBB_SUPPORTED = 0; -localparam ZBC_SUPPORTED = 0; -localparam ZBS_SUPPORTED = 0; +localparam ZBA_SUPPORTED = 1; +localparam ZBB_SUPPORTED = 1; +localparam ZBC_SUPPORTED = 1; +localparam ZBS_SUPPORTED = 1; // New compressed instructions localparam ZCB_SUPPORTED = 1; diff --git a/src/fpu/postproc/flags.sv b/src/fpu/postproc/flags.sv index 98ed0a34d..50d9bf229 100644 --- a/src/fpu/postproc/flags.sv +++ b/src/fpu/postproc/flags.sv @@ -70,7 +70,7 @@ module flags import cvw::*; #(parameter cvw_t P) ( logic DivInvalid; // integer invalid flag logic Underflow; // Underflow flag logic ResExpGteMax; // is the result greater than or equal to the maximum floating point expoent - logic ShiftGtIntSz; // is the shift greater than the the integer size (use Re to account for possible roundning "shift") + logic ShiftGtIntSz; // is the shift greater than the the integer size (use Re to account for possible rounding "shift") /////////////////////////////////////////////////////////////////////////////// // Overflow diff --git a/src/fpu/postproc/postprocess.sv b/src/fpu/postproc/postprocess.sv index 516752a78..1d51fdf85 100644 --- a/src/fpu/postproc/postprocess.sv +++ b/src/fpu/postproc/postprocess.sv @@ -89,7 +89,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( logic [P.NE+1:0] NormSumExp; // exponent of the normalized sum not taking into account Subnormal or zero results logic FmaPreResultSubnorm; // is the result subnormal - calculated before LZA corection logic [$clog2(3*P.NF+5)-1:0] FmaShiftAmt; // normalization shift amount for fma - // division singals + // division signals logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt; // divsqrt shif amount logic [P.NORMSHIFTSZ-1:0] DivShiftIn; // divsqrt shift input logic [P.NE+1:0] Ue; // divsqrt corrected exponent after corretion shift diff --git a/testbench/testbench.sv b/testbench/testbench.sv index 87b603288..b20c6a993 100644 --- a/testbench/testbench.sv +++ b/testbench/testbench.sv @@ -632,8 +632,8 @@ task automatic updateProgramAddrLabelArray; end end - if(ProgramAddrLabelArray["begin_signature"] == 0) $display("Couldn't find begin_signature in %s", ProgramLabelMapFile); - if(ProgramAddrLabelArray["sig_end_canary"] == 0) $display("Couldn't find sig_end_canary in %s", ProgramLabelMapFile); +// if(ProgramAddrLabelArray["begin_signature"] == 0) $display("Couldn't find begin_signature in %s", ProgramLabelMapFile); +// if(ProgramAddrLabelArray["sig_end_canary"] == 0) $display("Couldn't find sig_end_canary in %s", ProgramLabelMapFile); $fclose(ProgramLabelMapFP); $fclose(ProgramAddrMapFP); diff --git a/tests/riscof/spike/riscof_spike.py b/tests/riscof/spike/riscof_spike.py index 61b556932..5450f64df 100644 --- a/tests/riscof/spike/riscof_spike.py +++ b/tests/riscof/spike/riscof_spike.py @@ -115,6 +115,10 @@ class spike(pluginTemplate): self.isa += '_Zicond' if "Zicboz" in ispec["ISA"]: self.isa += '_Zicboz' + if "Zfa" in ispec["ISA"]: + self.isa += '_Zfa' + if "Zfh" in ispec["ISA"]: + self.isa += '_Zfh' if "Zca" in ispec["ISA"]: self.isa += '_Zca' if "Zcb" in ispec["ISA"]: diff --git a/tests/riscof/spike/spike_rv32gc_isa.yaml b/tests/riscof/spike/spike_rv32gc_isa.yaml index ae314fa76..7d97edb6a 100644 --- a/tests/riscof/spike/spike_rv32gc_isa.yaml +++ b/tests/riscof/spike/spike_rv32gc_isa.yaml @@ -1,6 +1,7 @@ hart_ids: [0] hart0: - ISA: RV32IMAFDCZicsr_Zicond_Zifencei_Zba_Zbb_Zbc_Zbs + ISA: RV32IMAFDCZicsr_Zifencei_Zba_Zbb_Zbc_Zbs +# ISA: RV32IMAFDCZicsr_Zicond_Zifencei_Zfa_Zfh_Zba_Zbb_Zbc_Zbs # ISA: RV32IMAFDCZicsr_Zicboz_Zifencei_Zca_Zba_Zbb_Zbc_Zbs # _Zbkb_Zcb physical_addr_sz: 32 User_Spec_Version: '2.3' diff --git a/tests/riscof/spike/spike_rv64gc_isa.yaml b/tests/riscof/spike/spike_rv64gc_isa.yaml index df5e7cb2b..471fbbb13 100644 --- a/tests/riscof/spike/spike_rv64gc_isa.yaml +++ b/tests/riscof/spike/spike_rv64gc_isa.yaml @@ -2,7 +2,8 @@ hart_ids: [0] hart0: # ISA: RV64IMAFDCSUZicsr_Zicboz_Zifencei_Zba_Zbb_Zbc_Zbs # Zkbs_Zcb # ISA: RV64IMAFDCSUZicsr_Zifencei_Zca_Zcb_Zba_Zbb_Zbc_Zbs # Zkbs_Zcb - ISA: RV64IMAFDCSUZicsr_Zicond_Zifencei_Zba_Zbb_Zbc_Zbs # Zkbs_Zcb +# ISA: RV64IMAFDCSUZicsr_Zicond_Zifencei_Zfa_Zfh_Zba_Zbb_Zbc_Zbs # Zkbs_Zcb + ISA: RV64IMAFDCSUZicsr_Zifencei_Zba_Zbb_Zbc_Zbs # Zkbs_Zcb physical_addr_sz: 56 User_Spec_Version: '2.3' supported_xlen: [64] From 9614913e8f54285096d2c162b2cc9e130e914e9a Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 18 Jan 2024 22:10:20 -0800 Subject: [PATCH 32/37] Changed CoreMark maiefile to rv64im --- benchmarks/coremark/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/coremark/Makefile b/benchmarks/coremark/Makefile index 6e466291e..a73dc6eea 100644 --- a/benchmarks/coremark/Makefile +++ b/benchmarks/coremark/Makefile @@ -11,8 +11,8 @@ sources=$(cmbase)/core_main.c $(cmbase)/core_list_join.c $(cmbase)/coremark.h \ $(PORT_DIR)/core_portme.h $(PORT_DIR)/core_portme.c $(PORT_DIR)/core_portme.mak \ $(PORT_DIR)/crt.S $(PORT_DIR)/encoding.h $(PORT_DIR)/util.h $(PORT_DIR)/syscalls.c ABI := $(if $(findstring "64","$(XLEN)"),lp64,ilp32) -ARCH := rv$(XLEN)gc_zba_zbb_zbc -#ARCH := rv$(XLEN)im_zicsr_zba_zbb_zbc +#ARCH := rv$(XLEN)gc_zba_zbb_zbc +ARCH := rv$(XLEN)im_zicsr_zba_zbb_zbc #ARCH := rv$(XLEN)gc #ARCH := rv$(XLEN)imc_zicsr #ARCH := rv$(XLEN)im_zicsr From 9260d3c424092d3cd660fb5f3055f600b6ee0f6e Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 18 Jan 2024 22:46:07 -0800 Subject: [PATCH 33/37] Add Zfh support to imperas.ic, use Zicond in riscof now that it is fixed in riscv-arch-test --- config/rv32gc/config.vh | 4 ++-- sim/imperas.ic | 1 + tests/riscof/spike/spike_rv32gc_isa.yaml | 3 +-- tests/riscof/spike/spike_rv64gc_isa.yaml | 3 +-- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/config/rv32gc/config.vh b/config/rv32gc/config.vh index a59bb1ab3..4baef0075 100644 --- a/config/rv32gc/config.vh +++ b/config/rv32gc/config.vh @@ -41,8 +41,8 @@ localparam ZIFENCEI_SUPPORTED = 1; localparam COUNTERS = 12'd32; localparam ZICNTR_SUPPORTED = 1; localparam ZIHPM_SUPPORTED = 1; -localparam ZFH_SUPPORTED = 0; -localparam ZFA_SUPPORTED = 0; +localparam ZFH_SUPPORTED = 1; +localparam ZFA_SUPPORTED = 1; localparam SSTC_SUPPORTED = 1; localparam ZICBOM_SUPPORTED = 1; localparam ZICBOZ_SUPPORTED = 1; diff --git a/sim/imperas.ic b/sim/imperas.ic index f3c620b96..5de5935c6 100644 --- a/sim/imperas.ic +++ b/sim/imperas.ic @@ -20,6 +20,7 @@ # More extensions --override cpu/Zcb=T --override cpu/Zicond=T +--override cpu/Zfh=T # Cache block operations --override cpu/Zicbom=T diff --git a/tests/riscof/spike/spike_rv32gc_isa.yaml b/tests/riscof/spike/spike_rv32gc_isa.yaml index 7d97edb6a..c2c95fbf4 100644 --- a/tests/riscof/spike/spike_rv32gc_isa.yaml +++ b/tests/riscof/spike/spike_rv32gc_isa.yaml @@ -1,7 +1,6 @@ hart_ids: [0] hart0: - ISA: RV32IMAFDCZicsr_Zifencei_Zba_Zbb_Zbc_Zbs -# ISA: RV32IMAFDCZicsr_Zicond_Zifencei_Zfa_Zfh_Zba_Zbb_Zbc_Zbs + ISA: RV32IMAFDCZicsr_Zicond_Zifencei_Zfa_Zfh_Zba_Zbb_Zbc_Zbs # ISA: RV32IMAFDCZicsr_Zicboz_Zifencei_Zca_Zba_Zbb_Zbc_Zbs # _Zbkb_Zcb physical_addr_sz: 32 User_Spec_Version: '2.3' diff --git a/tests/riscof/spike/spike_rv64gc_isa.yaml b/tests/riscof/spike/spike_rv64gc_isa.yaml index 471fbbb13..4374ad07c 100644 --- a/tests/riscof/spike/spike_rv64gc_isa.yaml +++ b/tests/riscof/spike/spike_rv64gc_isa.yaml @@ -2,8 +2,7 @@ hart_ids: [0] hart0: # ISA: RV64IMAFDCSUZicsr_Zicboz_Zifencei_Zba_Zbb_Zbc_Zbs # Zkbs_Zcb # ISA: RV64IMAFDCSUZicsr_Zifencei_Zca_Zcb_Zba_Zbb_Zbc_Zbs # Zkbs_Zcb -# ISA: RV64IMAFDCSUZicsr_Zicond_Zifencei_Zfa_Zfh_Zba_Zbb_Zbc_Zbs # Zkbs_Zcb - ISA: RV64IMAFDCSUZicsr_Zifencei_Zba_Zbb_Zbc_Zbs # Zkbs_Zcb + ISA: RV64IMAFDCSUZicsr_Zicond_Zifencei_Zfa_Zfh_Zba_Zbb_Zbc_Zbs # Zkbs_Zcb physical_addr_sz: 56 User_Spec_Version: '2.3' supported_xlen: [64] From 324180244178e85510cbecd953cfe181c20a9893 Mon Sep 17 00:00:00 2001 From: Kevin Kim Date: Sun, 21 Jan 2024 08:25:17 -0800 Subject: [PATCH 34/37] fixed bug in CORRSHIFTSZ param --- config/shared/config-shared.vh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/shared/config-shared.vh b/config/shared/config-shared.vh index 86f9a0a9e..ba215785c 100644 --- a/config/shared/config-shared.vh +++ b/config/shared/config-shared.vh @@ -111,7 +111,7 @@ localparam LLEN = (($unsigned(FLEN)<$unsigned(XLEN)) ? ($unsigned(XLEN)) : ($uns localparam LOGCVTLEN = $unsigned($clog2(CVTLEN+1)); localparam NORMSHIFTSZ = (((CVTLEN+NF+1)>(DIVb + 1 +NF+1) & (CVTLEN+NF+1)>(3*NF+6)) ? (CVTLEN+NF+1) : ((DIVb + 1 +NF+1) > (3*NF+6) ? (DIVb + 1 +NF+1) : (3*NF+6))); localparam LOGNORMSHIFTSZ = ($clog2(NORMSHIFTSZ)); -localparam CORRSHIFTSZ = (((CVTLEN+NF+1)>(DIVb + 1 +NF+1) & (CVTLEN+NF+1)>(3*NF+6)) ? (CVTLEN+NF+1) : ((DIVMINb+1+NF) > (3*NF+4) ? (DIVMINb+1+NF) : (3*NF+4))); +localparam CORRSHIFTSZ = (((DIVMINb+1+NF) > (3*NF+4) ? (DIVMINb+1+NF) : (3*NF+4))); // max(DIVMINb+NF+1, 3*NF+4) // Disable spurious Verilator warnings From 1459943a7537ff184ab00d27a476129d2c1d2c91 Mon Sep 17 00:00:00 2001 From: Kevin Kim Date: Sun, 21 Jan 2024 10:08:48 -0800 Subject: [PATCH 35/37] more shiftcorrection bug fixes --- config/shared/config-shared.vh | 2 +- src/fpu/postproc/shiftcorrection.sv | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/config/shared/config-shared.vh b/config/shared/config-shared.vh index ba215785c..5dfb4b1ba 100644 --- a/config/shared/config-shared.vh +++ b/config/shared/config-shared.vh @@ -111,7 +111,7 @@ localparam LLEN = (($unsigned(FLEN)<$unsigned(XLEN)) ? ($unsigned(XLEN)) : ($uns localparam LOGCVTLEN = $unsigned($clog2(CVTLEN+1)); localparam NORMSHIFTSZ = (((CVTLEN+NF+1)>(DIVb + 1 +NF+1) & (CVTLEN+NF+1)>(3*NF+6)) ? (CVTLEN+NF+1) : ((DIVb + 1 +NF+1) > (3*NF+6) ? (DIVb + 1 +NF+1) : (3*NF+6))); localparam LOGNORMSHIFTSZ = ($clog2(NORMSHIFTSZ)); -localparam CORRSHIFTSZ = (((DIVMINb+1+NF) > (3*NF+4) ? (DIVMINb+1+NF) : (3*NF+4))); // max(DIVMINb+NF+1, 3*NF+4) +localparam CORRSHIFTSZ = NORMSHIFTSZ-2; // Disable spurious Verilator warnings diff --git a/src/fpu/postproc/shiftcorrection.sv b/src/fpu/postproc/shiftcorrection.sv index f5860b42d..1da3556d8 100644 --- a/src/fpu/postproc/shiftcorrection.sv +++ b/src/fpu/postproc/shiftcorrection.sv @@ -44,7 +44,7 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) ( output logic [P.NE+1:0] Ue // corrected exponent for divider ); - logic [3*P.NF+3:0] CorrSumShifted; // the shifted sum after LZA correction + logic [P.CORRSHIFTSZ-1:0] CorrSumShifted; // the shifted sum after LZA correction logic [P.CORRSHIFTSZ-1:0] CorrQm0, CorrQm1; // portions of Shifted to select for CorrQmShifted logic [P.CORRSHIFTSZ-1:0] CorrQmShifted; // the shifted divsqrt result after one bit shift logic ResSubnorm; // is the result Subnormal @@ -68,7 +68,7 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) ( // if the result of the divider was calculated to be subnormal, then the result was correctly normalized, so select the top shifted bits always_comb - if(FmaOp) Mf = {CorrSumShifted, {P.CORRSHIFTSZ-(3*P.NF+4){1'b0}}}; + if(FmaOp) Mf = {CorrSumShifted}; else if (DivOp&~DivResSubnorm) Mf = CorrQmShifted; else Mf = Shifted[P.NORMSHIFTSZ-1:P.NORMSHIFTSZ-P.CORRSHIFTSZ]; From 4936496bb9852f08db33fe3904a7d258be73d0f7 Mon Sep 17 00:00:00 2001 From: Jordan Carlin Date: Mon, 22 Jan 2024 08:58:31 -0800 Subject: [PATCH 36/37] fix sfence.inval.ir and sret coverage from previous PR --- tests/coverage/priv.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/coverage/priv.S b/tests/coverage/priv.S index dcf56e14a..6b5260259 100644 --- a/tests/coverage/priv.S +++ b/tests/coverage/priv.S @@ -300,6 +300,8 @@ sretdone: # Test uncovered privdec instructions + li a0, 3 + ecall # exercise sfence.inval.ir instruction .word 0x18100073 From 0c13e14bbf922db0306d9b72285620c565554a34 Mon Sep 17 00:00:00 2001 From: Jordan Carlin Date: Mon, 22 Jan 2024 09:52:58 -0800 Subject: [PATCH 37/37] coverage improvements for mret when mpp = 3; update imperas config --- sim/imperas.ic | 2 ++ tests/coverage/priv.S | 15 ++++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/sim/imperas.ic b/sim/imperas.ic index 5de5935c6..b35166429 100644 --- a/sim/imperas.ic +++ b/sim/imperas.ic @@ -37,6 +37,8 @@ # SV39 and SV48 supported --override cpu/Sv_modes=768 +--override cpu/Svinval=T + # clarify #--override refRoot/cpu/mtvec_sext=F diff --git a/tests/coverage/priv.S b/tests/coverage/priv.S index 6b5260259..39b3a8aeb 100644 --- a/tests/coverage/priv.S +++ b/tests/coverage/priv.S @@ -309,7 +309,20 @@ sretdone: .word 0x102F8073 - j done + # cover mret when mpp = 3 and mprv = 1 + li a0, 3 + ecall # enter machine mode + bseti t0, zero, 17 + csrs mstatus, t0 # set MPRV + li t1, 0x00001800 + csrs mstatus, t1 # set MPP=3 + la t1, finished + csrr t0, mepc + csrw mepc, t1 # set mepc for mret to jump to + mret + + +finished: j done