diff --git a/benchmarks/coremark/Makefile b/benchmarks/coremark/Makefile index db8a6e1d6..a73dc6eea 100644 --- a/benchmarks/coremark/Makefile +++ b/benchmarks/coremark/Makefile @@ -11,8 +11,8 @@ sources=$(cmbase)/core_main.c $(cmbase)/core_list_join.c $(cmbase)/coremark.h \ $(PORT_DIR)/core_portme.h $(PORT_DIR)/core_portme.c $(PORT_DIR)/core_portme.mak \ $(PORT_DIR)/crt.S $(PORT_DIR)/encoding.h $(PORT_DIR)/util.h $(PORT_DIR)/syscalls.c ABI := $(if $(findstring "64","$(XLEN)"),lp64,ilp32) -ARCH := rv$(XLEN)im_zicsr_zba_zbb_zbc_zbs -#ARCH := rv$(XLEN)gc_zba_zbb_zbc_zbs +#ARCH := rv$(XLEN)gc_zba_zbb_zbc +ARCH := rv$(XLEN)im_zicsr_zba_zbb_zbc #ARCH := rv$(XLEN)gc #ARCH := rv$(XLEN)imc_zicsr #ARCH := rv$(XLEN)im_zicsr @@ -25,24 +25,6 @@ PORT_CFLAGS = -g -mabi=$(ABI) -march=$(ARCH) -static -falign-functions=16 \ -nostdlib -nostartfiles -ffreestanding -mstrict-align \ -DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 -DPERFORMANCE_RUN=1 -DITERATIONS=10 -DXLEN=$(XLEN) -# Black Parrott -#PORT_CFLAGS = -O2 -fno-common -funroll-loops -finline-functions --param max-inline-insns-auto=20 -falign-functions=4 -falign-jumps=4 -falign-loops=4 \ - -DITERATIONS=10 -DPERFORMANCE_RUN=1 -#OPTIMIZE := -O2 -fno-common -funroll-loops -finline-functions --param max-inline-insns-auto=20 -falign-functions=4 -falign-jumps=4 -falign-loops=4 -#override CFLAGS += $(OPTIMIZE) -DFLAGS_STR=\""$(OPTIMIZE)"\" -#override CFLAGS += -DITERATIONS=10 -DPERFORMANCE_RUN=1 - -# try adding the new fields from muntjac coremark build -#PORT_CFLAGS = -g -mabi=$(ABI) -march=$(ARCH) -static -falign-functions=16 \ - -fno-common -flto -funswitch-loops -mcmodel=medany \ - -falign-functions=4 -falign-jumps=4 -falign-loops=4 \ - -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-3-series -O3 -finline-functions --param max-inline-insns-auto=20 -falign-jumps=4 \ - -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 \ - -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta \ - -nostdlib -nostartfiles -ffreestanding -mstrict-align \ - -DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 -DPERFORMANCE_RUN=1 -DITERATIONS=10 -DXLEN=$(XLEN) - - all: $(work_dir)/coremark.bare.riscv.elf.memfile run: diff --git a/benchmarks/coremark/riscv64-baremetal/syscalls.c b/benchmarks/coremark/riscv64-baremetal/syscalls.c index 29cd5f24a..25c47b797 100644 --- a/benchmarks/coremark/riscv64-baremetal/syscalls.c +++ b/benchmarks/coremark/riscv64-baremetal/syscalls.c @@ -177,6 +177,7 @@ void _init(int cid, int nc) counters[17] = read_csr(mhpmcounter17) - counters[17]; ee_printf("Load Stalls %d\n", counters[11]); + ee_printf("Store Stalls %d\n", counters[12]); ee_printf("D-Cache Accesses %d\n", counters[13]); ee_printf("D-Cache Misses %d\n", counters[14]); ee_printf("I-Cache Accesses %d\n", counters[16]); diff --git a/config/buildroot/config.vh b/config/buildroot/config.vh index e183d9cbd..de6e4800d 100644 --- a/config/buildroot/config.vh +++ b/config/buildroot/config.vh @@ -40,7 +40,8 @@ localparam ZIFENCEI_SUPPORTED = 1; localparam ZICNTR_SUPPORTED = 1; localparam ZIHPM_SUPPORTED = 1; localparam COUNTERS = 12'd32; -localparam ZFH_SUPPORTED = 0; +localparam ZFH_SUPPORTED = 1; +localparam ZFA_SUPPORTED = 0; localparam SSTC_SUPPORTED = 1; localparam ZICBOM_SUPPORTED = 1; localparam ZICBOZ_SUPPORTED = 1; @@ -56,7 +57,7 @@ localparam BUS_SUPPORTED = 1; localparam DCACHE_SUPPORTED = 1; localparam ICACHE_SUPPORTED = 1; localparam VIRTMEM_SUPPORTED = 1; -localparam VECTORED_INTERRUPTS_SUPPORTED = 1 ; +localparam VECTORED_INTERRUPTS_SUPPORTED = 1; localparam BIGENDIAN_SUPPORTED = 1; // TLB configuration. Entries should be a power of 2 @@ -162,10 +163,10 @@ localparam RADIX = 32'h4; localparam DIVCOPIES = 32'h4; // bit manipulation -localparam ZBA_SUPPORTED = 0; -localparam ZBB_SUPPORTED = 0; -localparam ZBC_SUPPORTED = 0; -localparam ZBS_SUPPORTED = 0; +localparam ZBA_SUPPORTED = 1; +localparam ZBB_SUPPORTED = 1; +localparam ZBC_SUPPORTED = 1; +localparam ZBS_SUPPORTED = 1; // New compressed instructions localparam ZCB_SUPPORTED = 1; diff --git a/config/rv32e/config.vh b/config/rv32e/config.vh index 8906bb571..70d455b4e 100644 --- a/config/rv32e/config.vh +++ b/config/rv32e/config.vh @@ -41,6 +41,7 @@ localparam COUNTERS = 12'd0; localparam ZICNTR_SUPPORTED = 0; localparam ZIHPM_SUPPORTED = 0; localparam ZFH_SUPPORTED = 0; +localparam ZFA_SUPPORTED = 0; localparam SSTC_SUPPORTED = 0; localparam ZICBOM_SUPPORTED = 0; localparam ZICBOZ_SUPPORTED = 0; diff --git a/config/rv32gc/config.vh b/config/rv32gc/config.vh index 67855c817..4baef0075 100644 --- a/config/rv32gc/config.vh +++ b/config/rv32gc/config.vh @@ -41,7 +41,8 @@ localparam ZIFENCEI_SUPPORTED = 1; localparam COUNTERS = 12'd32; localparam ZICNTR_SUPPORTED = 1; localparam ZIHPM_SUPPORTED = 1; -localparam ZFH_SUPPORTED = 0; +localparam ZFH_SUPPORTED = 1; +localparam ZFA_SUPPORTED = 1; localparam SSTC_SUPPORTED = 1; localparam ZICBOM_SUPPORTED = 1; localparam ZICBOZ_SUPPORTED = 1; diff --git a/config/rv32i/config.vh b/config/rv32i/config.vh index 2f90656f2..6e5d08803 100644 --- a/config/rv32i/config.vh +++ b/config/rv32i/config.vh @@ -41,6 +41,7 @@ localparam COUNTERS = 0; localparam ZICNTR_SUPPORTED = 0; localparam ZIHPM_SUPPORTED = 0; localparam ZFH_SUPPORTED = 0; +localparam ZFA_SUPPORTED = 0; localparam SSTC_SUPPORTED = 0; localparam ZICBOM_SUPPORTED = 0; localparam ZICBOZ_SUPPORTED = 0; diff --git a/config/rv32imc/config.vh b/config/rv32imc/config.vh index ecb7b8f78..a32dc3bd6 100644 --- a/config/rv32imc/config.vh +++ b/config/rv32imc/config.vh @@ -40,6 +40,7 @@ localparam COUNTERS = 12'd32; localparam ZICNTR_SUPPORTED = 1; localparam ZIHPM_SUPPORTED = 1; localparam ZFH_SUPPORTED = 0; +localparam ZFA_SUPPORTED = 0; localparam SSTC_SUPPORTED = 0; localparam ZICBOM_SUPPORTED = 0; localparam ZICBOZ_SUPPORTED = 0; diff --git a/config/rv64fpquad/config.vh b/config/rv64fpquad/config.vh index 11feba734..09885808f 100644 --- a/config/rv64fpquad/config.vh +++ b/config/rv64fpquad/config.vh @@ -31,7 +31,7 @@ localparam XLEN = 32'd64; // IEEE 754 compliance -localparam IEEE754 = 0; +localparam IEEE754 = 1; // MISA RISC-V configuration per specification localparam MISA = (32'h00000104 | 1 << 5 | 1 << 3 | 1 << 16 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0 ); @@ -41,6 +41,7 @@ localparam COUNTERS = 12'd32; localparam ZICNTR_SUPPORTED = 1; localparam ZIHPM_SUPPORTED = 1; localparam ZFH_SUPPORTED = 1; +localparam ZFA_SUPPORTED = 0; localparam SSTC_SUPPORTED = 0; localparam ZICBOM_SUPPORTED = 0; localparam ZICBOZ_SUPPORTED = 0; diff --git a/config/rv64gc/config.vh b/config/rv64gc/config.vh index af6e4aebd..bb3e79659 100644 --- a/config/rv64gc/config.vh +++ b/config/rv64gc/config.vh @@ -40,7 +40,8 @@ localparam ZIFENCEI_SUPPORTED = 1; localparam COUNTERS = 12'd32; localparam ZICNTR_SUPPORTED = 1; localparam ZIHPM_SUPPORTED = 1; -localparam ZFH_SUPPORTED = 0; +localparam ZFH_SUPPORTED = 1; +localparam ZFA_SUPPORTED = 0; localparam SSTC_SUPPORTED = 1; localparam ZICBOM_SUPPORTED = 1; localparam ZICBOZ_SUPPORTED = 1; diff --git a/config/rv64i/config.vh b/config/rv64i/config.vh index 1908f900f..609a50f97 100644 --- a/config/rv64i/config.vh +++ b/config/rv64i/config.vh @@ -41,6 +41,7 @@ localparam COUNTERS = 0; localparam ZICNTR_SUPPORTED = 0; localparam ZIHPM_SUPPORTED = 0; localparam ZFH_SUPPORTED = 0; +localparam ZFA_SUPPORTED = 0; localparam SSTC_SUPPORTED = 0; localparam ZICBOM_SUPPORTED = 0; localparam ZICBOZ_SUPPORTED = 0; diff --git a/config/shared/config-shared.vh b/config/shared/config-shared.vh index 55bca569f..5dfb4b1ba 100644 --- a/config/shared/config-shared.vh +++ b/config/shared/config-shared.vh @@ -65,33 +65,29 @@ localparam H_NF = 32'd10; localparam H_BIAS = 32'd15; localparam H_FMT = 2'd2; -// Floating point length FLEN and number of exponent (NE) and fraction (NF) bits -localparam FLEN = (Q_SUPPORTED ? Q_LEN : D_SUPPORTED ? D_LEN : S_LEN); -localparam NE = (Q_SUPPORTED ? Q_NE : D_SUPPORTED ? D_NE : S_NE); -localparam NF = (Q_SUPPORTED ? Q_NF : D_SUPPORTED ? D_NF : S_NF); -localparam FMT = (Q_SUPPORTED ? 2'd3 : D_SUPPORTED ? 2'd1 : 2'd0); -localparam BIAS = (Q_SUPPORTED ? Q_BIAS : D_SUPPORTED ? D_BIAS : S_BIAS); -/* Delete once tested dh 10/10/22 - -localparam FLEN = (Q_SUPPORTED ? Q_LEN : D_SUPPORTED ? D_LEN : F_SUPPORTED ? S_LEN : H_LEN); -localparam NE = (Q_SUPPORTED ? Q_NE : D_SUPPORTED ? D_NE : F_SUPPORTED ? S_NE : H_NE); -localparam NF = (Q_SUPPORTED ? Q_NF : D_SUPPORTED ? D_NF : F_SUPPORTED ? S_NF : H_NF); -localparam FMT = (Q_SUPPORTED ? 2'd3 : D_SUPPORTED ? 2'd1 : F_SUPPORTED ? 2'd0 : 2'd2); -localparam BIAS = (Q_SUPPORTED ? Q_BIAS : D_SUPPORTED ? D_BIAS : F_SUPPORTED ? S_BIAS : H_BIAS);*/ +// Floating point length FLEN and number of exponent (NE) and fraction (NF) bits (for longest format supported) +localparam FLEN = Q_SUPPORTED ? Q_LEN : D_SUPPORTED ? D_LEN : S_LEN; +localparam NE = Q_SUPPORTED ? Q_NE : D_SUPPORTED ? D_NE : S_NE; +localparam NF = Q_SUPPORTED ? Q_NF : D_SUPPORTED ? D_NF : S_NF; +localparam FMT = Q_SUPPORTED ? 2'd3 : D_SUPPORTED ? 2'd1 : 2'd0; +localparam BIAS = Q_SUPPORTED ? Q_BIAS : D_SUPPORTED ? D_BIAS : S_BIAS; // Floating point constants needed for FPU paramerterization -localparam FPSIZES = ((32)'(Q_SUPPORTED)+(32)'(D_SUPPORTED)+(32)'(F_SUPPORTED)+(32)'(ZFH_SUPPORTED)); -localparam FMTBITS = ((32)'(FPSIZES>=3)+1); -localparam LEN1 = ((D_SUPPORTED & (FLEN != D_LEN)) ? D_LEN : (F_SUPPORTED & (FLEN != S_LEN)) ? S_LEN : H_LEN); -localparam NE1 = ((D_SUPPORTED & (FLEN != D_LEN)) ? D_NE : (F_SUPPORTED & (FLEN != S_LEN)) ? S_NE : H_NE); -localparam NF1 = ((D_SUPPORTED & (FLEN != D_LEN)) ? D_NF : (F_SUPPORTED & (FLEN != S_LEN)) ? S_NF : H_NF); -localparam FMT1 = ((D_SUPPORTED & (FLEN != D_LEN)) ? 2'd1 : (F_SUPPORTED & (FLEN != S_LEN)) ? 2'd0 : 2'd2); -localparam BIAS1 = ((D_SUPPORTED & (FLEN != D_LEN)) ? D_BIAS : (F_SUPPORTED & (FLEN != S_LEN)) ? S_BIAS : H_BIAS); -localparam LEN2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_LEN : H_LEN); -localparam NE2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_NE : H_NE); -localparam NF2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_NF : H_NF); -localparam FMT2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? 2'd0 : 2'd2); -localparam BIAS2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_BIAS : H_BIAS); +// LEN1/NE1/NF1/FNT1 is the size of the second longest supported format +localparam FPSIZES = (32)'(Q_SUPPORTED)+(32)'(D_SUPPORTED)+(32)'(F_SUPPORTED)+(32)'(ZFH_SUPPORTED); +localparam FMTBITS = (32)'(FPSIZES>=3)+1; +localparam LEN1 = (FLEN > D_LEN) ? D_LEN : (FLEN > S_LEN) ? S_LEN : H_LEN; +localparam NE1 = (FLEN > D_LEN) ? D_NE : (FLEN > S_LEN) ? S_NE : H_NE; +localparam NF1 = (FLEN > D_LEN) ? D_NF : (FLEN > S_LEN) ? S_NF : H_NF; +localparam FMT1 = (FLEN > D_LEN) ? 2'd1 : (FLEN > S_LEN) ? 2'd0 : 2'd2; +localparam BIAS1 = (FLEN > D_LEN) ? D_BIAS : (FLEN > S_LEN) ? S_BIAS : H_BIAS; + +// LEN2 etc is the size of the third longest supported format +localparam LEN2 = (LEN1 > S_LEN) ? S_LEN : H_LEN; +localparam NE2 = (LEN1 > S_LEN) ? S_NE : H_NE; +localparam NF2 = (LEN1 > S_LEN) ? S_NF : H_NF; +localparam FMT2 = (LEN1 > S_LEN) ? 2'd0 : 2'd2; +localparam BIAS2 = (LEN1 > S_LEN) ? S_BIAS : H_BIAS; // divider r and rk (bits per digit, bits per cycle) localparam LOGR = $clog2(RADIX); // r = log(R) bits per digit @@ -115,7 +111,7 @@ localparam LLEN = (($unsigned(FLEN)<$unsigned(XLEN)) ? ($unsigned(XLEN)) : ($uns localparam LOGCVTLEN = $unsigned($clog2(CVTLEN+1)); localparam NORMSHIFTSZ = (((CVTLEN+NF+1)>(DIVb + 1 +NF+1) & (CVTLEN+NF+1)>(3*NF+6)) ? (CVTLEN+NF+1) : ((DIVb + 1 +NF+1) > (3*NF+6) ? (DIVb + 1 +NF+1) : (3*NF+6))); localparam LOGNORMSHIFTSZ = ($clog2(NORMSHIFTSZ)); -localparam CORRSHIFTSZ = (((CVTLEN+NF+1)>(DIVb + 1 +NF+1) & (CVTLEN+NF+1)>(3*NF+6)) ? (CVTLEN+NF+1) : ((DIVMINb+1+NF) > (3*NF+4) ? (DIVMINb+1+NF) : (3*NF+4))); +localparam CORRSHIFTSZ = NORMSHIFTSZ-2; // Disable spurious Verilator warnings diff --git a/config/shared/parameter-defs.vh b/config/shared/parameter-defs.vh index ec6fc7ec5..7dc0a0bcf 100644 --- a/config/shared/parameter-defs.vh +++ b/config/shared/parameter-defs.vh @@ -14,6 +14,7 @@ localparam cvw_t P = '{ ZICNTR_SUPPORTED : ZICNTR_SUPPORTED, ZIHPM_SUPPORTED : ZIHPM_SUPPORTED, ZFH_SUPPORTED : ZFH_SUPPORTED, + ZFA_SUPPORTED : ZFA_SUPPORTED, SSTC_SUPPORTED : SSTC_SUPPORTED, VIRTMEM_SUPPORTED : VIRTMEM_SUPPORTED, VECTORED_INTERRUPTS_SUPPORTED : VECTORED_INTERRUPTS_SUPPORTED, diff --git a/setup.sh b/setup.sh index b1ecbd84f..e1d4e6cd3 100755 --- a/setup.sh +++ b/setup.sh @@ -16,8 +16,7 @@ echo \$WALLY set to ${WALLY} # Must edit these based on your local environment. Ask your sysadmin. export MGLS_LICENSE_FILE=27002@zircon.eng.hmc.edu # Change this to your Siemens license server export SNPSLMD_LICENSE_FILE=27020@zircon.eng.hmc.edu # Change this to your Synopsys license server -export QUESTA_HOME=/cad/mentor/questa_sim-2022.4_2/questasim # Change this for your path to Questa, excluding bin -#export QUESTA_HOME=/cad/mentor/questa_sim-2022.4_3/questasim # Change this for your path to Questa, excluding bin +export QUESTA_HOME=/cad/mentor/questa_sim-2023.4/questasim # Change this for your path to Questa, excluding bin export SNPS_HOME=/cad/synopsys/SYN # Change this for your path to Design Compiler, excluding bin # Path to RISC-V Tools diff --git a/sim/coverage-exclusions-rv64gc.do b/sim/coverage-exclusions-rv64gc.do index 76e18e30e..b9c20eead 100644 --- a/sim/coverage-exclusions-rv64gc.do +++ b/sim/coverage-exclusions-rv64gc.do @@ -253,3 +253,10 @@ coverage exclude -srcfile priorityonehot.sv # Excluding pmpadrdecs[0] coverage case for PAgePMPAdrIn being hardwired to 1 coverage exclude -scope /dut/core/ifu/immu/immu/pmp/pmpchecker/pmp/pmpadrdecs[0] -linerange [GetLineNum ../src/mmu/pmpadrdec.sv "exclusion-tag: PAgePMPAdrIn"] -item e 1 -fecexprrow 1 coverage exclude -scope /dut/core/lsu/dmmu/dmmu/pmp/pmpchecker/pmp/pmpadrdecs[0] -linerange [GetLineNum ../src/mmu/pmpadrdec.sv "exclusion-tag: PAgePMPAdrIn"] -item e 1 -fecexprrow 1 + +#################### +# EBU +#################### + +# Exclude EBU Beat Counter because it is only idle when bus has multicycle latency, but rv64gc has single cycle latency +coverage exclude -scope /core/ebu/ebu/ebufsmarb/BeatCounter diff --git a/sim/imperas.ic b/sim/imperas.ic index f3c620b96..b35166429 100644 --- a/sim/imperas.ic +++ b/sim/imperas.ic @@ -20,6 +20,7 @@ # More extensions --override cpu/Zcb=T --override cpu/Zicond=T +--override cpu/Zfh=T # Cache block operations --override cpu/Zicbom=T @@ -36,6 +37,8 @@ # SV39 and SV48 supported --override cpu/Sv_modes=768 +--override cpu/Svinval=T + # clarify #--override refRoot/cpu/mtvec_sext=F diff --git a/src/cache/cacheway.sv b/src/cache/cacheway.sv index 96762dbde..3445067a0 100644 --- a/src/cache/cacheway.sv +++ b/src/cache/cacheway.sv @@ -77,6 +77,7 @@ module cacheway import cvw::*; #(parameter cvw_t P, logic ClearDirtyWay; logic SelNonHit; logic SelData; + logic InvalidateCacheDelay; if (!READ_ONLY_CACHE) begin:flushlogic logic FlushWayEn; @@ -121,7 +122,9 @@ module cacheway import cvw::*; #(parameter cvw_t P, assign TagWay = SelData ? ReadTag : '0; // AND part of AOMux assign HitDirtyWay = Dirty & ValidWay; assign DirtyWay = SelDirty & HitDirtyWay; - assign HitWay = ValidWay & (ReadTag == PAdr[PA_BITS-1:OFFSETLEN+INDEXLEN]); + assign HitWay = ValidWay & (ReadTag == PAdr[PA_BITS-1:OFFSETLEN+INDEXLEN]) & ~InvalidateCacheDelay; + + flop #(1) InvalidateCacheReg(clk, InvalidateCache, InvalidateCacheDelay); ///////////////////////////////////////////////////////////////////////////////////////////// // Data Array diff --git a/src/cvw.sv b/src/cvw.sv index 53cbb5a70..a9ee9d093 100644 --- a/src/cvw.sv +++ b/src/cvw.sv @@ -49,6 +49,7 @@ typedef struct packed { logic ZICNTR_SUPPORTED; logic ZIHPM_SUPPORTED; logic ZFH_SUPPORTED; + logic ZFA_SUPPORTED; logic SSTC_SUPPORTED; logic VIRTMEM_SUPPORTED; logic VECTORED_INTERRUPTS_SUPPORTED; diff --git a/src/ebu/ahbinterface.sv b/src/ebu/ahbinterface.sv index de17f3553..df84175f0 100644 --- a/src/ebu/ahbinterface.sv +++ b/src/ebu/ahbinterface.sv @@ -29,7 +29,7 @@ module ahbinterface #( parameter XLEN, - parameter LSU = 0 // 1: LSU bus width is `XLEN, 0: IFU bus width is 32 bits + parameter logic LSU = 1'b0 // 1: LSU bus width is `XLEN, 0: IFU bus width is 32 bits )( input logic HCLK, HRESETn, // bus interface @@ -44,6 +44,7 @@ module ahbinterface #( input logic Stall, // Core pipeline is stalled input logic Flush, // Pipeline stage flush. Prevents bus transaction from starting input logic [1:0] BusRW, // Memory operation read/write control: 10: read, 01: write + input logic BusAtomic, // Uncache atomic memory operation input logic [XLEN/8-1:0] ByteMask, // Bytes enables within a word input logic [XLEN-1:0] WriteData, // IEU write data for a store output logic BusStall, // Bus is busy with an in flight memory operation @@ -64,7 +65,7 @@ module ahbinterface #( assign HWSTRB = '0; end - busfsm busfsm(.HCLK, .HRESETn, .Flush, .BusRW, + busfsm #(~LSU) busfsm(.HCLK, .HRESETn, .Flush, .BusRW, .BusAtomic, .BusCommitted, .Stall, .BusStall, .CaptureEn, .HREADY, .HTRANS, .HWRITE); diff --git a/src/ebu/buscachefsm.sv b/src/ebu/buscachefsm.sv index 0368164ed..8d434c678 100644 --- a/src/ebu/buscachefsm.sv +++ b/src/ebu/buscachefsm.sv @@ -66,7 +66,7 @@ module buscachefsm #( output logic [2:0] HBURST // AHB burst length ); - typedef enum logic [2:0] {ADR_PHASE, DATA_PHASE, ATOMIC_PHASE, MEM3, CACHE_FETCH, CACHE_WRITEBACK} busstatetype; + typedef enum logic [2:0] {ADR_PHASE, DATA_PHASE, ATOMIC_READ_DATA_PHASE, ATOMIC_PHASE, MEM3, CACHE_FETCH, CACHE_WRITEBACK} busstatetype; typedef enum logic [1:0] {AHB_IDLE = 2'b00, AHB_BUSY = 2'b01, AHB_NONSEQ = 2'b10, AHB_SEQ = 2'b11} ahbtranstype; busstatetype CurrState, NextState; @@ -87,13 +87,15 @@ module buscachefsm #( always_comb begin case(CurrState) - ADR_PHASE: if (HREADY & |BusRW) NextState = DATA_PHASE; - else if (HREADY & BusWrite) NextState = CACHE_WRITEBACK; - else if (HREADY & CacheBusRW[1]) NextState = CACHE_FETCH; - else NextState = ADR_PHASE; - DATA_PHASE: if(HREADY & BusAtomic) NextState = ATOMIC_PHASE; - else if(HREADY & ~BusAtomic) NextState = MEM3; + ADR_PHASE: if (HREADY & |BusRW) NextState = DATA_PHASE; + else if (HREADY & BusWrite) NextState = CACHE_WRITEBACK; + else if (HREADY & CacheBusRW[1]) NextState = CACHE_FETCH; + else NextState = ADR_PHASE; + DATA_PHASE: if(HREADY & BusAtomic) NextState = ATOMIC_READ_DATA_PHASE; + else if(HREADY & ~BusAtomic) NextState = MEM3; else NextState = DATA_PHASE; + ATOMIC_READ_DATA_PHASE: if(HREADY) NextState = ATOMIC_PHASE; + else NextState = ATOMIC_READ_DATA_PHASE; ATOMIC_PHASE: if(HREADY) NextState = MEM3; else NextState = ATOMIC_PHASE; MEM3: if(Stall) NextState = MEM3; @@ -107,7 +109,7 @@ module buscachefsm #( else if(HREADY & FinalBeatCount & BusCMOZero) NextState = MEM3; else if(HREADY & FinalBeatCount & ~|CacheBusRW) NextState = ADR_PHASE; else NextState = CACHE_WRITEBACK; - default: NextState = ADR_PHASE; + default: NextState = ADR_PHASE; endcase end @@ -129,6 +131,7 @@ module buscachefsm #( //(CurrState == DATA_PHASE & ~BusRW[0]) | // *** replace the next line with this. Fails uart test but i think it's a test problem not a hardware problem. (CurrState == DATA_PHASE) | (CurrState == ATOMIC_PHASE) | + (CurrState == ATOMIC_READ_DATA_PHASE) | (CurrState == CACHE_FETCH & ~FinalBeatCount) | (CurrState == CACHE_WRITEBACK & ~FinalBeatCount); @@ -136,11 +139,11 @@ module buscachefsm #( // AHB bus interface assign HTRANS = (CurrState == ADR_PHASE & HREADY & ((|BusRW) | (|CacheBusRW) | BusCMOZero) & ~Flush) | - (CurrState == DATA_PHASE & BusAtomic) | + (CurrState == ATOMIC_READ_DATA_PHASE & BusAtomic) | (CacheAccess & FinalBeatCount & |CacheBusRW & HREADY & ~Flush) ? AHB_NONSEQ : // if we have a pipelined request (CacheAccess & |BeatCount) ? (`BURST_EN ? AHB_SEQ : AHB_NONSEQ) : AHB_IDLE; - assign HWRITE = ((BusRW[0] & ~BusAtomic) | BusWrite & ~Flush) | (CurrState == DATA_PHASE & BusAtomic) | + assign HWRITE = ((BusRW[0] & ~BusAtomic) | BusWrite & ~Flush) | (CurrState == ATOMIC_READ_DATA_PHASE & BusAtomic) | (CurrState == CACHE_WRITEBACK & |BeatCount); assign HBURST = `BURST_EN & ((|CacheBusRW & ~Flush) | (CacheAccess & |BeatCount)) ? LocalBurstType : 3'b0; @@ -159,6 +162,7 @@ module buscachefsm #( assign SelBusBeat = (CurrState == ADR_PHASE & (BusRW[0] | BusWrite)) | (CurrState == DATA_PHASE & BusRW[0]) | (CurrState == ATOMIC_PHASE & BusRW[0]) | + (CurrState == ATOMIC_READ_DATA_PHASE & BusRW[0]) | (CurrState == CACHE_WRITEBACK) | (CurrState == CACHE_FETCH); diff --git a/src/ebu/busfsm.sv b/src/ebu/busfsm.sv index 108cd546d..81d11715e 100644 --- a/src/ebu/busfsm.sv +++ b/src/ebu/busfsm.sv @@ -28,7 +28,9 @@ //////////////////////////////////////////////////////////////////////////////////////////////// // HCLK and clk must be the same clock! -module busfsm ( +module busfsm #( + parameter logic READ_ONLY +)( input logic HCLK, input logic HRESETn, @@ -36,6 +38,7 @@ module busfsm ( input logic Stall, // Core pipeline is stalled input logic Flush, // Pipeline stage flush. Prevents bus transaction from starting input logic [1:0] BusRW, // Memory operation read/write control: 10: read, 01: write + input logic BusAtomic, // Uncache atomic memory operation output logic CaptureEn, // Enable updating the Fetch buffer with valid data from HRDATA output logic BusStall, // Bus is busy with an in flight memory operation output logic BusCommitted, // Bus is busy with an in flight memory operation and it is not safe to take an interrupt @@ -45,7 +48,7 @@ module busfsm ( output logic HWRITE // AHB 0: Read operation 1: Write operation ); - typedef enum logic [2:0] {ADR_PHASE, DATA_PHASE, MEM3} busstatetype; + typedef enum logic [2:0] {ADR_PHASE, DATA_PHASE, MEM3, ATOMIC_READ_DATA_PHASE, ATOMIC_PHASE} busstatetype; typedef enum logic [1:0] {AHB_IDLE = 2'b00, AHB_BUSY = 2'b01, AHB_NONSEQ = 2'b10, AHB_SEQ = 2'b11} ahbtranstype; busstatetype CurrState, NextState; @@ -56,24 +59,33 @@ module busfsm ( always_comb begin case(CurrState) - ADR_PHASE: if(HREADY & |BusRW) NextState = DATA_PHASE; - else NextState = ADR_PHASE; - DATA_PHASE: if(HREADY) NextState = MEM3; - else NextState = DATA_PHASE; - MEM3: if(Stall) NextState = MEM3; - else NextState = ADR_PHASE; - default: NextState = ADR_PHASE; + ADR_PHASE: if(HREADY & |BusRW) NextState = DATA_PHASE; + else NextState = ADR_PHASE; + DATA_PHASE: if(HREADY & BusAtomic) NextState = ATOMIC_READ_DATA_PHASE; + else if(HREADY & ~BusAtomic) NextState = MEM3; + else NextState = DATA_PHASE; + ATOMIC_READ_DATA_PHASE: if(HREADY) NextState = ATOMIC_PHASE; + else NextState = ATOMIC_READ_DATA_PHASE; + ATOMIC_PHASE: if(HREADY) NextState = MEM3; + else NextState = ATOMIC_PHASE; + MEM3: if(Stall) NextState = MEM3; + else NextState = ADR_PHASE; + default: NextState = ADR_PHASE; endcase end assign BusStall = (CurrState == ADR_PHASE & |BusRW) | // (CurrState == DATA_PHASE & ~BusRW[0]); // possible optimization here. fails uart test, but i'm not sure the failure is valid. + (CurrState == ATOMIC_PHASE) | + (CurrState == ATOMIC_READ_DATA_PHASE) | (CurrState == DATA_PHASE); - assign BusCommitted = CurrState != ADR_PHASE; + assign BusCommitted = (CurrState != ADR_PHASE) & ~(READ_ONLY & CurrState == MEM3); + + assign HTRANS = (CurrState == ADR_PHASE & HREADY & |BusRW & ~Flush) | + (CurrState == ATOMIC_READ_DATA_PHASE & BusAtomic) ? AHB_NONSEQ : AHB_IDLE; + assign HWRITE = (BusRW[0] & ~BusAtomic) | (CurrState == ATOMIC_READ_DATA_PHASE & BusAtomic); - assign HTRANS = (CurrState == ADR_PHASE & HREADY & |BusRW & ~Flush) ? AHB_NONSEQ : AHB_IDLE; - assign HWRITE = BusRW[0]; assign CaptureEn = CurrState == DATA_PHASE; endmodule diff --git a/src/fpu/fcmp.sv b/src/fpu/fcmp.sv index 9d0d582b5..e330f1fda 100755 --- a/src/fpu/fcmp.sv +++ b/src/fpu/fcmp.sv @@ -36,6 +36,7 @@ module fcmp import cvw::*; #(parameter cvw_t P) ( input logic [P.FMTBITS-1:0] Fmt, // format of fp number input logic [2:0] OpCtrl, // see above table + input logic Zfa, // Zfa variants: fminm, fmaxm, fleq, fltq input logic Xs, Ys, // input signs input logic [P.NE-1:0] Xe, Ye, // input exponents input logic [P.NF:0] Xm, Ym, // input mantissa @@ -70,8 +71,8 @@ module fcmp import cvw::*; #(parameter cvw_t P) ( 3'b110: CmpNV = EitherSNaN; //min 3'b101: CmpNV = EitherSNaN; //max 3'b010: CmpNV = EitherSNaN; //equal - 3'b001: CmpNV = EitherNaN; //less than - 3'b011: CmpNV = EitherNaN; //less than or equal + 3'b001: CmpNV = Zfa ? EitherSNaN : EitherNaN; // fltq / flt perform CompareQuietLess / CompareSignalingLess differing on when to set invalid + 3'b011: CmpNV = Zfa ? EitherSNaN : EitherNaN; // fleq / fle differ on when to set invalid default: CmpNV = 1'bx; endcase end @@ -128,23 +129,35 @@ module fcmp import cvw::*; #(parameter cvw_t P) ( // - if one is a NaN output the non-NaN always_comb if(OpCtrl[0]) // MAX - if(XNaN) - if(YNaN) CmpFpRes = NaNRes; // X = NaN Y = NaN - else CmpFpRes = Y; // X = NaN Y != NaN - else - if(YNaN) CmpFpRes = X; // X != NaN Y = NaN - else // X,Y != NaN - if(LT) CmpFpRes = Y; // X < Y - else CmpFpRes = X; // X > Y + if (Zfa & P.ZFA_SUPPORTED) // fmaxm perform IEEE754 maxNum that produce NaN if either input is NaN + if (XNaN | YNaN) CmpFpRes = NaNRes; // either input is NaN + else + if (LT) CmpFpRes = Y; // X < Y + else CmpFpRes = X; // X > Y + else // fmax performs IEEE754 maxNumber that produces NaN if both inputs are NaN + if(XNaN) + if(YNaN) CmpFpRes = NaNRes; // X = NaN Y = NaN + else CmpFpRes = Y; // X = NaN Y != NaN + else + if(YNaN) CmpFpRes = X; // X != NaN Y = NaN + else // X,Y != NaN + if(LT) CmpFpRes = Y; // X < Y + else CmpFpRes = X; // X > Y else // MIN - if(XNaN) - if(YNaN) CmpFpRes = NaNRes; // X = NaN Y = NaN - else CmpFpRes = Y; // X = NaN Y != NaN - else - if(YNaN) CmpFpRes = X; // X != NaN Y = NaN - else // X,Y != NaN - if(LT) CmpFpRes = X; // X < Y - else CmpFpRes = Y; // X > Y + if (Zfa & P.ZFA_SUPPORTED) // fminm perform IEEE754 minNum that produce NaN if either input is NaN + if (XNaN | YNaN) CmpFpRes = NaNRes; // either input is NaN + else + if (LT) CmpFpRes = X; // X < Y + else CmpFpRes = Y; // X > Y + else // fmin performs IEEE754 minNumber that produces NaN if both inputs are NaN + if(XNaN) + if(YNaN) CmpFpRes = NaNRes; // X = NaN Y = NaN + else CmpFpRes = Y; // X = NaN Y != NaN + else + if(YNaN) CmpFpRes = X; // X != NaN Y = NaN + else // X,Y != NaN + if(LT) CmpFpRes = X; // X < Y + else CmpFpRes = Y; // X > Y // LT/LE/EQ // - -0 = 0 diff --git a/src/fpu/fctrl.sv b/src/fpu/fctrl.sv index 3d1a7bedd..6d5a91aa6 100755 --- a/src/fpu/fctrl.sv +++ b/src/fpu/fctrl.sv @@ -38,7 +38,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( input logic FDivBusyE, // is the divider busy // instruction input logic [31:0] InstrD, // the full instruction - input logic [6:0] Funct7D, // bits 31:25 of instruction - may contain percision + input logic [6:0] Funct7D, // bits 31:25 of instruction - may contain precision input logic [6:0] OpD, // bits 6:0 of instruction input logic [4:0] Rs2D, // bits 24:20 of instruction input logic [2:0] Funct3D, // bits 14:12 of instruction - may contain rounding mode @@ -54,6 +54,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( output logic [1:0] PostProcSelE, PostProcSelM, // select result in the post processing unit output logic [1:0] FResSelE, FResSelM, FResSelW, // Select one of the results that finish in the memory stage output logic FPUActiveE, // FP instruction being executed + output logic ZfaE, ZfaM, // Zfa variants of instructions (fli, fminm, fmaxm, fround, froundnx, fleq, fltq, fmvh, fmvp, fcvtmod) // register control signals output logic FRegWriteE, FRegWriteM, FRegWriteW, // FP register write enable output logic FWriteIntE, FWriteIntM, // Write to integer register @@ -64,7 +65,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( output logic FDivStartE, IDivStartE // Start division or squareroot ); - `define FCTRLW 12 + `define FCTRLW 13 logic [`FCTRLW-1:0] ControlsD; // control signals logic FRegWriteD; // FP register write enable @@ -79,138 +80,178 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( logic SupportedFmt; // is the format supported logic SupportedFmt2; // is the source format supported for fp -> fp logic FCvtIntD, FCvtIntM; // convert to integer opperation + logic ZfaD; // Zfa variants of instructions // FPU Instruction Decoder assign Fmt = Funct7D[1:0]; assign Fmt2 = Rs2D[1:0]; // source format for fcvt fp->fp - assign SupportedFmt = (Fmt == 2'b00 | (Fmt == 2'b01 & P.D_SUPPORTED) | - (Fmt == 2'b10 & P.ZFH_SUPPORTED) | (Fmt == 2'b11 & P.Q_SUPPORTED)); + assign SupportedFmt = (Fmt == 2'b00 | (Fmt == 2'b01 & P.D_SUPPORTED) | + (Fmt == 2'b10 & P.ZFH_SUPPORTED & {OpD[6:4], OpD[1:0]} != 5'b10011) | // fma not supported for Zfh + (Fmt == 2'b11 & P.Q_SUPPORTED)); assign SupportedFmt2 = (Fmt2 == 2'b00 | (Fmt2 == 2'b01 & P.D_SUPPORTED) | (Fmt2 == 2'b10 & P.ZFH_SUPPORTED) | (Fmt2 == 2'b11 & P.Q_SUPPORTED)); // decode the instruction - // FRegWrite_FWriteInt_FResSel_PostProcSel_FOpCtrl_FDivStart_IllegalFPUInstr_FCvtInt + // FRegWrite_FWriteInt_FResSel_PostProcSel_FOpCtrl_FDivStart_IllegalFPUInstr_FCvtInt_Zfa always_comb if (STATUS_FS == 2'b00) // FPU instructions are illegal when FPU is disabled - ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0; + ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0_0; else if (OpD != 7'b0000111 & OpD != 7'b0100111 & ~SupportedFmt) - ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0; // for anything other than loads and stores, check for supported format + ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0_0; // for anything other than loads and stores, check for supported format else begin - ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0; // default: non-implemented instruction + ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0_0; // default: non-implemented instruction /* verilator lint_off CASEINCOMPLETE */ // default value above has priority so no other default needed case(OpD) 7'b0000111: case(Funct3D) - 3'b010: ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flw - 3'b011: if (P.D_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // fld - 3'b100: if (P.Q_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flq - 3'b001: if (P.ZFH_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flh + 3'b010: ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0_0; // flw + 3'b011: if (P.D_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0_0; // fld + 3'b100: if (P.Q_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0_0; // flq + 3'b001: if (P.ZFH_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0_0; // flh endcase 7'b0100111: case(Funct3D) - 3'b010: ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsw - 3'b011: if (P.D_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsd - 3'b100: if (P.Q_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsq - 3'b001: if (P.ZFH_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsh + 3'b010: ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0_0; // fsw + 3'b011: if (P.D_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0_0; // fsd + 3'b100: if (P.Q_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0_0; // fsq + 3'b001: if (P.ZFH_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0_0; // fsh endcase - 7'b1000011: ControlsD = `FCTRLW'b1_0_01_10_000_0_0_0; // fmadd - 7'b1000111: ControlsD = `FCTRLW'b1_0_01_10_001_0_0_0; // fmsub - 7'b1001011: ControlsD = `FCTRLW'b1_0_01_10_010_0_0_0; // fnmsub - 7'b1001111: ControlsD = `FCTRLW'b1_0_01_10_011_0_0_0; // fnmadd + 7'b1000011: ControlsD = `FCTRLW'b1_0_01_10_000_0_0_0_0; // fmadd + 7'b1000111: ControlsD = `FCTRLW'b1_0_01_10_001_0_0_0_0; // fmsub + 7'b1001011: ControlsD = `FCTRLW'b1_0_01_10_010_0_0_0_0; // fnmsub + 7'b1001111: ControlsD = `FCTRLW'b1_0_01_10_011_0_0_0_0; // fnmadd 7'b1010011: casez(Funct7D) - 7'b00000??: ControlsD = `FCTRLW'b1_0_01_10_110_0_0_0; // fadd - 7'b00001??: ControlsD = `FCTRLW'b1_0_01_10_111_0_0_0; // fsub - 7'b00010??: ControlsD = `FCTRLW'b1_0_01_10_100_0_0_0; // fmul - 7'b00011??: ControlsD = `FCTRLW'b1_0_01_01_xx0_1_0_0; // fdiv - 7'b01011??: if (Rs2D == 5'b0000) ControlsD = `FCTRLW'b1_0_01_01_xx1_1_0_0; // fsqrt + 7'b00000??: ControlsD = `FCTRLW'b1_0_01_10_110_0_0_0_0; // fadd + 7'b00001??: ControlsD = `FCTRLW'b1_0_01_10_111_0_0_0_0; // fsub + 7'b00010??: ControlsD = `FCTRLW'b1_0_01_10_100_0_0_0_0; // fmul + 7'b00011??: ControlsD = `FCTRLW'b1_0_01_01_xx0_1_0_0_0; // fdiv + 7'b01011??: if (Rs2D == 5'b0000) ControlsD = `FCTRLW'b1_0_01_01_xx1_1_0_0_0; // fsqrt 7'b00100??: case(Funct3D) - 3'b000: ControlsD = `FCTRLW'b1_0_00_00_000_0_0_0; // fsgnj - 3'b001: ControlsD = `FCTRLW'b1_0_00_00_001_0_0_0; // fsgnjn - 3'b010: ControlsD = `FCTRLW'b1_0_00_00_010_0_0_0; // fsgnjx + 3'b000: ControlsD = `FCTRLW'b1_0_00_00_000_0_0_0_0; // fsgnj + 3'b001: ControlsD = `FCTRLW'b1_0_00_00_001_0_0_0_0; // fsgnjn + 3'b010: ControlsD = `FCTRLW'b1_0_00_00_010_0_0_0_0; // fsgnjx endcase 7'b00101??: case(Funct3D) - 3'b000: ControlsD = `FCTRLW'b1_0_00_00_110_0_0_0; // fmin - 3'b001: ControlsD = `FCTRLW'b1_0_00_00_101_0_0_0; // fmax + 3'b000: ControlsD = `FCTRLW'b1_0_00_00_110_0_0_0_0; // fmin + 3'b001: ControlsD = `FCTRLW'b1_0_00_00_101_0_0_0_0; // fmax + 3'b010: if (P.ZFA_SUPPORTED) ControlsD = `FCTRLW'b1_0_00_00_110_0_0_0_1; // fminm (Zfa) + 3'b011: if (P.ZFA_SUPPORTED) ControlsD = `FCTRLW'b1_0_00_00_101_0_0_0_1; // fmaxm (Zfa) endcase 7'b10100??: case(Funct3D) - 3'b010: ControlsD = `FCTRLW'b0_1_00_00_010_0_0_0; // feq - 3'b001: ControlsD = `FCTRLW'b0_1_00_00_001_0_0_0; // flt - 3'b000: ControlsD = `FCTRLW'b0_1_00_00_011_0_0_0; // fle + 3'b000: ControlsD = `FCTRLW'b0_1_00_00_011_0_0_0_0; // fle + 3'b001: ControlsD = `FCTRLW'b0_1_00_00_001_0_0_0_0; // flt + 3'b010: ControlsD = `FCTRLW'b0_1_00_00_010_0_0_0_0; // feq + 3'b100: if (P.ZFA_SUPPORTED) ControlsD = `FCTRLW'b0_1_00_00_011_0_0_0_1; // fleq (Zfa) + 3'b101: if (P.ZFA_SUPPORTED) ControlsD = `FCTRLW'b0_1_00_00_001_0_0_0_1; // fltq (Zfa) endcase 7'b11100??: if (Funct3D == 3'b001 & Rs2D == 5'b00000) - ControlsD = `FCTRLW'b0_1_10_00_000_0_0_0; // fclass + ControlsD = `FCTRLW'b0_1_10_00_000_0_0_0_0; // fclass else if (Funct3D == 3'b000 & Rs2D == 5'b00000) - ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0; // fmv.x.w/d/h/q fp to int register + ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0_0; // fmv.x.w/d/h/q fp to int register + else if (P.ZFA_SUPPORTED & P.XLEN == 32 & P.D_SUPPORTED & Funct7D[1:0] == 2'b01 & Funct3D == 3'b000 & Rs2D == 5'b00001) + ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0_1; // fmvh.x.d (Zfa) + // Q not supported in RV64GC + // coverage off + else if (P.ZFA_SUPPORTED & P.XLEN == 64 & P.Q_SUPPORTED & Funct7D[1:0] == 2'b11 & Funct3D == 3'b000 & Rs2D == 5'b00001) + ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0_1; // fmvh.x.q (Zfa) + // coverage on 7'b11110??: if (Funct3D == 3'b000 & Rs2D == 5'b00000) - ControlsD = `FCTRLW'b1_0_00_00_011_0_0_0; // fmv.w/d/h/q.x int to fp reg + ControlsD = `FCTRLW'b1_0_00_00_011_0_0_0_0; // fmv.w/d/h/q.x int to fp reg + else if (P.ZFA_SUPPORTED & Funct3D == 3'b000 & Rs2D == 5'b00001) + ControlsD = `FCTRLW'b1_0_00_00_111_0_0_0_1; // fli (Zfa) 7'b0100000: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b00) - ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0; // fcvt.s.(d/q/h) + ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_0; // fcvt.s.(d/q/h) + else if (Rs2D == 5'b00100 & P.ZFA_SUPPORTED) + ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // fround.s (Zfa) *** needs ctrl for all rounds + else if (Rs2D == 5'b00101 & P.ZFA_SUPPORTED) + ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // froundnx.s (Zfa) *** needs ctrl for all rounds 7'b0100001: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b01) - ControlsD = `FCTRLW'b1_0_01_00_001_0_0_0; // fcvt.d.(s/h/q) - // coverage off - // Not covered in testing because rv64gc does not support half or quad precision + ControlsD = `FCTRLW'b1_0_01_00_001_0_0_0_0; // fcvt.d.(s/h/q) + else if (Rs2D == 5'b00100 & P.ZFA_SUPPORTED) + ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // fround.d (Zfa) + else if (Rs2D == 5'b00101 & P.ZFA_SUPPORTED) + ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // froundnx.d (Zfa) 7'b0100010: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b10) - ControlsD = `FCTRLW'b1_0_01_00_010_0_0_0; // fcvt.h.(s/d/q) + ControlsD = `FCTRLW'b1_0_01_00_010_0_0_0_0; // fcvt.h.(s/d/q) + else if (Rs2D == 5'b00100 & P.ZFA_SUPPORTED) + ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // fround.h (Zfa) + else if (Rs2D == 5'b00101 & P.ZFA_SUPPORTED) + ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // froundnx.h (Zfa) + // coverage off + // Not covered in testing because rv64gc does not support quad precision 7'b0100011: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b11) - ControlsD = `FCTRLW'b1_0_01_00_011_0_0_0; // fcvt.q.(s/h/d) + ControlsD = `FCTRLW'b1_0_01_00_011_0_0_0_0; // fcvt.q.(s/h/d) + else if (Rs2D == 5'b00100 & P.ZFA_SUPPORTED) + ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // fround.q (Zfa) + else if (Rs2D == 5'b00101 & P.ZFA_SUPPORTED) + ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // froundnx.q (Zfa) // coverage on 7'b1101000: case(Rs2D) - 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0; // fcvt.s.w w->s - 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0; // fcvt.s.wu wu->s - 5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0; // fcvt.s.l l->s - 5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0; // fcvt.s.lu lu->s + 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0; // fcvt.s.w w->s + 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0_0; // fcvt.s.wu wu->s + 5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0_0; // fcvt.s.l l->s + 5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0_0; // fcvt.s.lu lu->s endcase 7'b1100000: case(Rs2D) - 5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1; // fcvt.w.s s->w - 5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1; // fcvt.wu.s s->wu - 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1; // fcvt.l.s s->l - 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1; // fcvt.lu.s s->lu + 5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_0; // fcvt.w.s s->w + 5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1_0; // fcvt.wu.s s->wu + 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1_0; // fcvt.l.s s->l + 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1_0; // fcvt.lu.s s->lu endcase 7'b1101001: case(Rs2D) - 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0; // fcvt.d.w w->d - 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0; // fcvt.d.wu wu->d - 5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0; // fcvt.d.l l->d - 5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0; // fcvt.d.lu lu->d + 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0; // fcvt.d.w w->d + 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0_0; // fcvt.d.wu wu->d + 5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0_0; // fcvt.d.l l->d + 5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0_0; // fcvt.d.lu lu->d endcase 7'b1100001: case(Rs2D) - 5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1; // fcvt.w.d d->w - 5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1; // fcvt.wu.d d->wu - 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1; // fcvt.l.d d->l - 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1; // fcvt.lu.d d->lu + 5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_0; // fcvt.w.d d->w + 5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1_0; // fcvt.wu.d d->wu + 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1_0; // fcvt.l.d d->l + 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1_0; // fcvt.lu.d d->lu + 5'b01000: if (P.ZFA_SUPPORTED & P.D_SUPPORTED & Funct3D == 3'b001) + ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_1; // fcvtmod.w.d (Zfa) endcase - // coverage off - // Not covered in testing because rv64gc does not support half or quad precision 7'b1101010: case(Rs2D) - 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0; // fcvt.h.w w->h - 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0; // fcvt.h.wu wu->h - 5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0; // fcvt.h.l l->h - 5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0; // fcvt.h.lu lu->h + 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0; // fcvt.h.w w->h + 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0_0; // fcvt.h.wu wu->h + 5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0_0; // fcvt.h.l l->h + 5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0_0; // fcvt.h.lu lu->h endcase 7'b1100010: case(Rs2D) - 5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1; // fcvt.w.h h->w - 5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1; // fcvt.wu.h h->wu - 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1; // fcvt.l.h h->l - 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1; // fcvt.lu.h h->lu + 5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_0; // fcvt.w.h h->w + 5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1_0; // fcvt.wu.h h->wu + 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1_0; // fcvt.l.h h->l + 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1_0; // fcvt.lu.h h->lu endcase + // Not covered in testing because rv64gc does not support quad precision + // coverage off 7'b1101011: case(Rs2D) - 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0; // fcvt.q.w w->q - 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0; // fcvt.q.wu wu->q - 5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0; // fcvt.q.l l->q - 5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0; // fcvt.q.lu lu->q + 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0; // fcvt.q.w w->q + 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0_0; // fcvt.q.wu wu->q + 5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0_0; // fcvt.q.l l->q + 5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0_0; // fcvt.q.lu lu->q endcase 7'b1100011: case(Rs2D) - 5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1; // fcvt.w.q q->w - 5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1; // fcvt.wu.q q->wu - 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1; // fcvt.l.q q->l - 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1; // fcvt.lu.q q->lu + 5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_0; // fcvt.w.q q->w + 5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1_0; // fcvt.wu.q q->wu + 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1_0; // fcvt.l.q q->l + 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1_0; // fcvt.lu.q q->lu endcase // coverage on - endcase + 7'b1011001: if (P.ZFA_SUPPORTED & P.XLEN == 32 & P.D_SUPPORTED & Funct3D == 3'b000) + ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0; // fmvp.d.x (Zfa) *** untested, controls could be wrong + // Not covered in testing because rv64gc does not support quad precision + // coverage off + 7'b1011011: if (P.ZFA_SUPPORTED & P.XLEN == 64 & P.Q_SUPPORTED & Funct3D == 3'b000) + ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0; // fmvp.q.x (Zfa) + // coverage on + endcase endcase end /* verilator lint_on CASEINCOMPLETE */ // unswizzle control bits - assign #1 {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, OpCtrlD, FDivStartD, IllegalFPUInstrD, FCvtIntD} = ControlsD; + assign #1 {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, OpCtrlD, FDivStartD, IllegalFPUInstrD, FCvtIntD, ZfaD} = ControlsD; // rounding modes: // 000 - round to nearest, ties to even @@ -273,6 +314,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( // 011 - mv to fp 01 // 110 - min 10 // 101 - max 10 + // 111 - fli 11 // OpCtrl: // Fma: {not multiply-add?, negate prod?, negate Z?} @@ -309,9 +351,9 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( assign Adr3D = InstrD[31:27]; // D/E pipleine register - flopenrc #(14+P.FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE, - {FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, OpCtrlD, FWriteIntD, FCvtIntD, ~IllegalFPUInstrD}, - {FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE, FPUActiveE}); + flopenrc #(15+P.FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE, + {FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, OpCtrlD, FWriteIntD, FCvtIntD, ZfaD, ~IllegalFPUInstrD}, + {FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE, ZfaE, FPUActiveE}); flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {Adr1D, Adr2D, Adr3D}, {Adr1E, Adr2E, Adr3E}); flopenrc #(1) DEFDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, FDivStartD, FDivStartE); flopenrc #(3) DEEnReg(clk, reset, FlushE, ~StallE, {XEnD, YEnD, ZEnD}, {XEnE, YEnE, ZEnE}); @@ -321,9 +363,9 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( else assign IDivStartE = 0; // E/M pipleine register - flopenrc #(13+int'(P.FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM, - {FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE}, - {FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, OpCtrlM, FWriteIntM, FCvtIntM}); + flopenrc #(14+int'(P.FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM, + {FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE, ZfaE}, + {FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, OpCtrlM, FWriteIntM, FCvtIntM, ZfaM}); // renameing for readability assign FpLoadStoreM = FResSelM[1]; diff --git a/src/fpu/fcvt.sv b/src/fpu/fcvt.sv index d396fee95..ad767d2ef 100644 --- a/src/fpu/fcvt.sv +++ b/src/fpu/fcvt.sv @@ -69,9 +69,9 @@ module fcvt import cvw::*; #(parameter cvw_t P) ( assign Int64 = OpCtrl[1]; assign IntToFp = OpCtrl[2]; - // choose the ouptut format depending on the opperation - // - fp -> fp: OpCtrl contains the percision of the output - // - int -> fp: Fmt contains the percision of the output + // choose the output format depending on the opperation + // - fp -> fp: OpCtrl contains the precision of the output + // - int -> fp: Fmt contains the precision of the output if (P.FPSIZES == 2) assign OutFmt = IntToFp ? Fmt : (OpCtrl[1:0] == P.FMT); else if (P.FPSIZES == 3 | P.FPSIZES == 4) diff --git a/src/fpu/fli.sv b/src/fpu/fli.sv new file mode 100644 index 000000000..e61415388 --- /dev/null +++ b/src/fpu/fli.sv @@ -0,0 +1,219 @@ +/////////////////////////////////////////// +// fli.sv +// +// Written: David_Harris@hmc.edu +// Modified: 1/16/2024 +// +// Purpose: Floating-point float immediate +// +// Documentation: RISC-V System on Chip Design Chapter 16 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module fli import cvw::*; #(parameter cvw_t P) ( + input logic [4:0] Rs1, // Index of immediate to select + input logic [1:0] Fmt, // 00 = single, 01 = double, 10 = half, 11 = quad + output logic [P.FLEN-1:0] Imm // Immediate output +); + + logic [P.FLEN-1:0] HImmBox, SImmBox, DImmBox, QImmBox; + + // select constant for each immediate size supported + + //////////////////////////// + // half + //////////////////////////// + + if (P.ZFH_SUPPORTED) begin + logic [15:0] HImm; + always_comb begin + case(Rs1) + 0: HImm = 16'hBC00; + 1: HImm = 16'h0400; + 2: HImm = 16'h0100; + 3: HImm = 16'h0200; + 4: HImm = 16'h1C00; + 5: HImm = 16'h2000; + 6: HImm = 16'h2C00; + 7: HImm = 16'h3000; + 8: HImm = 16'h3400; + 9: HImm = 16'h3500; + 10: HImm = 16'h3600; + 11: HImm = 16'h3700; + 12: HImm = 16'h3800; + 13: HImm = 16'h3900; + 14: HImm = 16'h3A00; + 15: HImm = 16'h3B00; + 16: HImm = 16'h3C00; + 17: HImm = 16'h3D00; + 18: HImm = 16'h3E00; + 19: HImm = 16'h3F00; + 20: HImm = 16'h4000; + 21: HImm = 16'h4100; + 22: HImm = 16'h4200; + 23: HImm = 16'h4400; + 24: HImm = 16'h4800; + 25: HImm = 16'h4C00; + 26: HImm = 16'h5800; + 27: HImm = 16'h5C00; + 28: HImm = 16'h7800; + 29: HImm = 16'h7C00; + 30: HImm = 16'h7C00; + 31: HImm = 16'h7E00; + endcase + end + assign HImmBox = {{(P.FLEN-16){1'b1}}, HImm}; // NaN-box HImm + end else assign HImmBox = '0; + + //////////////////////////// + // single + //////////////////////////// + + logic [31:0] SImm; + always_comb begin + case(Rs1) + 0: SImm = 32'hBF800000; + 1: SImm = 32'h00800000; + 2: SImm = 32'h37800000; + 3: SImm = 32'h38000000; + 4: SImm = 32'h3B800000; + 5: SImm = 32'h3C000000; + 6: SImm = 32'h3D800000; + 7: SImm = 32'h3E000000; + 8: SImm = 32'h3E800000; + 9: SImm = 32'h3EA00000; + 10: SImm = 32'h3EC00000; + 11: SImm = 32'h3EE00000; + 12: SImm = 32'h3F000000; + 13: SImm = 32'h3F200000; + 14: SImm = 32'h3F400000; + 15: SImm = 32'h3F600000; + 16: SImm = 32'h3F800000; + 17: SImm = 32'h3FA00000; + 18: SImm = 32'h3FC00000; + 19: SImm = 32'h3FE00000; + 20: SImm = 32'h40000000; + 21: SImm = 32'h40200000; + 22: SImm = 32'h40400000; + 23: SImm = 32'h40800000; + 24: SImm = 32'h41000000; + 25: SImm = 32'h41800000; + 26: SImm = 32'h43000000; + 27: SImm = 32'h43800000; + 28: SImm = 32'h47000000; + 29: SImm = 32'h47800000; + 30: SImm = 32'h7F800000; + 31: SImm = 32'h7FC00000; + endcase + end + assign SImmBox = {{(P.FLEN-32){1'b1}}, SImm}; // NaN-box SImm + + //////////////////////////// + // double + //////////////////////////// + + if (P.D_SUPPORTED) begin + logic [63:0] DImm; + always_comb begin + case(Rs1) + 0: DImm = 64'hBFF0000000000000; + 1: DImm = 64'h0010000000000000; + 2: DImm = 64'h3EF0000000000000; + 3: DImm = 64'h3F00000000000000; + 4: DImm = 64'h3F70000000000000; + 5: DImm = 64'h3F80000000000000; + 6: DImm = 64'h3FB0000000000000; + 7: DImm = 64'h3FC0000000000000; + 8: DImm = 64'h3FD0000000000000; + 9: DImm = 64'h3FD4000000000000; + 10: DImm = 64'h3FD8000000000000; + 11: DImm = 64'h3FDC000000000000; + 12: DImm = 64'h3FE0000000000000; + 13: DImm = 64'h3FE4000000000000; + 14: DImm = 64'h3FE8000000000000; + 15: DImm = 64'h3FEC000000000000; + 16: DImm = 64'h3FF0000000000000; + 17: DImm = 64'h3FF4000000000000; + 18: DImm = 64'h3FF8000000000000; + 19: DImm = 64'h3FFC000000000000; + 20: DImm = 64'h4000000000000000; + 21: DImm = 64'h4004000000000000; + 22: DImm = 64'h4008000000000000; + 23: DImm = 64'h4010000000000000; + 24: DImm = 64'h4020000000000000; + 25: DImm = 64'h4030000000000000; + 26: DImm = 64'h4060000000000000; + 27: DImm = 64'h4070000000000000; + 28: DImm = 64'h40E0000000000000; + 29: DImm = 64'h40F0000000000000; + 30: DImm = 64'h7FF0000000000000; + 31: DImm = 64'h7FF8000000000000; + endcase + end + assign DImmBox = {{(P.FLEN-64){1'b1}}, DImm}; // NaN-box DImm + end else assign DImmBox = '0; + + //////////////////////////// + // double + //////////////////////////// + + if (P.Q_SUPPORTED) begin + logic [63:0] QImm; + always_comb begin + case(Rs1) + 0: QImm = 128'hBFFF0000000000000000000000000000; + 1: QImm = 128'h00010000000000000000000000000000; + 2: QImm = 128'h3FEF0000000000000000000000000000; + 3: QImm = 128'h3FF00000000000000000000000000000; + 4: QImm = 128'h3FF70000000000000000000000000000; + 5: QImm = 128'h3FF80000000000000000000000000000; + 6: QImm = 128'h3FFB0000000000000000000000000000; + 7: QImm = 128'h3FFC0000000000000000000000000000; + 8: QImm = 128'h3FFD0000000000000000000000000000; + 9: QImm = 128'h3FFD4000000000000000000000000000; + 10: QImm = 128'h3FFD8000000000000000000000000000; + 11: QImm = 128'h3FFDC000000000000000000000000000; + 12: QImm = 128'h3FFE0000000000000000000000000000; + 13: QImm = 128'h3FFE4000000000000000000000000000; + 14: QImm = 128'h3FFE8000000000000000000000000000; + 15: QImm = 128'h3FFEC000000000000000000000000000; + 16: QImm = 128'h3FFF0000000000000000000000000000; + 17: QImm = 128'h3FFF4000000000000000000000000000; + 18: QImm = 128'h3FFF8000000000000000000000000000; + 19: QImm = 128'h3FFFC000000000000000000000000000; + 20: QImm = 128'h40000000000000000000000000000000; + 21: QImm = 128'h40004000000000000000000000000000; + 22: QImm = 128'h40008000000000000000000000000000; + 23: QImm = 128'h40010000000000000000000000000000; + 24: QImm = 128'h40020000000000000000000000000000; + 25: QImm = 128'h40030000000000000000000000000000; + 26: QImm = 128'h40060000000000000000000000000000; + 27: QImm = 128'h40070000000000000000000000000000; + 28: QImm = 128'h400E0000000000000000000000000000; + 29: QImm = 128'h400F0000000000000000000000000000; + 30: QImm = 128'h7FFF0000000000000000000000000000; + 31: QImm = 128'h7FFF8000000000000000000000000000; + endcase + end + assign QImmBox = QImm; // NaN-box QImm trivial because Q is longest format + end else assign QImmBox = '0; + + mux4 #(P.FLEN) flimux(SImmBox, DImmBox, HImmBox, QImmBox, Fmt, Imm); // select immediate based on format + +endmodule diff --git a/src/fpu/fpu.sv b/src/fpu/fpu.sv index 7d7574a45..c304219aa 100755 --- a/src/fpu/fpu.sv +++ b/src/fpu/fpu.sv @@ -83,6 +83,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( logic XEnE, YEnE, ZEnE; // X, Y, Z inputs used for current operation logic FRegWriteE; // Write floating-point register logic FPUActiveE; // FP instruction being executed + logic ZfaE, ZfaM; // Zfa variants of instructions (fli, fminm, fmaxm, fround, froundnx, fleq, fltq, fmvh, fmvp, fcvtmod.w.d) // regfile signals logic [P.FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage @@ -154,12 +155,13 @@ module fpu import cvw::*; #(parameter cvw_t P) ( logic [P.FLEN-1:0] FResultW; // final FP result being written to the FP register // other signals - logic [P.FLEN-1:0] AlignedSrcAE; // align SrcA from IEU to the floating point format for fmv + logic [P.FLEN-1:0] PreIntSrcE, IntSrcE; // align SrcA from IEU to the floating point format for fmv / fmvp logic [P.FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed logic [P.FLEN-1:0] BoxedOneE; // One value for Z for multiplication, with NaN boxing if needed logic StallUnpackedM; // Stall unpacker outputs during multicycle fdivsqrt logic [P.FLEN-1:0] SgnExtXE; // Sign-extended X input for move to integer logic mvsgn; // sign bit for extending move + logic [P.FLEN-1:0] FliResE; // Floating-point load immediate value ////////////////////////////////////////////////////////////////////////////////////////// // Decode Stage: fctrl decoder, read register file @@ -169,7 +171,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( fctrl #(P) fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .IntDivE, .InstrD, .StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE, - .reset, .clk, .FRegWriteE, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM, + .reset, .clk, .FRegWriteE, .FRegWriteM, .FRegWriteW, .ZfaE, .ZfaM, .FrmM, .FmtE, .FmtM, .FDivStartE, .IDivStartE, .FWriteIntE, .FCvtIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .FpLoadStoreM, .IllegalFPUInstrD, .XEnD, .YEnD, .ZEnD, .XEnE, .YEnE, .ZEnE, .FResSelE, .FResSelM, .FResSelW, .FPUActiveE, .PostProcSelE, .PostProcSelM, .FCvtIntW, @@ -246,7 +248,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( .UmM, .FIntDivResultM); // compare: fmin/fmax, flt/fle/feq - fcmp #(P) fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE), + fcmp #(P) fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Zfa(ZfaE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE), .Xm(XmE), .Ym(YmE), .XZero(XZeroE), .YZero(YZeroE), .XNaN(XNaNE), .YNaN(YNaNE), .XSNaN(XSNaNE), .YSNaN(YSNaNE), .X(XE), .Y(YE), .CmpNV(CmpNVE), .CmpFpRes(CmpFpResE), .CmpIntRes(CmpIntResE)); @@ -263,37 +265,53 @@ module fpu import cvw::*; #(parameter cvw_t P) ( .ToInt(FWriteIntE), .XZero(XZeroE), .Fmt(FmtE), .Ce(CeE), .ShiftAmt(CvtShiftAmtE), .ResSubnormUf(CvtResSubnormUfE), .Cs(CsE), .IntZero(IntZeroE), .LzcIn(CvtLzcInE)); - // NaN Box SrcA to convert integer to requested FP size - if(P.FPSIZES == 1) assign AlignedSrcAE = {{P.FLEN-P.XLEN{1'b1}}, ForwardedSrcAE}; + // floating-point load immediate: fli + if (P.ZFA_SUPPORTED) begin + logic [4:0] Rs1E; + + flopenrc #(5) Rs1EReg(clk, reset, FlushE, ~StallE, InstrD[19:15], Rs1E); + fli #(P) fli(.Rs1(Rs1E), .Fmt(FmtE), .Imm(FliResE)); + end else assign FliResE = '0; + + // fmv.*.x: NaN Box SrcA to extend integer to requested FP size + if(P.FPSIZES == 1) assign PreIntSrcE = {{P.FLEN-P.XLEN{1'b1}}, ForwardedSrcAE}; else if(P.FPSIZES == 2) - mux2 #(P.FLEN) SrcAMux ({{P.FLEN-P.LEN1{1'b1}}, ForwardedSrcAE[P.LEN1-1:0]}, {{P.FLEN-P.XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); - else if(P.FPSIZES == 3 | P.FPSIZES == 4) - mux4 #(P.FLEN) SrcAMux ({{P.FLEN-P.S_LEN{1'b1}}, ForwardedSrcAE[P.S_LEN-1:0]}, - {{P.FLEN-P.D_LEN{1'b1}}, ForwardedSrcAE[P.D_LEN-1:0]}, + mux2 #(P.FLEN) SrcAMux ({{P.FLEN-P.LEN1{1'b1}}, ForwardedSrcAE[P.LEN1-1:0]}, {{P.FLEN-P.XLEN{1'b1}}, ForwardedSrcAE}, FmtE, PreIntSrcE); + else if(P.FPSIZES == 3 | P.FPSIZES == 4) begin + localparam XD_LEN = P.D_LEN < P.XLEN ? P.D_LEN : P.XLEN; // shorter of D_LEN and XLEN + mux3 #(P.FLEN) SrcAMux ({{P.FLEN-P.S_LEN{1'b1}}, ForwardedSrcAE[P.S_LEN-1:0]}, + {{P.FLEN-XD_LEN{1'b1}}, ForwardedSrcAE[XD_LEN-1:0]}, {{P.FLEN-P.H_LEN{1'b1}}, ForwardedSrcAE[P.H_LEN-1:0]}, - {{P.FLEN-P.XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes + FmtE, PreIntSrcE); // NaN boxing zeroes + end + // fmvp.*.x: Select pair of registers + if (P.ZFA_SUPPORTED & (P.XLEN==32 & P.D_SUPPORTED) | (P.XLEN==64 & P.Q_SUPPORTED)) + assign IntSrcE = ZfaE ? {ForwardedSrcBE, ForwardedSrcAE} : PreIntSrcE; // choose pair of integer registers for fmvp.d.x / fmvp.q.x + else assign IntSrcE = PreIntSrcE; // select a result that may be written to the FP register - mux3 #(P.FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE); + mux4 #(P.FLEN) FResMux(SgnResE, IntSrcE, CmpFpResE, FliResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE); assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE); - // select the result that may be written to the integer register with fmv - to IEU + // fmv.x.*: select the result that may be written to the integer register if(P.FPSIZES == 1) begin assign mvsgn = XE[P.FLEN-1]; assign SgnExtXE = XE; end else if(P.FPSIZES == 2) begin - mux2 #(1) sgnmux (XE[P.LEN1-1], XE[P.FLEN-1],FmtE, mvsgn); + mux2 #(1) sgnmux (XE[P.LEN1-1], XE[P.FLEN-1],FmtE, mvsgn); mux2 #(P.FLEN) sgnextmux ({{P.FLEN-P.LEN1{mvsgn}}, XE[P.LEN1-1:0]}, XE, FmtE, SgnExtXE); end else if(P.FPSIZES == 3 | P.FPSIZES == 4) begin - mux4 #(1) sgnmux (XE[P.H_LEN-1], XE[P.S_LEN-1], XE[P.D_LEN-1], XE[P.LLEN-1], FmtE, mvsgn); - mux4 #(P.FLEN) fmulzeromux ({{P.FLEN-P.H_LEN{mvsgn}}, XE[P.H_LEN-1:0]}, - {{P.FLEN-P.S_LEN{mvsgn}}, XE[P.S_LEN-1:0]}, - {{P.FLEN-P.D_LEN{mvsgn}}, XE[P.D_LEN-1:0]}, - XE, FmtE, SgnExtXE); + mux4 #(1) sgnmux (XE[P.S_LEN-1], XE[P.D_LEN-1], XE[P.H_LEN-1], XE[P.LLEN-1], FmtE, mvsgn); + mux3 #(P.FLEN) sgnextmux ({{P.FLEN-P.S_LEN{mvsgn}}, XE[P.S_LEN-1:0]}, + {{P.FLEN-P.D_LEN{mvsgn}}, XE[P.D_LEN-1:0]}, + {{P.FLEN-P.H_LEN{mvsgn}}, XE[P.H_LEN-1:0]}, + FmtE, SgnExtXE); // Q not needed because there is no fmv.x.q end + // sign extend to XLEN if necessary if (P.FLEN>P.XLEN) - assign IntSrcXE = SgnExtXE[P.XLEN-1:0]; + if (P.ZFA_SUPPORTED) assign IntSrcXE = ZfaE ? XE[P.FLEN-1:P.FLEN/2] : SgnExtXE[P.XLEN-1:0]; // either fmvh.x.* or fmv.x.* + else assign IntSrcXE = SgnExtXE[P.XLEN-1:0]; else assign IntSrcXE = {{P.XLEN-P.FLEN{mvsgn}}, SgnExtXE}; mux3 #(P.XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE); @@ -330,7 +348,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SmM), .DivUe(UeM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM), .CvtCe(CeM), .CvtResSubnormUf(CvtResSubnormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CsM), - .ToInt(FWriteIntM), .DivSticky(DivStickyM), .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), + .ToInt(FWriteIntM), .Zfa(ZfaM), .DivSticky(DivStickyM), .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM)); // FPU flag selection - to privileged diff --git a/src/fpu/postproc/cvtshiftcalc.sv b/src/fpu/postproc/cvtshiftcalc.sv index 1150d4ecc..ff3d29b90 100644 --- a/src/fpu/postproc/cvtshiftcalc.sv +++ b/src/fpu/postproc/cvtshiftcalc.sv @@ -82,7 +82,7 @@ module cvtshiftcalc import cvw::*; #(parameter cvw_t P) ( P.FMT: ResNegNF = -($clog2(P.NF)+1)'(P.NF); P.FMT1: ResNegNF = -($clog2(P.NF)+1)'(P.NF1); P.FMT2: ResNegNF = -($clog2(P.NF)+1)'(P.NF2); - default: ResNegNF = 'x; + default: ResNegNF = 0; // Not used for floating-point so don't care, but convert to unsigned long has OutFmt = 11. endcase end else if (P.FPSIZES == 4) begin diff --git a/src/fpu/postproc/flags.sv b/src/fpu/postproc/flags.sv index 98ed0a34d..50d9bf229 100644 --- a/src/fpu/postproc/flags.sv +++ b/src/fpu/postproc/flags.sv @@ -70,7 +70,7 @@ module flags import cvw::*; #(parameter cvw_t P) ( logic DivInvalid; // integer invalid flag logic Underflow; // Underflow flag logic ResExpGteMax; // is the result greater than or equal to the maximum floating point expoent - logic ShiftGtIntSz; // is the shift greater than the the integer size (use Re to account for possible roundning "shift") + logic ShiftGtIntSz; // is the shift greater than the the integer size (use Re to account for possible rounding "shift") /////////////////////////////////////////////////////////////////////////////// // Overflow diff --git a/src/fpu/postproc/fmashiftcalc.sv b/src/fpu/postproc/fmashiftcalc.sv index c80748061..0a8ac3035 100644 --- a/src/fpu/postproc/fmashiftcalc.sv +++ b/src/fpu/postproc/fmashiftcalc.sv @@ -50,7 +50,7 @@ module fmashiftcalc import cvw::*; #(parameter cvw_t P) ( // calculate the sum's exponent assign PreNormSumExp = FmaSe + {{P.NE+2-$unsigned($clog2(3*P.NF+5)){1'b1}}, ~FmaSCnt} + (P.NE+2)'(P.NF+3); - //convert the sum's exponent into the proper percision + //convert the sum's exponent into the proper precision if (P.FPSIZES == 1) begin assign NormSumExp = PreNormSumExp; end else if (P.FPSIZES == 2) begin diff --git a/src/fpu/postproc/postprocess.sv b/src/fpu/postproc/postprocess.sv index 05db352cd..1d51fdf85 100644 --- a/src/fpu/postproc/postprocess.sv +++ b/src/fpu/postproc/postprocess.sv @@ -56,6 +56,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( input logic CvtResSubnormUf, // the convert result is subnormal or underflows input logic [P.LOGCVTLEN-1:0] CvtShiftAmt, // how much to shift by input logic ToInt, // is fp->int (since it's writting to the integer register) + input logic Zfa, // Zfa operation (fcvtmod.w.d) input logic [P.CVTLEN-1:0] CvtLzcIn, // input to the Leading Zero Counter (without msb) input logic IntZero, // is the integer input zero // final results @@ -88,7 +89,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( logic [P.NE+1:0] NormSumExp; // exponent of the normalized sum not taking into account Subnormal or zero results logic FmaPreResultSubnorm; // is the result subnormal - calculated before LZA corection logic [$clog2(3*P.NF+5)-1:0] FmaShiftAmt; // normalization shift amount for fma - // division singals + // division signals logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt; // divsqrt shif amount logic [P.NORMSHIFTSZ-1:0] DivShiftIn; // divsqrt shift input logic [P.NE+1:0] Ue; // divsqrt corrected exponent after corretion shift @@ -127,9 +128,9 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( assign InfIn = XInf|YInf|ZInf; assign NaNIn = XNaN|YNaN|ZNaN; - // choose the ouptut format depending on the opperation - // - fp -> fp: OpCtrl contains the percision of the output - // - otherwise: Fmt contains the percision of the output + // choose the output format depending on the opperation + // - fp -> fp: OpCtrl contains the precision of the output + // - otherwise: Fmt contains the precision of the output if (P.FPSIZES == 2) assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == P.FMT); else if (P.FPSIZES == 3 | P.FPSIZES == 4) @@ -216,9 +217,9 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( negateintres #(P) negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes); - specialcase #(P) specialcase(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid, + specialcase #(P) specialcase(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid, .IntZero, .Frm, .OutFmt, .XNaN, .YNaN, .ZNaN, .CvtResUf, - .NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .CvtNegRes, + .NaNIn, .IntToFp, .Int64, .Signed, .Zfa, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .CvtNegRes, .XInf, .YInf, .DivOp, .DivByZero, .FullRe, .CvtCe, .Rs, .Re, .Rf, .PostProcRes, .FCvtIntRes); endmodule diff --git a/src/fpu/postproc/round.sv b/src/fpu/postproc/round.sv index e01ff376b..460786135 100644 --- a/src/fpu/postproc/round.sv +++ b/src/fpu/postproc/round.sv @@ -145,18 +145,18 @@ module round import cvw::*; #(parameter cvw_t P) ( end else if (P.FPSIZES == 3) begin // 1: XLEN > NF > NF1 - if (XLENPOS == 1) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT1)) | + if (XLENPOS == 1) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT2)) | (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&FpRes&~(OutFmt==P.FMT)) | (|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes) | (|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]); // 2: NF > XLEN > NF1 - if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT1)) | + if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT2)) | (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&~(OutFmt==P.FMT)) | (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&(IntRes|~(OutFmt==P.FMT))) | (|Mf[P.CORRSHIFTSZ-P.NF-2:0]); // 3: NF > NF1 > XLEN - if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&(OutFmt==P.FMT1)) | - (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF1-1]&((OutFmt==P.FMT1)|IntRes)) | + if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&(OutFmt==P.FMT2)) | + (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF1-1]&((OutFmt==P.FMT2)|IntRes)) | (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&(~(OutFmt==P.FMT)|IntRes)) | (|Mf[P.CORRSHIFTSZ-P.NF-2:0]); diff --git a/src/fpu/postproc/shiftcorrection.sv b/src/fpu/postproc/shiftcorrection.sv index f5860b42d..1da3556d8 100644 --- a/src/fpu/postproc/shiftcorrection.sv +++ b/src/fpu/postproc/shiftcorrection.sv @@ -44,7 +44,7 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) ( output logic [P.NE+1:0] Ue // corrected exponent for divider ); - logic [3*P.NF+3:0] CorrSumShifted; // the shifted sum after LZA correction + logic [P.CORRSHIFTSZ-1:0] CorrSumShifted; // the shifted sum after LZA correction logic [P.CORRSHIFTSZ-1:0] CorrQm0, CorrQm1; // portions of Shifted to select for CorrQmShifted logic [P.CORRSHIFTSZ-1:0] CorrQmShifted; // the shifted divsqrt result after one bit shift logic ResSubnorm; // is the result Subnormal @@ -68,7 +68,7 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) ( // if the result of the divider was calculated to be subnormal, then the result was correctly normalized, so select the top shifted bits always_comb - if(FmaOp) Mf = {CorrSumShifted, {P.CORRSHIFTSZ-(3*P.NF+4){1'b0}}}; + if(FmaOp) Mf = {CorrSumShifted}; else if (DivOp&~DivResSubnorm) Mf = CorrQmShifted; else Mf = Shifted[P.NORMSHIFTSZ-1:P.NORMSHIFTSZ-P.CORRSHIFTSZ]; diff --git a/src/fpu/postproc/specialcase.sv b/src/fpu/postproc/specialcase.sv index 677ccce16..76784e4a1 100644 --- a/src/fpu/postproc/specialcase.sv +++ b/src/fpu/postproc/specialcase.sv @@ -53,6 +53,7 @@ module specialcase import cvw::*; #(parameter cvw_t P) ( input logic IntToFp, // is cvt int -> fp opperation input logic Int64, // is the integer 64 bits input logic Signed, // is the integer signed + input logic Zfa, // Zfa conversion operation: fcvtmod.w.d input logic [P.NE:0] CvtCe, // the calculated expoent for cvt input logic IntInvalid, // integer invalid flag to choose the result input logic CvtResUf, // does the convert result underflow @@ -70,10 +71,12 @@ module specialcase import cvw::*; #(parameter cvw_t P) ( logic [P.FLEN-1:0] OfRes; // overflowed result result logic [P.FLEN-1:0] NormRes; // normal result logic [P.XLEN-1:0] OfIntRes; // the overflow result for integer output + logic [P.XLEN-1:0] OfIntRes2; // the overflow result for integer output after accounting for fcvtmod.w.d + logic [P.XLEN-1:0] Int64Res; // Result for conversion to 64-bit int after accounting for fcvtmod.w.d logic OfResMax; // does the of result output maximum norm fp number logic KillRes; // kill the result for underflow - logic SelOfRes; // should the overflow result be selected - + logic SelOfRes; // should the overflow result be selected (excluding convert) + logic SelCvtOfRes; // select overflow result for convert instruction // does the overflow result output the maximum normalized floating point number // output infinity if the input is infinity @@ -329,6 +332,25 @@ module specialcase import cvw::*; #(parameter cvw_t P) ( else OfIntRes = {P.XLEN{1'b1}}; // unsigned positive end + // fcvtmod.w.d logic + // fcvtmod.w.d is like fcvt.w.d excep thtat it takes bits [31:0] and sign extends the rest, + // and converts +/-inf and NaN to zero. + + if (P.ZFA_SUPPORTED & P.D_SUPPORTED) // fcvtmod.w.d support + always_comb begin + if (Zfa) OfIntRes2 = '0; + else OfIntRes2 = OfIntRes; + if (Zfa) Int64Res = {{(P.XLEN-32){CvtNegRes[P.XLEN-1]}}, CvtNegRes[31:0]}; + else Int64Res = CvtNegRes[P.XLEN-1:0]; + if (Zfa) SelCvtOfRes = InfIn | NaNIn; // fcvtmod.w.d only overflows to 0 on NaN or Infinity + else SelCvtOfRes = IntInvalid; // regular fcvt gives an overflow if out of range + end + else + always_comb begin // no fcvtmod.w.d support + OfIntRes2 = OfIntRes; + Int64Res = CvtNegRes[P.XLEN-1:0]; + SelCvtOfRes = IntInvalid; + end // select the integer output // - if the input is invalid (out of bounds NaN or Inf) then output overflow res @@ -337,10 +359,10 @@ module specialcase import cvw::*; #(parameter cvw_t P) ( // - otherwise output a rounded 0 // - otherwise output the normal res (trmined and sign extended if nessisary) always_comb - if(IntInvalid) FCvtIntRes = OfIntRes; + if(SelCvtOfRes) FCvtIntRes = OfIntRes2; else if(CvtCe[P.NE]) if(Xs&Signed&Plus1) FCvtIntRes = {{P.XLEN{1'b1}}}; else FCvtIntRes = {{P.XLEN-1{1'b0}}, Plus1}; - else if(Int64) FCvtIntRes = CvtNegRes[P.XLEN-1:0]; + else if(Int64) FCvtIntRes = Int64Res; else FCvtIntRes = {{P.XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]}; endmodule diff --git a/src/ieu/controller.sv b/src/ieu/controller.sv index 3507ec3e9..e5cfff9ed 100644 --- a/src/ieu/controller.sv +++ b/src/ieu/controller.sv @@ -370,7 +370,7 @@ module controller import cvw::*; #(parameter cvw_t P) ( // Fences // Ordinary fence is presently a nop // fence.i flushes the D$ and invalidates the I$ if Zifencei is supported and I$ is implemented - if (P.ZIFENCEI_SUPPORTED & P.ICACHE_SUPPORTED) begin:fencei + if (P.ZIFENCEI_SUPPORTED & (P.ICACHE_SUPPORTED | P.DCACHE_SUPPORTED)) begin:fencei logic FenceID; assign FenceID = FenceXD & (Funct3D == 3'b001); // is it a FENCE.I instruction? assign InvalidateICacheD = FenceID; diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index 107a4af8b..0bd899306 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -273,9 +273,9 @@ module ifu import cvw::*; #(parameter cvw_t P) ( assign BusRW = ~ITLBMissF & ~SelIROM ? IFURWF : '0; assign IFUHSIZE = 3'b010; - ahbinterface #(P.XLEN, 0) ahbinterface(.HCLK(clk), .Flush(FlushD), .HRESETn(~reset), .HREADY(IFUHREADY), + ahbinterface #(P.XLEN, 1'b0) ahbinterface(.HCLK(clk), .Flush(FlushD), .HRESETn(~reset), .HREADY(IFUHREADY), .HRDATA(HRDATA), .HTRANS(IFUHTRANS), .HWRITE(IFUHWRITE), .HWDATA(), - .HWSTRB(), .BusRW, .ByteMask(), .WriteData('0), + .HWSTRB(), .BusRW, .BusAtomic('0), .ByteMask(), .WriteData('0), .Stall(GatedStallD), .BusStall, .BusCommitted(BusCommittedF), .FetchBuffer(FetchBuffer)); assign CacheCommittedF = '0; diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index e7d6707d6..220a42eef 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -111,7 +111,9 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic GatedStallW; // Hazard unit StallW gated when SelHPTW = 1 logic BusStall; // Bus interface busy with multicycle operation + logic LSUBusStallM; // Bus interface busy with multicycle operation masked by IgnoreRequestTLB logic HPTWStall; // HPTW busy with multicycle operation + logic DCacheBusStallM; // Cache or bus stall logic CacheBusHPWTStall; // Cache, bus, or hptw is requesting a stall logic SelSpillE; // Align logic detected a spill and needs to stall @@ -194,7 +196,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( if(P.VIRTMEM_SUPPORTED) begin : hptw hptw #(P) hptw(.clk, .reset, .MemRWM, .AtomicM, .ITLBMissF, .ITLBWriteF, .DTLBMissM, .DTLBWriteM, .InstrUpdateDAF, .DataUpdateDAM, - .FlushW, .DCacheStallM, .SATP_REGW, .PCSpillF, + .FlushW, .DCacheBusStallM, .SATP_REGW, .PCSpillF, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .ENVCFG_ADUE, .PrivilegeModeW, .ReadDataM(ReadDataM[P.XLEN-1:0]), // ReadDataM is LLEN, but HPTW only needs XLEN .WriteDataM(WriteDataZM), .Funct3M, .LSUFunct3M, .Funct7M, .LSUFunct7M, @@ -225,7 +227,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( // the trap module. assign CommittedM = SelHPTW | DCacheCommittedM | BusCommittedM; assign GatedStallW = StallW & ~SelHPTW; - assign CacheBusHPWTStall = DCacheStallM | HPTWStall | BusStall; + assign DCacheBusStallM = DCacheStallM | LSUBusStallM; + assign CacheBusHPWTStall = DCacheBusStallM | HPTWStall; assign LSUStallM = CacheBusHPWTStall | SpillStallM; ///////////////////////////////////////////////////////////////////////////////////////////// @@ -342,7 +345,6 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign DCacheStallM = CacheStall & ~IgnoreRequestTLB; assign CacheBusRW = CacheBusRWTemp; - // *** add support for cboz ahbcacheinterface #(.AHBW(P.AHBW), .LLEN(P.LLEN), .PA_BITS(P.PA_BITS), .BEATSPERLINE(BEATSPERLINE), .AHBWLOGBWPL(AHBWLOGBWPL), .LINELEN(LINELEN), .LLENPOVERAHBW(LLENPOVERAHBW), .READ_ONLY_CACHE(0)) ahbcacheinterface( .HCLK(clk), .HRESETn(~reset), .Flush(FlushW | IgnoreRequestTLB), .HRDATA, .HWDATA(LSUHWDATA), .HWSTRB(LSUHWSTRB), @@ -353,6 +355,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( .Cacheable(CacheableOrFlushCacheM), .BusRW, .Stall(GatedStallW), .BusStall, .BusCommitted(BusCommittedM)); + // Mux between the 3 sources of read data, 0: cache, 1: Bus, 2: DTIM // Uncache bus access may be smaller width than LLEN. Duplicate LLENPOVERAHBW times. // *** DTIMReadDataWordM should be increased to LLEN. @@ -368,9 +371,9 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign LSUHADDR = PAdrM; assign LSUHSIZE = LSUFunct3M; - ahbinterface #(P.XLEN, 1) ahbinterface(.HCLK(clk), .HRESETn(~reset), .Flush(FlushW), .HREADY(LSUHREADY), + ahbinterface #(P.XLEN, 1'b1) ahbinterface(.HCLK(clk), .HRESETn(~reset), .Flush(FlushW), .HREADY(LSUHREADY), .HRDATA(HRDATA), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HWDATA(LSUHWDATA), - .HWSTRB(LSUHWSTRB), .BusRW, .ByteMask(ByteMaskM), .WriteData(LSUWriteDataM[P.XLEN-1:0]), + .HWSTRB(LSUHWSTRB), .BusRW, .BusAtomic(AtomicM[1]), .ByteMask(ByteMaskM), .WriteData(LSUWriteDataM[P.XLEN-1:0]), .Stall(GatedStallW), .BusStall, .BusCommitted(BusCommittedM), .FetchBuffer(FetchBuffer)); // Mux between the 2 sources of read data, 0: Bus, 1: DTIM @@ -387,6 +390,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign {DCacheStallM, DCacheCommittedM} = '0; end + assign LSUBusStallM = BusStall & ~IgnoreRequestTLB; + ///////////////////////////////////////////////////////////////////////////////////////////// // Atomic operations ///////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/mmu/hptw.sv b/src/mmu/hptw.sv index 7ca4a007a..82eeaef87 100644 --- a/src/mmu/hptw.sv +++ b/src/mmu/hptw.sv @@ -42,7 +42,7 @@ module hptw import cvw::*; #(parameter cvw_t P) ( input logic [1:0] PrivilegeModeW, input logic [P.XLEN-1:0] ReadDataM, // page table entry from LSU input logic [P.XLEN-1:0] WriteDataM, - input logic DCacheStallM, // stall from LSU + input logic DCacheBusStallM, // stall from LSU input logic [2:0] Funct3M, input logic [6:0] Funct7M, input logic ITLBMissF, @@ -145,7 +145,7 @@ module hptw import cvw::*; #(parameter cvw_t P) ( // State flops flopenr #(1) TLBMissMReg(clk, reset, StartWalk, DTLBMissOrUpdateDAM, DTLBWalk); // when walk begins, record whether it was for DTLB (or record 0 for ITLB) - assign PRegEn = HPTWRW[1] & ~DCacheStallM | UpdatePTE; + assign PRegEn = HPTWRW[1] & ~DCacheBusStallM | UpdatePTE; flopenr #(P.XLEN) PTEReg(clk, reset, PRegEn, NextPTE, PTE); // Capture page table entry from data cache // Assign PTE descriptors common across all XLEN values @@ -283,30 +283,30 @@ module hptw import cvw::*; #(parameter cvw_t P) ( flopenl #(.TYPE(statetype)) WalkerStateReg(clk, reset | FlushW, 1'b1, NextWalkerState, IDLE, WalkerState); always_comb case (WalkerState) - IDLE: if (TLBMiss & ~DCacheStallM) NextWalkerState = InitialWalkerState; + IDLE: if (TLBMiss & ~DCacheBusStallM) NextWalkerState = InitialWalkerState; else NextWalkerState = IDLE; L3_ADR: NextWalkerState = L3_RD; // first access in SV48 - L3_RD: if (DCacheStallM) NextWalkerState = L3_RD; + L3_RD: if (DCacheBusStallM) NextWalkerState = L3_RD; else if(HPTWFaultM) NextWalkerState = FAULT; else NextWalkerState = L2_ADR; L2_ADR: if (InitialWalkerState == L2_ADR | ValidNonLeafPTE) NextWalkerState = L2_RD; // first access in SV39 else NextWalkerState = LEAF; - L2_RD: if (DCacheStallM) NextWalkerState = L2_RD; + L2_RD: if (DCacheBusStallM) NextWalkerState = L2_RD; else if(HPTWFaultM) NextWalkerState = FAULT; else NextWalkerState = L1_ADR; L1_ADR: if (InitialWalkerState == L1_ADR | ValidNonLeafPTE) NextWalkerState = L1_RD; // first access in SV32 else NextWalkerState = LEAF; - L1_RD: if (DCacheStallM) NextWalkerState = L1_RD; + L1_RD: if (DCacheBusStallM) NextWalkerState = L1_RD; else if(HPTWFaultM) NextWalkerState = FAULT; else NextWalkerState = L0_ADR; L0_ADR: if (ValidNonLeafPTE) NextWalkerState = L0_RD; else NextWalkerState = LEAF; - L0_RD: if (DCacheStallM) NextWalkerState = L0_RD; + L0_RD: if (DCacheBusStallM) NextWalkerState = L0_RD; else if(HPTWFaultM) NextWalkerState = FAULT; else NextWalkerState = LEAF; LEAF: if (P.SVADU_SUPPORTED & HPTWUpdateDA) NextWalkerState = UPDATE_PTE; else NextWalkerState = IDLE; - UPDATE_PTE: if(DCacheStallM) NextWalkerState = UPDATE_PTE; + UPDATE_PTE: if(DCacheBusStallM) NextWalkerState = UPDATE_PTE; else NextWalkerState = LEAF; FAULT: NextWalkerState = IDLE; default: NextWalkerState = IDLE; // should never be reached diff --git a/src/mmu/mmu.sv b/src/mmu/mmu.sv index c26ee2a44..dffa7be76 100644 --- a/src/mmu/mmu.sv +++ b/src/mmu/mmu.sv @@ -140,7 +140,7 @@ module mmu import cvw::*; #(parameter cvw_t P, 2'b11: DataMisalignedM = |VAdr[2:0]; // ld, sd, fld, fsd endcase assign LoadMisalignedFaultM = DataMisalignedM & ReadNoAmoAccessM & ~(P.ZICCLSM_SUPPORTED & Cacheable); - assign StoreAmoMisalignedFaultM = DataMisalignedM & WriteAccessM & ~(P.ZICCLSM_SUPPORTED & Cacheable); + assign StoreAmoMisalignedFaultM = DataMisalignedM & WriteAccessM & (~(P.ZICCLSM_SUPPORTED & Cacheable) | ReadAccessM); // Misaligned AMO faults even if ZICCLSM supported // Specify which type of page fault is occurring assign InstrPageFaultF = TLBPageFault & ExecuteAccessF; diff --git a/testbench/common/instrNameDecTB.sv b/testbench/common/instrNameDecTB.sv index 96ef6d67f..ee6cd6900 100644 --- a/testbench/common/instrNameDecTB.sv +++ b/testbench/common/instrNameDecTB.sv @@ -232,95 +232,7 @@ module instrNameDecTB( 10'b1000111_???: name = "FMSUB"; 10'b1001011_???: name = "FNMSUB"; 10'b1001111_???: name = "FNMADD"; - 10'b1010011_000: if (funct7[6:2] == 5'b00000) name = "FADD"; - else if (funct7[6:2] == 5'b00001) name = "FSUB"; - else if (funct7[6:2] == 5'b00010) name = "FMUL"; - else if (funct7[6:2] == 5'b00011) name = "FDIV"; - else if (funct7[6:2] == 5'b01011) name = "FSQRT"; - else if (funct7 == 7'b1100000 & rs2 == 5'b00000) name = "FCVT.W.S"; - else if (funct7 == 7'b1100000 & rs2 == 5'b00001) name = "FCVT.WU.S"; - else if (funct7 == 7'b1100000 & rs2 == 5'b00010) name = "FCVT.L.S"; - else if (funct7 == 7'b1100000 & rs2 == 5'b00011) name = "FCVT.LU.S"; - else if (funct7 == 7'b1101000 & rs2 == 5'b00000) name = "FCVT.S.W"; - else if (funct7 == 7'b1101000 & rs2 == 5'b00001) name = "FCVT.S.WU"; - else if (funct7 == 7'b1101000 & rs2 == 5'b00010) name = "FCVT.S.L"; - else if (funct7 == 7'b1101000 & rs2 == 5'b00011) name = "FCVT.S.LU"; - else if (funct7 == 7'b1100001 & rs2 == 5'b00000) name = "FCVT.W.D"; - else if (funct7 == 7'b1100001 & rs2 == 5'b00001) name = "FCVT.WU.D"; - else if (funct7 == 7'b1100001 & rs2 == 5'b00010) name = "FCVT.L.D"; - else if (funct7 == 7'b1100001 & rs2 == 5'b00011) name = "FCVT.LU.D"; - else if (funct7 == 7'b1101001 & rs2 == 5'b00000) name = "FCVT.D.W"; - else if (funct7 == 7'b1101001 & rs2 == 5'b00001) name = "FCVT.D.WU"; - else if (funct7 == 7'b1101001 & rs2 == 5'b00010) name = "FCVT.D.L"; - else if (funct7 == 7'b1101001 & rs2 == 5'b00011) name = "FCVT.D.LU"; - else if (funct7 == 7'b0100000 & rs2 == 5'b00001) name = "FCVT.S.D"; - else if (funct7 == 7'b0100001 & rs2 == 5'b00000) name = "FCVT.D.S"; - else if (funct7 == 7'b1110000 & rs2 == 5'b00000) name = "FMV.X.W"; - else if (funct7 == 7'b1111000 & rs2 == 5'b00000) name = "FMV.W.X"; - else if (funct7 == 7'b1110001 & rs2 == 5'b00000) name = "FMV.X.D"; // DOUBLE - else if (funct7 == 7'b1111001 & rs2 == 5'b00000) name = "FMV.D.X"; // DOUBLE - else if (funct7[6:2] == 5'b00100) name = "FSGNJ"; - else if (funct7[6:2] == 5'b00101) name = "FMIN"; - else if (funct7[6:2] == 5'b10100) name = "FLE"; - else name = "ILLEGAL"; - 10'b1010011_001: if (funct7[6:2] == 5'b00000) name = "FADD"; - else if (funct7[6:2] == 5'b00001) name = "FSUB"; - else if (funct7[6:2] == 5'b00010) name = "FMUL"; - else if (funct7[6:2] == 5'b00011) name = "FDIV"; - else if (funct7[6:2] == 5'b01011) name = "FSQRT"; - else if (funct7 == 7'b1100000 & rs2 == 5'b00000) name = "FCVT.W.S"; - else if (funct7 == 7'b1100000 & rs2 == 5'b00001) name = "FCVT.WU.S"; - else if (funct7 == 7'b1100000 & rs2 == 5'b00010) name = "FCVT.L.S"; - else if (funct7 == 7'b1100000 & rs2 == 5'b00011) name = "FCVT.LU.S"; - else if (funct7 == 7'b1101000 & rs2 == 5'b00000) name = "FCVT.S.W"; - else if (funct7 == 7'b1101000 & rs2 == 5'b00001) name = "FCVT.S.WU"; - else if (funct7 == 7'b1101000 & rs2 == 5'b00010) name = "FCVT.S.L"; - else if (funct7 == 7'b1101000 & rs2 == 5'b00011) name = "FCVT.S.LU"; - else if (funct7 == 7'b1100001 & rs2 == 5'b00000) name = "FCVT.W.D"; - else if (funct7 == 7'b1100001 & rs2 == 5'b00001) name = "FCVT.WU.D"; - else if (funct7 == 7'b1100001 & rs2 == 5'b00010) name = "FCVT.L.D"; - else if (funct7 == 7'b1100001 & rs2 == 5'b00011) name = "FCVT.LU.D"; - else if (funct7 == 7'b1101001 & rs2 == 5'b00000) name = "FCVT.D.W"; - else if (funct7 == 7'b1101001 & rs2 == 5'b00001) name = "FCVT.D.WU"; - else if (funct7 == 7'b1101001 & rs2 == 5'b00010) name = "FCVT.D.L"; - else if (funct7 == 7'b1101001 & rs2 == 5'b00011) name = "FCVT.D.LU"; - else if (funct7 == 7'b0100000 & rs2 == 5'b00001) name = "FCVT.S.D"; - else if (funct7 == 7'b0100001 & rs2 == 5'b00000) name = "FCVT.D.S"; - else if (funct7[6:2] == 5'b00100) name = "FSGNJN"; - else if (funct7[6:2] == 5'b00101) name = "FMAX"; - else if (funct7[6:2] == 5'b10100) name = "FLT"; - else if (funct7[6:2] == 5'b11100) name = "FCLASS"; - else name = "ILLEGAL"; - 10'b1010011_010: if (funct7[6:2] == 5'b00000) name = "FADD"; - else if (funct7[6:2] == 5'b00001) name = "FSUB"; - else if (funct7[6:2] == 5'b00010) name = "FMUL"; - else if (funct7[6:2] == 5'b00011) name = "FDIV"; - else if (funct7[6:2] == 5'b01011) name = "FSQRT"; - else if (funct7 == 7'b1100000 & rs2 == 5'b00000) name = "FCVT.W.S"; - else if (funct7 == 7'b1100000 & rs2 == 5'b00001) name = "FCVT.WU.S"; - else if (funct7 == 7'b1100000 & rs2 == 5'b00010) name = "FCVT.L.S"; - else if (funct7 == 7'b1100000 & rs2 == 5'b00011) name = "FCVT.LU.S"; - else if (funct7 == 7'b1101000 & rs2 == 5'b00000) name = "FCVT.S.W"; - else if (funct7 == 7'b1101000 & rs2 == 5'b00001) name = "FCVT.S.WU"; - else if (funct7 == 7'b1101000 & rs2 == 5'b00010) name = "FCVT.S.L"; - else if (funct7 == 7'b1101000 & rs2 == 5'b00011) name = "FCVT.S.LU"; - else if (funct7 == 7'b1100001 & rs2 == 5'b00000) name = "FCVT.W.D"; - else if (funct7 == 7'b1100001 & rs2 == 5'b00001) name = "FCVT.WU.D"; - else if (funct7 == 7'b1100001 & rs2 == 5'b00010) name = "FCVT.L.D"; - else if (funct7 == 7'b1100001 & rs2 == 5'b00011) name = "FCVT.LU.D"; - else if (funct7 == 7'b1101001 & rs2 == 5'b00000) name = "FCVT.D.W"; - else if (funct7 == 7'b1101001 & rs2 == 5'b00001) name = "FCVT.D.WU"; - else if (funct7 == 7'b1101001 & rs2 == 5'b00010) name = "FCVT.D.L"; - else if (funct7 == 7'b1101001 & rs2 == 5'b00011) name = "FCVT.D.LU"; - else if (funct7 == 7'b0100000 & rs2 == 5'b00001) name = "FCVT.S.D"; - else if (funct7 == 7'b0100001 & rs2 == 5'b00000) name = "FCVT.D.S"; - else if (funct7[6:2] == 5'b00100) name = "FSGNJX"; - else if (funct7[6:2] == 5'b10100) name = "FEQ"; - else name = "ILLEGAL"; - /* verilator lint_off CASEOVERLAP */ - // *** RT: definitely take a look at this. This overlaps with 10'b1010011_000 10'b1010011_???: if (funct7[6:2] == 5'b00000) name = "FADD"; - /* verilator lint_on CASEOVERLAP */ else if (funct7[6:2] == 5'b00001) name = "FSUB"; else if (funct7[6:2] == 5'b00010) name = "FMUL"; else if (funct7[6:2] == 5'b00011) name = "FDIV"; @@ -343,6 +255,61 @@ module instrNameDecTB( else if (funct7 == 7'b1101001 & rs2 == 5'b00011) name = "FCVT.D.LU"; else if (funct7 == 7'b0100000 & rs2 == 5'b00001) name = "FCVT.S.D"; else if (funct7 == 7'b0100001 & rs2 == 5'b00000) name = "FCVT.D.S"; + else if (funct7 == 7'b1100010 & rs2 == 5'b00000) name = "FCVT.W.H"; + else if (funct7 == 7'b1100010 & rs2 == 5'b00001) name = "FCVT.WU.H"; + else if (funct7 == 7'b1100010 & rs2 == 5'b00010) name = "FCVT.L.H"; + else if (funct7 == 7'b1100010 & rs2 == 5'b00011) name = "FCVT.LU.H"; + else if (funct7 == 7'b1101010 & rs2 == 5'b00000) name = "FCVT.H.W"; + else if (funct7 == 7'b1101010 & rs2 == 5'b00001) name = "FCVT.H.WU"; + else if (funct7 == 7'b1101010 & rs2 == 5'b00010) name = "FCVT.H.L"; + else if (funct7 == 7'b1101010 & rs2 == 5'b00011) name = "FCVT.H.LU"; + else if (funct7 == 7'b1100011 & rs2 == 5'b00000) name = "FCVT.W.Q"; + else if (funct7 == 7'b1100011 & rs2 == 5'b00001) name = "FCVT.WU.Q"; + else if (funct7 == 7'b1100011 & rs2 == 5'b00010) name = "FCVT.L.Q"; + else if (funct7 == 7'b1100011 & rs2 == 5'b00011) name = "FCVT.LU.Q"; + else if (funct7 == 7'b1101011 & rs2 == 5'b00000) name = "FCVT.Q.W"; + else if (funct7 == 7'b1101011 & rs2 == 5'b00001) name = "FCVT.Q.WU"; + else if (funct7 == 7'b1101011 & rs2 == 5'b00010) name = "FCVT.Q.L"; + else if (funct7 == 7'b1101011 & rs2 == 5'b00011) name = "FCVT.Q.LU"; + else if (funct7 == 7'b0100000 & rs2 == 5'b00001) name = "FCVT.S.D"; + else if (funct7 == 7'b0100000 & rs2 == 5'b00010) name = "FCVT.S.H"; + else if (funct7 == 7'b0100000 & rs2 == 5'b00011) name = "FCVT.S.Q"; + else if (funct7 == 7'b0100001 & rs2 == 5'b00000) name = "FCVT.D.S"; + else if (funct7 == 7'b0100001 & rs2 == 5'b00010) name = "FCVT.D.H"; + else if (funct7 == 7'b0100001 & rs2 == 5'b00011) name = "FCVT.D.Q"; + else if (funct7 == 7'b0100010 & rs2 == 5'b00000) name = "FCVT.H.S"; + else if (funct7 == 7'b0100010 & rs2 == 5'b00001) name = "FCVT.H.D"; + else if (funct7 == 7'b0100010 & rs2 == 5'b00011) name = "FCVT.H.Q"; + else if (funct7 == 7'b0100011 & rs2 == 5'b00000) name = "FCVT.Q.S"; + else if (funct7 == 7'b0100011 & rs2 == 5'b00001) name = "FCVT.Q.D"; + else if (funct7 == 7'b0100011 & rs2 == 5'b00010) name = "FCVT.Q.H"; + else if (funct7 == 7'b1110000 & rs2 == 5'b00000 & funct3 == 3'b000) name = "FMV.X.W"; + else if (funct7 == 7'b1111000 & rs2 == 5'b00000 & funct3 == 3'b000) name = "FMV.W.X"; + else if (funct7 == 7'b1110001 & rs2 == 5'b00000 & funct3 == 3'b000) name = "FMV.X.D"; + else if (funct7 == 7'b1111001 & rs2 == 5'b00000 & funct3 == 3'b000) name = "FMV.D.X"; + else if (funct7 == 7'b1110010 & rs2 == 5'b00000 & funct3 == 3'b000) name = "FMV.X.H"; + else if (funct7 == 7'b1111010 & rs2 == 5'b00000 & funct3 == 3'b000) name = "FMV.H.X"; + else if (funct7[6:2] == 5'b00100 & funct3 == 3'b000) name = "FSGNJ"; + else if (funct7[6:2] == 5'b00101 & funct3 == 3'b000) name = "FMIN"; + else if (funct7[6:2] == 5'b10100 & funct3 == 3'b000) name = "FLE"; + else if (funct7[6:2] == 5'b00100 & funct3 == 3'b001) name = "FSGNJN"; + else if (funct7[6:2] == 5'b00101 & funct3 == 3'b001) name = "FMAX"; + else if (funct7[6:2] == 5'b10100 & funct3 == 3'b001) name = "FLT"; + else if (funct7[6:2] == 5'b11100 & funct3 == 3'b001) name = "FCLASS"; + else if (funct7[6:2] == 5'b00100 & funct3 == 3'b010) name = "FSGNJX"; + else if (funct7[6:2] == 5'b10100 & funct3 == 3'b010) name = "FEQ"; + else if (funct7[6:2] == 5'b11110 & funct3 == 3'b000 & rs2 == 5'b00001) name = "FLI"; + else if (funct7[6:2] == 5'b00101 & funct3 == 3'b010) name = "FMINM"; + else if (funct7[6:2] == 5'b00101 & funct3 == 3'b011) name = "FMAXM"; + else if (funct7[6:2] == 5'b01000 & rs2 == 5'b00100) name = "FROUND"; + else if (funct7[6:2] == 5'b01000 & rs2 == 5'b00101) name = "FROUNDNX"; + else if (funct7[6:2] == 5'b10100 & funct3 == 3'b100) name = "FLEQ"; + else if (funct7[6:2] == 5'b10100 & funct3 == 3'b101) name = "FLTQ"; + else if (funct7 == 7'b1110001 & funct3 == 3'b000 & rs2 == 5'b00001) name = "FMVH.X.D"; + else if (funct7 == 7'b1110011 & funct3 == 3'b000 & rs2 == 5'b00001) name = "FMVH.X.Q"; + else if (funct7 == 7'b1011001 & funct3 == 3'b000) name = "FMVP.D.X"; + else if (funct7 == 7'b1011011 & funct3 == 3'b000) name = "FMVP.Q.X"; + else if (funct7 == 7'b1100001 & funct3 == 3'b001 & rs2 == 5'b01000) name = "FCVTMOD.W.D"; else name = "ILLEGAL"; 10'b0000111_010: name = "FLW"; 10'b0100111_010: name = "FSW"; diff --git a/testbench/testbench-fp.sv b/testbench/testbench-fp.sv index ce10c2be2..b19542d62 100644 --- a/testbench/testbench-fp.sv +++ b/testbench/testbench-fp.sv @@ -882,7 +882,7 @@ module testbenchfp; // - the sign of the NaN does not matter for the opperations being tested // - when 2 or more NaNs are inputed the NaN that is propigated doesn't matter if (UnitVal !== `CVTFPUNIT & UnitVal !== `CVTINTUNIT) - case (FmtVal) + case (FmtVal) 2'b11: NaNGood = (((P.IEEE754==0)&AnsNaN&(Res === {1'b0, {P.Q_NE+1{1'b1}}, {P.Q_NF-1{1'b0}}})) | (AnsFlg[4]&(Res[P.Q_LEN-2:0] === {{P.Q_NE+1{1'b1}}, {P.Q_NF-1{1'b0}}})) | (XNaN&(Res[P.Q_LEN-2:0] === {X[P.Q_LEN-2:P.Q_NF],1'b1,X[P.Q_NF-2:0]})) | @@ -903,9 +903,9 @@ module testbenchfp; (XNaN&(Res[P.H_LEN-2:0] === {X[P.H_LEN-2:P.H_NF],1'b1,X[P.H_NF-2:0]})) | (YNaN&(Res[P.H_LEN-2:0] === {Y[P.H_LEN-2:P.H_NF],1'b1,Y[P.H_NF-2:0]})) | (ZNaN&(Res[P.H_LEN-2:0] === {Z[P.H_LEN-2:P.H_NF],1'b1,Z[P.H_NF-2:0]}))); - endcase - else if (UnitVal === `CVTFPUNIT) // if converting from floating point to floating point OpCtrl contains the final FP format - case (OpCtrlVal[1:0]) + endcase + else if (UnitVal === `CVTFPUNIT) // if converting from floating point to floating point OpCtrl contains the final FP format + case (OpCtrlVal[1:0]) 2'b11: NaNGood = (((P.IEEE754==0)&AnsNaN&(Res === {1'b0, {P.Q_NE+1{1'b1}}, {P.Q_NF-1{1'b0}}})) | (AnsFlg[4]&(Res[P.Q_LEN-2:0] === {{P.Q_NE+1{1'b1}}, {P.Q_NF-1{1'b0}}})) | (AnsNaN&(Res[P.Q_LEN-2:0] === Ans[P.Q_LEN-2:0])) | @@ -926,72 +926,70 @@ module testbenchfp; (AnsNaN&(Res[P.H_LEN-2:0] === Ans[P.H_LEN-2:0])) | (XNaN&(Res[P.H_LEN-2:0] === {X[P.H_LEN-2:P.H_NF],1'b1,X[P.H_NF-2:0]})) | (YNaN&(Res[P.H_LEN-2:0] === {Y[P.H_LEN-2:P.H_NF],1'b1,Y[P.H_NF-2:0]}))); - endcase - else NaNGood = 1'b0; // integers can't be NaNs + endcase + else NaNGood = 1'b0; // integers can't be NaNs - /////////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////////// - // ||||||| ||| ||| ||||||| ||||||| ||| ||| - // ||| ||| ||| ||| ||| ||| ||| - // ||| |||||||||| ||||||| ||| |||||| - // ||| ||| ||| ||| ||| ||| ||| - // ||||||| ||| ||| ||||||| ||||||| ||| ||| + // ||||||| ||| ||| ||||||| ||||||| ||| ||| + // ||| ||| ||| ||| ||| ||| ||| + // ||| |||||||||| ||||||| ||| |||||| + // ||| ||| ||| ||| ||| ||| ||| + // ||||||| ||| ||| ||||||| ||||||| ||| ||| - /////////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////////// - // check if result is correct - // wait till the division result is done or one extra cylcle for early termination (to simulate the EM pipline stage) - assign ResMatch = ((Res === Ans) | NaNGood | (NaNGood === 1'bx)); - assign FlagMatch = ((ResFlg === AnsFlg) | (AnsFlg === 5'bx)); - assign divsqrtop = (OpCtrlVal == `SQRT_OPCTRL) | (OpCtrlVal == `DIV_OPCTRL); - assign FMAop = (OpCtrlVal == `FMAUNIT); - assign DivDone = OldFDivBusyE & ~FDivBusyE; + // check if result is correct + // wait till the division result is done or one extra cylcle for early termination (to simulate the EM pipline stage) + assign ResMatch = ((Res === Ans) | NaNGood | (NaNGood === 1'bx)); + assign FlagMatch = ((ResFlg === AnsFlg) | (AnsFlg === 5'bx)); + assign divsqrtop = (OpCtrlVal == `SQRT_OPCTRL) | (OpCtrlVal == `DIV_OPCTRL); + assign FMAop = (OpCtrlVal == `FMAUNIT); + assign DivDone = OldFDivBusyE & ~FDivBusyE; - // Maybe change OpCtrl but for now just look at TEST for fma test - assign CheckNow = ((DivDone | ~divsqrtop) | (TEST == "add" | TEST == "fma" | TEST == "sub")) & (UnitVal !== `CVTINTUNIT) & (UnitVal !== `CMPUNIT); - if (~(ResMatch & FlagMatch) & CheckNow) begin + // Maybe change OpCtrl but for now just look at TEST for fma test + assign CheckNow = ((DivDone | ~divsqrtop) | (TEST == "add" | TEST == "fma" | TEST == "sub")) & (UnitVal !== `CVTINTUNIT) & (UnitVal !== `CMPUNIT); + if (~(ResMatch & FlagMatch) & CheckNow) begin + errors += 1; + $display("\nError in %s", Tests[TestNum]); + $display("TestNum %d OpCtrl %d", TestNum, OpCtrl[TestNum]); + $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Expected: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg); + $stop; + end else if (((UnitVal === `CVTINTUNIT) | (UnitVal === `CMPUNIT)) & + ~(ResMatch & FlagMatch) & (Ans[0] !== 1'bx)) begin // Check for conversion and comparisons errors += 1; $display("\nError in %s", Tests[TestNum]); - $display("TestNum %d OpCtrl %d", TestNum, OpCtrl[TestNum]); - $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Expected: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg); + $display("TestNum %d OpCtrl %d", TestNum, OpCtrl[TestNum]); + $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg); $stop; - end else if (((UnitVal === `CVTINTUNIT) | (UnitVal === `CMPUNIT)) & - ~(ResMatch & FlagMatch) & (Ans[0] !== 1'bx)) begin // Check for conversion and comparisons - errors += 1; - $display("\nError in %s", Tests[TestNum]); - $display("TestNum %d OpCtrl %d", TestNum, OpCtrl[TestNum]); - $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg); - $stop; - end end if (TestVectors[VectorNum][0] === 1'bx & Tests[TestNum] !== "") begin // if reached the eof - // increment the test - TestNum += 1; - // clear the vectors - for(int i=0; i<6133248; i++) TestVectors[i] = {P.FLEN*4+8{1'bx}}; - // read next files - $readmemh({`PATH, Tests[TestNum]}, TestVectors); - // set the vector index back to 0 - VectorNum = 0; - // incemet the operation if all the rounding modes have been tested - if (FrmNum === 4) OpCtrlNum += 1; - // increment the rounding mode or loop back to rne - if (FrmNum < 4) - FrmNum += 1; - else begin - FrmNum = 0; - // Add some time as a buffer between tests at the end of each test - repeat (10) - @(posedge clk); - end - // if no more Tests - finish - if (Tests[TestNum] === "") begin - $display("\nAll Tests completed with %d errors\n", errors); - $stop; - end - $display("Running %s vectors", Tests[TestNum]); + // increment the test + TestNum += 1; + // clear the vectors + for(int i=0; i<6133248; i++) TestVectors[i] = {P.FLEN*4+8{1'bx}}; + // read next files + $readmemh({`PATH, Tests[TestNum]}, TestVectors); + // set the vector index back to 0 + VectorNum = 0; + // incemet the operation if all the rounding modes have been tested + if (FrmNum === 4) OpCtrlNum += 1; + // increment the rounding mode or loop back to rne + if (FrmNum < 4) FrmNum += 1; + else begin + FrmNum = 0; + // Add some time as a buffer between tests at the end of each test + repeat (10) + @(posedge clk); + end + // if no more Tests - finish + if (Tests[TestNum] === "") begin + $display("\nAll Tests completed with %d errors\n", errors); + $stop; + end + $display("Running %s vectors", Tests[TestNum]); end end endmodule diff --git a/testbench/testbench.sv b/testbench/testbench.sv index e68b01b48..b20c6a993 100644 --- a/testbench/testbench.sv +++ b/testbench/testbench.sv @@ -127,6 +127,9 @@ module testbench; "arch64zbs": if (P.ZBS_SUPPORTED) tests = arch64zbs; "arch64zicboz": if (P.ZICBOZ_SUPPORTED) tests = arch64zicboz; "arch64zcb": if (P.ZCB_SUPPORTED) tests = arch64zcb; + "arch64zfh": if (P.ZFH_SUPPORTED) tests = arch64zfh; + "arch64zfaf": if (P.ZFA_SUPPORTED) tests = arch64zfaf; + "arch64zfad": if (P.ZFA_SUPPORTED & P.D_SUPPORTED) tests = arch64zfad; endcase end else begin // RV32 case (TEST) @@ -161,6 +164,9 @@ module testbench; "arch32zbs": if (P.ZBS_SUPPORTED) tests = arch32zbs; "arch32zicboz": if (P.ZICBOZ_SUPPORTED) tests = arch32zicboz; "arch32zcb": if (P.ZCB_SUPPORTED) tests = arch32zcb; + "arch32zfh": if (P.ZFH_SUPPORTED) tests = arch32zfh; + "arch32zfaf": if (P.ZFA_SUPPORTED) tests = arch32zfaf; + "arch32zfad": if (P.ZFA_SUPPORTED & P.D_SUPPORTED) tests = arch32zfad; endcase end if (tests.size() == 0) begin @@ -626,8 +632,8 @@ task automatic updateProgramAddrLabelArray; end end - if(ProgramAddrLabelArray["begin_signature"] == 0) $display("Couldn't find begin_signature in %s", ProgramLabelMapFile); - if(ProgramAddrLabelArray["sig_end_canary"] == 0) $display("Couldn't find sig_end_canary in %s", ProgramLabelMapFile); +// if(ProgramAddrLabelArray["begin_signature"] == 0) $display("Couldn't find begin_signature in %s", ProgramLabelMapFile); +// if(ProgramAddrLabelArray["sig_end_canary"] == 0) $display("Couldn't find sig_end_canary in %s", ProgramLabelMapFile); $fclose(ProgramLabelMapFP); $fclose(ProgramAddrMapFP); diff --git a/testbench/tests.vh b/testbench/tests.vh index 39b4ecc41..fecf4ebc9 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -1291,6 +1291,123 @@ string imperas32f[] = '{ "rv64i_m/F/src/fsw-align-01.S" }; + string arch64zfh[] = '{ + `RISCVARCHTEST, + "rv64i_m/Zfh/src/fadd_b10-01.S", + "rv64i_m/Zfh/src/fadd_b1-01.S", + "rv64i_m/Zfh/src/fadd_b11-01.S", + "rv64i_m/Zfh/src/fadd_b12-01.S", + "rv64i_m/Zfh/src/fadd_b13-01.S", + "rv64i_m/Zfh/src/fadd_b2-01.S", + "rv64i_m/Zfh/src/fadd_b3-01.S", + "rv64i_m/Zfh/src/fadd_b4-01.S", + "rv64i_m/Zfh/src/fadd_b5-01.S", + "rv64i_m/Zfh/src/fadd_b7-01.S", + "rv64i_m/Zfh/src/fadd_b8-01.S", + "rv64i_m/Zfh/src/fclass_b1-01.S", + "rv64i_m/Zfh/src/fcvt.h.w_b25-01.S", + "rv64i_m/Zfh/src/fcvt.h.w_b26-01.S", + "rv64i_m/Zfh/src/fcvt.h.wu_b25-01.S", + "rv64i_m/Zfh/src/fcvt.h.wu_b26-01.S", + "rv64i_m/Zfh/src/fcvt.w.h_b1-01.S", + "rv64i_m/Zfh/src/fcvt.w.h_b22-01.S", + "rv64i_m/Zfh/src/fcvt.w.h_b23-01.S", + "rv64i_m/Zfh/src/fcvt.w.h_b24-01.S", + "rv64i_m/Zfh/src/fcvt.w.h_b27-01.S", + "rv64i_m/Zfh/src/fcvt.w.h_b28-01.S", + "rv64i_m/Zfh/src/fcvt.w.h_b29-01.S", + "rv64i_m/Zfh/src/fcvt.wu.h_b1-01.S", + "rv64i_m/Zfh/src/fcvt.wu.h_b22-01.S", + "rv64i_m/Zfh/src/fcvt.wu.h_b23-01.S", + "rv64i_m/Zfh/src/fcvt.wu.h_b24-01.S", + "rv64i_m/Zfh/src/fcvt.wu.h_b27-01.S", + "rv64i_m/Zfh/src/fcvt.wu.h_b28-01.S", + "rv64i_m/Zfh/src/fcvt.wu.h_b29-01.S", + "rv64i_m/Zfh/src/fcvt.h.l_b25-01.S", + "rv64i_m/Zfh/src/fcvt.h.l_b26-01.S", + "rv64i_m/Zfh/src/fcvt.h.lu_b25-01.S", + "rv64i_m/Zfh/src/fcvt.h.lu_b26-01.S", + "rv64i_m/Zfh/src/fcvt.l.h_b1-01.S", + "rv64i_m/Zfh/src/fcvt.l.h_b22-01.S", + "rv64i_m/Zfh/src/fcvt.l.h_b23-01.S", + "rv64i_m/Zfh/src/fcvt.l.h_b24-01.S", + "rv64i_m/Zfh/src/fcvt.l.h_b27-01.S", + "rv64i_m/Zfh/src/fcvt.l.h_b28-01.S", + "rv64i_m/Zfh/src/fcvt.l.h_b29-01.S", + "rv64i_m/Zfh/src/fcvt.lu.h_b1-01.S", + "rv64i_m/Zfh/src/fcvt.lu.h_b22-01.S", + "rv64i_m/Zfh/src/fcvt.lu.h_b23-01.S", + "rv64i_m/Zfh/src/fcvt.lu.h_b24-01.S", + "rv64i_m/Zfh/src/fcvt.lu.h_b27-01.S", + "rv64i_m/Zfh/src/fcvt.lu.h_b28-01.S", + "rv64i_m/Zfh/src/fcvt.lu.h_b29-01.S", + "rv64i_m/Zfh/src/fdiv_b20-01.S", + "rv64i_m/Zfh/src/fdiv_b1-01.S", + "rv64i_m/Zfh/src/fdiv_b2-01.S", + "rv64i_m/Zfh/src/fdiv_b21-01.S", + "rv64i_m/Zfh/src/fdiv_b3-01.S", + "rv64i_m/Zfh/src/fdiv_b4-01.S", + "rv64i_m/Zfh/src/fdiv_b5-01.S", + "rv64i_m/Zfh/src/fdiv_b6-01.S", + "rv64i_m/Zfh/src/fdiv_b7-01.S", + "rv64i_m/Zfh/src/fdiv_b8-01.S", + "rv64i_m/Zfh/src/fdiv_b9-01.S", + "rv64i_m/Zfh/src/feq_b1-01.S", + "rv64i_m/Zfh/src/feq_b19-01.S", + "rv64i_m/Zfh/src/fle_b1-01.S", + "rv64i_m/Zfh/src/fle_b19-01.S", + "rv64i_m/Zfh/src/flt_b1-01.S", + "rv64i_m/Zfh/src/flt_b19-01.S", + "rv64i_m/Zfh/src/flh-align-01.S", + "rv64i_m/Zfh/src/fmax_b1-01.S", + "rv64i_m/Zfh/src/fmax_b19-01.S", + "rv64i_m/Zfh/src/fmin_b1-01.S", + "rv64i_m/Zfh/src/fmin_b19-01.S", + "rv64i_m/Zfh/src/fmul_b1-01.S", + "rv64i_m/Zfh/src/fmul_b2-01.S", + "rv64i_m/Zfh/src/fmul_b3-01.S", + "rv64i_m/Zfh/src/fmul_b4-01.S", + "rv64i_m/Zfh/src/fmul_b5-01.S", + "rv64i_m/Zfh/src/fmul_b6-01.S", + "rv64i_m/Zfh/src/fmul_b7-01.S", + "rv64i_m/Zfh/src/fmul_b8-01.S", + "rv64i_m/Zfh/src/fmul_b9-01.S", + "rv64i_m/Zfh/src/fmv.h.x_b25-01.S", + "rv64i_m/Zfh/src/fmv.h.x_b26-01.S", + "rv64i_m/Zfh/src/fmv.x.h_b1-01.S", + "rv64i_m/Zfh/src/fmv.x.h_b22-01.S", + "rv64i_m/Zfh/src/fmv.x.h_b23-01.S", + "rv64i_m/Zfh/src/fmv.x.h_b24-01.S", + "rv64i_m/Zfh/src/fmv.x.h_b27-01.S", + "rv64i_m/Zfh/src/fmv.x.h_b28-01.S", + "rv64i_m/Zfh/src/fmv.x.h_b29-01.S", + "rv64i_m/Zfh/src/fsgnj_b1-01.S", + "rv64i_m/Zfh/src/fsgnjn_b1-01.S", + "rv64i_m/Zfh/src/fsgnjx_b1-01.S", + "rv64i_m/Zfh/src/fsqrt_b1-01.S", + "rv64i_m/Zfh/src/fsqrt_b20-01.S", + "rv64i_m/Zfh/src/fsqrt_b2-01.S", + "rv64i_m/Zfh/src/fsqrt_b3-01.S", + "rv64i_m/Zfh/src/fsqrt_b4-01.S", + "rv64i_m/Zfh/src/fsqrt_b5-01.S", + "rv64i_m/Zfh/src/fsqrt_b7-01.S", + "rv64i_m/Zfh/src/fsqrt_b8-01.S", + "rv64i_m/Zfh/src/fsqrt_b9-01.S", + "rv64i_m/Zfh/src/fsub_b10-01.S", + "rv64i_m/Zfh/src/fsub_b1-01.S", + "rv64i_m/Zfh/src/fsub_b11-01.S", + "rv64i_m/Zfh/src/fsub_b12-01.S", + "rv64i_m/Zfh/src/fsub_b13-01.S", + "rv64i_m/Zfh/src/fsub_b2-01.S", + "rv64i_m/Zfh/src/fsub_b3-01.S", + "rv64i_m/Zfh/src/fsub_b4-01.S", + "rv64i_m/Zfh/src/fsub_b5-01.S", + "rv64i_m/Zfh/src/fsub_b7-01.S", + "rv64i_m/Zfh/src/fsub_b8-01.S", + "rv64i_m/Zfh/src/fsh-align-01.S" + }; + + string arch64d_fma[] = '{ `RISCVARCHTEST, //"rv64i_m/D/src/fmadd.d_b15-01.S", @@ -1638,7 +1755,6 @@ string arch64zbs[] = '{ string arch32f[] = '{ `RISCVARCHTEST, - "rv32i_m/F/src/fdiv_b20-01.S", "rv32i_m/F/src/fadd_b10-01.S", "rv32i_m/F/src/fadd_b1-01.S", "rv32i_m/F/src/fadd_b11-01.S", @@ -1783,6 +1899,184 @@ string arch64zbs[] = '{ "rv32i_m/F/src/fsw-align-01.S" }; + string arch32zfh[] = '{ + `RISCVARCHTEST, + "rv32i_m/Zfh/src/fadd_b10-01.S", + "rv32i_m/Zfh/src/fadd_b1-01.S", + "rv32i_m/Zfh/src/fadd_b11-01.S", + "rv32i_m/Zfh/src/fadd_b12-01.S", + "rv32i_m/Zfh/src/fadd_b13-01.S", + "rv32i_m/Zfh/src/fadd_b2-01.S", + "rv32i_m/Zfh/src/fadd_b3-01.S", + "rv32i_m/Zfh/src/fadd_b4-01.S", + "rv32i_m/Zfh/src/fadd_b5-01.S", + "rv32i_m/Zfh/src/fadd_b7-01.S", + "rv32i_m/Zfh/src/fadd_b8-01.S", + "rv32i_m/Zfh/src/fclass_b1-01.S", + "rv32i_m/Zfh/src/fcvt.h.w_b25-01.S", + "rv32i_m/Zfh/src/fcvt.h.w_b26-01.S", + "rv32i_m/Zfh/src/fcvt.h.wu_b25-01.S", + "rv32i_m/Zfh/src/fcvt.h.wu_b26-01.S", + "rv32i_m/Zfh/src/fcvt.w.h_b1-01.S", + "rv32i_m/Zfh/src/fcvt.w.h_b22-01.S", + "rv32i_m/Zfh/src/fcvt.w.h_b23-01.S", + "rv32i_m/Zfh/src/fcvt.w.h_b24-01.S", + "rv32i_m/Zfh/src/fcvt.w.h_b27-01.S", + "rv32i_m/Zfh/src/fcvt.w.h_b28-01.S", + "rv32i_m/Zfh/src/fcvt.w.h_b29-01.S", + "rv32i_m/Zfh/src/fcvt.wu.h_b1-01.S", + "rv32i_m/Zfh/src/fcvt.wu.h_b22-01.S", + "rv32i_m/Zfh/src/fcvt.wu.h_b23-01.S", + "rv32i_m/Zfh/src/fcvt.wu.h_b24-01.S", + "rv32i_m/Zfh/src/fcvt.wu.h_b27-01.S", + "rv32i_m/Zfh/src/fcvt.wu.h_b28-01.S", + "rv32i_m/Zfh/src/fcvt.wu.h_b29-01.S", + "rv32i_m/Zfh/src/fdiv_b20-01.S", + "rv32i_m/Zfh/src/fdiv_b1-01.S", + "rv32i_m/Zfh/src/fdiv_b2-01.S", + "rv32i_m/Zfh/src/fdiv_b21-01.S", + "rv32i_m/Zfh/src/fdiv_b3-01.S", + "rv32i_m/Zfh/src/fdiv_b4-01.S", + "rv32i_m/Zfh/src/fdiv_b5-01.S", + "rv32i_m/Zfh/src/fdiv_b6-01.S", + "rv32i_m/Zfh/src/fdiv_b7-01.S", + "rv32i_m/Zfh/src/fdiv_b8-01.S", + "rv32i_m/Zfh/src/fdiv_b9-01.S", + "rv32i_m/Zfh/src/feq_b1-01.S", + "rv32i_m/Zfh/src/feq_b19-01.S", + "rv32i_m/Zfh/src/fle_b1-01.S", + "rv32i_m/Zfh/src/fle_b19-01.S", + "rv32i_m/Zfh/src/flt_b1-01.S", + "rv32i_m/Zfh/src/flt_b19-01.S", + "rv32i_m/Zfh/src/flh-align-01.S", + "rv32i_m/Zfh/src/fmax_b1-01.S", + "rv32i_m/Zfh/src/fmax_b19-01.S", + "rv32i_m/Zfh/src/fmin_b1-01.S", + "rv32i_m/Zfh/src/fmin_b19-01.S", + "rv32i_m/Zfh/src/fmul_b1-01.S", + "rv32i_m/Zfh/src/fmul_b2-01.S", + "rv32i_m/Zfh/src/fmul_b3-01.S", + "rv32i_m/Zfh/src/fmul_b4-01.S", + "rv32i_m/Zfh/src/fmul_b5-01.S", + "rv32i_m/Zfh/src/fmul_b6-01.S", + "rv32i_m/Zfh/src/fmul_b7-01.S", + "rv32i_m/Zfh/src/fmul_b8-01.S", + "rv32i_m/Zfh/src/fmul_b9-01.S", + "rv32i_m/Zfh/src/fmv.h.x_b25-01.S", + "rv32i_m/Zfh/src/fmv.h.x_b26-01.S", + "rv32i_m/Zfh/src/fmv.x.h_b1-01.S", + "rv32i_m/Zfh/src/fmv.x.h_b22-01.S", + "rv32i_m/Zfh/src/fmv.x.h_b23-01.S", + "rv32i_m/Zfh/src/fmv.x.h_b24-01.S", + "rv32i_m/Zfh/src/fmv.x.h_b27-01.S", + "rv32i_m/Zfh/src/fmv.x.h_b28-01.S", + "rv32i_m/Zfh/src/fmv.x.h_b29-01.S", + "rv32i_m/Zfh/src/fsgnj_b1-01.S", + "rv32i_m/Zfh/src/fsgnjn_b1-01.S", + "rv32i_m/Zfh/src/fsgnjx_b1-01.S", + "rv32i_m/Zfh/src/fsqrt_b1-01.S", + "rv32i_m/Zfh/src/fsqrt_b20-01.S", + "rv32i_m/Zfh/src/fsqrt_b2-01.S", + "rv32i_m/Zfh/src/fsqrt_b3-01.S", + "rv32i_m/Zfh/src/fsqrt_b4-01.S", + "rv32i_m/Zfh/src/fsqrt_b5-01.S", + "rv32i_m/Zfh/src/fsqrt_b7-01.S", + "rv32i_m/Zfh/src/fsqrt_b8-01.S", + "rv32i_m/Zfh/src/fsqrt_b9-01.S", + "rv32i_m/Zfh/src/fsub_b10-01.S", + "rv32i_m/Zfh/src/fsub_b1-01.S", + "rv32i_m/Zfh/src/fsub_b11-01.S", + "rv32i_m/Zfh/src/fsub_b12-01.S", + "rv32i_m/Zfh/src/fsub_b13-01.S", + "rv32i_m/Zfh/src/fsub_b2-01.S", + "rv32i_m/Zfh/src/fsub_b3-01.S", + "rv32i_m/Zfh/src/fsub_b4-01.S", + "rv32i_m/Zfh/src/fsub_b5-01.S", + "rv32i_m/Zfh/src/fsub_b7-01.S", + "rv32i_m/Zfh/src/fsub_b8-01.S", + "rv32i_m/Zfh/src/fsh-align-01.S" + }; + + string arch32zfaf[] = '{ + `RISCVARCHTEST, + "rv32i_m/F_Zfa/src/fleq_b1-01.S", + "rv32i_m/F_Zfa/src/fleq_b19-01.S", + "rv32i_m/F_Zfa/src/fli.s-01.S", + "rv32i_m/F_Zfa/src/fltq_b1-01.S", + "rv32i_m/F_Zfa/src/fltq_b19-01.S", + "rv32i_m/D_Zfa/src/fltq_b1-01.S", // these D tests are more comprehensive and seem they should replace the F tests. Applies to all F tests duplicated in D + "rv32i_m/D_Zfa/src/fltq_b19-01.S", + "rv32i_m/F_Zfa/src/fminm_b1-01.S", + "rv32i_m/F_Zfa/src/fminm_b19-01.S", + "rv32i_m/F_Zfa/src/fmaxm_b1-01.S", + "rv32i_m/F_Zfa/src/fmaxm_b19-01.S" +/* "rv32i_m/F_Zfa/src/fround_b1-01.S" */ + }; + + string arch32zfad[] = '{ + `RISCVARCHTEST, + "rv32i_m/D_Zfa/src/fcvtmod.w.d_b1-01.S", + "rv32i_m/D_Zfa/src/fcvtmod.w.d_b22-01.S", + "rv32i_m/D_Zfa/src/fcvtmod.w.d_b23-01.S", + "rv32i_m/D_Zfa/src/fcvtmod.w.d_b24-01.S", + "rv32i_m/D_Zfa/src/fcvtmod.w.d_b27-01.S", + "rv32i_m/D_Zfa/src/fcvtmod.w.d_b28-01.S", + "rv32i_m/D_Zfa/src/fcvtmod.w.d_b29-01.S", + "rv32i_m/D_Zfa/src/fleq_b1-01.S", + "rv32i_m/D_Zfa/src/fleq_b19-01.S", + "rv32i_m/D_Zfa/src/fleq.d_b1-01.S", + "rv32i_m/D_Zfa/src/fleq.d_b19-01.S", + "rv32i_m/D_Zfa/src/fli.d-01.S", + "rv32i_m/D_Zfa/src/fltq_b1-01.S", + "rv32i_m/D_Zfa/src/fltq_b19-01.S", + "rv32i_m/D_Zfa/src/fltq.d_b1-01.S", + "rv32i_m/D_Zfa/src/fltq.d_b19-01.S", + "rv32i_m/D_Zfa/src/fminm_b1-01.S", + "rv32i_m/D_Zfa/src/fminm_b19-01.S", + "rv32i_m/D_Zfa/src/fminm.d_b1-01.S", + "rv32i_m/D_Zfa/src/fminm.d_b19-01.S", + "rv32i_m/D_Zfa/src/fmaxm_b1-01.S", + "rv32i_m/D_Zfa/src/fmaxm_b19-01.S", + "rv32i_m/D_Zfa/src/fmaxm.d_b1-01.S", + "rv32i_m/D_Zfa/src/fmaxm.d_b19-01.S", + "rv32i_m/D_Zfa/src/fmvh.x.d_b1-01.S", + "rv32i_m/D_Zfa/src/fmvh.x.d_b22-01.S", + "rv32i_m/D_Zfa/src/fmvh.x.d_b23-01.S", + "rv32i_m/D_Zfa/src/fmvh.x.d_b24-01.S", + "rv32i_m/D_Zfa/src/fmvh.x.d_b27-01.S", + "rv32i_m/D_Zfa/src/fmvh.x.d_b28-01.S", + "rv32i_m/D_Zfa/src/fmvh.x.d_b29-01.S" +/* "rv32i_m/D_Zfa/src/fround_b1-01.S" */ + }; + + string arch64zfaf[] = '{ + `RISCVARCHTEST, + "rv64i_m/F_Zfa/src/fleq_b1-01.S", + "rv64i_m/F_Zfa/src/fleq_b19-01.S", + "rv64i_m/F_Zfa/src/fli.s-01.S", + "rv64i_m/F_Zfa/src/fltq_b1-01.S", + "rv64i_m/F_Zfa/src/fltq_b19-01.S", + "rv64i_m/F_Zfa/src/fminm_b1-01.S", + "rv64i_m/F_Zfa/src/fminm_b19-01.S", + "rv64i_m/F_Zfa/src/fmaxm_b1-01.S", + "rv64i_m/F_Zfa/src/fmaxm_b19-01.S" +/* "rv64i_m/F_Zfa/src/fround_b1-01.S" */ + }; + + string arch64zfad[] = '{ + `RISCVARCHTEST, + "rv64i_m/D_Zfa/src/fleq_b1-01.S", + "rv64i_m/D_Zfa/src/fleq_b19-01.S", + "rv64i_m/D_Zfa/src/fli.d-01.S", + "rv64i_m/D_Zfa/src/fltq_b1-01.S", + "rv64i_m/D_Zfa/src/fltq_b19-01.S", + "rv64i_m/D_Zfa/src/fminm_b1-01.S", + "rv64i_m/D_Zfa/src/fminm_b19-01.S", + "rv64i_m/D_Zfa/src/fmaxm_b1-01.S", + "rv64i_m/D_Zfa/src/fmaxm_b19-01.S" +/* "rv64i_m/D_Zfa/src/fround_b1-01.S" */ + }; + string arch32d_fma[] = '{ `RISCVARCHTEST, //"rv32i_m/D/src/fmadd.d_b15-01.S", diff --git a/tests/coverage/csrwrites.S b/tests/coverage/csrwrites.S index 63ee00c38..ce5639bd7 100644 --- a/tests/coverage/csrwrites.S +++ b/tests/coverage/csrwrites.S @@ -37,4 +37,31 @@ main: csrrw t1, menvcfg, t0 csrrw t2, senvcfg, t0 + # testing FIOM with different privelege modes + # setting environment config (to both 1 and 0) in each privelege mode + csrsi menvcfg, 1 + li a0, 1 + ecall # enter supervisor mode + + li a0, 0 + ecall # enter user mode + + li a0, 1 + ecall # enter supervisor mode + + csrsi senvcfg, 1 + li a0, 0 + ecall # enter user mode + + li a0, 3 + ecall # enter machine mode + csrci menvcfg, 1 + + li a0, 1 + ecall # enter supervisor mode + + li a0, 0 + ecall # enter user mode + + j done diff --git a/tests/coverage/priv.S b/tests/coverage/priv.S index aa9c8b50b..39b3a8aeb 100644 --- a/tests/coverage/priv.S +++ b/tests/coverage/priv.S @@ -297,7 +297,32 @@ sretdone: wfi - j done + + + # Test uncovered privdec instructions + li a0, 3 + ecall + # exercise sfence.inval.ir instruction + .word 0x18100073 + + # exercise sret with rs1 not 0 + .word 0x102F8073 + + + # cover mret when mpp = 3 and mprv = 1 + li a0, 3 + ecall # enter machine mode + bseti t0, zero, 17 + csrs mstatus, t0 # set MPRV + li t1, 0x00001800 + csrs mstatus, t1 # set MPP=3 + la t1, finished + csrr t0, mepc + csrw mepc, t1 # set mepc for mret to jump to + mret + + +finished: j done diff --git a/tests/riscof/spike/riscof_spike.py b/tests/riscof/spike/riscof_spike.py index 61b556932..5450f64df 100644 --- a/tests/riscof/spike/riscof_spike.py +++ b/tests/riscof/spike/riscof_spike.py @@ -115,6 +115,10 @@ class spike(pluginTemplate): self.isa += '_Zicond' if "Zicboz" in ispec["ISA"]: self.isa += '_Zicboz' + if "Zfa" in ispec["ISA"]: + self.isa += '_Zfa' + if "Zfh" in ispec["ISA"]: + self.isa += '_Zfh' if "Zca" in ispec["ISA"]: self.isa += '_Zca' if "Zcb" in ispec["ISA"]: diff --git a/tests/riscof/spike/spike_rv32gc_isa.yaml b/tests/riscof/spike/spike_rv32gc_isa.yaml index ae314fa76..c2c95fbf4 100644 --- a/tests/riscof/spike/spike_rv32gc_isa.yaml +++ b/tests/riscof/spike/spike_rv32gc_isa.yaml @@ -1,6 +1,6 @@ hart_ids: [0] hart0: - ISA: RV32IMAFDCZicsr_Zicond_Zifencei_Zba_Zbb_Zbc_Zbs + ISA: RV32IMAFDCZicsr_Zicond_Zifencei_Zfa_Zfh_Zba_Zbb_Zbc_Zbs # ISA: RV32IMAFDCZicsr_Zicboz_Zifencei_Zca_Zba_Zbb_Zbc_Zbs # _Zbkb_Zcb physical_addr_sz: 32 User_Spec_Version: '2.3' diff --git a/tests/riscof/spike/spike_rv64gc_isa.yaml b/tests/riscof/spike/spike_rv64gc_isa.yaml index df5e7cb2b..4374ad07c 100644 --- a/tests/riscof/spike/spike_rv64gc_isa.yaml +++ b/tests/riscof/spike/spike_rv64gc_isa.yaml @@ -2,7 +2,7 @@ hart_ids: [0] hart0: # ISA: RV64IMAFDCSUZicsr_Zicboz_Zifencei_Zba_Zbb_Zbc_Zbs # Zkbs_Zcb # ISA: RV64IMAFDCSUZicsr_Zifencei_Zca_Zcb_Zba_Zbb_Zbc_Zbs # Zkbs_Zcb - ISA: RV64IMAFDCSUZicsr_Zicond_Zifencei_Zba_Zbb_Zbc_Zbs # Zkbs_Zcb + ISA: RV64IMAFDCSUZicsr_Zicond_Zifencei_Zfa_Zfh_Zba_Zbb_Zbc_Zbs # Zkbs_Zcb physical_addr_sz: 56 User_Spec_Version: '2.3' supported_xlen: [64] diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-spi-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-spi-01.S index b9c82c92d..3d0abc6a0 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-spi-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-spi-01.S @@ -607,6 +607,7 @@ SETUP_PLIC .4byte delay1, 0x0000001, write32_test # reset delay1 register .4byte cs_mode, 0x00000000, write32_test # reset cs_mode .4byte tx_mark, 0x00000001, write32_test # set transmit watermark to 1 (any entry turns mark off) +.4byte sck_div, 0x00000100, write32_test # lower SPI clock rate so read32_tests trigger at correct times #.4byte ie, 0x00000000, write32_test # enable transmit interrupt .4byte ip, 0x00000001, read32_test # tx watermark interupt should be pending .4byte 0x0, 0x00000000, readmip_test diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-spi-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-spi-01.S index 266b0e74f..11aebe333 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-spi-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-spi-01.S @@ -608,6 +608,7 @@ SETUP_PLIC .8byte delay1, 0x0000001, write32_test # reset delay1 register .8byte cs_mode, 0x00000000, write32_test # reset cs_mode +.8byte sck_div, 0x00000100, write32_test # lower SPI clock rate so reads are done at correct time when ICACHE not supported .8byte tx_mark, 0x00000001, write32_test # set transmit watermark to 1 (any entry turns mark off) #.8byte ie, 0x00000000, write32_test # enable transmit interrupt .8byte ip, 0x00000001, read32_test # tx watermark interupt should be pending